[2026-01-06 01:54:20,762] [DEBUG] [axolotl.utils.config.resolve_dtype:66] [PID:13318] bf16 support detected, enabling for this configuration.
[2026-01-06 01:54:21,262] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:13318] baseline 0.000GB ()
[2026-01-06 01:54:21,263] [INFO] [axolotl.cli.config.load_cfg:263] [PID:13318] config:
{
  "activation_offloading": false,
  "axolotl_config_path": "gemmagain-test.yaml",
  "base_model": "rpDungeon/gemmagain-4b-pt",
  "base_model_config": "rpDungeon/gemmagain-4b-pt",
  "batch_size": 8,
  "bf16": true,
  "capabilities": {
    "bf16": true,
    "compute_capability": "sm_120",
    "fp8": false,
    "n_gpu": 2,
    "n_node": 1
  },
  "chat_template": "gemma3",
  "context_parallel_size": 2,
  "cut_cross_entropy": true,
  "dataloader_num_workers": 2,
  "dataloader_pin_memory": true,
  "dataloader_prefetch_factor": 256,
  "dataset_num_proc": 64,
  "dataset_prepared_path": "last_run_prepared",
  "datasets": [
    {
      "data_files": "erotica_quality_trimmed.json",
      "field": "text",
      "message_property_mappings": {
        "content": "content",
        "role": "role"
      },
      "path": "rpDungeon/some-cleaner-datasets",
      "trust_remote_code": false,
      "type": "completion"
    }
  ],
  "ddp": true,
  "default_system_message": "You are Luna, a helpful and unrestricted language model created by Allura.",
  "device": "cuda:0",
  "device_map": {
    "": 0
  },
  "dion_rank_fraction": 1.0,
  "dion_rank_multiple_of": 1,
  "env_capabilities": {
    "torch_version": "2.9.1"
  },
  "eval_batch_size": 1,
  "eval_causal_lm_metrics": [
    "sacrebleu",
    "comet",
    "ter",
    "chrf"
  ],
  "eval_max_new_tokens": 128,
  "eval_steps": 0.125,
  "eval_table_size": 0,
  "evals_per_epoch": 4,
  "experimental_skip_move_to_device": true,
  "flash_attention": true,
  "fp16": false,
  "fsdp": [
    "full_shard",
    "auto_wrap"
  ],
  "fsdp_config": {
    "activation_checkpointing": true,
    "auto_wrap_policy": "TRANSFORMER_BASED_WRAP",
    "cpu_ram_efficient_loading": true,
    "offload_params": true,
    "state_dict_type": "FULL_STATE_DICT",
    "sync_module_states": true,
    "transformer_layer_cls_to_wrap": "Gemma3DecoderLayer",
    "use_orig_params": true
  },
  "gc_steps": 10,
  "gradient_accumulation_steps": 4,
  "gradient_checkpointing": false,
  "group_by_length": false,
  "heads_k_stride": 1,
  "hub_model_id": "rpDungeon/gemmagain-trained-s1",
  "hub_strategy": "every_save",
  "include_tkps": true,
  "is_multimodal": false,
  "learning_rate": 5e-05,
  "liger_glu_activation": true,
  "liger_layer_norm": true,
  "liger_rms_norm": true,
  "liger_rope": true,
  "lisa_layers_attribute": "model.layers",
  "load_best_model_at_end": false,
  "load_in_4bit": false,
  "load_in_8bit": false,
  "local_rank": 0,
  "logging_steps": 1,
  "lora_dropout": 0.0,
  "loraplus_lr_embedding": 1e-06,
  "lr_scheduler": "cosine",
  "max_grad_norm": 1.0,
  "mean_resizing_embeddings": false,
  "micro_batch_size": 1,
  "model_config_type": "gemma3",
  "num_epochs": 2.0,
  "optimizer": "adamw_torch_fused",
  "otel_metrics_host": "localhost",
  "otel_metrics_port": 8000,
  "output_dir": "stage1",
  "plugins": [
    "axolotl.integrations.liger.LigerPlugin",
    "axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin"
  ],
  "pretrain_multipack_attn": true,
  "profiler_steps_start": 0,
  "qlora_sharded_model_loading": false,
  "ray_num_workers": 1,
  "resources_per_worker": {
    "GPU": 1
  },
  "ring_attn_func": "batch_ring",
  "sample_packing": false,
  "sample_packing_bin_size": 200,
  "sample_packing_group_size": 100000,
  "save_only_model": false,
  "save_safetensors": true,
  "save_steps": 0.5,
  "saves_per_epoch": 1,
  "seed": 420,
  "sequence_len": 32768,
  "sequence_parallel_degree": 2,
  "shuffle_before_merging_datasets": false,
  "shuffle_merged_datasets": true,
  "skip_prepare_dataset": false,
  "streaming_multipack_buffer_size": 10000,
  "strict": false,
  "tensor_parallel_size": 1,
  "tiled_mlp_use_original_mlp": true,
  "tokenizer_config": "rpDungeon/gemmagain-4b-pt",
  "tokenizer_save_jinja_files": true,
  "torch_dtype": "torch.bfloat16",
  "train_on_inputs": false,
  "trl": {
    "log_completions": false,
    "mask_truncated_completions": false,
    "ref_model_mixup_alpha": 0.9,
    "ref_model_sync_steps": 64,
    "scale_rewards": true,
    "sync_ref_model": false,
    "use_vllm": false,
    "vllm_server_host": "0.0.0.0",
    "vllm_server_port": 8000
  },
  "trust_remote_code": true,
  "use_otel_metrics": false,
  "use_ray": false,
  "use_wandb": true,
  "val_set_size": 0.01,
  "vllm": {
    "device": "auto",
    "dtype": "auto",
    "gpu_memory_utilization": 0.9,
    "host": "0.0.0.0",
    "port": 8000
  },
  "wandb_name": "stage-1",
  "wandb_project": "Gemmagain-Tests",
  "warmup_ratio": 0.025,
  "weight_decay": 0.01,
  "world_size": 2
}
[2026-01-06 01:54:22,729] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:285] [PID:13318] EOS: 1 / <eos>
[2026-01-06 01:54:22,729] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:286] [PID:13318] BOS: 2 / <bos>
[2026-01-06 01:54:22,729] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:287] [PID:13318] PAD: 0 / <pad>
[2026-01-06 01:54:22,729] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:288] [PID:13318] UNK: 3 / <unk>
[2026-01-06 01:54:22,730] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:475] [PID:13318] Loading prepared dataset from disk at last_run_prepared/b77b22402667c44a458deb3cdbd0f672...
[2026-01-06 01:54:22,752] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:417] [PID:13318] total_num_tokens: 12_050_432
[2026-01-06 01:54:22,788] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:435] [PID:13318] `total_supervised_tokens: 12_050_432`
[2026-01-06 01:54:22,788] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:533] [PID:13318] total_num_steps: 292
[2026-01-06 01:54:22,788] [INFO] [axolotl.utils.data.sft._prepare_standard_dataset:121] [PID:13318] Maximum number of steps set at 292
[2026-01-06 01:54:22,830] [DEBUG] [axolotl.train.setup_model_and_tokenizer:70] [PID:13318] loading tokenizer... rpDungeon/gemmagain-4b-pt
[2026-01-06 01:54:24,150] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:285] [PID:13318] EOS: 1 / <eos>
[2026-01-06 01:54:24,150] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:286] [PID:13318] BOS: 2 / <bos>
[2026-01-06 01:54:24,150] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:287] [PID:13318] PAD: 0 / <pad>
[2026-01-06 01:54:24,150] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:288] [PID:13318] UNK: 3 / <unk>
[2026-01-06 01:54:24,150] [DEBUG] [axolotl.train.setup_model_and_tokenizer:82] [PID:13318] Loading model
[2026-01-06 01:54:24,574] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:87] [PID:13318] Patched Trainer.evaluation_loop with nanmean loss calculation
[2026-01-06 01:54:24,576] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:138] [PID:13318] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation
[2026-01-06 01:54:24,580] [DEBUG] [axolotl.monkeypatch.transformers.trainer_context_parallel.patch_prepare_context_parallel_inputs:64] [PID:13318] Patched Trainer._prepare_context_parallel_inputs for FlashAttention + CP
[2026-01-06 01:54:24,698] [INFO] [axolotl.integrations.liger.plugin.pre_model_load:98] [PID:13318] Applying LIGER to gemma3 with kwargs: {'rope': True, 'cross_entropy': None, 'fused_linear_cross_entropy': None, 'rms_norm': True, 'layer_norm': True, 'geglu': True}
[2026-01-06 01:54:24,743] [INFO] [axolotl.integrations.cut_cross_entropy.pre_model_load:94] [PID:13318] Applying Cut Cross Entropy to model type: gemma3
Loading checkpoint shards:   0%|                                                                                                                                                                        | 0/2 [00:00<?, ?it/s]Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 179.46it/s]
[2026-01-06 01:54:25,257] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:347] [PID:13318] Converting modules to torch.bfloat16
[2026-01-06 01:54:25,262] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:13318] Memory usage after model load 0.000GB ()
[2026-01-06 01:54:28,223] [INFO] [axolotl.train.save_initial_configs:417] [PID:13318] Pre-saving tokenizer to stage1...
[2026-01-06 01:54:28,462] [INFO] [axolotl.train.save_initial_configs:422] [PID:13318] Pre-saving model config to stage1...
[2026-01-06 01:54:28,470] [INFO] [axolotl.monkeypatch.ring_attn.patch.register_ring_attn_from_device_mesh:154] [PID:13318] Enabling ring attention sequence parallelism using DeviceMesh dimension '('cp',)'
[2026-01-06 01:54:28,471] [INFO] [axolotl.monkeypatch.ring_attn.patch.register_ring_attn_from_device_mesh:174] [PID:13318] Sequence parallel degree: 2, mesh shape: torch.Size([2])
[2026-01-06 01:54:28,478] [INFO] [axolotl.train.execute_training:212] [PID:13318] Starting trainer...
[2026-01-06 01:54:32,693] [WARNING] [py.warnings._showwarnmsg:110] [PID:13318] /workspace/venv/lib/python3.12/site-packages/accelerate/accelerator.py:1992: UserWarning: Upcasted low precision parameters in GemmagainForCausalLM because mixed precision turned on in FSDP. Affects: model.embed_tokens.weight, model.norm.weight.
  warnings.warn(

[2026-01-06 01:54:32,693] [WARNING] [py.warnings._showwarnmsg:110] [PID:13318] /workspace/venv/lib/python3.12/site-packages/accelerate/accelerator.py:1992: UserWarning: Upcasted low precision parameters in Gemma3DecoderLayer because mixed precision turned on in FSDP. Affects: self_attn.q_proj.weight, self_attn.k_proj.weight, self_attn.v_proj.weight, self_attn.o_proj.weight, self_attn.q_norm.weight, self_attn.k_norm.weight, mlp.gate_proj.weight, mlp.up_proj.weight, mlp.down_proj.weight, input_layernorm.weight, post_attention_layernorm.weight, pre_feedforward_layernorm.weight, post_feedforward_layernorm.weight.
  warnings.warn(

[2026-01-06 01:54:32,693] [WARNING] [py.warnings._showwarnmsg:110] [PID:13318] /workspace/venv/lib/python3.12/site-packages/accelerate/accelerator.py:1998: UserWarning: FSDP upcast of low precision parameters may affect the precision of model checkpoints.
  warnings.warn(

[34m[1mwandb[0m: Currently logged in as: [33mcooawoo[0m ([33mcooawoo-personal[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: [38;5;178m⢿[0m Waiting for wandb.init()...
[Am[2K[34m[1mwandb[0m: [38;5;178m⣻[0m setting up run zwmo802e (0.2s)
[Am[2K[34m[1mwandb[0m: [38;5;178m⣽[0m setting up run zwmo802e (0.2s)
[Am[2K[34m[1mwandb[0m: Tracking run with wandb version 0.23.1
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/workspace/wandb/run-20260106_015433-zwmo802e[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mstage-1[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/cooawoo-personal/Gemmagain-Tests[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/cooawoo-personal/Gemmagain-Tests/runs/zwmo802e[0m
[34m[1mwandb[0m: Detected [huggingface_hub.inference] in use.
[34m[1mwandb[0m: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
[34m[1mwandb[0m: For more information, check out the docs at: https://weave-docs.wandb.ai/
[34m[1mwandb[0m: [33mWARNING[0m Saving files without folders. If you want to preserve subdirectories pass base_path to wandb.save, i.e. wandb.save("/mnt/folder/file.h5", base_path="/mnt")
[34m[1mwandb[0m: [33mWARNING[0m Symlinked 1 file into the W&B run directory; call wandb.save again to sync new files.
[2026-01-06 01:54:35,447] [INFO] [axolotl.utils.callbacks.on_train_begin:757] [PID:13318] The Axolotl config has been saved to the WandB run under files.
  0%|                                                                                                                                                                                                 | 0/292 [00:00<?, ?it/s][2026-01-06 01:54:35,453] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:13318] Running evaluation step...

  0%|                                                                                                                                                                                                   | 0/6 [00:00<?, ?it/s][A
 33%|██████████████████████████████████████████████████████████████▎                                                                                                                            | 2/6 [00:01<00:03,  1.01it/s][A
 50%|█████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                             | 3/6 [00:02<00:02,  1.05it/s][A
 67%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                              | 4/6 [00:04<00:02,  1.14s/it][A
 83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                               | 5/6 [00:05<00:01,  1.15s/it][A
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:07<00:00,  1.30s/it][A                                                                                                                                                                                                                              
                                                                                                                                                                                                                              [A{'eval_loss': 5.71091890335083, 'eval_runtime': 17.4715, 'eval_samples_per_second': 0.343, 'eval_steps_per_second': 0.172, 'eval_ppl': 302.14859, 'memory/max_active (GiB)': 33.34, 'memory/max_allocated (GiB)': 33.34, 'memory/device_reserved (GiB)': 78.77, 'epoch': 0}
  0%|                                                                                                                                                                                                 | 0/292 [00:17<?, ?it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:07<00:00,  1.30s/it][A
                                                                                                                                                                                                                              [A  0%|▋                                                                                                                                                                                      | 1/292 [00:39<3:11:22, 39.46s/it]                                                                                                                                                                                                                              {'loss': 21.0792, 'grad_norm': 195.2947540283203, 'learning_rate': 0.0, 'ppl': 1427513562.03611, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.25, 'tokens/train_per_sec_per_gpu': 850.7900390625, 'epoch': 0.01, 'tokens/total': 130816.0, 'tokens/trainable': 130787.0}
  0%|▋                                                                                                                                                                                      | 1/292 [00:39<3:11:22, 39.46s/it]  1%|█▎                                                                                                                                                                                     | 2/292 [00:49<1:48:08, 22.38s/it]                                                                                                                                                                                                                              {'loss': 22.2601, 'grad_norm': 208.32066345214844, 'learning_rate': 7.142857142857143e-06, 'ppl': 4649846289.60319, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.25, 'tokens/train_per_sec_per_gpu': 1054.1312255859375, 'epoch': 0.01, 'tokens/total': 199744.0, 'tokens/trainable': 199671.0}
  1%|█▎                                                                                                                                                                                     | 2/292 [00:49<1:48:08, 22.38s/it]  1%|█▉                                                                                                                                                                                     | 3/292 [00:59<1:20:45, 16.77s/it]                                                                                                                                                                                                                              {'loss': 20.457, 'grad_norm': 181.6300048828125, 'learning_rate': 1.4285714285714285e-05, 'ppl': 766235406.74321, 'memory/max_active (GiB)': 30.79, 'memory/max_allocated (GiB)': 28.69, 'memory/device_reserved (GiB)': 50.25, 'tokens/train_per_sec_per_gpu': 482.83038330078125, 'epoch': 0.02, 'tokens/total': 277184.0, 'tokens/trainable': 277008.0}
  1%|█▉                                                                                                                                                                                     | 3/292 [00:59<1:20:45, 16.77s/it]  1%|██▌                                                                                                                                                                                    | 4/292 [01:09<1:07:40, 14.10s/it]                                                                                                                                                                                                                              {'loss': 19.3173, 'grad_norm': 116.59762573242188, 'learning_rate': 2.1428571428571428e-05, 'ppl': 245130186.29176, 'memory/max_active (GiB)': 31.05, 'memory/max_allocated (GiB)': 28.95, 'memory/device_reserved (GiB)': 50.25, 'tokens/train_per_sec_per_gpu': 1275.0469970703125, 'epoch': 0.03, 'tokens/total': 340608.0, 'tokens/trainable': 340365.0}
  1%|██▌                                                                                                                                                                                    | 4/292 [01:09<1:07:40, 14.10s/it]  2%|███▏                                                                                                                                                                                   | 5/292 [01:22<1:04:51, 13.56s/it]                                                                                                                                                                                                                              {'loss': 17.594, 'grad_norm': 52.553707122802734, 'learning_rate': 2.857142857142857e-05, 'ppl': 43749905.02901, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.25, 'tokens/train_per_sec_per_gpu': 1300.5919189453125, 'epoch': 0.03, 'tokens/total': 439680.0, 'tokens/trainable': 439369.0}
  2%|███▏                                                                                                                                                                                   | 5/292 [01:22<1:04:51, 13.56s/it]  2%|███▊                                                                                                                                                                                   | 6/292 [01:34<1:01:47, 12.96s/it]                                                                                                                                                                                                                              {'loss': 17.7433, 'grad_norm': 53.69865798950195, 'learning_rate': 3.571428571428572e-05, 'ppl': 50794569.13284, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.25, 'tokens/train_per_sec_per_gpu': 495.872314453125, 'epoch': 0.04, 'tokens/total': 537216.0, 'tokens/trainable': 536851.0}
  2%|███▊                                                                                                                                                                                   | 6/292 [01:34<1:01:47, 12.96s/it]  2%|████▍                                                                                                                                                                                    | 7/292 [01:44<56:58, 12.00s/it]                                                                                                                                                                                                                              {'loss': 17.3529, 'grad_norm': 64.52742004394531, 'learning_rate': 4.2857142857142856e-05, 'ppl': 34377058.64458, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.25, 'tokens/train_per_sec_per_gpu': 165.50241088867188, 'epoch': 0.05, 'tokens/total': 596480.0, 'tokens/trainable': 595995.0}
  2%|████▍                                                                                                                                                                                    | 7/292 [01:44<56:58, 12.00s/it]  3%|█████                                                                                                                                                                                    | 8/292 [01:55<54:54, 11.60s/it]                                                                                                                                                                                                                              {'loss': 15.4887, 'grad_norm': 26.642906188964844, 'learning_rate': 5e-05, 'ppl': 5329137.69632, 'memory/max_active (GiB)': 33.22, 'memory/max_allocated (GiB)': 31.0, 'memory/device_reserved (GiB)': 50.25, 'tokens/train_per_sec_per_gpu': 949.2601318359375, 'epoch': 0.05, 'tokens/total': 674624.0, 'tokens/trainable': 674059.0}
  3%|█████                                                                                                                                                                                    | 8/292 [01:55<54:54, 11.60s/it]  3%|█████▋                                                                                                                                                                                   | 9/292 [02:05<53:32, 11.35s/it]                                                                                                                                                                                                                              {'loss': 15.7201, 'grad_norm': 31.065916061401367, 'learning_rate': 4.999848114735858e-05, 'ppl': 6716649.48755, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.27, 'tokens/train_per_sec_per_gpu': 215.44378662109375, 'epoch': 0.06, 'tokens/total': 732992.0, 'tokens/trainable': 732341.0}
  3%|█████▋                                                                                                                                                                                   | 9/292 [02:05<53:32, 11.35s/it]  3%|██████▎                                                                                                                                                                                 | 10/292 [02:18<54:36, 11.62s/it]                                                                                                                                                                                                                              {'loss': 13.8299, 'grad_norm': 23.727582931518555, 'learning_rate': 4.999392477398737e-05, 'ppl': 1014493.46842, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.27, 'tokens/train_per_sec_per_gpu': 319.8927917480469, 'epoch': 0.07, 'tokens/total': 834304.0, 'tokens/trainable': 833573.0}
  3%|██████▎                                                                                                                                                                                 | 10/292 [02:18<54:36, 11.62s/it]  4%|██████▉                                                                                                                                                                                 | 11/292 [02:27<50:34, 10.80s/it]                                                                                                                                                                                                                              {'loss': 13.9798, 'grad_norm': 23.9365291595459, 'learning_rate': 4.9986331433523156e-05, 'ppl': 1178555.3892, 'memory/max_active (GiB)': 24.61, 'memory/max_allocated (GiB)': 22.55, 'memory/device_reserved (GiB)': 45.54, 'tokens/train_per_sec_per_gpu': 265.8936767578125, 'epoch': 0.08, 'tokens/total': 889728.0, 'tokens/trainable': 888856.0}
  4%|██████▉                                                                                                                                                                                 | 11/292 [02:27<50:34, 10.80s/it]  4%|███████▌                                                                                                                                                                                | 12/292 [02:39<51:59, 11.14s/it]                                                                                                                                                                                                                              {'loss': 13.3933, 'grad_norm': 19.26011085510254, 'learning_rate': 4.997570204861915e-05, 'ppl': 655595.9839, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 61.55, 'tokens/train_per_sec_per_gpu': 552.9033203125, 'epoch': 0.08, 'tokens/total': 991360.0, 'tokens/trainable': 990439.0}
  4%|███████▌                                                                                                                                                                                | 12/292 [02:39<51:59, 11.14s/it]  4%|████████▏                                                                                                                                                                               | 13/292 [02:51<54:12, 11.66s/it]                                                                                                                                                                                                                              {'loss': 12.6176, 'grad_norm': 20.421977996826172, 'learning_rate': 4.996203791083291e-05, 'ppl': 301824.19769, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 61.55, 'tokens/train_per_sec_per_gpu': 738.143798828125, 'epoch': 0.09, 'tokens/total': 1108672.0, 'tokens/trainable': 1107691.0}
  4%|████████▏                                                                                                                                                                               | 13/292 [02:51<54:12, 11.66s/it]  5%|████████▊                                                                                                                                                                               | 14/292 [03:01<51:21, 11.09s/it]                                                                                                                                                                                                                              {'loss': 12.4769, 'grad_norm': 18.907896041870117, 'learning_rate': 4.994534068046937e-05, 'ppl': 262209.74083, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 61.55, 'tokens/train_per_sec_per_gpu': 368.9007263183594, 'epoch': 0.1, 'tokens/total': 1173248.0, 'tokens/trainable': 1172178.0}
  5%|████████▊                                                                                                                                                                               | 14/292 [03:01<51:21, 11.09s/it]  5%|█████████▍                                                                                                                                                                              | 15/292 [03:11<49:14, 10.67s/it]                                                                                                                                                                                                                              {'loss': 12.1845, 'grad_norm': 15.544273376464844, 'learning_rate': 4.992561238637912e-05, 'ppl': 195731.67595, 'memory/max_active (GiB)': 22.99, 'memory/max_allocated (GiB)': 20.9, 'memory/device_reserved (GiB)': 61.55, 'tokens/train_per_sec_per_gpu': 686.980712890625, 'epoch': 0.1, 'tokens/total': 1223360.0, 'tokens/trainable': 1222166.0}
  5%|█████████▍                                                                                                                                                                              | 15/292 [03:11<49:14, 10.67s/it]  5%|██████████                                                                                                                                                                              | 16/292 [03:24<52:57, 11.51s/it]                                                                                                                                                                                                                              {'loss': 11.7715, 'grad_norm': 15.398914337158203, 'learning_rate': 4.9902855425711905e-05, 'ppl': 129508.26753, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 61.55, 'tokens/train_per_sec_per_gpu': 1215.7496337890625, 'epoch': 0.11, 'tokens/total': 1348736.0, 'tokens/trainable': 1347484.0}
  5%|██████████                                                                                                                                                                              | 16/292 [03:24<52:57, 11.51s/it]  6%|██████████▋                                                                                                                                                                             | 17/292 [03:36<53:18, 11.63s/it]                                                                                                                                                                                                                              {'loss': 11.9459, 'grad_norm': 17.642976760864258, 'learning_rate': 4.9877072563625285e-05, 'ppl': 154183.69574, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 61.55, 'tokens/train_per_sec_per_gpu': 796.2022705078125, 'epoch': 0.12, 'tokens/total': 1451648.0, 'tokens/trainable': 1450333.0}
  6%|██████████▋                                                                                                                                                                             | 17/292 [03:36<53:18, 11.63s/it]  6%|███████████▎                                                                                                                                                                            | 18/292 [03:46<51:08, 11.20s/it]                                                                                                                                                                                                                              {'loss': 12.417, 'grad_norm': 16.296375274658203, 'learning_rate': 4.984826693294874e-05, 'ppl': 246964.52949, 'memory/max_active (GiB)': 32.13, 'memory/max_allocated (GiB)': 29.93, 'memory/device_reserved (GiB)': 61.55, 'tokens/train_per_sec_per_gpu': 145.33132934570312, 'epoch': 0.12, 'tokens/total': 1524224.0, 'tokens/trainable': 1522758.0}
  6%|███████████▎                                                                                                                                                                            | 18/292 [03:46<51:08, 11.20s/it]  7%|███████████▉                                                                                                                                                                            | 19/292 [03:58<51:25, 11.30s/it]                                                                                                                                                                                                                              {'loss': 11.4194, 'grad_norm': 15.501102447509766, 'learning_rate': 4.981644203380291e-05, 'ppl': 91071.48258, 'memory/max_active (GiB)': 34.37, 'memory/max_allocated (GiB)': 32.13, 'memory/device_reserved (GiB)': 61.55, 'tokens/train_per_sec_per_gpu': 1276.0517578125, 'epoch': 0.13, 'tokens/total': 1620864.0, 'tokens/trainable': 1619240.0}
  7%|███████████▉                                                                                                                                                                            | 19/292 [03:58<51:25, 11.30s/it]  7%|████████████▌                                                                                                                                                                           | 20/292 [04:09<50:36, 11.16s/it]                                                                                                                                                                                                                              {'loss': 12.018, 'grad_norm': 13.411163330078125, 'learning_rate': 4.978160173317438e-05, 'ppl': 165710.90285, 'memory/max_active (GiB)': 29.71, 'memory/max_allocated (GiB)': 27.52, 'memory/device_reserved (GiB)': 61.55, 'tokens/train_per_sec_per_gpu': 846.8701171875, 'epoch': 0.14, 'tokens/total': 1690880.0, 'tokens/trainable': 1689162.0}
  7%|████████████▌                                                                                                                                                                           | 20/292 [04:09<50:36, 11.16s/it]  7%|█████████████▏                                                                                                                                                                          | 21/292 [04:19<49:44, 11.01s/it]                                                                                                                                                                                                                              {'loss': 11.2688, 'grad_norm': 16.117755889892578, 'learning_rate': 4.974375026444575e-05, 'ppl': 78338.93402, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 64.14, 'tokens/train_per_sec_per_gpu': 1538.0128173828125, 'epoch': 0.14, 'tokens/total': 1769024.0, 'tokens/trainable': 1767255.0}
  7%|█████████████▏                                                                                                                                                                          | 21/292 [04:19<49:44, 11.01s/it]  8%|█████████████▊                                                                                                                                                                          | 22/292 [04:30<48:31, 10.78s/it]                                                                                                                                                                                                                              {'loss': 10.833, 'grad_norm': 14.527398109436035, 'learning_rate': 4.970289222688129e-05, 'ppl': 50665.47545, 'memory/max_active (GiB)': 33.49, 'memory/max_allocated (GiB)': 31.26, 'memory/device_reserved (GiB)': 64.14, 'tokens/train_per_sec_per_gpu': 449.59429931640625, 'epoch': 0.15, 'tokens/total': 1855616.0, 'tokens/trainable': 1853734.0}
  8%|█████████████▊                                                                                                                                                                          | 22/292 [04:30<48:31, 10.78s/it]  8%|██████████████▍                                                                                                                                                                         | 23/292 [04:40<47:07, 10.51s/it]                                                                                                                                                                                                                              {'loss': 11.817, 'grad_norm': 15.794232368469238, 'learning_rate': 4.965903258506806e-05, 'ppl': 135537.00749, 'memory/max_active (GiB)': 32.77, 'memory/max_allocated (GiB)': 30.64, 'memory/device_reserved (GiB)': 64.14, 'tokens/train_per_sec_per_gpu': 1078.495361328125, 'epoch': 0.16, 'tokens/total': 1925504.0, 'tokens/trainable': 1923526.0}
  8%|██████████████▍                                                                                                                                                                         | 23/292 [04:40<47:07, 10.51s/it]  8%|███████████████                                                                                                                                                                         | 24/292 [04:49<45:57, 10.29s/it]                                                                                                                                                                                                                              {'loss': 11.2778, 'grad_norm': 17.108484268188477, 'learning_rate': 4.961217666831268e-05, 'ppl': 79047.16669, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.74, 'memory/device_reserved (GiB)': 64.14, 'tokens/train_per_sec_per_gpu': 867.5133666992188, 'epoch': 0.16, 'tokens/total': 1983552.0, 'tokens/trainable': 1981463.0}
  8%|███████████████                                                                                                                                                                         | 24/292 [04:49<45:57, 10.29s/it]  9%|███████████████▊                                                                                                                                                                        | 25/292 [05:00<46:45, 10.51s/it]                                                                                                                                                                                                                              {'loss': 11.9643, 'grad_norm': 16.070518493652344, 'learning_rate': 4.956233016999379e-05, 'ppl': 157046.93677, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.74, 'memory/device_reserved (GiB)': 64.14, 'tokens/train_per_sec_per_gpu': 1290.3115234375, 'epoch': 0.17, 'tokens/total': 2052928.0, 'tokens/trainable': 2050749.0}
  9%|███████████████▊                                                                                                                                                                        | 25/292 [05:00<46:45, 10.51s/it]  9%|████████████████▍                                                                                                                                                                       | 26/292 [05:12<47:32, 10.72s/it]                                                                                                                                                                                                                              {'loss': 10.7069, 'grad_norm': 12.805251121520996, 'learning_rate': 4.9509499146870236e-05, 'ppl': 44662.96885, 'memory/max_active (GiB)': 36.47, 'memory/max_allocated (GiB)': 34.28, 'memory/device_reserved (GiB)': 64.14, 'tokens/train_per_sec_per_gpu': 1435.0052490234375, 'epoch': 0.18, 'tokens/total': 2147328.0, 'tokens/trainable': 2144974.0}
  9%|████████████████▍                                                                                                                                                                       | 26/292 [05:12<47:32, 10.72s/it]  9%|█████████████████                                                                                                                                                                       | 27/292 [05:24<49:12, 11.14s/it]                                                                                                                                                                                                                              {'loss': 11.9309, 'grad_norm': 13.481685638427734, 'learning_rate': 4.9453690018345144e-05, 'ppl': 151888.19956, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 64.14, 'tokens/train_per_sec_per_gpu': 1424.1138916015625, 'epoch': 0.18, 'tokens/total': 2254144.0, 'tokens/trainable': 2251688.0}
  9%|█████████████████                                                                                                                                                                       | 27/292 [05:24<49:12, 11.14s/it] 10%|█████████████████▋                                                                                                                                                                      | 28/292 [05:36<50:28, 11.47s/it]                                                                                                                                                                                                                              {'loss': 10.6407, 'grad_norm': 13.475818634033203, 'learning_rate': 4.9394909565685894e-05, 'ppl': 41802.0224, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 64.14, 'tokens/train_per_sec_per_gpu': 1424.8941650390625, 'epoch': 0.19, 'tokens/total': 2361216.0, 'tokens/trainable': 2358707.0}
 10%|█████████████████▋                                                                                                                                                                      | 28/292 [05:36<50:28, 11.47s/it] 10%|██████████████████▎                                                                                                                                                                     | 29/292 [05:45<47:40, 10.88s/it]                                                                                                                                                                                                                              {'loss': 11.3206, 'grad_norm': 38.63097381591797, 'learning_rate': 4.933316493120015e-05, 'ppl': 82503.83037, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.74, 'memory/device_reserved (GiB)': 64.19, 'tokens/train_per_sec_per_gpu': 1813.5946044921875, 'epoch': 0.2, 'tokens/total': 2412992.0, 'tokens/trainable': 2410409.0}
 10%|██████████████████▎                                                                                                                                                                     | 29/292 [05:46<47:40, 10.88s/it] 10%|██████████████████▉                                                                                                                                                                     | 30/292 [05:58<49:06, 11.25s/it]                                                                                                                                                                                                                              {'loss': 11.4835, 'grad_norm': 15.340786933898926, 'learning_rate': 4.9268463617368e-05, 'ppl': 97100.32487, 'memory/max_active (GiB)': 33.12, 'memory/max_allocated (GiB)': 30.9, 'memory/device_reserved (GiB)': 64.19, 'tokens/train_per_sec_per_gpu': 760.5748901367188, 'epoch': 0.21, 'tokens/total': 2492608.0, 'tokens/trainable': 2489857.0}
 10%|██████████████████▉                                                                                                                                                                     | 30/292 [05:58<49:06, 11.25s/it] 11%|███████████████████▌                                                                                                                                                                    | 31/292 [06:08<47:38, 10.95s/it]                                                                                                                                                                                                                              {'loss': 11.3565, 'grad_norm': 12.422508239746094, 'learning_rate': 4.9200813485930375e-05, 'ppl': 85519.52573, 'memory/max_active (GiB)': 35.9, 'memory/max_allocated (GiB)': 33.72, 'memory/device_reserved (GiB)': 59.28, 'tokens/train_per_sec_per_gpu': 218.08758544921875, 'epoch': 0.21, 'tokens/total': 2561920.0, 'tokens/trainable': 2559030.0}
 11%|███████████████████▌                                                                                                                                                                    | 31/292 [06:08<47:38, 10.95s/it] 11%|████████████████████▏                                                                                                                                                                   | 32/292 [06:20<48:39, 11.23s/it]                                                                                                                                                                                                                              {'loss': 11.0364, 'grad_norm': 11.367571830749512, 'learning_rate': 4.913022275693372e-05, 'ppl': 62093.71158, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 75.29, 'tokens/train_per_sec_per_gpu': 1419.3466796875, 'epoch': 0.22, 'tokens/total': 2639104.0, 'tokens/trainable': 2636118.0}
 11%|████████████████████▏                                                                                                                                                                   | 32/292 [06:20<48:39, 11.23s/it] 11%|████████████████████▊                                                                                                                                                                   | 33/292 [06:33<51:11, 11.86s/it]                                                                                                                                                                                                                              {'loss': 11.2332, 'grad_norm': 10.562846183776855, 'learning_rate': 4.905670000773126e-05, 'ppl': 75599.12591, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 75.29, 'tokens/train_per_sec_per_gpu': 1229.2611083984375, 'epoch': 0.23, 'tokens/total': 2762688.0, 'tokens/trainable': 2759643.0}
 11%|████████████████████▊                                                                                                                                                                   | 33/292 [06:33<51:11, 11.86s/it] 12%|█████████████████████▍                                                                                                                                                                  | 34/292 [06:43<49:02, 11.40s/it]                                                                                                                                                                                                                              {'loss': 11.3295, 'grad_norm': 23.760316848754883, 'learning_rate': 4.8980254171940746e-05, 'ppl': 83241.39174, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 75.29, 'tokens/train_per_sec_per_gpu': 1061.787841796875, 'epoch': 0.23, 'tokens/total': 2833216.0, 'tokens/trainable': 2830108.0}
 12%|█████████████████████▍                                                                                                                                                                  | 34/292 [06:44<49:02, 11.40s/it] 12%|██████████████████████                                                                                                                                                                  | 35/292 [06:55<48:45, 11.38s/it]                                                                                                                                                                                                                              {'loss': 10.6174, 'grad_norm': 14.137269973754883, 'learning_rate': 4.8900894538358944e-05, 'ppl': 40839.29461, 'memory/max_active (GiB)': 27.85, 'memory/max_allocated (GiB)': 25.72, 'memory/device_reserved (GiB)': 75.29, 'tokens/train_per_sec_per_gpu': 799.6109619140625, 'epoch': 0.24, 'tokens/total': 2894144.0, 'tokens/trainable': 2890910.0}
 12%|██████████████████████                                                                                                                                                                  | 35/292 [06:55<48:45, 11.38s/it] 12%|██████████████████████▋                                                                                                                                                                 | 36/292 [07:03<45:14, 10.60s/it]                                                                                                                                                                                                                              {'loss': 11.4218, 'grad_norm': 25.23195457458496, 'learning_rate': 4.881863074983298e-05, 'ppl': 91290.31663, 'memory/max_active (GiB)': 30.16, 'memory/max_allocated (GiB)': 28.08, 'memory/device_reserved (GiB)': 75.35, 'tokens/train_per_sec_per_gpu': 28.542003631591797, 'epoch': 0.25, 'tokens/total': 2927808.0, 'tokens/trainable': 2924456.0}
 12%|██████████████████████▋                                                                                                                                                                 | 36/292 [07:03<45:14, 10.60s/it] 13%|███████████████████████▎                                                                                                                                                                | 37/292 [07:14<45:22, 10.68s/it]                                                                                                                                                                                                                              {'loss': 11.6046, 'grad_norm': 19.723880767822266, 'learning_rate': 4.8733472802088654e-05, 'ppl': 109600.80518, 'memory/max_active (GiB)': 29.71, 'memory/max_allocated (GiB)': 27.52, 'memory/device_reserved (GiB)': 75.35, 'tokens/train_per_sec_per_gpu': 1004.7235717773438, 'epoch': 0.25, 'tokens/total': 3003008.0, 'tokens/trainable': 2999547.0}
 13%|███████████████████████▎                                                                                                                                                                | 37/292 [07:14<45:22, 10.68s/it][2026-01-06 02:01:50,277] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:13318] Running evaluation step...

  0%|                                                                                                                                                                                                   | 0/6 [00:00<?, ?it/s][A
 33%|██████████████████████████████████████████████████████████████▎                                                                                                                            | 2/6 [00:01<00:03,  1.18it/s][A
 50%|█████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                             | 3/6 [00:02<00:02,  1.18it/s][A
 67%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                              | 4/6 [00:03<00:01,  1.01it/s][A
 83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                               | 5/6 [00:05<00:01,  1.14s/it][A
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:06<00:00,  1.18s/it][A                                                                                                                                                                                                                              
                                                                                                                                                                                                                              [A{'eval_loss': 2.774874448776245, 'eval_runtime': 8.0352, 'eval_samples_per_second': 0.747, 'eval_steps_per_second': 0.373, 'eval_ppl': 16.03661, 'memory/max_active (GiB)': 34.82, 'memory/max_allocated (GiB)': 33.36, 'memory/device_reserved (GiB)': 92.77, 'epoch': 0.25, 'tokens/train_per_sec_per_gpu': 0.0, 'tokens/total': 3003008.0, 'tokens/trainable': 2999547.0}
 13%|███████████████████████▎                                                                                                                                                                | 37/292 [07:22<45:22, 10.68s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:06<00:00,  1.18s/it][A
                                                                                                                                                                                                                              [A 13%|███████████████████████▋                                                                                                                                                              | 38/292 [07:38<1:01:07, 14.44s/it]                                                                                                                                                                                                                              {'loss': 10.8229, 'grad_norm': 13.797052383422852, 'learning_rate': 4.864543104251587e-05, 'ppl': 50156.32966, 'memory/max_active (GiB)': 37.03, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 63.32, 'tokens/train_per_sec_per_gpu': 1079.5362548828125, 'epoch': 0.26, 'tokens/total': 3134080.0, 'tokens/trainable': 3130619.0}
 13%|███████████████████████▋                                                                                                                                                              | 38/292 [07:38<1:01:07, 14.44s/it] 13%|████████████████████████▌                                                                                                                                                               | 39/292 [07:48<56:00, 13.28s/it]                                                                                                                                                                                                                              {'loss': 10.3932, 'grad_norm': 15.505252838134766, 'learning_rate': 4.855451616891136e-05, 'ppl': 32636.93822, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 63.32, 'tokens/train_per_sec_per_gpu': 64.1013412475586, 'epoch': 0.27, 'tokens/total': 3190208.0, 'tokens/trainable': 3186621.0}
 13%|████████████████████████▌                                                                                                                                                               | 39/292 [07:48<56:00, 13.28s/it] 14%|█████████████████████████▏                                                                                                                                                              | 40/292 [07:58<51:13, 12.20s/it]                                                                                                                                                                                                                              {'loss': 11.0801, 'grad_norm': 14.136200904846191, 'learning_rate': 4.8460739228178806e-05, 'ppl': 64867.36982, 'memory/max_active (GiB)': 27.52, 'memory/max_allocated (GiB)': 25.36, 'memory/device_reserved (GiB)': 61.09, 'tokens/train_per_sec_per_gpu': 738.7182006835938, 'epoch': 0.27, 'tokens/total': 3257344.0, 'tokens/trainable': 3253688.0}
 14%|█████████████████████████▏                                                                                                                                                              | 40/292 [07:58<51:13, 12.20s/it] 14%|█████████████████████████▊                                                                                                                                                              | 41/292 [08:09<49:54, 11.93s/it]                                                                                                                                                                                                                              {'loss': 10.7441, 'grad_norm': 14.392645835876465, 'learning_rate': 4.8364111614986527e-05, 'ppl': 46355.72129, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 60.74, 'tokens/train_per_sec_per_gpu': 697.6207275390625, 'epoch': 0.28, 'tokens/total': 3333184.0, 'tokens/trainable': 3329481.0}
 14%|█████████████████████████▊                                                                                                                                                              | 41/292 [08:10<49:54, 11.93s/it] 14%|██████████████████████████▍                                                                                                                                                             | 42/292 [08:21<49:05, 11.78s/it]                                                                                                                                                                                                                              {'loss': 11.3238, 'grad_norm': 10.756410598754883, 'learning_rate': 4.8264645070382964e-05, 'ppl': 82768.2655, 'memory/max_active (GiB)': 36.68, 'memory/max_allocated (GiB)': 34.49, 'memory/device_reserved (GiB)': 60.74, 'tokens/train_per_sec_per_gpu': 1499.282470703125, 'epoch': 0.29, 'tokens/total': 3423424.0, 'tokens/trainable': 3419569.0}
 14%|██████████████████████████▍                                                                                                                                                             | 42/292 [08:21<49:05, 11.78s/it] 15%|███████████████████████████                                                                                                                                                             | 43/292 [08:31<47:31, 11.45s/it]                                                                                                                                                                                                                              {'loss': 11.5321, 'grad_norm': 12.793756484985352, 'learning_rate': 4.8162351680370044e-05, 'ppl': 101935.9547, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 60.74, 'tokens/train_per_sec_per_gpu': 397.74896240234375, 'epoch': 0.29, 'tokens/total': 3496640.0, 'tokens/trainable': 3492684.0}
 15%|███████████████████████████                                                                                                                                                             | 43/292 [08:32<47:31, 11.45s/it] 15%|███████████████████████████▋                                                                                                                                                            | 44/292 [08:43<47:57, 11.60s/it]                                                                                                                                                                                                                              {'loss': 9.1615, 'grad_norm': 13.25418472290039, 'learning_rate': 4.805724387443462e-05, 'ppl': 9523.33137, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 60.74, 'tokens/train_per_sec_per_gpu': 1432.5128173828125, 'epoch': 0.3, 'tokens/total': 3592768.0, 'tokens/trainable': 3588733.0}
 15%|███████████████████████████▋                                                                                                                                                            | 44/292 [08:43<47:57, 11.60s/it] 15%|████████████████████████████▎                                                                                                                                                           | 45/292 [08:55<48:33, 11.80s/it]                                                                                                                                                                                                                              {'loss': 10.5462, 'grad_norm': 13.964683532714844, 'learning_rate': 4.7949334424038176e-05, 'ppl': 38032.63934, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 60.74, 'tokens/train_per_sec_per_gpu': 460.2379455566406, 'epoch': 0.31, 'tokens/total': 3702400.0, 'tokens/trainable': 3698311.0}
 15%|████████████████████████████▎                                                                                                                                                           | 45/292 [08:56<48:33, 11.80s/it] 16%|████████████████████████████▉                                                                                                                                                           | 46/292 [09:05<45:58, 11.21s/it]                                                                                                                                                                                                                              {'loss': 11.8846, 'grad_norm': 15.072256088256836, 'learning_rate': 4.783863644106502e-05, 'ppl': 145016.09279, 'memory/max_active (GiB)': 28.12, 'memory/max_allocated (GiB)': 25.98, 'memory/device_reserved (GiB)': 60.74, 'tokens/train_per_sec_per_gpu': 1163.6376953125, 'epoch': 0.32, 'tokens/total': 3770688.0, 'tokens/trainable': 3766466.0}
 16%|████████████████████████████▉                                                                                                                                                           | 46/292 [09:06<45:58, 11.21s/it] 16%|█████████████████████████████▌                                                                                                                                                          | 47/292 [09:15<43:32, 10.66s/it]                                                                                                                                                                                                                              {'loss': 10.8525, 'grad_norm': 16.8549861907959, 'learning_rate': 4.7725163376229064e-05, 'ppl': 51663.14791, 'memory/max_active (GiB)': 34.85, 'memory/max_allocated (GiB)': 32.69, 'memory/device_reserved (GiB)': 60.76, 'tokens/train_per_sec_per_gpu': 1726.6768798828125, 'epoch': 0.32, 'tokens/total': 3811200.0, 'tokens/trainable': 3806864.0}
 16%|█████████████████████████████▌                                                                                                                                                          | 47/292 [09:15<43:32, 10.66s/it] 16%|██████████████████████████████▏                                                                                                                                                         | 48/292 [09:24<41:27, 10.20s/it]                                                                                                                                                                                                                              {'loss': 10.7622, 'grad_norm': 16.35452651977539, 'learning_rate': 4.760892901743944e-05, 'ppl': 47202.39916, 'memory/max_active (GiB)': 29.71, 'memory/max_allocated (GiB)': 27.52, 'memory/device_reserved (GiB)': 60.77, 'tokens/train_per_sec_per_gpu': 79.54596710205078, 'epoch': 0.33, 'tokens/total': 3865792.0, 'tokens/trainable': 3861355.0}
 16%|██████████████████████████████▏                                                                                                                                                         | 48/292 [09:24<41:27, 10.20s/it] 17%|██████████████████████████████▉                                                                                                                                                         | 49/292 [09:35<42:29, 10.49s/it]                                                                                                                                                                                                                              {'loss': 11.1251, 'grad_norm': 13.279970169067383, 'learning_rate': 4.7489947488125175e-05, 'ppl': 67853.07603, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 60.77, 'tokens/train_per_sec_per_gpu': 1489.8870849609375, 'epoch': 0.34, 'tokens/total': 3964992.0, 'tokens/trainable': 3960495.0}
 17%|██████████████████████████████▉                                                                                                                                                         | 49/292 [09:36<42:29, 10.49s/it] 17%|███████████████████████████████▌                                                                                                                                                        | 50/292 [09:48<44:56, 11.14s/it]                                                                                                                                                                                                                              {'loss': 11.056, 'grad_norm': 12.65727710723877, 'learning_rate': 4.736823324551909e-05, 'ppl': 63322.75359, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 60.77, 'tokens/train_per_sec_per_gpu': 533.77294921875, 'epoch': 0.34, 'tokens/total': 4040832.0, 'tokens/trainable': 4036267.0}
 17%|███████████████████████████████▌                                                                                                                                                        | 50/292 [09:48<44:56, 11.14s/it] 17%|████████████████████████████████▏                                                                                                                                                       | 51/292 [09:58<44:14, 11.01s/it]                                                                                                                                                                                                                              {'loss': 10.9602, 'grad_norm': 30.799306869506836, 'learning_rate': 4.7243801078901084e-05, 'ppl': 57537.94948, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.71, 'tokens/train_per_sec_per_gpu': 1635.2841796875, 'epoch': 0.35, 'tokens/total': 4121152.0, 'tokens/trainable': 4116481.0}
 17%|████████████████████████████████▏                                                                                                                                                       | 51/292 [09:59<44:14, 11.01s/it] 18%|████████████████████████████████▊                                                                                                                                                       | 52/292 [10:10<45:00, 11.25s/it]                                                                                                                                                                                                                              {'loss': 11.51, 'grad_norm': 15.754761695861816, 'learning_rate': 4.711666610780115e-05, 'ppl': 99707.881, 'memory/max_active (GiB)': 37.03, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 729.2482299804688, 'epoch': 0.36, 'tokens/total': 4206976.0, 'tokens/trainable': 4202243.0}
 18%|████████████████████████████████▊                                                                                                                                                       | 52/292 [10:11<45:00, 11.25s/it] 18%|█████████████████████████████████▍                                                                                                                                                      | 53/292 [10:22<45:31, 11.43s/it]                                                                                                                                                                                                                              {'loss': 10.4531, 'grad_norm': 11.586980819702148, 'learning_rate': 4.698684378016222e-05, 'ppl': 34651.62843, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 1478.4158935546875, 'epoch': 0.36, 'tokens/total': 4296512.0, 'tokens/trainable': 4291678.0}
 18%|█████████████████████████████████▍                                                                                                                                                      | 53/292 [10:23<45:31, 11.43s/it] 18%|██████████████████████████████████                                                                                                                                                      | 54/292 [10:34<45:52, 11.57s/it]                                                                                                                                                                                                                              {'loss': 10.4701, 'grad_norm': 10.563597679138184, 'learning_rate': 4.685434987046314e-05, 'ppl': 35245.74177, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 719.87890625, 'epoch': 0.37, 'tokens/total': 4398080.0, 'tokens/trainable': 4393186.0}
 18%|██████████████████████████████████                                                                                                                                                      | 54/292 [10:34<45:52, 11.57s/it] 19%|██████████████████████████████████▋                                                                                                                                                     | 55/292 [10:45<45:38, 11.55s/it]                                                                                                                                                                                                                              {'loss': 11.1638, 'grad_norm': 12.953259468078613, 'learning_rate': 4.671920047780186e-05, 'ppl': 70530.46337, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 1469.0869140625, 'epoch': 0.38, 'tokens/total': 4488000.0, 'tokens/trainable': 4483060.0}
 19%|██████████████████████████████████▋                                                                                                                                                     | 55/292 [10:46<45:38, 11.55s/it] 19%|███████████████████████████████████▎                                                                                                                                                    | 56/292 [10:55<43:16, 11.00s/it]                                                                                                                                                                                                                              {'loss': 11.5271, 'grad_norm': 15.039900779724121, 'learning_rate': 4.6581412023939354e-05, 'ppl': 101427.54701, 'memory/max_active (GiB)': 33.5, 'memory/max_allocated (GiB)': 31.36, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 204.4135284423828, 'epoch': 0.38, 'tokens/total': 4548480.0, 'tokens/trainable': 4543354.0}
 19%|███████████████████████████████████▎                                                                                                                                                    | 56/292 [10:56<43:16, 11.00s/it] 20%|███████████████████████████████████▉                                                                                                                                                    | 57/292 [11:07<44:22, 11.33s/it]                                                                                                                                                                                                                              {'loss': 10.9329, 'grad_norm': 10.062328338623047, 'learning_rate': 4.644100125130418e-05, 'ppl': 55988.4109, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 1426.9908447265625, 'epoch': 0.39, 'tokens/total': 4639616.0, 'tokens/trainable': 4634454.0}
 20%|███████████████████████████████████▉                                                                                                                                                    | 57/292 [11:08<44:22, 11.33s/it] 20%|████████████████████████████████████▌                                                                                                                                                   | 58/292 [11:17<42:57, 11.01s/it]                                                                                                                                                                                                                              {'loss': 11.3378, 'grad_norm': 13.23015308380127, 'learning_rate': 4.629798522095818e-05, 'ppl': 83935.17049, 'memory/max_active (GiB)': 34.0, 'memory/max_allocated (GiB)': 31.77, 'memory/device_reserved (GiB)': 50.75, 'tokens/train_per_sec_per_gpu': 638.570556640625, 'epoch': 0.4, 'tokens/total': 4711040.0, 'tokens/trainable': 4705722.0}
 20%|████████████████████████████████████▌                                                                                                                                                   | 58/292 [11:18<42:57, 11.01s/it] 20%|█████████████████████████████████████▏                                                                                                                                                  | 59/292 [11:28<42:24, 10.92s/it]                                                                                                                                                                                                                              {'loss': 10.8083, 'grad_norm': 11.932737350463867, 'learning_rate': 4.6152381310523387e-05, 'ppl': 49429.36699, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.75, 'tokens/train_per_sec_per_gpu': 366.0463562011719, 'epoch': 0.4, 'tokens/total': 4789120.0, 'tokens/trainable': 4783685.0}
 20%|█████████████████████████████████████▏                                                                                                                                                  | 59/292 [11:29<42:24, 10.92s/it] 21%|█████████████████████████████████████▊                                                                                                                                                  | 60/292 [11:39<41:53, 10.83s/it]                                                                                                                                                                                                                              {'loss': 11.6797, 'grad_norm': 15.43041706085205, 'learning_rate': 4.600420721207053e-05, 'ppl': 118148.78512, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.75, 'tokens/train_per_sec_per_gpu': 1641.2930908203125, 'epoch': 0.41, 'tokens/total': 4845376.0, 'tokens/trainable': 4839813.0}
 21%|█████████████████████████████████████▊                                                                                                                                                  | 60/292 [11:39<41:53, 10.83s/it] 21%|██████████████████████████████████████▍                                                                                                                                                 | 61/292 [11:49<40:56, 10.63s/it]                                                                                                                                                                                                                              {'loss': 10.0608, 'grad_norm': 17.60348892211914, 'learning_rate': 4.585348092996925e-05, 'ppl': 23407.22466, 'memory/max_active (GiB)': 33.92, 'memory/max_allocated (GiB)': 31.77, 'memory/device_reserved (GiB)': 61.65, 'tokens/train_per_sec_per_gpu': 719.4760131835938, 'epoch': 0.42, 'tokens/total': 4917824.0, 'tokens/trainable': 4912173.0}
 21%|██████████████████████████████████████▍                                                                                                                                                 | 61/292 [11:49<40:56, 10.63s/it] 21%|███████████████████████████████████████                                                                                                                                                 | 62/292 [12:00<41:46, 10.90s/it]                                                                                                                                                                                                                              {'loss': 11.233, 'grad_norm': 13.747428894042969, 'learning_rate': 4.5700220778700504e-05, 'ppl': 75584.00759, 'memory/max_active (GiB)': 36.58, 'memory/max_allocated (GiB)': 34.39, 'memory/device_reserved (GiB)': 77.53, 'tokens/train_per_sec_per_gpu': 552.70458984375, 'epoch': 0.42, 'tokens/total': 5020160.0, 'tokens/trainable': 5014332.0}
 21%|███████████████████████████████████████                                                                                                                                                 | 62/292 [12:01<41:46, 10.90s/it] 22%|███████████████████████████████████████▋                                                                                                                                                | 63/292 [12:14<44:46, 11.73s/it]                                                                                                                                                                                                                              {'loss': 10.0545, 'grad_norm': 8.750018119812012, 'learning_rate': 4.554444538063113e-05, 'ppl': 23260.22269, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 93.53, 'tokens/train_per_sec_per_gpu': 1267.486083984375, 'epoch': 0.43, 'tokens/total': 5149888.0, 'tokens/trainable': 5144023.0}
 22%|███████████████████████████████████████▋                                                                                                                                                | 63/292 [12:15<44:46, 11.73s/it] 22%|████████████████████████████████████████▎                                                                                                                                               | 64/292 [12:25<43:13, 11.38s/it]                                                                                                                                                                                                                              {'loss': 11.1081, 'grad_norm': 11.333261489868164, 'learning_rate': 4.538617366375112e-05, 'ppl': 66709.32318, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 93.53, 'tokens/train_per_sec_per_gpu': 372.28546142578125, 'epoch': 0.44, 'tokens/total': 5228416.0, 'tokens/trainable': 5222496.0}
 22%|████████████████████████████████████████▎                                                                                                                                               | 64/292 [12:25<43:13, 11.38s/it] 22%|████████████████████████████████████████▉                                                                                                                                               | 65/292 [12:36<42:24, 11.21s/it]                                                                                                                                                                                                                              {'loss': 10.8496, 'grad_norm': 9.708237648010254, 'learning_rate': 4.522542485937369e-05, 'ppl': 51513.54182, 'memory/max_active (GiB)': 27.4, 'memory/max_allocated (GiB)': 25.16, 'memory/device_reserved (GiB)': 93.53, 'tokens/train_per_sec_per_gpu': 554.6248168945312, 'epoch': 0.45, 'tokens/total': 5299008.0, 'tokens/trainable': 5292988.0}
 22%|████████████████████████████████████████▉                                                                                                                                               | 65/292 [12:36<42:24, 11.21s/it] 23%|█████████████████████████████████████████▌                                                                                                                                              | 66/292 [12:48<43:34, 11.57s/it]                                                                                                                                                                                                                              {'loss': 10.8682, 'grad_norm': 10.23025131225586, 'learning_rate': 4.5062218499798526e-05, 'ppl': 52480.66001, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 93.53, 'tokens/train_per_sec_per_gpu': 546.8458862304688, 'epoch': 0.45, 'tokens/total': 5401600.0, 'tokens/trainable': 5395538.0}
 23%|█████████████████████████████████████████▌                                                                                                                                              | 66/292 [12:48<43:34, 11.57s/it] 23%|██████████████████████████████████████████▏                                                                                                                                             | 67/292 [13:02<45:55, 12.25s/it]                                                                                                                                                                                                                              {'loss': 10.0076, 'grad_norm': 10.006569862365723, 'learning_rate': 4.4896574415938465e-05, 'ppl': 22194.50467, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 93.53, 'tokens/train_per_sec_per_gpu': 1185.2337646484375, 'epoch': 0.46, 'tokens/total': 5532672.0, 'tokens/trainable': 5526610.0}
 23%|██████████████████████████████████████████▏                                                                                                                                             | 67/292 [13:02<45:55, 12.25s/it] 23%|██████████████████████████████████████████▊                                                                                                                                             | 68/292 [13:14<45:56, 12.31s/it]                                                                                                                                                                                                                              {'loss': 10.6382, 'grad_norm': 16.791748046875, 'learning_rate': 4.4728512734909844e-05, 'ppl': 41697.64786, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 93.53, 'tokens/train_per_sec_per_gpu': 1317.06640625, 'epoch': 0.47, 'tokens/total': 5636096.0, 'tokens/trainable': 5629934.0}
 23%|██████████████████████████████████████████▊                                                                                                                                             | 68/292 [13:14<45:56, 12.31s/it] 24%|███████████████████████████████████████████▍                                                                                                                                            | 69/292 [13:25<44:13, 11.90s/it]                                                                                                                                                                                                                              {'loss': 10.7609, 'grad_norm': 13.112563133239746, 'learning_rate': 4.455805387758691e-05, 'ppl': 47141.07591, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 93.53, 'tokens/train_per_sec_per_gpu': 861.1243286132812, 'epoch': 0.47, 'tokens/total': 5707264.0, 'tokens/trainable': 5701013.0}
 24%|███████████████████████████████████████████▍                                                                                                                                            | 69/292 [13:25<44:13, 11.90s/it] 24%|████████████████████████████████████████████                                                                                                                                            | 70/292 [13:38<45:23, 12.27s/it]                                                                                                                                                                                                                              {'loss': 11.5494, 'grad_norm': 11.526847839355469, 'learning_rate': 4.438521855612054e-05, 'ppl': 103714.78927, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 93.53, 'tokens/train_per_sec_per_gpu': 774.8507690429688, 'epoch': 0.48, 'tokens/total': 5807872.0, 'tokens/trainable': 5801589.0}
 24%|████████████████████████████████████████████                                                                                                                                            | 70/292 [13:38<45:23, 12.27s/it] 24%|████████████████████████████████████████████▋                                                                                                                                           | 71/292 [13:50<44:22, 12.05s/it]                                                                                                                                                                                                                              {'loss': 11.1166, 'grad_norm': 10.137777328491211, 'learning_rate': 4.421002777142148e-05, 'ppl': 67278.76915, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 59.46, 'tokens/train_per_sec_per_gpu': 1211.06640625, 'epoch': 0.49, 'tokens/total': 5899456.0, 'tokens/trainable': 5893073.0}
 24%|████████████████████████████████████████████▋                                                                                                                                           | 71/292 [13:50<44:22, 12.05s/it] 25%|█████████████████████████████████████████████▎                                                                                                                                          | 72/292 [13:59<41:19, 11.27s/it]                                                                                                                                                                                                                              {'loss': 11.2561, 'grad_norm': 10.271337509155273, 'learning_rate': 4.4032502810608614e-05, 'ppl': 77350.32054, 'memory/max_active (GiB)': 23.21, 'memory/max_allocated (GiB)': 21.16, 'memory/device_reserved (GiB)': 59.46, 'tokens/train_per_sec_per_gpu': 840.1568603515625, 'epoch': 0.49, 'tokens/total': 5950528.0, 'tokens/trainable': 5944044.0}
 25%|█████████████████████████████████████████████▎                                                                                                                                          | 72/292 [13:59<41:19, 11.27s/it] 25%|██████████████████████████████████████████████                                                                                                                                          | 73/292 [14:09<39:05, 10.71s/it]                                                                                                                                                                                                                              {'loss': 10.531, 'grad_norm': 12.101551055908203, 'learning_rate': 4.385266524442241e-05, 'ppl': 37458.91457, 'memory/max_active (GiB)': 28.69, 'memory/max_allocated (GiB)': 26.54, 'memory/device_reserved (GiB)': 59.46, 'tokens/train_per_sec_per_gpu': 382.9737548828125, 'epoch': 0.5, 'tokens/total': 6008512.0, 'tokens/trainable': 6001852.0}
 25%|██████████████████████████████████████████████                                                                                                                                          | 73/292 [14:09<39:05, 10.71s/it] 25%|██████████████████████████████████████████████▋                                                                                                                                         | 74/292 [14:20<39:56, 10.99s/it]                                                                                                                                                                                                                              {'loss': 10.9169, 'grad_norm': 11.274868965148926, 'learning_rate': 4.367053692460385e-05, 'ppl': 55099.72477, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 59.46, 'tokens/train_per_sec_per_gpu': 1092.1724853515625, 'epoch': 0.51, 'tokens/total': 6094144.0, 'tokens/trainable': 6087400.0}
 25%|██████████████████████████████████████████████▋                                                                                                                                         | 74/292 [14:20<39:56, 10.99s/it][2026-01-06 02:08:56,272] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:13318] Running evaluation step...

  0%|                                                                                                                                                                                                   | 0/6 [00:00<?, ?it/s][A
 33%|██████████████████████████████████████████████████████████████▎                                                                                                                            | 2/6 [00:01<00:03,  1.17it/s][A
 50%|█████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                             | 3/6 [00:02<00:02,  1.18it/s][A
 67%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                              | 4/6 [00:03<00:01,  1.00it/s][A
 83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                               | 5/6 [00:05<00:01,  1.13s/it][A
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:06<00:00,  1.20s/it][A                                                                                                                                                                                                                              
                                                                                                                                                                                                                              [A{'eval_loss': 2.68322491645813, 'eval_runtime': 8.0693, 'eval_samples_per_second': 0.744, 'eval_steps_per_second': 0.372, 'eval_ppl': 14.6322, 'memory/max_active (GiB)': 34.82, 'memory/max_allocated (GiB)': 33.36, 'memory/device_reserved (GiB)': 92.77, 'epoch': 0.51, 'tokens/train_per_sec_per_gpu': 0.0, 'tokens/total': 6094144.0, 'tokens/trainable': 6087400.0}
 25%|██████████████████████████████████████████████▋                                                                                                                                         | 74/292 [14:28<39:56, 10.99s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:06<00:00,  1.20s/it][A
                                                                                                                                                                                                                              [A 26%|███████████████████████████████████████████████▎                                                                                                                                        | 75/292 [14:39<48:29, 13.41s/it]                                                                                                                                                                                                                              {'loss': 10.8045, 'grad_norm': 10.694170951843262, 'learning_rate': 4.3486139981239304e-05, 'ppl': 49241.89182, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.74, 'memory/device_reserved (GiB)': 63.32, 'tokens/train_per_sec_per_gpu': 1494.0018310546875, 'epoch': 0.51, 'tokens/total': 6168384.0, 'tokens/trainable': 6161526.0}
 26%|███████████████████████████████████████████████▎                                                                                                                                        | 75/292 [14:39<48:29, 13.41s/it] 26%|███████████████████████████████████████████████▉                                                                                                                                        | 76/292 [14:50<45:23, 12.61s/it]                                                                                                                                                                                                                              {'loss': 11.2288, 'grad_norm': 9.749185562133789, 'learning_rate': 4.3299496820071546e-05, 'ppl': 75267.22048, 'memory/max_active (GiB)': 28.81, 'memory/max_allocated (GiB)': 26.75, 'memory/device_reserved (GiB)': 63.32, 'tokens/train_per_sec_per_gpu': 376.3951416015625, 'epoch': 0.52, 'tokens/total': 6236224.0, 'tokens/trainable': 6229191.0}
 26%|███████████████████████████████████████████████▉                                                                                                                                        | 76/292 [14:50<45:23, 12.61s/it] 26%|████████████████████████████████████████████████▌                                                                                                                                       | 77/292 [15:00<41:52, 11.69s/it]                                                                                                                                                                                                                              {'loss': 10.2271, 'grad_norm': 12.151470184326172, 'learning_rate': 4.311063011977723e-05, 'ppl': 27642.23125, 'memory/max_active (GiB)': 25.98, 'memory/max_allocated (GiB)': 23.88, 'memory/device_reserved (GiB)': 64.4, 'tokens/train_per_sec_per_gpu': 1004.1243286132812, 'epoch': 0.53, 'tokens/total': 6303040.0, 'tokens/trainable': 6295879.0}
 26%|████████████████████████████████████████████████▌                                                                                                                                       | 77/292 [15:00<41:52, 11.69s/it] 27%|█████████████████████████████████████████████████▏                                                                                                                                      | 78/292 [15:10<40:09, 11.26s/it]                                                                                                                                                                                                                              {'loss': 10.454, 'grad_norm': 11.687957763671875, 'learning_rate': 4.2919562829211283e-05, 'ppl': 34682.82893, 'memory/max_active (GiB)': 27.67, 'memory/max_allocated (GiB)': 25.62, 'memory/device_reserved (GiB)': 64.4, 'tokens/train_per_sec_per_gpu': 1079.1378173828125, 'epoch': 0.53, 'tokens/total': 6356992.0, 'tokens/trainable': 6349721.0}
 27%|█████████████████████████████████████████████████▏                                                                                                                                      | 78/292 [15:10<40:09, 11.26s/it] 27%|█████████████████████████████████████████████████▊                                                                                                                                      | 79/292 [15:19<37:57, 10.69s/it]                                                                                                                                                                                                                              {'loss': 11.6481, 'grad_norm': 10.91482925415039, 'learning_rate': 4.2726318164618435e-05, 'ppl': 114473.65635, 'memory/max_active (GiB)': 28.71, 'memory/max_allocated (GiB)': 26.64, 'memory/device_reserved (GiB)': 64.4, 'tokens/train_per_sec_per_gpu': 1210.665771484375, 'epoch': 0.54, 'tokens/total': 6412160.0, 'tokens/trainable': 6404719.0}
 27%|█████████████████████████████████████████████████▊                                                                                                                                      | 79/292 [15:19<37:57, 10.69s/it] 27%|██████████████████████████████████████████████████▍                                                                                                                                     | 80/292 [15:33<40:39, 11.51s/it]                                                                                                                                                                                                                              {'loss': 10.3332, 'grad_norm': 9.496088981628418, 'learning_rate': 4.2530919606812216e-05, 'ppl': 30736.3109, 'memory/max_active (GiB)': 36.16, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 87.94, 'tokens/train_per_sec_per_gpu': 1222.5479736328125, 'epoch': 0.55, 'tokens/total': 6514944.0, 'tokens/trainable': 6507372.0}
 27%|██████████████████████████████████████████████████▍                                                                                                                                     | 80/292 [15:33<40:39, 11.51s/it] 28%|███████████████████████████████████████████████████                                                                                                                                     | 81/292 [15:45<41:25, 11.78s/it]                                                                                                                                                                                                                              {'loss': 10.4892, 'grad_norm': 8.433793067932129, 'learning_rate': 4.233339089832189e-05, 'ppl': 35925.40556, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 61.47, 'tokens/train_per_sec_per_gpu': 555.1525268554688, 'epoch': 0.55, 'tokens/total': 6620928.0, 'tokens/trainable': 6613308.0}
 28%|███████████████████████████████████████████████████                                                                                                                                     | 81/292 [15:45<41:25, 11.78s/it] 28%|███████████████████████████████████████████████████▋                                                                                                                                    | 82/292 [15:54<38:38, 11.04s/it]                                                                                                                                                                                                                              {'loss': 10.6658, 'grad_norm': 17.788841247558594, 'learning_rate': 4.21337560405075e-05, 'ppl': 42864.53187, 'memory/max_active (GiB)': 27.16, 'memory/max_allocated (GiB)': 25.0, 'memory/device_reserved (GiB)': 61.47, 'tokens/train_per_sec_per_gpu': 766.8110961914062, 'epoch': 0.56, 'tokens/total': 6683456.0, 'tokens/trainable': 6675699.0}
 28%|███████████████████████████████████████████████████▋                                                                                                                                    | 82/292 [15:54<38:38, 11.04s/it] 28%|████████████████████████████████████████████████████▎                                                                                                                                   | 83/292 [16:05<38:02, 10.92s/it]                                                                                                                                                                                                                              {'loss': 10.6055, 'grad_norm': 10.50762939453125, 'learning_rate': 4.193203929064353e-05, 'ppl': 40356.18719, 'memory/max_active (GiB)': 32.09, 'memory/max_allocated (GiB)': 29.98, 'memory/device_reserved (GiB)': 61.47, 'tokens/train_per_sec_per_gpu': 392.1805419921875, 'epoch': 0.57, 'tokens/total': 6768960.0, 'tokens/trainable': 6761058.0}
 28%|████████████████████████████████████████████████████▎                                                                                                                                   | 83/292 [16:05<38:02, 10.92s/it] 29%|████████████████████████████████████████████████████▉                                                                                                                                   | 84/292 [16:18<39:36, 11.43s/it]                                                                                                                                                                                                                              {'loss': 10.3089, 'grad_norm': 9.794668197631836, 'learning_rate': 4.172826515897146e-05, 'ppl': 29998.42022, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 61.47, 'tokens/train_per_sec_per_gpu': 1299.6966552734375, 'epoch': 0.58, 'tokens/total': 6884160.0, 'tokens/trainable': 6876233.0}
 29%|████████████████████████████████████████████████████▉                                                                                                                                   | 84/292 [16:18<39:36, 11.43s/it] 29%|█████████████████████████████████████████████████████▌                                                                                                                                  | 85/292 [16:29<39:39, 11.49s/it]                                                                                                                                                                                                                              {'loss': 10.1642, 'grad_norm': 10.840988159179688, 'learning_rate': 4.152245840572153e-05, 'ppl': 25957.08821, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 61.47, 'tokens/train_per_sec_per_gpu': 145.31387329101562, 'epoch': 0.58, 'tokens/total': 6966208.0, 'tokens/trainable': 6958230.0}
 29%|█████████████████████████████████████████████████████▌                                                                                                                                  | 85/292 [16:29<39:39, 11.49s/it] 29%|██████████████████████████████████████████████████████▏                                                                                                                                 | 86/292 [16:40<39:06, 11.39s/it]                                                                                                                                                                                                                              {'loss': 9.883, 'grad_norm': 11.022953987121582, 'learning_rate': 4.131464403810422e-05, 'ppl': 19594.41783, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 61.47, 'tokens/train_per_sec_per_gpu': 673.7172241210938, 'epoch': 0.59, 'tokens/total': 7040896.0, 'tokens/trainable': 7032844.0}
 29%|██████████████████████████████████████████████████████▏                                                                                                                                 | 86/292 [16:40<39:06, 11.39s/it] 30%|██████████████████████████████████████████████████████▊                                                                                                                                 | 87/292 [16:52<38:48, 11.36s/it]                                                                                                                                                                                                                              {'loss': 10.1785, 'grad_norm': 10.094700813293457, 'learning_rate': 4.110484730727161e-05, 'ppl': 26330.94125, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 61.47, 'tokens/train_per_sec_per_gpu': 293.856201171875, 'epoch': 0.6, 'tokens/total': 7121664.0, 'tokens/trainable': 7113579.0}
 30%|██████████████████████████████████████████████████████▊                                                                                                                                 | 87/292 [16:52<38:48, 11.36s/it] 30%|███████████████████████████████████████████████████████▍                                                                                                                                | 88/292 [17:01<36:23, 10.70s/it]                                                                                                                                                                                                                              {'loss': 11.3251, 'grad_norm': 10.585225105285645, 'learning_rate': 4.089309370524921e-05, 'ppl': 82875.93421, 'memory/max_active (GiB)': 29.8, 'memory/max_allocated (GiB)': 27.52, 'memory/device_reserved (GiB)': 61.47, 'tokens/train_per_sec_per_gpu': 578.405517578125, 'epoch': 0.6, 'tokens/total': 7187072.0, 'tokens/trainable': 7178922.0}
 30%|███████████████████████████████████████████████████████▍                                                                                                                                | 88/292 [17:01<36:23, 10.70s/it] 30%|████████████████████████████████████████████████████████                                                                                                                                | 89/292 [17:13<37:27, 11.07s/it]                                                                                                                                                                                                                              {'loss': 10.1031, 'grad_norm': 8.632939338684082, 'learning_rate': 4.067940896183843e-05, 'ppl': 24418.58984, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 61.47, 'tokens/train_per_sec_per_gpu': 1374.6007080078125, 'epoch': 0.61, 'tokens/total': 7284480.0, 'tokens/trainable': 7276286.0}
 30%|████████████████████████████████████████████████████████                                                                                                                                | 89/292 [17:13<37:27, 11.07s/it] 31%|████████████████████████████████████████████████████████▋                                                                                                                               | 90/292 [17:23<36:39, 10.89s/it]                                                                                                                                                                                                                              {'loss': 10.0951, 'grad_norm': 11.287801742553711, 'learning_rate': 4.046381904149024e-05, 'ppl': 24224.02044, 'memory/max_active (GiB)': 26.76, 'memory/max_allocated (GiB)': 24.64, 'memory/device_reserved (GiB)': 61.47, 'tokens/train_per_sec_per_gpu': 960.5406494140625, 'epoch': 0.62, 'tokens/total': 7340608.0, 'tokens/trainable': 7332236.0}
 31%|████████████████████████████████████████████████████████▋                                                                                                                               | 90/292 [17:23<36:39, 10.89s/it] 31%|█████████████████████████████████████████████████████████▎                                                                                                                              | 91/292 [17:35<36:50, 11.00s/it]                                                                                                                                                                                                                              {'loss': 11.5662, 'grad_norm': 10.730464935302734, 'learning_rate': 4.024635014015023e-05, 'ppl': 105471.91627, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 58.28, 'tokens/train_per_sec_per_gpu': 545.7241821289062, 'epoch': 0.62, 'tokens/total': 7429440.0, 'tokens/trainable': 7420991.0}
 31%|█████████████████████████████████████████████████████████▎                                                                                                                              | 91/292 [17:35<36:50, 11.00s/it] 32%|█████████████████████████████████████████████████████████▉                                                                                                                              | 92/292 [17:45<35:46, 10.73s/it]                                                                                                                                                                                                                              {'loss': 10.5496, 'grad_norm': 10.551201820373535, 'learning_rate': 4.002702868207563e-05, 'ppl': 38162.17039, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 58.28, 'tokens/train_per_sec_per_gpu': 689.8139038085938, 'epoch': 0.63, 'tokens/total': 7501760.0, 'tokens/trainable': 7493277.0}
 32%|█████████████████████████████████████████████████████████▉                                                                                                                              | 92/292 [17:45<35:46, 10.73s/it] 32%|██████████████████████████████████████████████████████████▌                                                                                                                             | 93/292 [17:56<36:21, 10.96s/it]                                                                                                                                                                                                                              {'loss': 10.709, 'grad_norm': 9.262650489807129, 'learning_rate': 3.9805881316624506e-05, 'ppl': 44756.85964, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 58.28, 'tokens/train_per_sec_per_gpu': 436.4534912109375, 'epoch': 0.64, 'tokens/total': 7585600.0, 'tokens/trainable': 7577058.0}
 32%|██████████████████████████████████████████████████████████▌                                                                                                                             | 93/292 [17:56<36:21, 10.96s/it] 32%|███████████████████████████████████████████████████████████▏                                                                                                                            | 94/292 [18:08<37:18, 11.30s/it]                                                                                                                                                                                                                              {'loss': 10.2785, 'grad_norm': 10.444571495056152, 'learning_rate': 3.9582934915017665e-05, 'ppl': 29100.19051, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 58.28, 'tokens/train_per_sec_per_gpu': 1354.94970703125, 'epoch': 0.64, 'tokens/total': 7681152.0, 'tokens/trainable': 7672526.0}
 32%|███████████████████████████████████████████████████████████▏                                                                                                                            | 94/292 [18:08<37:18, 11.30s/it] 33%|███████████████████████████████████████████████████████████▊                                                                                                                            | 95/292 [18:19<36:19, 11.07s/it]                                                                                                                                                                                                                              {'loss': 10.0399, 'grad_norm': 11.018216133117676, 'learning_rate': 3.935821656707359e-05, 'ppl': 22923.09049, 'memory/max_active (GiB)': 30.77, 'memory/max_allocated (GiB)': 28.59, 'memory/device_reserved (GiB)': 58.28, 'tokens/train_per_sec_per_gpu': 297.35467529296875, 'epoch': 0.65, 'tokens/total': 7741056.0, 'tokens/trainable': 7732286.0}
 33%|███████████████████████████████████████████████████████████▊                                                                                                                            | 95/292 [18:19<36:19, 11.07s/it] 33%|████████████████████████████████████████████████████████████▍                                                                                                                           | 96/292 [18:31<36:53, 11.29s/it]                                                                                                                                                                                                                              {'loss': 11.2594, 'grad_norm': 9.483807563781738, 'learning_rate': 3.91317535779168e-05, 'ppl': 77605.99823, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 58.28, 'tokens/train_per_sec_per_gpu': 738.2098388671875, 'epoch': 0.66, 'tokens/total': 7842560.0, 'tokens/trainable': 7833746.0}
 33%|████████████████████████████████████████████████████████████▍                                                                                                                           | 96/292 [18:31<36:53, 11.29s/it] 33%|█████████████████████████████████████████████████████████████                                                                                                                           | 97/292 [18:41<35:34, 10.94s/it]                                                                                                                                                                                                                              {'loss': 11.4877, 'grad_norm': 10.450533866882324, 'learning_rate': 3.890357346466001e-05, 'ppl': 97509.00386, 'memory/max_active (GiB)': 23.65, 'memory/max_allocated (GiB)': 21.47, 'memory/device_reserved (GiB)': 58.28, 'tokens/train_per_sec_per_gpu': 688.2056274414062, 'epoch': 0.66, 'tokens/total': 7904896.0, 'tokens/trainable': 7895934.0}
 33%|█████████████████████████████████████████████████████████████                                                                                                                           | 97/292 [18:41<35:34, 10.94s/it] 34%|█████████████████████████████████████████████████████████████▊                                                                                                                          | 98/292 [18:51<34:43, 10.74s/it]                                                                                                                                                                                                                              {'loss': 11.2753, 'grad_norm': 16.486268997192383, 'learning_rate': 3.867370395306068e-05, 'ppl': 78849.79559, 'memory/max_active (GiB)': 33.59, 'memory/max_allocated (GiB)': 31.36, 'memory/device_reserved (GiB)': 58.28, 'tokens/train_per_sec_per_gpu': 533.9088745117188, 'epoch': 0.67, 'tokens/total': 7979904.0, 'tokens/trainable': 7970857.0}
 34%|█████████████████████████████████████████████████████████████▊                                                                                                                          | 98/292 [18:51<34:43, 10.74s/it] 34%|██████████████████████████████████████████████████████████████▍                                                                                                                         | 99/292 [19:03<35:46, 11.12s/it]                                                                                                                                                                                                                              {'loss': 9.8709, 'grad_norm': 14.060924530029297, 'learning_rate': 3.844217297415196e-05, 'ppl': 19358.75401, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 58.28, 'tokens/train_per_sec_per_gpu': 715.5191650390625, 'epoch': 0.68, 'tokens/total': 8092864.0, 'tokens/trainable': 8083789.0}
 34%|██████████████████████████████████████████████████████████████▍                                                                                                                         | 99/292 [19:04<35:46, 11.12s/it] 34%|██████████████████████████████████████████████████████████████▋                                                                                                                        | 100/292 [19:15<36:17, 11.34s/it]                                                                                                                                                                                                                              {'loss': 11.2412, 'grad_norm': 11.018702507019043, 'learning_rate': 3.8209008660848974e-05, 'ppl': 76206.34455, 'memory/max_active (GiB)': 29.8, 'memory/max_allocated (GiB)': 27.52, 'memory/device_reserved (GiB)': 58.28, 'tokens/train_per_sec_per_gpu': 951.6370239257812, 'epoch': 0.68, 'tokens/total': 8163968.0, 'tokens/trainable': 8154823.0}
 34%|██████████████████████████████████████████████████████████████▋                                                                                                                        | 100/292 [19:15<36:17, 11.34s/it] 35%|███████████████████████████████████████████████████████████████▎                                                                                                                       | 101/292 [19:25<35:17, 11.09s/it]                                                                                                                                                                                                                              {'loss': 10.5795, 'grad_norm': 11.04176139831543, 'learning_rate': 3.797423934453038e-05, 'ppl': 39320.44926, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 55.01, 'tokens/train_per_sec_per_gpu': 135.9639129638672, 'epoch': 0.69, 'tokens/total': 8236544.0, 'tokens/trainable': 8227264.0}
 35%|███████████████████████████████████████████████████████████████▎                                                                                                                       | 101/292 [19:25<35:17, 11.09s/it] 35%|███████████████████████████████████████████████████████████████▉                                                                                                                       | 102/292 [19:37<35:36, 11.24s/it]                                                                                                                                                                                                                              {'loss': 10.7414, 'grad_norm': 9.757561683654785, 'learning_rate': 3.773789355159587e-05, 'ppl': 46230.72965, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 55.01, 'tokens/train_per_sec_per_gpu': 801.3583374023438, 'epoch': 0.7, 'tokens/total': 8324608.0, 'tokens/trainable': 8315299.0}
 35%|███████████████████████████████████████████████████████████████▉                                                                                                                       | 102/292 [19:38<35:36, 11.24s/it] 35%|████████████████████████████████████████████████████████████████▌                                                                                                                      | 103/292 [19:48<35:16, 11.20s/it]                                                                                                                                                                                                                              {'loss': 10.6848, 'grad_norm': 13.148045539855957, 'learning_rate': 3.7500000000000003e-05, 'ppl': 43686.74426, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 55.01, 'tokens/train_per_sec_per_gpu': 1283.6297607421875, 'epoch': 0.71, 'tokens/total': 8402368.0, 'tokens/trainable': 8392961.0}
 35%|████████████████████████████████████████████████████████████████▌                                                                                                                      | 103/292 [19:49<35:16, 11.20s/it] 36%|█████████████████████████████████████████████████████████████████▏                                                                                                                     | 104/292 [19:59<34:44, 11.09s/it]                                                                                                                                                                                                                              {'loss': 10.4184, 'grad_norm': 10.46907901763916, 'learning_rate': 3.726058759576271e-05, 'ppl': 33469.83954, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 55.01, 'tokens/train_per_sec_per_gpu': 225.94459533691406, 'epoch': 0.71, 'tokens/total': 8487872.0, 'tokens/trainable': 8478427.0}
 36%|█████████████████████████████████████████████████████████████████▏                                                                                                                     | 104/292 [19:59<34:44, 11.09s/it] 36%|█████████████████████████████████████████████████████████████████▊                                                                                                                     | 105/292 [20:11<35:27, 11.38s/it]                                                                                                                                                                                                                              {'loss': 9.8588, 'grad_norm': 8.773171424865723, 'learning_rate': 3.7019685429456986e-05, 'ppl': 19125.92455, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 55.01, 'tokens/train_per_sec_per_gpu': 1409.4853515625, 'epoch': 0.72, 'tokens/total': 8593344.0, 'tokens/trainable': 8583809.0}
 36%|█████████████████████████████████████████████████████████████████▊                                                                                                                     | 105/292 [20:12<35:27, 11.38s/it] 36%|██████████████████████████████████████████████████████████████████▍                                                                                                                    | 106/292 [20:21<33:39, 10.86s/it]                                                                                                                                                                                                                              {'loss': 9.157, 'grad_norm': 8.994891166687012, 'learning_rate': 3.6777322772674186e-05, 'ppl': 9480.57265, 'memory/max_active (GiB)': 31.83, 'memory/max_allocated (GiB)': 29.72, 'memory/device_reserved (GiB)': 55.01, 'tokens/train_per_sec_per_gpu': 1490.2008056640625, 'epoch': 0.73, 'tokens/total': 8645248.0, 'tokens/trainable': 8635545.0}
 36%|██████████████████████████████████████████████████████████████████▍                                                                                                                    | 106/292 [20:21<33:39, 10.86s/it] 37%|███████████████████████████████████████████████████████████████████                                                                                                                    | 107/292 [20:31<32:57, 10.69s/it]                                                                                                                                                                                                                              {'loss': 9.9594, 'grad_norm': 12.329538345336914, 'learning_rate': 3.65335290744672e-05, 'ppl': 21150.10185, 'memory/max_active (GiB)': 25.32, 'memory/max_allocated (GiB)': 23.31, 'memory/device_reserved (GiB)': 55.01, 'tokens/train_per_sec_per_gpu': 779.9320678710938, 'epoch': 0.73, 'tokens/total': 8718528.0, 'tokens/trainable': 8708704.0}
 37%|███████████████████████████████████████████████████████████████████                                                                                                                    | 107/292 [20:31<32:57, 10.69s/it] 37%|███████████████████████████████████████████████████████████████████▋                                                                                                                   | 108/292 [20:43<33:38, 10.97s/it]                                                                                                                                                                                                                              {'loss': 11.1763, 'grad_norm': 33.85904312133789, 'learning_rate': 3.628833395777224e-05, 'ppl': 71417.62738, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 55.01, 'tokens/train_per_sec_per_gpu': 1167.7686767578125, 'epoch': 0.74, 'tokens/total': 8802304.0, 'tokens/trainable': 8792305.0}
 37%|███████████████████████████████████████████████████████████████████▋                                                                                                                   | 108/292 [20:43<33:38, 10.97s/it] 37%|████████████████████████████████████████████████████████████████████▎                                                                                                                  | 109/292 [20:54<34:01, 11.16s/it]                                                                                                                                                                                                                              {'loss': 10.4702, 'grad_norm': 8.886824607849121, 'learning_rate': 3.604176721580935e-05, 'ppl': 35249.26652, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 55.01, 'tokens/train_per_sec_per_gpu': 799.2709350585938, 'epoch': 0.75, 'tokens/total': 8893632.0, 'tokens/trainable': 8883483.0}
 37%|████████████████████████████████████████████████████████████████████▎                                                                                                                  | 109/292 [20:54<34:01, 11.16s/it] 38%|████████████████████████████████████████████████████████████████████▉                                                                                                                  | 110/292 [21:07<35:10, 11.59s/it]                                                                                                                                                                                                                              {'loss': 10.5194, 'grad_norm': 9.039085388183594, 'learning_rate': 3.579385880846232e-05, 'ppl': 37026.90168, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 55.01, 'tokens/train_per_sec_per_gpu': 670.75048828125, 'epoch': 0.75, 'tokens/total': 8998144.0, 'tokens/trainable': 8987971.0}
 38%|████████████████████████████████████████████████████████████████████▉                                                                                                                  | 110/292 [21:07<35:10, 11.59s/it] 38%|█████████████████████████████████████████████████████████████████████▌                                                                                                                 | 111/292 [21:17<34:09, 11.32s/it]                                                                                                                                                                                                                              {'loss': 10.691, 'grad_norm': 12.088001251220703, 'learning_rate': 3.5544638858638304e-05, 'ppl': 43958.44347, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.56, 'tokens/train_per_sec_per_gpu': 320.46234130859375, 'epoch': 0.76, 'tokens/total': 9073536.0, 'tokens/trainable': 9063337.0}
 38%|█████████████████████████████████████████████████████████████████████▌                                                                                                                 | 111/292 [21:18<34:09, 11.32s/it][2026-01-06 02:15:53,533] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:13318] Running evaluation step...

  0%|                                                                                                                                                                                                   | 0/6 [00:00<?, ?it/s][A
 33%|██████████████████████████████████████████████████████████████▎                                                                                                                            | 2/6 [00:01<00:03,  1.20it/s][A
 50%|█████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                             | 3/6 [00:02<00:02,  1.19it/s][A
 67%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                              | 4/6 [00:03<00:01,  1.01it/s][A
 83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                               | 5/6 [00:05<00:01,  1.13s/it][A
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:06<00:00,  1.17s/it][A                                                                                                                                                                                                                              
                                                                                                                                                                                                                              [A{'eval_loss': 2.656898260116577, 'eval_runtime': 7.9858, 'eval_samples_per_second': 0.751, 'eval_steps_per_second': 0.376, 'eval_ppl': 14.25201, 'memory/max_active (GiB)': 34.82, 'memory/max_allocated (GiB)': 33.36, 'memory/device_reserved (GiB)': 92.77, 'epoch': 0.76, 'tokens/train_per_sec_per_gpu': 0.0, 'tokens/total': 9073536.0, 'tokens/trainable': 9063337.0}
 38%|█████████████████████████████████████████████████████████████████████▌                                                                                                                 | 111/292 [21:26<34:09, 11.32s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:06<00:00,  1.17s/it][A
                                                                                                                                                                                                                              [A 38%|██████████████████████████████████████████████████████████████████████▏                                                                                                                | 112/292 [21:39<43:24, 14.47s/it]                                                                                                                                                                                                                              {'loss': 10.7228, 'grad_norm': 14.350635528564453, 'learning_rate': 3.5294137648607625e-05, 'ppl': 45378.78572, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 63.32, 'tokens/train_per_sec_per_gpu': 1172.4033203125, 'epoch': 0.77, 'tokens/total': 9187456.0, 'tokens/trainable': 9177214.0}
 38%|██████████████████████████████████████████████████████████████████████▏                                                                                                                | 112/292 [21:39<43:24, 14.47s/it] 39%|██████████████████████████████████████████████████████████████████████▊                                                                                                                | 113/292 [21:48<38:05, 12.77s/it]                                                                                                                                                                                                                              {'loss': 9.6772, 'grad_norm': 12.27252197265625, 'learning_rate': 3.504238561632424e-05, 'ppl': 15949.77498, 'memory/max_active (GiB)': 16.54, 'memory/max_allocated (GiB)': 14.59, 'memory/device_reserved (GiB)': 63.32, 'tokens/train_per_sec_per_gpu': 459.08099365234375, 'epoch': 0.77, 'tokens/total': 9219712.0, 'tokens/trainable': 9209289.0}
 39%|██████████████████████████████████████████████████████████████████████▊                                                                                                                | 113/292 [21:48<38:05, 12.77s/it] 39%|███████████████████████████████████████████████████████████████████████▍                                                                                                               | 114/292 [22:00<37:11, 12.54s/it]                                                                                                                                                                                                                              {'loss': 11.0268, 'grad_norm': 14.863158226013184, 'learning_rate': 3.478941335172729e-05, 'ppl': 61500.46409, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 67.93, 'tokens/train_per_sec_per_gpu': 1365.73828125, 'epoch': 0.78, 'tokens/total': 9302592.0, 'tokens/trainable': 9292148.0}
 39%|███████████████████████████████████████████████████████████████████████▍                                                                                                               | 114/292 [22:00<37:11, 12.54s/it] 39%|████████████████████████████████████████████████████████████████████████                                                                                                               | 115/292 [22:11<35:21, 11.99s/it]                                                                                                                                                                                                                              {'loss': 10.041, 'grad_norm': 9.471566200256348, 'learning_rate': 3.453525159302415e-05, 'ppl': 22948.31977, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 67.93, 'tokens/train_per_sec_per_gpu': 787.76708984375, 'epoch': 0.79, 'tokens/total': 9374592.0, 'tokens/trainable': 9364093.0}
 39%|████████████████████████████████████████████████████████████████████████                                                                                                               | 115/292 [22:11<35:21, 11.99s/it] 40%|████████████████████████████████████████████████████████████████████████▋                                                                                                              | 116/292 [22:22<34:42, 11.83s/it]                                                                                                                                                                                                                              {'loss': 10.4018, 'grad_norm': 8.656614303588867, 'learning_rate': 3.427993122295552e-05, 'ppl': 32918.82627, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 67.93, 'tokens/train_per_sec_per_gpu': 1445.7984619140625, 'epoch': 0.79, 'tokens/total': 9458176.0, 'tokens/trainable': 9447589.0}
 40%|████████████████████████████████████████████████████████████████████████▋                                                                                                              | 116/292 [22:22<34:42, 11.83s/it] 40%|█████████████████████████████████████████████████████████████████████████▎                                                                                                             | 117/292 [22:34<34:09, 11.71s/it]                                                                                                                                                                                                                              {'loss': 9.9748, 'grad_norm': 14.357237815856934, 'learning_rate': 3.4023483265042874e-05, 'ppl': 21478.33432, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 67.93, 'tokens/train_per_sec_per_gpu': 126.87413024902344, 'epoch': 0.8, 'tokens/total': 9535296.0, 'tokens/trainable': 9524622.0}
 40%|█████████████████████████████████████████████████████████████████████████▎                                                                                                             | 117/292 [22:34<34:09, 11.71s/it] 40%|█████████████████████████████████████████████████████████████████████████▉                                                                                                             | 118/292 [22:45<33:30, 11.55s/it]                                                                                                                                                                                                                              {'loss': 11.3553, 'grad_norm': 16.961658477783203, 'learning_rate': 3.376593887981887e-05, 'ppl': 85416.96385, 'memory/max_active (GiB)': 37.03, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 67.93, 'tokens/train_per_sec_per_gpu': 1465.13671875, 'epoch': 0.81, 'tokens/total': 9606592.0, 'tokens/trainable': 9595884.0}
 40%|█████████████████████████████████████████████████████████████████████████▉                                                                                                             | 118/292 [22:45<33:30, 11.55s/it] 41%|██████████████████████████████████████████████████████████████████████████▌                                                                                                            | 119/292 [22:55<31:54, 11.07s/it]                                                                                                                                                                                                                              {'loss': 10.6742, 'grad_norm': 10.10484790802002, 'learning_rate': 3.350732936104108e-05, 'ppl': 43226.11044, 'memory/max_active (GiB)': 30.36, 'memory/max_allocated (GiB)': 28.18, 'memory/device_reserved (GiB)': 67.93, 'tokens/train_per_sec_per_gpu': 375.2933349609375, 'epoch': 0.82, 'tokens/total': 9687552.0, 'tokens/trainable': 9676724.0}
 41%|██████████████████████████████████████████████████████████████████████████▌                                                                                                            | 119/292 [22:55<31:54, 11.07s/it] 41%|███████████████████████████████████████████████████████████████████████████▏                                                                                                           | 120/292 [23:06<31:29, 10.98s/it]                                                                                                                                                                                                                              {'loss': 10.4299, 'grad_norm': 12.969881057739258, 'learning_rate': 3.3247686131889574e-05, 'ppl': 33856.96439, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 67.93, 'tokens/train_per_sec_per_gpu': 1535.1326904296875, 'epoch': 0.82, 'tokens/total': 9752384.0, 'tokens/trainable': 9741447.0}
 41%|███████████████████████████████████████████████████████████████████████████▏                                                                                                           | 120/292 [23:06<31:29, 10.98s/it] 41%|███████████████████████████████████████████████████████████████████████████▊                                                                                                           | 121/292 [23:16<30:44, 10.79s/it]                                                                                                                                                                                                                              {'loss': 11.4844, 'grad_norm': 10.693540573120117, 'learning_rate': 3.29870407411487e-05, 'ppl': 97187.7545, 'memory/max_active (GiB)': 30.27, 'memory/max_allocated (GiB)': 28.23, 'memory/device_reserved (GiB)': 61.54, 'tokens/train_per_sec_per_gpu': 1191.47216796875, 'epoch': 0.83, 'tokens/total': 9824192.0, 'tokens/trainable': 9813109.0}
 41%|███████████████████████████████████████████████████████████████████████████▊                                                                                                           | 121/292 [23:16<30:44, 10.79s/it] 42%|████████████████████████████████████████████████████████████████████████████▍                                                                                                          | 122/292 [23:27<31:17, 11.04s/it]                                                                                                                                                                                                                              {'loss': 10.199, 'grad_norm': 8.101689338684082, 'learning_rate': 3.272542485937369e-05, 'ppl': 26876.29634, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 77.54, 'tokens/train_per_sec_per_gpu': 1422.23876953125, 'epoch': 0.84, 'tokens/total': 9919616.0, 'tokens/trainable': 9908514.0}
 42%|████████████████████████████████████████████████████████████████████████████▍                                                                                                          | 122/292 [23:28<31:17, 11.04s/it] 42%|█████████████████████████████████████████████████████████████████████████████                                                                                                          | 123/292 [23:36<29:21, 10.42s/it]                                                                                                                                                                                                                              {'loss': 10.0138, 'grad_norm': 10.399201393127441, 'learning_rate': 3.246287027504237e-05, 'ppl': 22332.53806, 'memory/max_active (GiB)': 26.55, 'memory/max_allocated (GiB)': 24.44, 'memory/device_reserved (GiB)': 77.55, 'tokens/train_per_sec_per_gpu': 417.74249267578125, 'epoch': 0.84, 'tokens/total': 9980864.0, 'tokens/trainable': 9969639.0}
 42%|█████████████████████████████████████████████████████████████████████████████                                                                                                          | 123/292 [23:36<29:21, 10.42s/it] 42%|█████████████████████████████████████████████████████████████████████████████▋                                                                                                         | 124/292 [23:48<30:26, 10.87s/it]                                                                                                                                                                                                                              {'loss': 10.2421, 'grad_norm': 8.768990516662598, 'learning_rate': 3.2199408890692655e-05, 'ppl': 28059.99008, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 77.55, 'tokens/train_per_sec_per_gpu': 1035.2977294921875, 'epoch': 0.85, 'tokens/total': 10087168.0, 'tokens/trainable': 10075841.0}
 42%|█████████████████████████████████████████████████████████████████████████████▋                                                                                                         | 124/292 [23:48<30:26, 10.87s/it] 43%|██████████████████████████████████████████████████████████████████████████████▎                                                                                                        | 125/292 [24:00<30:29, 10.96s/it]                                                                                                                                                                                                                              {'loss': 11.6325, 'grad_norm': 10.096231460571289, 'learning_rate': 3.1935072719046115e-05, 'ppl': 112701.72432, 'memory/max_active (GiB)': 31.15, 'memory/max_allocated (GiB)': 29.05, 'memory/device_reserved (GiB)': 77.55, 'tokens/train_per_sec_per_gpu': 435.2079162597656, 'epoch': 0.86, 'tokens/total': 10162176.0, 'tokens/trainable': 10150736.0}
 43%|██████████████████████████████████████████████████████████████████████████████▎                                                                                                        | 125/292 [24:00<30:29, 10.96s/it] 43%|██████████████████████████████████████████████████████████████████████████████▉                                                                                                        | 126/292 [24:10<29:46, 10.76s/it]                                                                                                                                                                                                                              {'loss': 10.1556, 'grad_norm': 12.037337303161621, 'learning_rate': 3.1669893879118156e-05, 'ppl': 25734.8144, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 77.55, 'tokens/train_per_sec_per_gpu': 423.6293640136719, 'epoch': 0.86, 'tokens/total': 10238656.0, 'tokens/trainable': 10227125.0}
 43%|██████████████████████████████████████████████████████████████████████████████▉                                                                                                        | 126/292 [24:10<29:46, 10.76s/it] 43%|███████████████████████████████████████████████████████████████████████████████▌                                                                                                       | 127/292 [24:22<30:56, 11.25s/it]                                                                                                                                                                                                                              {'loss': 11.0659, 'grad_norm': 10.323676109313965, 'learning_rate': 3.140390459231528e-05, 'ppl': 63952.76225, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 77.55, 'tokens/train_per_sec_per_gpu': 985.3286743164062, 'epoch': 0.87, 'tokens/total': 10339200.0, 'tokens/trainable': 10327615.0}
 43%|███████████████████████████████████████████████████████████████████████████████▌                                                                                                       | 127/292 [24:22<30:56, 11.25s/it] 44%|████████████████████████████████████████████████████████████████████████████████▏                                                                                                      | 128/292 [24:36<32:44, 11.98s/it]                                                                                                                                                                                                                              {'loss': 9.6758, 'grad_norm': 11.889599800109863, 'learning_rate': 3.1137137178519985e-05, 'ppl': 15927.46091, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 77.55, 'tokens/train_per_sec_per_gpu': 1198.954345703125, 'epoch': 0.88, 'tokens/total': 10462272.0, 'tokens/trainable': 10450636.0}
 44%|████████████████████████████████████████████████████████████████████████████████▏                                                                                                      | 128/292 [24:36<32:44, 11.98s/it] 44%|████████████████████████████████████████████████████████████████████████████████▊                                                                                                      | 129/292 [24:47<31:50, 11.72s/it]                                                                                                                                                                                                                              {'loss': 11.0907, 'grad_norm': 9.236298561096191, 'learning_rate': 3.086962405216353e-05, 'ppl': 65558.6211, 'memory/max_active (GiB)': 33.12, 'memory/max_allocated (GiB)': 30.9, 'memory/device_reserved (GiB)': 77.55, 'tokens/train_per_sec_per_gpu': 1256.813720703125, 'epoch': 0.88, 'tokens/total': 10540416.0, 'tokens/trainable': 10528638.0}
 44%|████████████████████████████████████████████████████████████████████████████████▊                                                                                                      | 129/292 [24:47<31:50, 11.72s/it] 45%|█████████████████████████████████████████████████████████████████████████████████▍                                                                                                     | 130/292 [25:00<32:35, 12.07s/it]                                                                                                                                                                                                                              {'loss': 10.5846, 'grad_norm': 15.494653701782227, 'learning_rate': 3.06013977182874e-05, 'ppl': 39521.49579, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 77.55, 'tokens/train_per_sec_per_gpu': 1271.8621826171875, 'epoch': 0.89, 'tokens/total': 10633088.0, 'tokens/trainable': 10621187.0}
 45%|█████████████████████████████████████████████████████████████████████████████████▍                                                                                                     | 130/292 [25:00<32:35, 12.07s/it] 45%|██████████████████████████████████████████████████████████████████████████████████                                                                                                     | 131/292 [25:11<32:00, 11.93s/it]                                                                                                                                                                                                                              {'loss': 10.9597, 'grad_norm': 11.114017486572266, 'learning_rate': 3.0332490768593675e-05, 'ppl': 57509.18769, 'memory/max_active (GiB)': 36.68, 'memory/max_allocated (GiB)': 34.49, 'memory/device_reserved (GiB)': 63.89, 'tokens/train_per_sec_per_gpu': 1417.88916015625, 'epoch': 0.9, 'tokens/total': 10720448.0, 'tokens/trainable': 10708447.0}
 45%|██████████████████████████████████████████████████████████████████████████████████                                                                                                     | 131/292 [25:11<32:00, 11.93s/it] 45%|██████████████████████████████████████████████████████████████████████████████████▋                                                                                                    | 132/292 [25:22<30:32, 11.46s/it]                                                                                                                                                                                                                              {'loss': 9.7906, 'grad_norm': 9.437528610229492, 'learning_rate': 3.0062935877484804e-05, 'ppl': 17865.02197, 'memory/max_active (GiB)': 31.03, 'memory/max_allocated (GiB)': 28.85, 'memory/device_reserved (GiB)': 79.89, 'tokens/train_per_sec_per_gpu': 203.33883666992188, 'epoch': 0.9, 'tokens/total': 10805376.0, 'tokens/trainable': 10793257.0}
 45%|██████████████████████████████████████████████████████████████████████████████████▋                                                                                                    | 132/292 [25:22<30:32, 11.46s/it] 46%|███████████████████████████████████████████████████████████████████████████████████▎                                                                                                   | 133/292 [25:33<29:48, 11.25s/it]                                                                                                                                                                                                                              {'loss': 9.6089, 'grad_norm': 9.345086097717285, 'learning_rate': 2.9792765798093465e-05, 'ppl': 14896.77462, 'memory/max_active (GiB)': 29.83, 'memory/max_allocated (GiB)': 27.67, 'memory/device_reserved (GiB)': 79.89, 'tokens/train_per_sec_per_gpu': 225.55430603027344, 'epoch': 0.91, 'tokens/total': 10880576.0, 'tokens/trainable': 10868390.0}
 46%|███████████████████████████████████████████████████████████████████████████████████▎                                                                                                   | 133/292 [25:33<29:48, 11.25s/it] 46%|███████████████████████████████████████████████████████████████████████████████████▉                                                                                                   | 134/292 [25:44<29:32, 11.22s/it]                                                                                                                                                                                                                              {'loss': 10.917, 'grad_norm': 16.293935775756836, 'learning_rate': 2.952201335830275e-05, 'ppl': 55105.23502, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 79.96, 'tokens/train_per_sec_per_gpu': 568.6598510742188, 'epoch': 0.92, 'tokens/total': 10955008.0, 'tokens/trainable': 10942706.0}
 46%|███████████████████████████████████████████████████████████████████████████████████▉                                                                                                   | 134/292 [25:44<29:32, 11.22s/it] 46%|████████████████████████████████████████████████████████████████████████████████████▌                                                                                                  | 135/292 [25:56<30:14, 11.56s/it]                                                                                                                                                                                                                              {'loss': 10.7512, 'grad_norm': 9.124157905578613, 'learning_rate': 2.925071145675733e-05, 'ppl': 46686.01807, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 79.96, 'tokens/train_per_sec_per_gpu': 586.0256958007812, 'epoch': 0.92, 'tokens/total': 11050048.0, 'tokens/trainable': 11037662.0}
 46%|████████████████████████████████████████████████████████████████████████████████████▌                                                                                                  | 135/292 [25:56<30:14, 11.56s/it] 47%|█████████████████████████████████████████████████████████████████████████████████████▏                                                                                                 | 136/292 [26:08<30:17, 11.65s/it]                                                                                                                                                                                                                              {'loss': 10.1961, 'grad_norm': 8.995081901550293, 'learning_rate': 2.8978893058865987e-05, 'ppl': 26798.46798, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 79.96, 'tokens/train_per_sec_per_gpu': 312.3091125488281, 'epoch': 0.93, 'tokens/total': 11151424.0, 'tokens/trainable': 11139004.0}
 47%|█████████████████████████████████████████████████████████████████████████████████████▏                                                                                                 | 136/292 [26:08<30:17, 11.65s/it] 47%|█████████████████████████████████████████████████████████████████████████████████████▊                                                                                                 | 137/292 [26:19<29:20, 11.36s/it]                                                                                                                                                                                                                              {'loss': 10.2117, 'grad_norm': 7.647458553314209, 'learning_rate': 2.870659119279605e-05, 'ppl': 27219.80194, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 79.96, 'tokens/train_per_sec_per_gpu': 536.2960205078125, 'epoch': 0.94, 'tokens/total': 11238976.0, 'tokens/trainable': 11226540.0}
 47%|█████████████████████████████████████████████████████████████████████████████████████▊                                                                                                 | 137/292 [26:19<29:20, 11.36s/it] 47%|██████████████████████████████████████████████████████████████████████████████████████▍                                                                                                | 138/292 [26:31<29:33, 11.52s/it]                                                                                                                                                                                                                              {'loss': 10.1882, 'grad_norm': 7.739211082458496, 'learning_rate': 2.8433838945460205e-05, 'ppl': 26587.59413, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 79.96, 'tokens/train_per_sec_per_gpu': 1415.2030029296875, 'epoch': 0.95, 'tokens/total': 11335872.0, 'tokens/trainable': 11323388.0}
 47%|██████████████████████████████████████████████████████████████████████████████████████▍                                                                                                | 138/292 [26:31<29:33, 11.52s/it] 48%|███████████████████████████████████████████████████████████████████████████████████████                                                                                                | 139/292 [26:44<31:02, 12.17s/it]                                                                                                                                                                                                                              {'loss': 9.965, 'grad_norm': 9.073641777038574, 'learning_rate': 2.8160669458496158e-05, 'ppl': 21268.87467, 'memory/max_active (GiB)': 37.03, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 79.96, 'tokens/train_per_sec_per_gpu': 1196.521728515625, 'epoch': 0.95, 'tokens/total': 11466944.0, 'tokens/trainable': 11454460.0}
 48%|███████████████████████████████████████████████████████████████████████████████████████                                                                                                | 139/292 [26:44<31:02, 12.17s/it] 48%|███████████████████████████████████████████████████████████████████████████████████████▋                                                                                               | 140/292 [26:56<30:46, 12.15s/it]                                                                                                                                                                                                                              {'loss': 10.5671, 'grad_norm': 9.36669921875, 'learning_rate': 2.788711592423966e-05, 'ppl': 38835.88619, 'memory/max_active (GiB)': 37.03, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 79.96, 'tokens/train_per_sec_per_gpu': 220.32659912109375, 'epoch': 0.96, 'tokens/total': 11546176.0, 'tokens/trainable': 11533660.0}
 48%|███████████████████████████████████████████████████████████████████████████████████████▋                                                                                               | 140/292 [26:57<30:46, 12.15s/it] 48%|████████████████████████████████████████████████████████████████████████████████████████▎                                                                                              | 141/292 [27:07<29:33, 11.75s/it]                                                                                                                                                                                                                              {'loss': 10.9121, 'grad_norm': 10.059173583984375, 'learning_rate': 2.761321158169134e-05, 'ppl': 54835.87982, 'memory/max_active (GiB)': 28.48, 'memory/max_allocated (GiB)': 26.34, 'memory/device_reserved (GiB)': 48.05, 'tokens/train_per_sec_per_gpu': 956.5413208007812, 'epoch': 0.97, 'tokens/total': 11610560.0, 'tokens/trainable': 11597948.0}
 48%|████████████████████████████████████████████████████████████████████████████████████████▎                                                                                              | 141/292 [27:07<29:33, 11.75s/it] 49%|████████████████████████████████████████████████████████████████████████████████████████▉                                                                                              | 142/292 [27:18<29:03, 11.62s/it]                                                                                                                                                                                                                              {'loss': 10.7182, 'grad_norm': 8.981560707092285, 'learning_rate': 2.7338989712477945e-05, 'ppl': 45170.52268, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 77.06, 'tokens/train_per_sec_per_gpu': 330.9703369140625, 'epoch': 0.97, 'tokens/total': 11690432.0, 'tokens/trainable': 11677687.0}
 49%|████████████████████████████████████████████████████████████████████████████████████████▉                                                                                              | 142/292 [27:19<29:03, 11.62s/it] 49%|█████████████████████████████████████████████████████████████████████████████████████████▌                                                                                             | 143/292 [27:30<28:32, 11.49s/it]                                                                                                                                                                                                                              {'loss': 10.2219, 'grad_norm': 10.942479133605957, 'learning_rate': 2.7064483636808313e-05, 'ppl': 27498.86472, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 77.06, 'tokens/train_per_sec_per_gpu': 1468.89599609375, 'epoch': 0.98, 'tokens/total': 11764160.0, 'tokens/trainable': 11751331.0}
 49%|█████████████████████████████████████████████████████████████████████████████████████████▌                                                                                             | 143/292 [27:30<28:32, 11.49s/it] 49%|██████████████████████████████████████████████████████████████████████████████████████████▏                                                                                            | 144/292 [27:43<29:21, 11.90s/it]                                                                                                                                                                                                                              {'loss': 9.7564, 'grad_norm': 7.87283992767334, 'learning_rate': 2.678972670942468e-05, 'ppl': 17264.36794, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 77.06, 'tokens/train_per_sec_per_gpu': 1274.6510009765625, 'epoch': 0.99, 'tokens/total': 11879552.0, 'tokens/trainable': 11866694.0}
 49%|██████████████████████████████████████████████████████████████████████████████████████████▏                                                                                            | 144/292 [27:43<29:21, 11.90s/it] 50%|██████████████████████████████████████████████████████████████████████████████████████████▊                                                                                            | 145/292 [27:55<29:34, 12.07s/it]                                                                                                                                                                                                                              {'loss': 9.9985, 'grad_norm': 7.173329830169678, 'learning_rate': 2.6514752315549847e-05, 'ppl': 21993.45086, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 77.06, 'tokens/train_per_sec_per_gpu': 805.0530395507812, 'epoch': 0.99, 'tokens/total': 11980224.0, 'tokens/trainable': 11967322.0}
 50%|██████████████████████████████████████████████████████████████████████████████████████████▊                                                                                            | 145/292 [27:56<29:34, 12.07s/it] 50%|███████████████████████████████████████████████████████████████████████████████████████████▌                                                                                           | 146/292 [28:09<30:51, 12.68s/it]                                                                                                                                                                                                                              {'loss': 9.8355, 'grad_norm': 10.376947402954102, 'learning_rate': 2.623959386683056e-05, 'ppl': 18685.44206, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 77.09, 'tokens/train_per_sec_per_gpu': 1282.34814453125, 'epoch': 1.0, 'tokens/total': 12063424.0, 'tokens/trainable': 12050432.0}
 50%|███████████████████████████████████████████████████████████████████████████████████████████▌                                                                                           | 146/292 [28:09<30:51, 12.68s/it][2026-01-06 02:22:45,061] [INFO] [axolotl.core.trainers.base._save:722] [PID:13318] Saving model checkpoint to stage1/checkpoint-146
[2026-01-06 02:22:58,721] [WARNING] [py.warnings._showwarnmsg:110] [PID:13318] /workspace/venv/lib/python3.12/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:675: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .
  warnings.warn(

 50%|███████████████████████████████████████████████████████████████████████████████████████████                                                                                          | 147/292 [29:24<1:16:01, 31.46s/it]                                                                                                                                                                                                                              {'loss': 8.8876, 'grad_norm': 33.536434173583984, 'learning_rate': 2.5964284797277762e-05, 'ppl': 7241.61843, 'memory/max_active (GiB)': 30.36, 'memory/max_allocated (GiB)': 28.18, 'memory/device_reserved (GiB)': 50.71, 'tokens/train_per_sec_per_gpu': 123.207763671875, 'epoch': 1.01, 'tokens/total': 12133632.0, 'tokens/trainable': 12120546.0}
 50%|███████████████████████████████████████████████████████████████████████████████████████████                                                                                          | 147/292 [29:24<1:16:01, 31.46s/it] 51%|███████████████████████████████████████████████████████████████████████████████████████████▋                                                                                         | 148/292 [29:38<1:02:41, 26.12s/it]                                                                                                                                                                                                                              {'loss': 8.4355, 'grad_norm': 12.238924980163574, 'learning_rate': 2.5688858559204053e-05, 'ppl': 4607.77328, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 1198.18212890625, 'epoch': 1.01, 'tokens/total': 12218432.0, 'tokens/trainable': 12205268.0}
 51%|███████████████████████████████████████████████████████████████████████████████████████████▋                                                                                         | 148/292 [29:41<1:02:41, 26.12s/it][2026-01-06 02:24:16,583] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:13318] Running evaluation step...

  0%|                                                                                                                                                                                                   | 0/6 [00:00<?, ?it/s][A
 33%|██████████████████████████████████████████████████████████████▎                                                                                                                            | 2/6 [00:01<00:03,  1.16it/s][A
 50%|█████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                             | 3/6 [00:02<00:02,  1.16it/s][A
 67%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                              | 4/6 [00:03<00:01,  1.00it/s][A
 83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                               | 5/6 [00:05<00:01,  1.13s/it][A
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:06<00:00,  1.22s/it][A                                                                                                                                                                                                                              
                                                                                                                                                                                                                              [A{'eval_loss': 2.6064236164093018, 'eval_runtime': 8.4702, 'eval_samples_per_second': 0.708, 'eval_steps_per_second': 0.354, 'eval_ppl': 13.5505, 'memory/max_active (GiB)': 34.82, 'memory/max_allocated (GiB)': 33.36, 'memory/device_reserved (GiB)': 92.77, 'epoch': 1.01, 'tokens/train_per_sec_per_gpu': 0.0, 'tokens/total': 12218432.0, 'tokens/trainable': 12205268.0}
 51%|███████████████████████████████████████████████████████████████████████████████████████████▋                                                                                         | 148/292 [29:49<1:02:41, 26.12s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:06<00:00,  1.22s/it][A
                                                                                                                                                                                                                              [A 51%|████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                        | 149/292 [30:03<1:01:08, 25.66s/it]                                                                                                                                                                                                                              {'loss': 7.8666, 'grad_norm': 10.17717456817627, 'learning_rate': 2.5413348619158967e-05, 'ppl': 2608.68098, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 63.35, 'tokens/train_per_sec_per_gpu': 1212.4832763671875, 'epoch': 1.02, 'tokens/total': 12318336.0, 'tokens/trainable': 12305150.0}
 51%|████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                        | 149/292 [30:04<1:01:08, 25.66s/it] 51%|██████████████████████████████████████████████████████████████████████████████████████████████                                                                                         | 150/292 [30:17<52:57, 22.37s/it]                                                                                                                                                                                                                              {'loss': 8.4875, 'grad_norm': 11.336738586425781, 'learning_rate': 2.5137788453862515e-05, 'ppl': 4853.7166, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 74.78, 'tokens/train_per_sec_per_gpu': 704.2633666992188, 'epoch': 1.03, 'tokens/total': 12415744.0, 'tokens/trainable': 12402482.0}
 51%|██████████████████████████████████████████████████████████████████████████████████████████████                                                                                         | 150/292 [30:18<52:57, 22.37s/it] 52%|██████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                        | 151/292 [30:28<44:04, 18.75s/it]                                                                                                                                                                                                                              {'loss': 7.4085, 'grad_norm': 15.87648868560791, 'learning_rate': 2.486221154613749e-05, 'ppl': 1649.94957, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 53.29, 'tokens/train_per_sec_per_gpu': 393.7394714355469, 'epoch': 1.03, 'tokens/total': 12469952.0, 'tokens/trainable': 12456647.0}
 52%|██████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                        | 151/292 [30:28<44:04, 18.75s/it] 52%|███████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                       | 152/292 [30:37<37:17, 15.98s/it]                                                                                                                                                                                                                              {'loss': 6.8745, 'grad_norm': 12.569140434265137, 'learning_rate': 2.458665138084104e-05, 'ppl': 967.2916, 'memory/max_active (GiB)': 29.71, 'memory/max_allocated (GiB)': 27.52, 'memory/device_reserved (GiB)': 53.29, 'tokens/train_per_sec_per_gpu': 221.4871826171875, 'epoch': 1.04, 'tokens/total': 12532736.0, 'tokens/trainable': 12519341.0}
 52%|███████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                       | 152/292 [30:37<37:17, 15.98s/it] 52%|███████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                       | 153/292 [30:47<32:56, 14.22s/it]                                                                                                                                                                                                                              {'loss': 6.8506, 'grad_norm': 17.732160568237305, 'learning_rate': 2.4311141440795953e-05, 'ppl': 944.44741, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 53.29, 'tokens/train_per_sec_per_gpu': 103.26007080078125, 'epoch': 1.05, 'tokens/total': 12606336.0, 'tokens/trainable': 12592898.0}
 52%|███████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                       | 153/292 [30:47<32:56, 14.22s/it] 53%|████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                      | 154/292 [30:59<31:08, 13.54s/it]                                                                                                                                                                                                                              {'loss': 9.3339, 'grad_norm': 14.820151329040527, 'learning_rate': 2.4035715202722237e-05, 'ppl': 11315.17473, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 53.29, 'tokens/train_per_sec_per_gpu': 1371.7255859375, 'epoch': 1.05, 'tokens/total': 12710144.0, 'tokens/trainable': 12696679.0}
 53%|████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                      | 154/292 [30:59<31:08, 13.54s/it] 53%|█████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                     | 155/292 [31:12<30:21, 13.29s/it]                                                                                                                                                                                                                              {'loss': 8.7127, 'grad_norm': 11.660552978515625, 'learning_rate': 2.3760406133169443e-05, 'ppl': 6079.63536, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 53.29, 'tokens/train_per_sec_per_gpu': 1289.1514892578125, 'epoch': 1.06, 'tokens/total': 12817664.0, 'tokens/trainable': 12804138.0}
 53%|█████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                     | 155/292 [31:12<30:21, 13.29s/it] 53%|█████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                     | 156/292 [31:24<29:22, 12.96s/it]                                                                                                                                                                                                                              {'loss': 8.8289, 'grad_norm': 8.447288513183594, 'learning_rate': 2.3485247684450166e-05, 'ppl': 6828.77103, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 53.29, 'tokens/train_per_sec_per_gpu': 1344.69189453125, 'epoch': 1.07, 'tokens/total': 12917760.0, 'tokens/trainable': 12904217.0}
 53%|█████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                     | 156/292 [31:24<29:22, 12.96s/it] 54%|██████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                    | 157/292 [31:36<28:45, 12.78s/it]                                                                                                                                                                                                                              {'loss': 9.2479, 'grad_norm': 10.372071266174316, 'learning_rate': 2.3210273290575333e-05, 'ppl': 10382.73905, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 53.29, 'tokens/train_per_sec_per_gpu': 1325.2635498046875, 'epoch': 1.08, 'tokens/total': 13030208.0, 'tokens/trainable': 13016598.0}
 54%|██████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                    | 157/292 [31:36<28:45, 12.78s/it] 54%|███████████████████████████████████████████████████████████████████████████████████████████████████                                                                                    | 158/292 [31:46<26:12, 11.73s/it]                                                                                                                                                                                                                              {'loss': 6.3662, 'grad_norm': 14.479423522949219, 'learning_rate': 2.2935516363191693e-05, 'ppl': 581.84262, 'memory/max_active (GiB)': 29.71, 'memory/max_allocated (GiB)': 27.52, 'memory/device_reserved (GiB)': 53.29, 'tokens/train_per_sec_per_gpu': 465.51220703125, 'epoch': 1.08, 'tokens/total': 13089536.0, 'tokens/trainable': 13075785.0}
 54%|███████████████████████████████████████████████████████████████████████████████████████████████████                                                                                    | 158/292 [31:46<26:12, 11.73s/it] 54%|███████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                   | 159/292 [31:57<25:43, 11.61s/it]                                                                                                                                                                                                                              {'loss': 8.7601, 'grad_norm': 10.794559478759766, 'learning_rate': 2.2661010287522057e-05, 'ppl': 6374.74902, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 53.29, 'tokens/train_per_sec_per_gpu': 1220.4619140625, 'epoch': 1.09, 'tokens/total': 13172672.0, 'tokens/trainable': 13158813.0}
 54%|███████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                   | 159/292 [31:57<25:43, 11.61s/it] 55%|████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                  | 160/292 [32:10<26:32, 12.06s/it]                                                                                                                                                                                                                              {'loss': 8.5249, 'grad_norm': 8.886340141296387, 'learning_rate': 2.238678841830867e-05, 'ppl': 5038.68291, 'memory/max_active (GiB)': 36.68, 'memory/max_allocated (GiB)': 34.49, 'memory/device_reserved (GiB)': 53.29, 'tokens/train_per_sec_per_gpu': 500.83099365234375, 'epoch': 1.1, 'tokens/total': 13281408.0, 'tokens/trainable': 13267448.0}
 55%|████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                  | 160/292 [32:10<26:32, 12.06s/it] 55%|████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                  | 161/292 [32:19<24:22, 11.16s/it]                                                                                                                                                                                                                              {'loss': 7.4727, 'grad_norm': 9.952726364135742, 'learning_rate': 2.2112884075760347e-05, 'ppl': 1759.35052, 'memory/max_active (GiB)': 25.3, 'memory/max_allocated (GiB)': 23.21, 'memory/device_reserved (GiB)': 42.89, 'tokens/train_per_sec_per_gpu': 1013.79296875, 'epoch': 1.1, 'tokens/total': 13335040.0, 'tokens/trainable': 13320939.0}
 55%|████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                  | 161/292 [32:19<24:22, 11.16s/it] 55%|█████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                 | 162/292 [32:32<25:23, 11.72s/it]                                                                                                                                                                                                                              {'loss': 9.0886, 'grad_norm': 9.424476623535156, 'learning_rate': 2.1839330541503845e-05, 'ppl': 8853.78208, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 58.9, 'tokens/train_per_sec_per_gpu': 1258.2423095703125, 'epoch': 1.11, 'tokens/total': 13453632.0, 'tokens/trainable': 13439518.0}
 55%|█████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                 | 162/292 [32:32<25:23, 11.72s/it] 56%|██████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                | 163/292 [32:44<25:00, 11.63s/it]                                                                                                                                                                                                                              {'loss': 8.5789, 'grad_norm': 8.801314353942871, 'learning_rate': 2.1566161054539798e-05, 'ppl': 5318.25223, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 58.9, 'tokens/train_per_sec_per_gpu': 821.4688720703125, 'epoch': 1.12, 'tokens/total': 13530944.0, 'tokens/trainable': 13516725.0}
 56%|██████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                | 163/292 [32:44<25:00, 11.63s/it] 56%|██████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                | 164/292 [32:55<24:41, 11.57s/it]                                                                                                                                                                                                                              {'loss': 7.9726, 'grad_norm': 8.361584663391113, 'learning_rate': 2.1293408807203947e-05, 'ppl': 2900.38858, 'memory/max_active (GiB)': 35.94, 'memory/max_allocated (GiB)': 33.68, 'memory/device_reserved (GiB)': 58.9, 'tokens/train_per_sec_per_gpu': 671.0490112304688, 'epoch': 1.12, 'tokens/total': 13633024.0, 'tokens/trainable': 13618702.0}
 56%|██████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                | 164/292 [32:55<24:41, 11.57s/it] 57%|███████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                               | 165/292 [33:06<24:08, 11.40s/it]                                                                                                                                                                                                                              {'loss': 8.0882, 'grad_norm': 8.317357063293457, 'learning_rate': 2.1021106941134012e-05, 'ppl': 3255.82181, 'memory/max_active (GiB)': 31.68, 'memory/max_allocated (GiB)': 29.57, 'memory/device_reserved (GiB)': 58.9, 'tokens/train_per_sec_per_gpu': 1194.756103515625, 'epoch': 1.13, 'tokens/total': 13719488.0, 'tokens/trainable': 13705040.0}
 57%|███████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                               | 165/292 [33:06<24:08, 11.40s/it] 57%|████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                               | 166/292 [33:16<22:41, 10.80s/it]                                                                                                                                                                                                                              {'loss': 6.8081, 'grad_norm': 13.565043449401855, 'learning_rate': 2.074928854324268e-05, 'ppl': 905.14939, 'memory/max_active (GiB)': 26.62, 'memory/max_allocated (GiB)': 24.59, 'memory/device_reserved (GiB)': 58.9, 'tokens/train_per_sec_per_gpu': 1067.6107177734375, 'epoch': 1.14, 'tokens/total': 13774912.0, 'tokens/trainable': 13760338.0}
 57%|████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                               | 166/292 [33:16<22:41, 10.80s/it] 57%|████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                              | 167/292 [33:27<22:57, 11.02s/it]                                                                                                                                                                                                                              {'loss': 7.1945, 'grad_norm': 10.28644847869873, 'learning_rate': 2.047798664169726e-05, 'ppl': 1332.08412, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 58.9, 'tokens/train_per_sec_per_gpu': 92.330810546875, 'epoch': 1.14, 'tokens/total': 13875392.0, 'tokens/trainable': 13860771.0}
 57%|████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                              | 167/292 [33:27<22:57, 11.02s/it] 58%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                             | 168/292 [33:38<22:53, 11.08s/it]                                                                                                                                                                                                                              {'loss': 8.6391, 'grad_norm': 10.852195739746094, 'learning_rate': 2.0207234201906547e-05, 'ppl': 5648.24412, 'memory/max_active (GiB)': 34.68, 'memory/max_allocated (GiB)': 32.44, 'memory/device_reserved (GiB)': 58.9, 'tokens/train_per_sec_per_gpu': 747.2620239257812, 'epoch': 1.15, 'tokens/total': 13970112.0, 'tokens/trainable': 13955354.0}
 58%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                             | 168/292 [33:38<22:53, 11.08s/it] 58%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                             | 169/292 [33:48<22:08, 10.80s/it]                                                                                                                                                                                                                              {'loss': 6.9627, 'grad_norm': 9.488106727600098, 'learning_rate': 1.9937064122515202e-05, 'ppl': 1056.48221, 'memory/max_active (GiB)': 30.95, 'memory/max_allocated (GiB)': 28.85, 'memory/device_reserved (GiB)': 58.9, 'tokens/train_per_sec_per_gpu': 269.4940490722656, 'epoch': 1.16, 'tokens/total': 14039488.0, 'tokens/trainable': 14024581.0}
 58%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                             | 169/292 [33:48<22:08, 10.80s/it] 58%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                            | 170/292 [34:02<23:32, 11.58s/it]                                                                                                                                                                                                                              {'loss': 8.9894, 'grad_norm': 9.464667320251465, 'learning_rate': 1.9667509231406334e-05, 'ppl': 8017.64486, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 58.9, 'tokens/train_per_sec_per_gpu': 1223.881591796875, 'epoch': 1.16, 'tokens/total': 14146368.0, 'tokens/trainable': 14131297.0}
 58%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                            | 170/292 [34:02<23:32, 11.58s/it] 59%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                           | 171/292 [34:14<23:36, 11.71s/it]                                                                                                                                                                                                                              {'loss': 8.5484, 'grad_norm': 9.088114738464355, 'learning_rate': 1.9398602281712604e-05, 'ppl': 5158.49423, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 55.98, 'tokens/train_per_sec_per_gpu': 551.7069702148438, 'epoch': 1.17, 'tokens/total': 14245184.0, 'tokens/trainable': 14230076.0}
 59%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                           | 171/292 [34:14<23:36, 11.71s/it] 59%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                           | 172/292 [34:24<22:33, 11.28s/it]                                                                                                                                                                                                                              {'loss': 8.9253, 'grad_norm': 11.491279602050781, 'learning_rate': 1.913037594783648e-05, 'ppl': 7519.83895, 'memory/max_active (GiB)': 29.71, 'memory/max_allocated (GiB)': 27.52, 'memory/device_reserved (GiB)': 55.98, 'tokens/train_per_sec_per_gpu': 334.81158447265625, 'epoch': 1.18, 'tokens/total': 14324096.0, 'tokens/trainable': 14308879.0}
 59%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                           | 172/292 [34:24<22:33, 11.28s/it] 59%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                          | 173/292 [34:34<21:42, 10.95s/it]                                                                                                                                                                                                                              {'loss': 8.1928, 'grad_norm': 12.732453346252441, 'learning_rate': 1.8862862821480025e-05, 'ppl': 3614.82961, 'memory/max_active (GiB)': 27.7, 'memory/max_allocated (GiB)': 25.57, 'memory/device_reserved (GiB)': 55.98, 'tokens/train_per_sec_per_gpu': 1044.738037109375, 'epoch': 1.18, 'tokens/total': 14394624.0, 'tokens/trainable': 14379235.0}
 59%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                          | 173/292 [34:34<21:42, 10.95s/it] 60%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                          | 174/292 [34:45<21:39, 11.02s/it]                                                                                                                                                                                                                              {'loss': 9.1031, 'grad_norm': 12.279065132141113, 'learning_rate': 1.859609540768471e-05, 'ppl': 8983.09719, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 55.98, 'tokens/train_per_sec_per_gpu': 1466.5863037109375, 'epoch': 1.19, 'tokens/total': 14474816.0, 'tokens/trainable': 14459356.0}
 60%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                          | 174/292 [34:45<21:39, 11.02s/it] 60%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                         | 175/292 [34:56<21:28, 11.01s/it]                                                                                                                                                                                                                              {'loss': 7.7153, 'grad_norm': 13.07933521270752, 'learning_rate': 1.8330106120881846e-05, 'ppl': 2242.39552, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 56.01, 'tokens/train_per_sec_per_gpu': 1490.773681640625, 'epoch': 1.2, 'tokens/total': 14545408.0, 'tokens/trainable': 14529869.0}
 60%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                         | 175/292 [34:56<21:28, 11.01s/it] 60%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                        | 176/292 [35:06<20:21, 10.53s/it]                                                                                                                                                                                                                              {'loss': 8.1223, 'grad_norm': 13.329166412353516, 'learning_rate': 1.806492728095389e-05, 'ppl': 3368.75999, 'memory/max_active (GiB)': 27.51, 'memory/max_allocated (GiB)': 25.46, 'memory/device_reserved (GiB)': 56.01, 'tokens/train_per_sec_per_gpu': 68.52429962158203, 'epoch': 1.21, 'tokens/total': 14609280.0, 'tokens/trainable': 14593589.0}
 60%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                        | 176/292 [35:06<20:21, 10.53s/it] 61%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                        | 177/292 [35:17<20:37, 10.76s/it]                                                                                                                                                                                                                              {'loss': 7.4187, 'grad_norm': 19.3292293548584, 'learning_rate': 1.780059110930735e-05, 'ppl': 1666.86517, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 56.03, 'tokens/train_per_sec_per_gpu': 45.11967849731445, 'epoch': 1.21, 'tokens/total': 14701568.0, 'tokens/trainable': 14685862.0}
 61%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                        | 177/292 [35:17<20:37, 10.76s/it] 61%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                       | 178/292 [35:27<19:52, 10.46s/it]                                                                                                                                                                                                                              {'loss': 9.0387, 'grad_norm': 11.683582305908203, 'learning_rate': 1.7537129724957642e-05, 'ppl': 8422.82027, 'memory/max_active (GiB)': 28.65, 'memory/max_allocated (GiB)': 26.59, 'memory/device_reserved (GiB)': 56.03, 'tokens/train_per_sec_per_gpu': 1157.07568359375, 'epoch': 1.22, 'tokens/total': 14764544.0, 'tokens/trainable': 14748735.0}
 61%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                       | 178/292 [35:27<19:52, 10.46s/it] 61%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                      | 179/292 [35:37<19:42, 10.46s/it]                                                                                                                                                                                                                              {'loss': 8.4925, 'grad_norm': 12.265822410583496, 'learning_rate': 1.7274575140626318e-05, 'ppl': 4878.04596, 'memory/max_active (GiB)': 37.03, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 56.03, 'tokens/train_per_sec_per_gpu': 1565.35498046875, 'epoch': 1.23, 'tokens/total': 14845952.0, 'tokens/trainable': 14830081.0}
 61%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                      | 179/292 [35:37<19:42, 10.46s/it] 62%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                      | 180/292 [35:48<19:50, 10.63s/it]                                                                                                                                                                                                                              {'loss': 7.6745, 'grad_norm': 10.35807991027832, 'learning_rate': 1.70129592588513e-05, 'ppl': 2152.74704, 'memory/max_active (GiB)': 37.03, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 56.03, 'tokens/train_per_sec_per_gpu': 1488.306884765625, 'epoch': 1.23, 'tokens/total': 14922752.0, 'tokens/trainable': 14906826.0}
 62%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                      | 180/292 [35:48<19:50, 10.63s/it] 62%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                     | 181/292 [36:00<20:24, 11.03s/it]                                                                                                                                                                                                                              {'loss': 8.838, 'grad_norm': 8.179500579833984, 'learning_rate': 1.675231386811043e-05, 'ppl': 6891.19646, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 63.76, 'tokens/train_per_sec_per_gpu': 1052.1103515625, 'epoch': 1.24, 'tokens/total': 15017856.0, 'tokens/trainable': 15001780.0}
 62%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                     | 181/292 [36:00<20:24, 11.03s/it] 62%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                     | 182/292 [36:13<21:19, 11.64s/it]                                                                                                                                                                                                                              {'loss': 9.1344, 'grad_norm': 7.368348598480225, 'learning_rate': 1.6492670638958924e-05, 'ppl': 9268.71472, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 63.76, 'tokens/train_per_sec_per_gpu': 1255.8795166015625, 'epoch': 1.25, 'tokens/total': 15139072.0, 'tokens/trainable': 15122949.0}
 62%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                     | 182/292 [36:14<21:19, 11.64s/it] 63%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                    | 183/292 [36:25<21:19, 11.74s/it]                                                                                                                                                                                                                              {'loss': 8.9797, 'grad_norm': 7.638699531555176, 'learning_rate': 1.6234061120181142e-05, 'ppl': 7940.24968, 'memory/max_active (GiB)': 33.48, 'memory/max_allocated (GiB)': 31.26, 'memory/device_reserved (GiB)': 63.76, 'tokens/train_per_sec_per_gpu': 1192.135498046875, 'epoch': 1.25, 'tokens/total': 15242112.0, 'tokens/trainable': 15225876.0}
 63%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                    | 183/292 [36:25<21:19, 11.74s/it] 63%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                   | 184/292 [36:37<21:05, 11.72s/it]                                                                                                                                                                                                                              {'loss': 8.0119, 'grad_norm': 9.075273513793945, 'learning_rate': 1.5976516734957138e-05, 'ppl': 3016.64329, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 63.76, 'tokens/train_per_sec_per_gpu': 136.06011962890625, 'epoch': 1.26, 'tokens/total': 15342656.0, 'tokens/trainable': 15326352.0}
 63%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                   | 184/292 [36:37<21:05, 11.72s/it] 63%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                   | 185/292 [36:49<20:44, 11.63s/it]                                                                                                                                                                                                                              {'loss': 7.3686, 'grad_norm': 11.18115520477295, 'learning_rate': 1.5720068777044476e-05, 'ppl': 1585.41265, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 63.76, 'tokens/train_per_sec_per_gpu': 231.77915954589844, 'epoch': 1.27, 'tokens/total': 15419200.0, 'tokens/trainable': 15402829.0}
 63%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                   | 185/292 [36:49<20:44, 11.63s/it][2026-01-06 02:31:24,488] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:13318] Running evaluation step...

  0%|                                                                                                                                                                                                   | 0/6 [00:00<?, ?it/s][A
 33%|██████████████████████████████████████████████████████████████▎                                                                                                                            | 2/6 [00:01<00:03,  1.19it/s][A
 50%|█████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                             | 3/6 [00:02<00:02,  1.18it/s][A
 67%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                              | 4/6 [00:03<00:01,  1.01it/s][A
 83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                               | 5/6 [00:05<00:01,  1.13s/it][A
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:06<00:00,  1.28s/it][A                                                                                                                                                                                                                              
                                                                                                                                                                                                                              [A{'eval_loss': 2.584646463394165, 'eval_runtime': 8.512, 'eval_samples_per_second': 0.705, 'eval_steps_per_second': 0.352, 'eval_ppl': 13.2586, 'memory/max_active (GiB)': 34.82, 'memory/max_allocated (GiB)': 33.36, 'memory/device_reserved (GiB)': 92.77, 'epoch': 1.27, 'tokens/train_per_sec_per_gpu': 0.0, 'tokens/total': 15419200.0, 'tokens/trainable': 15402829.0}
 63%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                   | 185/292 [36:57<20:44, 11.63s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:06<00:00,  1.28s/it][A
                                                                                                                                                                                                                              [A 64%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                  | 186/292 [37:08<24:32, 13.89s/it]                                                                                                                                                                                                                              {'loss': 7.6793, 'grad_norm': 11.250419616699219, 'learning_rate': 1.5464748406975847e-05, 'ppl': 2163.10507, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.74, 'memory/device_reserved (GiB)': 63.32, 'tokens/train_per_sec_per_gpu': 228.8728790283203, 'epoch': 1.27, 'tokens/total': 15481664.0, 'tokens/trainable': 15465160.0}
 64%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                  | 186/292 [37:08<24:32, 13.89s/it] 64%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                 | 187/292 [37:19<22:53, 13.08s/it]                                                                                                                                                                                                                              {'loss': 7.0724, 'grad_norm': 11.877015113830566, 'learning_rate': 1.521058664827272e-05, 'ppl': 1178.97418, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 63.32, 'tokens/train_per_sec_per_gpu': 912.2022094726562, 'epoch': 1.28, 'tokens/total': 15543872.0, 'tokens/trainable': 15527249.0}
 64%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                 | 187/292 [37:19<22:53, 13.08s/it] 64%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                 | 188/292 [37:29<21:10, 12.21s/it]                                                                                                                                                                                                                              {'loss': 8.4001, 'grad_norm': 13.853951454162598, 'learning_rate': 1.495761438367577e-05, 'ppl': 4447.51148, 'memory/max_active (GiB)': 25.82, 'memory/max_allocated (GiB)': 23.72, 'memory/device_reserved (GiB)': 51.6, 'tokens/train_per_sec_per_gpu': 930.4054565429688, 'epoch': 1.29, 'tokens/total': 15610112.0, 'tokens/trainable': 15593330.0}
 64%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                 | 188/292 [37:29<21:10, 12.21s/it] 65%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                | 189/292 [37:38<19:06, 11.13s/it]                                                                                                                                                                                                                              {'loss': 6.0725, 'grad_norm': 18.349655151367188, 'learning_rate': 1.4705862351392379e-05, 'ppl': 433.76374, 'memory/max_active (GiB)': 27.35, 'memory/max_allocated (GiB)': 25.31, 'memory/device_reserved (GiB)': 51.62, 'tokens/train_per_sec_per_gpu': 169.90524291992188, 'epoch': 1.29, 'tokens/total': 15643712.0, 'tokens/trainable': 15626784.0}
 65%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                | 189/292 [37:38<19:06, 11.13s/it] 65%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                | 190/292 [37:50<19:20, 11.38s/it]                                                                                                                                                                                                                              {'loss': 8.5625, 'grad_norm': 10.19249439239502, 'learning_rate': 1.44553611413617e-05, 'ppl': 5231.7442, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 51.62, 'tokens/train_per_sec_per_gpu': 566.8071899414062, 'epoch': 1.3, 'tokens/total': 15738432.0, 'tokens/trainable': 15721456.0}
 65%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                | 190/292 [37:50<19:20, 11.38s/it] 65%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                               | 191/292 [38:02<19:24, 11.53s/it]                                                                                                                                                                                                                              {'loss': 8.1905, 'grad_norm': 8.731797218322754, 'learning_rate': 1.4206141191537682e-05, 'ppl': 3606.52506, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 1392.1507568359375, 'epoch': 1.31, 'tokens/total': 15843648.0, 'tokens/trainable': 15826630.0}
 65%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                               | 191/292 [38:02<19:24, 11.53s/it] 66%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                              | 192/292 [38:11<18:12, 10.92s/it]                                                                                                                                                                                                                              {'loss': 8.3661, 'grad_norm': 9.280367851257324, 'learning_rate': 1.395823278419065e-05, 'ppl': 4298.83786, 'memory/max_active (GiB)': 25.98, 'memory/max_allocated (GiB)': 23.87, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 829.3560791015625, 'epoch': 1.32, 'tokens/total': 15898368.0, 'tokens/trainable': 15881238.0}
 66%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                              | 192/292 [38:11<18:12, 10.92s/it] 66%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                              | 193/292 [38:21<17:22, 10.53s/it]                                                                                                                                                                                                                              {'loss': 6.9534, 'grad_norm': 12.326271057128906, 'learning_rate': 1.3711666042227772e-05, 'ppl': 1046.70247, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 240.16000366210938, 'epoch': 1.32, 'tokens/total': 15949248.0, 'tokens/trainable': 15932056.0}
 66%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                              | 193/292 [38:21<17:22, 10.53s/it] 66%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                             | 194/292 [38:31<17:11, 10.53s/it]                                                                                                                                                                                                                              {'loss': 7.4213, 'grad_norm': 8.948957443237305, 'learning_rate': 1.346647092553281e-05, 'ppl': 1671.20466, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 631.2174072265625, 'epoch': 1.33, 'tokens/total': 16030656.0, 'tokens/trainable': 16013387.0}
 66%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                             | 194/292 [38:31<17:11, 10.53s/it] 67%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                            | 195/292 [38:44<18:06, 11.20s/it]                                                                                                                                                                                                                              {'loss': 7.955, 'grad_norm': 7.481887340545654, 'learning_rate': 1.322267722732582e-05, 'ppl': 2849.78833, 'memory/max_active (GiB)': 33.03, 'memory/max_allocated (GiB)': 30.9, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 663.0271606445312, 'epoch': 1.34, 'tokens/total': 16134720.0, 'tokens/trainable': 16117279.0}
 67%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                            | 195/292 [38:44<18:06, 11.20s/it] 67%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                            | 196/292 [38:53<17:00, 10.64s/it]                                                                                                                                                                                                                              {'loss': 8.2259, 'grad_norm': 12.571401596069336, 'learning_rate': 1.2980314570543006e-05, 'ppl': 3736.48272, 'memory/max_active (GiB)': 27.66, 'memory/max_allocated (GiB)': 25.62, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 221.12901306152344, 'epoch': 1.34, 'tokens/total': 16193856.0, 'tokens/trainable': 16176251.0}
 67%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                            | 196/292 [38:54<17:00, 10.64s/it] 67%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                           | 197/292 [39:02<16:09, 10.20s/it]                                                                                                                                                                                                                              {'loss': 7.1773, 'grad_norm': 11.956565856933594, 'learning_rate': 1.2739412404237306e-05, 'ppl': 1309.36819, 'memory/max_active (GiB)': 29.71, 'memory/max_allocated (GiB)': 27.52, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 168.97547912597656, 'epoch': 1.35, 'tokens/total': 16244544.0, 'tokens/trainable': 16226803.0}
 67%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                           | 197/292 [39:02<16:09, 10.20s/it] 68%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                           | 198/292 [39:13<16:07, 10.29s/it]                                                                                                                                                                                                                              {'loss': 8.9923, 'grad_norm': 11.351778030395508, 'learning_rate': 1.2500000000000006e-05, 'ppl': 8040.92978, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 488.45367431640625, 'epoch': 1.36, 'tokens/total': 16325120.0, 'tokens/trainable': 16307272.0}
 68%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                           | 198/292 [39:14<16:07, 10.29s/it] 68%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                          | 199/292 [39:25<16:44, 10.80s/it]                                                                                                                                                                                                                              {'loss': 9.1495, 'grad_norm': 9.053028106689453, 'learning_rate': 1.2262106448404132e-05, 'ppl': 9409.73434, 'memory/max_active (GiB)': 30.95, 'memory/max_allocated (GiB)': 28.85, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 998.4190063476562, 'epoch': 1.36, 'tokens/total': 16417664.0, 'tokens/trainable': 16399782.0}
 68%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                          | 199/292 [39:25<16:44, 10.80s/it] 68%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                         | 200/292 [39:40<18:27, 12.03s/it]                                                                                                                                                                                                                              {'loss': 8.529, 'grad_norm': 7.48803186416626, 'learning_rate': 1.202576065546963e-05, 'ppl': 5059.38392, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 1098.966064453125, 'epoch': 1.37, 'tokens/total': 16548736.0, 'tokens/trainable': 16530854.0}
 68%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                         | 200/292 [39:40<18:27, 12.03s/it] 69%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                         | 201/292 [39:50<17:25, 11.49s/it]                                                                                                                                                                                                                              {'loss': 7.4978, 'grad_norm': 10.250720977783203, 'learning_rate': 1.1790991339151031e-05, 'ppl': 1804.06909, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 61.18, 'tokens/train_per_sec_per_gpu': 1601.96728515625, 'epoch': 1.38, 'tokens/total': 16619072.0, 'tokens/trainable': 16601065.0}
 69%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                         | 201/292 [39:50<17:25, 11.49s/it] 69%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                        | 202/292 [40:00<16:43, 11.15s/it]                                                                                                                                                                                                                              {'loss': 8.3431, 'grad_norm': 10.58807373046875, 'learning_rate': 1.1557827025848047e-05, 'ppl': 4201.09296, 'memory/max_active (GiB)': 30.16, 'memory/max_allocated (GiB)': 28.08, 'memory/device_reserved (GiB)': 61.18, 'tokens/train_per_sec_per_gpu': 1229.1123046875, 'epoch': 1.38, 'tokens/total': 16684800.0, 'tokens/trainable': 16666687.0}
 69%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                        | 202/292 [40:00<16:43, 11.15s/it] 70%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                       | 203/292 [40:13<17:14, 11.63s/it]                                                                                                                                                                                                                              {'loss': 8.7407, 'grad_norm': 7.187331676483154, 'learning_rate': 1.1326296046939333e-05, 'ppl': 6252.27077, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 61.18, 'tokens/train_per_sec_per_gpu': 902.08203125, 'epoch': 1.39, 'tokens/total': 16801344.0, 'tokens/trainable': 16783156.0}
 70%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                       | 203/292 [40:13<17:14, 11.63s/it] 70%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                       | 204/292 [40:25<17:08, 11.69s/it]                                                                                                                                                                                                                              {'loss': 9.6375, 'grad_norm': 10.33745288848877, 'learning_rate': 1.1096426535339985e-05, 'ppl': 15328.97336, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 61.18, 'tokens/train_per_sec_per_gpu': 1394.552490234375, 'epoch': 1.4, 'tokens/total': 16910272.0, 'tokens/trainable': 16892034.0}
 70%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                       | 204/292 [40:25<17:08, 11.69s/it] 70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                      | 205/292 [40:37<16:58, 11.70s/it]                                                                                                                                                                                                                              {'loss': 8.1809, 'grad_norm': 7.694809913635254, 'learning_rate': 1.0868246422083204e-05, 'ppl': 3572.06808, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 61.18, 'tokens/train_per_sec_per_gpu': 715.5293579101562, 'epoch': 1.4, 'tokens/total': 17003328.0, 'tokens/trainable': 16985036.0}
 70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                      | 205/292 [40:37<16:58, 11.70s/it] 71%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                      | 206/292 [40:49<16:51, 11.76s/it]                                                                                                                                                                                                                              {'loss': 8.4949, 'grad_norm': 7.882662773132324, 'learning_rate': 1.064178343292641e-05, 'ppl': 4889.76733, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 61.18, 'tokens/train_per_sec_per_gpu': 476.4204406738281, 'epoch': 1.41, 'tokens/total': 17112192.0, 'tokens/trainable': 17093872.0}
 71%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                      | 206/292 [40:49<16:51, 11.76s/it] 71%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                     | 207/292 [40:59<16:13, 11.45s/it]                                                                                                                                                                                                                              {'loss': 9.5406, 'grad_norm': 14.968565940856934, 'learning_rate': 1.0417065084982346e-05, 'ppl': 13913.2931, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 61.18, 'tokens/train_per_sec_per_gpu': 1528.200439453125, 'epoch': 1.42, 'tokens/total': 17179136.0, 'tokens/trainable': 17160748.0}
 71%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                     | 207/292 [40:59<16:13, 11.45s/it] 71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                    | 208/292 [41:09<15:11, 10.85s/it]                                                                                                                                                                                                                              {'loss': 7.3037, 'grad_norm': 15.098634719848633, 'learning_rate': 1.0194118683375503e-05, 'ppl': 1485.78718, 'memory/max_active (GiB)': 28.39, 'memory/max_allocated (GiB)': 26.34, 'memory/device_reserved (GiB)': 61.18, 'tokens/train_per_sec_per_gpu': 157.43849182128906, 'epoch': 1.42, 'tokens/total': 17228800.0, 'tokens/trainable': 17210284.0}
 71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                    | 208/292 [41:09<15:11, 10.85s/it] 72%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                    | 209/292 [41:19<14:50, 10.72s/it]                                                                                                                                                                                                                              {'loss': 8.6694, 'grad_norm': 7.866952419281006, 'learning_rate': 9.972971317924374e-06, 'ppl': 5822.0051, 'memory/max_active (GiB)': 36.06, 'memory/max_allocated (GiB)': 33.87, 'memory/device_reserved (GiB)': 61.18, 'tokens/train_per_sec_per_gpu': 494.049072265625, 'epoch': 1.43, 'tokens/total': 17315072.0, 'tokens/trainable': 17296430.0}
 72%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                    | 209/292 [41:19<14:50, 10.72s/it] 72%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                   | 210/292 [41:30<14:32, 10.64s/it]                                                                                                                                                                                                                              {'loss': 8.0547, 'grad_norm': 11.04082202911377, 'learning_rate': 9.753649859849775e-06, 'ppl': 3148.55847, 'memory/max_active (GiB)': 27.75, 'memory/max_allocated (GiB)': 25.62, 'memory/device_reserved (GiB)': 61.18, 'tokens/train_per_sec_per_gpu': 1022.7557983398438, 'epoch': 1.44, 'tokens/total': 17375680.0, 'tokens/trainable': 17356894.0}
 72%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                   | 210/292 [41:30<14:32, 10.64s/it] 72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                  | 211/292 [41:42<15:09, 11.23s/it]                                                                                                                                                                                                                              {'loss': 8.5136, 'grad_norm': 13.34084415435791, 'learning_rate': 9.536180958509768e-06, 'ppl': 4982.06628, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.71, 'tokens/train_per_sec_per_gpu': 945.4319458007812, 'epoch': 1.45, 'tokens/total': 17497856.0, 'tokens/trainable': 17479062.0}
 72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                  | 211/292 [41:42<15:09, 11.23s/it] 73%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                  | 212/292 [41:51<13:56, 10.45s/it]                                                                                                                                                                                                                              {'loss': 6.2838, 'grad_norm': 8.978821754455566, 'learning_rate': 9.320591038161574e-06, 'ppl': 535.82092, 'memory/max_active (GiB)': 27.73, 'memory/max_allocated (GiB)': 25.57, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 822.3914794921875, 'epoch': 1.45, 'tokens/total': 17554816.0, 'tokens/trainable': 17535880.0}
 73%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                  | 212/292 [41:51<13:56, 10.45s/it] 73%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                 | 213/292 [42:00<13:08,  9.98s/it]                                                                                                                                                                                                                              {'loss': 7.2265, 'grad_norm': 10.690631866455078, 'learning_rate': 9.106906294750805e-06, 'ppl': 1375.40017, 'memory/max_active (GiB)': 29.71, 'memory/max_allocated (GiB)': 27.52, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 328.83941650390625, 'epoch': 1.46, 'tokens/total': 17619776.0, 'tokens/trainable': 17600736.0}
 73%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                 | 213/292 [42:00<13:08,  9.98s/it] 73%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                 | 214/292 [42:11<13:18, 10.24s/it]                                                                                                                                                                                                                              {'loss': 8.7425, 'grad_norm': 8.969985961914062, 'learning_rate': 8.895152692728397e-06, 'ppl': 6263.53499, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.72, 'tokens/train_per_sec_per_gpu': 1007.0833129882812, 'epoch': 1.47, 'tokens/total': 17708672.0, 'tokens/trainable': 17689532.0}
 73%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                 | 214/292 [42:11<13:18, 10.24s/it] 74%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                | 215/292 [42:23<13:46, 10.74s/it]                                                                                                                                                                                                                              {'loss': 6.8899, 'grad_norm': 10.145347595214844, 'learning_rate': 8.685355961895784e-06, 'ppl': 982.30318, 'memory/max_active (GiB)': 37.03, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.74, 'tokens/train_per_sec_per_gpu': 1378.9794921875, 'epoch': 1.47, 'tokens/total': 17791744.0, 'tokens/trainable': 17772496.0}
 74%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                | 215/292 [42:23<13:46, 10.74s/it] 74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                               | 216/292 [42:32<13:02, 10.29s/it]                                                                                                                                                                                                                              {'loss': 7.8082, 'grad_norm': 10.8076810836792, 'learning_rate': 8.477541594278474e-06, 'ppl': 2460.69719, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.74, 'tokens/train_per_sec_per_gpu': 358.6339111328125, 'epoch': 1.48, 'tokens/total': 17850176.0, 'tokens/trainable': 17830834.0}
 74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                               | 216/292 [42:32<13:02, 10.29s/it] 74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                               | 217/292 [42:42<12:52, 10.30s/it]                                                                                                                                                                                                                              {'loss': 8.3398, 'grad_norm': 10.170552253723145, 'learning_rate': 8.271734841028553e-06, 'ppl': 4187.25221, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.74, 'tokens/train_per_sec_per_gpu': 1591.6998291015625, 'epoch': 1.49, 'tokens/total': 17927040.0, 'tokens/trainable': 17907670.0}
 74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                               | 217/292 [42:42<12:52, 10.30s/it] 75%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                              | 218/292 [42:53<12:46, 10.35s/it]                                                                                                                                                                                                                              {'loss': 8.1129, 'grad_norm': 16.44310760498047, 'learning_rate': 8.067960709356478e-06, 'ppl': 3337.24201, 'memory/max_active (GiB)': 37.03, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.77, 'tokens/train_per_sec_per_gpu': 967.4773559570312, 'epoch': 1.49, 'tokens/total': 18013376.0, 'tokens/trainable': 17993954.0}
 75%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                              | 218/292 [42:53<12:46, 10.35s/it] 75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                             | 219/292 [43:05<13:21, 10.98s/it]                                                                                                                                                                                                                              {'loss': 9.1255, 'grad_norm': 9.386290550231934, 'learning_rate': 7.866243959492509e-06, 'ppl': 9186.58916, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.77, 'tokens/train_per_sec_per_gpu': 1316.1553955078125, 'epoch': 1.5, 'tokens/total': 18121664.0, 'tokens/trainable': 18102176.0}
 75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                             | 219/292 [43:05<13:21, 10.98s/it] 75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                             | 220/292 [43:18<13:50, 11.53s/it]                                                                                                                                                                                                                              {'loss': 6.9988, 'grad_norm': 12.271045684814453, 'learning_rate': 7.666609101678121e-06, 'ppl': 1095.31799, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 50.8, 'tokens/train_per_sec_per_gpu': 1278.7352294921875, 'epoch': 1.51, 'tokens/total': 18214592.0, 'tokens/trainable': 18195078.0}
 75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                             | 220/292 [43:18<13:50, 11.53s/it] 76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                            | 221/292 [43:28<13:17, 11.23s/it]                                                                                                                                                                                                                              {'loss': 7.7838, 'grad_norm': 8.78132438659668, 'learning_rate': 7.469080393187786e-06, 'ppl': 2401.38276, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 53.41, 'tokens/train_per_sec_per_gpu': 500.48944091796875, 'epoch': 1.51, 'tokens/total': 18287872.0, 'tokens/trainable': 18268240.0}
 76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                            | 221/292 [43:28<13:17, 11.23s/it] 76%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                           | 222/292 [43:40<13:04, 11.20s/it]                                                                                                                                                                                                                              {'loss': 8.6552, 'grad_norm': 9.850422859191895, 'learning_rate': 7.273681835381569e-06, 'ppl': 5739.91683, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 53.41, 'tokens/train_per_sec_per_gpu': 1478.1253662109375, 'epoch': 1.52, 'tokens/total': 18351104.0, 'tokens/trainable': 18331382.0}
 76%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                           | 222/292 [43:40<13:04, 11.20s/it][2026-01-06 02:38:15,462] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:13318] Running evaluation step...

  0%|                                                                                                                                                                                                   | 0/6 [00:00<?, ?it/s][A
 33%|██████████████████████████████████████████████████████████████▎                                                                                                                            | 2/6 [00:01<00:03,  1.19it/s][A
 50%|█████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                             | 3/6 [00:02<00:02,  1.19it/s][A
 67%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                              | 4/6 [00:03<00:01,  1.01it/s][A
 83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                               | 5/6 [00:05<00:01,  1.12s/it][A
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:06<00:00,  1.17s/it][A                                                                                                                                                                                                                              
                                                                                                                                                                                                                              [A{'eval_loss': 2.561933994293213, 'eval_runtime': 8.044, 'eval_samples_per_second': 0.746, 'eval_steps_per_second': 0.373, 'eval_ppl': 12.96086, 'memory/max_active (GiB)': 34.82, 'memory/max_allocated (GiB)': 33.35, 'memory/device_reserved (GiB)': 92.77, 'epoch': 1.52, 'tokens/train_per_sec_per_gpu': 0.0, 'tokens/total': 18351104.0, 'tokens/trainable': 18331382.0}
 76%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                           | 222/292 [43:48<13:04, 11.20s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:06<00:00,  1.17s/it][A
                                                                                                                                                                                                                              [A 76%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                           | 223/292 [44:00<16:03, 13.96s/it]                                                                                                                                                                                                                              {'loss': 7.6601, 'grad_norm': 7.473888397216797, 'learning_rate': 7.080437170788723e-06, 'ppl': 2121.96961, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 63.32, 'tokens/train_per_sec_per_gpu': 1326.9090576171875, 'epoch': 1.53, 'tokens/total': 18445568.0, 'tokens/trainable': 18425764.0}
 76%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                           | 223/292 [44:00<16:03, 13.96s/it] 77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                          | 224/292 [44:13<15:34, 13.75s/it]                                                                                                                                                                                                                              {'loss': 7.9502, 'grad_norm': 8.059795379638672, 'learning_rate': 6.889369880222776e-06, 'ppl': 2836.14212, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 64.67, 'tokens/train_per_sec_per_gpu': 1289.3660888671875, 'epoch': 1.53, 'tokens/total': 18544448.0, 'tokens/trainable': 18524622.0}
 77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                          | 224/292 [44:13<15:34, 13.75s/it] 77%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                          | 225/292 [44:25<14:38, 13.12s/it]                                                                                                                                                                                                                              {'loss': 8.075, 'grad_norm': 9.339559555053711, 'learning_rate': 6.700503179928458e-06, 'ppl': 3213.12737, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 64.67, 'tokens/train_per_sec_per_gpu': 1406.6197509765625, 'epoch': 1.54, 'tokens/total': 18640000.0, 'tokens/trainable': 18620138.0}
 77%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                          | 225/292 [44:25<14:38, 13.12s/it] 77%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                         | 226/292 [44:35<13:25, 12.20s/it]                                                                                                                                                                                                                              {'loss': 7.1308, 'grad_norm': 12.448400497436523, 'learning_rate': 6.513860018760698e-06, 'ppl': 1249.87647, 'memory/max_active (GiB)': 32.02, 'memory/max_allocated (GiB)': 29.82, 'memory/device_reserved (GiB)': 64.67, 'tokens/train_per_sec_per_gpu': 998.8069458007812, 'epoch': 1.55, 'tokens/total': 18696832.0, 'tokens/trainable': 18676908.0}
 77%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                         | 226/292 [44:35<13:25, 12.20s/it] 78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                        | 227/292 [44:44<12:06, 11.18s/it]                                                                                                                                                                                                                              {'loss': 6.6565, 'grad_norm': 11.307518005371094, 'learning_rate': 6.329463075396161e-06, 'ppl': 777.82378, 'memory/max_active (GiB)': 25.41, 'memory/max_allocated (GiB)': 23.31, 'memory/device_reserved (GiB)': 64.67, 'tokens/train_per_sec_per_gpu': 409.2963562011719, 'epoch': 1.55, 'tokens/total': 18742400.0, 'tokens/trainable': 18722300.0}
 78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                        | 227/292 [44:44<12:06, 11.18s/it] 78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                        | 228/292 [44:55<11:54, 11.16s/it]                                                                                                                                                                                                                              {'loss': 6.7035, 'grad_norm': 8.538440704345703, 'learning_rate': 6.147334755577596e-06, 'ppl': 815.25423, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 64.67, 'tokens/train_per_sec_per_gpu': 1475.2935791015625, 'epoch': 1.56, 'tokens/total': 18815424.0, 'tokens/trainable': 18795210.0}
 78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                        | 228/292 [44:55<11:54, 11.16s/it] 78%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                       | 229/292 [45:08<12:15, 11.67s/it]                                                                                                                                                                                                                              {'loss': 8.421, 'grad_norm': 7.931164741516113, 'learning_rate': 5.967497189391386e-06, 'ppl': 4541.44263, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 64.67, 'tokens/train_per_sec_per_gpu': 1274.7518310546875, 'epoch': 1.57, 'tokens/total': 18935616.0, 'tokens/trainable': 18915380.0}
 78%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                       | 229/292 [45:08<12:15, 11.67s/it] 79%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                      | 230/292 [45:21<12:31, 12.13s/it]                                                                                                                                                                                                                              {'loss': 7.7738, 'grad_norm': 18.230396270751953, 'learning_rate': 5.78997222857853e-06, 'ppl': 2377.4886, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 64.68, 'tokens/train_per_sec_per_gpu': 1242.2275390625, 'epoch': 1.58, 'tokens/total': 19034816.0, 'tokens/trainable': 19014508.0}
 79%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                      | 230/292 [45:21<12:31, 12.13s/it] 79%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                      | 231/292 [45:32<12:02, 11.85s/it]                                                                                                                                                                                                                              {'loss': 8.1677, 'grad_norm': 9.821671485900879, 'learning_rate': 5.614781443879463e-06, 'ppl': 3525.22661, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 66.53, 'tokens/train_per_sec_per_gpu': 1464.2877197265625, 'epoch': 1.58, 'tokens/total': 19127168.0, 'tokens/trainable': 19106796.0}
 79%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                      | 231/292 [45:32<12:02, 11.85s/it] 79%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                     | 232/292 [45:44<11:58, 11.97s/it]                                                                                                                                                                                                                              {'loss': 9.2159, 'grad_norm': 7.2286458015441895, 'learning_rate': 5.441946122413086e-06, 'ppl': 10055.75111, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 66.53, 'tokens/train_per_sec_per_gpu': 1345.8397216796875, 'epoch': 1.59, 'tokens/total': 19227520.0, 'tokens/trainable': 19207030.0}
 79%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                     | 232/292 [45:44<11:58, 11.97s/it] 80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                     | 233/292 [45:54<11:09, 11.35s/it]                                                                                                                                                                                                                              {'loss': 6.9809, 'grad_norm': 11.456761360168457, 'learning_rate': 5.271487265090163e-06, 'ppl': 1075.88623, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.74, 'memory/device_reserved (GiB)': 66.53, 'tokens/train_per_sec_per_gpu': 155.82644653320312, 'epoch': 1.6, 'tokens/total': 19287808.0, 'tokens/trainable': 19267146.0}
 80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                     | 233/292 [45:54<11:09, 11.35s/it] 80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                    | 234/292 [46:06<11:03, 11.45s/it]                                                                                                                                                                                                                              {'loss': 6.7577, 'grad_norm': 12.42844295501709, 'learning_rate': 5.103425584061538e-06, 'ppl': 860.6604, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 66.55, 'tokens/train_per_sec_per_gpu': 1442.8096923828125, 'epoch': 1.6, 'tokens/total': 19366272.0, 'tokens/trainable': 19345600.0}
 80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                    | 234/292 [46:06<11:03, 11.45s/it] 80%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                   | 235/292 [46:17<10:45, 11.33s/it]                                                                                                                                                                                                                              {'loss': 8.5924, 'grad_norm': 12.590235710144043, 'learning_rate': 4.937781500201474e-06, 'ppl': 5390.53545, 'memory/max_active (GiB)': 31.24, 'memory/max_allocated (GiB)': 29.05, 'memory/device_reserved (GiB)': 66.55, 'tokens/train_per_sec_per_gpu': 1159.2744140625, 'epoch': 1.61, 'tokens/total': 19444544.0, 'tokens/trainable': 19423760.0}
 80%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                   | 235/292 [46:17<10:45, 11.33s/it] 81%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                   | 236/292 [46:28<10:35, 11.35s/it]                                                                                                                                                                                                                              {'loss': 7.5572, 'grad_norm': 10.289626121520996, 'learning_rate': 4.7745751406263165e-06, 'ppl': 1914.47746, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 66.55, 'tokens/train_per_sec_per_gpu': 619.739013671875, 'epoch': 1.62, 'tokens/total': 19526080.0, 'tokens/trainable': 19505204.0}
 81%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                   | 236/292 [46:28<10:35, 11.35s/it] 81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                  | 237/292 [46:40<10:34, 11.54s/it]                                                                                                                                                                                                                              {'loss': 9.0411, 'grad_norm': 7.320796489715576, 'learning_rate': 4.613826336248881e-06, 'ppl': 8443.05932, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 66.55, 'tokens/train_per_sec_per_gpu': 1384.785888671875, 'epoch': 1.62, 'tokens/total': 19625600.0, 'tokens/trainable': 19604652.0}
 81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                  | 237/292 [46:40<10:34, 11.54s/it] 82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                 | 238/292 [46:48<09:20, 10.37s/it]                                                                                                                                                                                                                              {'loss': 6.2577, 'grad_norm': 13.113734245300293, 'learning_rate': 4.4555546193688735e-06, 'ppl': 522.01692, 'memory/max_active (GiB)': 15.46, 'memory/max_allocated (GiB)': 12.8, 'memory/device_reserved (GiB)': 66.55, 'tokens/train_per_sec_per_gpu': 221.78091430664062, 'epoch': 1.63, 'tokens/total': 19653056.0, 'tokens/trainable': 19632028.0}
 82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                 | 238/292 [46:48<09:20, 10.37s/it] 82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                 | 239/292 [46:58<09:10, 10.38s/it]                                                                                                                                                                                                                              {'loss': 7.8585, 'grad_norm': 10.8417329788208, 'learning_rate': 4.299779221299499e-06, 'ppl': 2587.63601, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 66.55, 'tokens/train_per_sec_per_gpu': 327.97491455078125, 'epoch': 1.64, 'tokens/total': 19729152.0, 'tokens/trainable': 19707996.0}
 82%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                 | 239/292 [46:58<09:10, 10.38s/it] 82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                | 240/292 [47:10<09:19, 10.76s/it]                                                                                                                                                                                                                              {'loss': 8.787, 'grad_norm': 8.166531562805176, 'learning_rate': 4.146519070030757e-06, 'ppl': 6548.55701, 'memory/max_active (GiB)': 30.93, 'memory/max_allocated (GiB)': 28.75, 'memory/device_reserved (GiB)': 66.55, 'tokens/train_per_sec_per_gpu': 362.9252624511719, 'epoch': 1.64, 'tokens/total': 19810816.0, 'tokens/trainable': 19789476.0}
 82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                | 240/292 [47:11<09:19, 10.76s/it] 83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                | 241/292 [47:21<09:11, 10.81s/it]                                                                                                                                                                                                                              {'loss': 7.5547, 'grad_norm': 9.753114700317383, 'learning_rate': 3.995792787929481e-06, 'ppl': 1909.69725, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 58.25, 'tokens/train_per_sec_per_gpu': 1621.063232421875, 'epoch': 1.65, 'tokens/total': 19869184.0, 'tokens/trainable': 19847764.0}
 83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                | 241/292 [47:21<09:11, 10.81s/it] 83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                               | 242/292 [47:33<09:19, 11.20s/it]                                                                                                                                                                                                                              {'loss': 8.0673, 'grad_norm': 7.780124187469482, 'learning_rate': 3.847618689476612e-06, 'ppl': 3188.4813, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 58.25, 'tokens/train_per_sec_per_gpu': 779.6542358398438, 'epoch': 1.66, 'tokens/total': 19979136.0, 'tokens/trainable': 19957656.0}
 83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                               | 242/292 [47:34<09:19, 11.20s/it] 83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                              | 243/292 [47:44<09:02, 11.07s/it]                                                                                                                                                                                                                              {'loss': 6.5553, 'grad_norm': 8.798025131225586, 'learning_rate': 3.7020147790418263e-06, 'ppl': 702.96001, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 58.25, 'tokens/train_per_sec_per_gpu': 336.2445068359375, 'epoch': 1.66, 'tokens/total': 20051520.0, 'tokens/trainable': 20029954.0}
 83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                              | 243/292 [47:44<09:02, 11.07s/it] 84%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                              | 244/292 [47:54<08:41, 10.87s/it]                                                                                                                                                                                                                              {'loss': 7.9483, 'grad_norm': 9.620014190673828, 'learning_rate': 3.5589987486958243e-06, 'ppl': 2830.75857, 'memory/max_active (GiB)': 33.38, 'memory/max_allocated (GiB)': 31.16, 'memory/device_reserved (GiB)': 58.25, 'tokens/train_per_sec_per_gpu': 532.8733520507812, 'epoch': 1.67, 'tokens/total': 20135296.0, 'tokens/trainable': 20113648.0}
 84%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                              | 244/292 [47:54<08:41, 10.87s/it] 84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                             | 245/292 [48:04<08:21, 10.68s/it]                                                                                                                                                                                                                              {'loss': 6.9251, 'grad_norm': 18.902856826782227, 'learning_rate': 3.418587976060653e-06, 'ppl': 1017.49601, 'memory/max_active (GiB)': 30.16, 'memory/max_allocated (GiB)': 28.08, 'memory/device_reserved (GiB)': 58.34, 'tokens/train_per_sec_per_gpu': 32.994266510009766, 'epoch': 1.68, 'tokens/total': 20189120.0, 'tokens/trainable': 20167374.0}
 84%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                             | 245/292 [48:05<08:21, 10.68s/it] 84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                            | 246/292 [48:16<08:20, 10.88s/it]                                                                                                                                                                                                                              {'loss': 9.231, 'grad_norm': 12.806941986083984, 'learning_rate': 3.280799522198144e-06, 'ppl': 10208.74515, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 58.34, 'tokens/train_per_sec_per_gpu': 434.17919921875, 'epoch': 1.68, 'tokens/total': 20280896.0, 'tokens/trainable': 20259076.0}
 84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                            | 246/292 [48:16<08:20, 10.88s/it] 85%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                            | 247/292 [48:24<07:37, 10.17s/it]                                                                                                                                                                                                                              {'loss': 5.8349, 'grad_norm': 21.162952423095703, 'learning_rate': 3.145650129536862e-06, 'ppl': 342.03053, 'memory/max_active (GiB)': 26.67, 'memory/max_allocated (GiB)': 24.64, 'memory/device_reserved (GiB)': 58.34, 'tokens/train_per_sec_per_gpu': 1195.00537109375, 'epoch': 1.69, 'tokens/total': 20323904.0, 'tokens/trainable': 20301930.0}
 85%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                            | 247/292 [48:25<07:37, 10.17s/it] 85%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                           | 248/292 [48:36<07:41, 10.49s/it]                                                                                                                                                                                                                              {'loss': 8.2786, 'grad_norm': 7.937388896942139, 'learning_rate': 3.013156219837776e-06, 'ppl': 3938.67637, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 58.34, 'tokens/train_per_sec_per_gpu': 1484.173828125, 'epoch': 1.7, 'tokens/total': 20399040.0, 'tokens/trainable': 20377004.0}
 85%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                           | 248/292 [48:36<07:41, 10.49s/it] 85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                           | 249/292 [48:46<07:30, 10.47s/it]                                                                                                                                                                                                                              {'loss': 6.3088, 'grad_norm': 13.317540168762207, 'learning_rate': 2.883333892198853e-06, 'ppl': 549.38529, 'memory/max_active (GiB)': 27.66, 'memory/max_allocated (GiB)': 25.62, 'memory/device_reserved (GiB)': 58.34, 'tokens/train_per_sec_per_gpu': 1026.387939453125, 'epoch': 1.71, 'tokens/total': 20454272.0, 'tokens/trainable': 20432168.0}
 85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                           | 249/292 [48:46<07:30, 10.47s/it] 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                          | 250/292 [48:58<07:40, 10.97s/it]                                                                                                                                                                                                                              {'loss': 6.8559, 'grad_norm': 8.249980926513672, 'learning_rate': 2.7561989210989235e-06, 'ppl': 949.46626, 'memory/max_active (GiB)': 35.43, 'memory/max_allocated (GiB)': 33.26, 'memory/device_reserved (GiB)': 58.34, 'tokens/train_per_sec_per_gpu': 299.6693115234375, 'epoch': 1.71, 'tokens/total': 20532416.0, 'tokens/trainable': 20510212.0}
 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                          | 250/292 [48:58<07:40, 10.97s/it] 86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                         | 251/292 [49:09<07:23, 10.82s/it]                                                                                                                                                                                                                              {'loss': 7.3455, 'grad_norm': 11.29924201965332, 'learning_rate': 2.6317667544809134e-06, 'ppl': 1549.20938, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 55.33, 'tokens/train_per_sec_per_gpu': 722.3812866210938, 'epoch': 1.72, 'tokens/total': 20592896.0, 'tokens/trainable': 20570548.0}
 86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                         | 251/292 [49:09<07:23, 10.82s/it] 86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                         | 252/292 [49:21<07:28, 11.20s/it]                                                                                                                                                                                                                              {'loss': 7.3622, 'grad_norm': 7.545140743255615, 'learning_rate': 2.510052511874822e-06, 'ppl': 1575.29841, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 55.33, 'tokens/train_per_sec_per_gpu': 564.7664184570312, 'epoch': 1.73, 'tokens/total': 20692544.0, 'tokens/trainable': 20670146.0}
 86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                         | 252/292 [49:21<07:28, 11.20s/it] 87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                        | 253/292 [49:31<07:05, 10.92s/it]                                                                                                                                                                                                                              {'loss': 7.421, 'grad_norm': 10.008145332336426, 'learning_rate': 2.391070982560564e-06, 'ppl': 1670.70338, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 55.33, 'tokens/train_per_sec_per_gpu': 323.4619140625, 'epoch': 1.73, 'tokens/total': 20765504.0, 'tokens/trainable': 20743038.0}
 87%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                        | 253/292 [49:31<07:05, 10.92s/it] 87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                       | 254/292 [49:42<07:02, 11.12s/it]                                                                                                                                                                                                                              {'loss': 7.63, 'grad_norm': 9.905967712402344, 'learning_rate': 2.2748366237709374e-06, 'ppl': 2059.05002, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 55.33, 'tokens/train_per_sec_per_gpu': 1414.0296630859375, 'epoch': 1.74, 'tokens/total': 20854464.0, 'tokens/trainable': 20831944.0}
 87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                       | 254/292 [49:42<07:02, 11.12s/it] 87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                       | 255/292 [49:54<06:54, 11.20s/it]                                                                                                                                                                                                                              {'loss': 9.1398, 'grad_norm': 10.640007972717285, 'learning_rate': 2.1613635589349756e-06, 'ppl': 9318.90117, 'memory/max_active (GiB)': 35.9, 'memory/max_allocated (GiB)': 33.72, 'memory/device_reserved (GiB)': 55.33, 'tokens/train_per_sec_per_gpu': 204.31854248046875, 'epoch': 1.75, 'tokens/total': 20937856.0, 'tokens/trainable': 20915224.0}
 87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                       | 255/292 [49:54<06:54, 11.20s/it] 88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                      | 256/292 [50:06<06:54, 11.51s/it]                                                                                                                                                                                                                              {'loss': 7.8613, 'grad_norm': 8.943666458129883, 'learning_rate': 2.0506655759618244e-06, 'ppl': 2594.89154, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 55.33, 'tokens/train_per_sec_per_gpu': 1208.9571533203125, 'epoch': 1.75, 'tokens/total': 21027712.0, 'tokens/trainable': 21005002.0}
 88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                      | 256/292 [50:06<06:54, 11.51s/it] 88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                      | 257/292 [50:17<06:33, 11.24s/it]                                                                                                                                                                                                                              {'loss': 6.8994, 'grad_norm': 14.281301498413086, 'learning_rate': 1.9427561255653816e-06, 'ppl': 991.67953, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 55.33, 'tokens/train_per_sec_per_gpu': 537.530029296875, 'epoch': 1.76, 'tokens/total': 21108416.0, 'tokens/trainable': 21085598.0}
 88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                      | 257/292 [50:17<06:33, 11.24s/it] 88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                     | 258/292 [50:26<06:03, 10.68s/it]                                                                                                                                                                                                                              {'loss': 7.6228, 'grad_norm': 7.619968891143799, 'learning_rate': 1.837648319629956e-06, 'ppl': 2044.2781, 'memory/max_active (GiB)': 29.71, 'memory/max_allocated (GiB)': 27.52, 'memory/device_reserved (GiB)': 55.33, 'tokens/train_per_sec_per_gpu': 624.7019653320312, 'epoch': 1.77, 'tokens/total': 21177152.0, 'tokens/trainable': 21154260.0}
 88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                     | 258/292 [50:26<06:03, 10.68s/it] 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                    | 259/292 [50:39<06:10, 11.23s/it]                                                                                                                                                                                                                              {'loss': 8.0194, 'grad_norm': 7.058052062988281, 'learning_rate': 1.735354929617042e-06, 'ppl': 3039.35317, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 55.33, 'tokens/train_per_sec_per_gpu': 1352.8214111328125, 'epoch': 1.77, 'tokens/total': 21268096.0, 'tokens/trainable': 21245092.0}
 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                    | 259/292 [50:39<06:10, 11.23s/it][2026-01-06 02:45:14,591] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:13318] Running evaluation step...

  0%|                                                                                                                                                                                                   | 0/6 [00:00<?, ?it/s][A
 33%|██████████████████████████████████████████████████████████████▎                                                                                                                            | 2/6 [00:01<00:03,  1.18it/s][A
 50%|█████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                             | 3/6 [00:02<00:02,  1.18it/s][A
 67%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                              | 4/6 [00:03<00:01,  1.01it/s][A
 83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                               | 5/6 [00:05<00:01,  1.13s/it][A
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:06<00:00,  1.20s/it][A                                                                                                                                                                                                                              
                                                                                                                                                                                                                              [A{'eval_loss': 2.5516953468322754, 'eval_runtime': 8.1173, 'eval_samples_per_second': 0.739, 'eval_steps_per_second': 0.37, 'eval_ppl': 12.82883, 'memory/max_active (GiB)': 34.82, 'memory/max_allocated (GiB)': 33.36, 'memory/device_reserved (GiB)': 92.77, 'epoch': 1.77, 'tokens/train_per_sec_per_gpu': 0.0, 'tokens/total': 21268096.0, 'tokens/trainable': 21245092.0}
 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                    | 259/292 [50:47<06:10, 11.23s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:06<00:00,  1.20s/it][A
                                                                                                                                                                                                                              [A 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                    | 260/292 [50:59<07:31, 14.12s/it]                                                                                                                                                                                                                              {'loss': 8.7379, 'grad_norm': 8.328006744384766, 'learning_rate': 1.6358883850134816e-06, 'ppl': 6234.7889, 'memory/max_active (GiB)': 36.47, 'memory/max_allocated (GiB)': 34.28, 'memory/device_reserved (GiB)': 63.18, 'tokens/train_per_sec_per_gpu': 671.3817749023438, 'epoch': 1.78, 'tokens/total': 21358272.0, 'tokens/trainable': 21335118.0}
 89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                    | 260/292 [51:00<07:31, 14.12s/it] 89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                   | 261/292 [51:09<06:37, 12.84s/it]                                                                                                                                                                                                                              {'loss': 7.2866, 'grad_norm': 11.861757278442383, 'learning_rate': 1.5392607718211994e-06, 'ppl': 1460.59622, 'memory/max_active (GiB)': 30.41, 'memory/max_allocated (GiB)': 28.23, 'memory/device_reserved (GiB)': 53.55, 'tokens/train_per_sec_per_gpu': 247.686767578125, 'epoch': 1.79, 'tokens/total': 21412864.0, 'tokens/trainable': 21389522.0}
 89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                   | 261/292 [51:09<06:37, 12.84s/it] 90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                  | 262/292 [51:21<06:12, 12.42s/it]                                                                                                                                                                                                                              {'loss': 8.4668, 'grad_norm': 8.033477783203125, 'learning_rate': 1.4454838310886425e-06, 'ppl': 4754.27741, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 69.56, 'tokens/train_per_sec_per_gpu': 1448.756103515625, 'epoch': 1.79, 'tokens/total': 21498176.0, 'tokens/trainable': 21474796.0}
 90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                  | 262/292 [51:21<06:12, 12.42s/it] 90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                  | 263/292 [51:32<05:48, 12.02s/it]                                                                                                                                                                                                                              {'loss': 8.5983, 'grad_norm': 9.96855354309082, 'learning_rate': 1.3545689574841342e-06, 'ppl': 5422.43361, 'memory/max_active (GiB)': 33.9, 'memory/max_allocated (GiB)': 31.67, 'memory/device_reserved (GiB)': 69.56, 'tokens/train_per_sec_per_gpu': 759.2297973632812, 'epoch': 1.8, 'tokens/total': 21579264.0, 'tokens/trainable': 21555700.0}
 90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                  | 263/292 [51:32<05:48, 12.02s/it] 90%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                 | 264/292 [51:44<05:33, 11.93s/it]                                                                                                                                                                                                                              {'loss': 9.0178, 'grad_norm': 6.982646465301514, 'learning_rate': 1.266527197911352e-06, 'ppl': 8248.61016, 'memory/max_active (GiB)': 35.47, 'memory/max_allocated (GiB)': 33.21, 'memory/device_reserved (GiB)': 69.56, 'tokens/train_per_sec_per_gpu': 1369.138916015625, 'epoch': 1.81, 'tokens/total': 21678400.0, 'tokens/trainable': 21654652.0}
 90%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                 | 264/292 [51:44<05:33, 11.93s/it] 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                 | 265/292 [51:55<05:18, 11.80s/it]                                                                                                                                                                                                                              {'loss': 8.3691, 'grad_norm': 8.018171310424805, 'learning_rate': 1.1813692501670276e-06, 'ppl': 4311.75374, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 69.56, 'tokens/train_per_sec_per_gpu': 413.8620300292969, 'epoch': 1.82, 'tokens/total': 21767744.0, 'tokens/trainable': 21743906.0}
 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                 | 265/292 [51:55<05:18, 11.80s/it] 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                | 266/292 [52:07<05:05, 11.76s/it]                                                                                                                                                                                                                              {'loss': 9.0205, 'grad_norm': 6.452097415924072, 'learning_rate': 1.0991054616410589e-06, 'ppl': 8270.9115, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 69.56, 'tokens/train_per_sec_per_gpu': 1192.5711669921875, 'epoch': 1.82, 'tokens/total': 21879552.0, 'tokens/trainable': 21855678.0}
 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                | 266/292 [52:07<05:05, 11.76s/it] 91%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎               | 267/292 [52:17<04:40, 11.21s/it]                                                                                                                                                                                                                              {'loss': 7.8726, 'grad_norm': 7.977558135986328, 'learning_rate': 1.0197458280592542e-06, 'ppl': 2624.38011, 'memory/max_active (GiB)': 33.43, 'memory/max_allocated (GiB)': 31.21, 'memory/device_reserved (GiB)': 69.56, 'tokens/train_per_sec_per_gpu': 375.8406677246094, 'epoch': 1.83, 'tokens/total': 21952064.0, 'tokens/trainable': 21927996.0}
 91%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎               | 267/292 [52:17<04:40, 11.21s/it] 92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉               | 268/292 [52:27<04:23, 10.97s/it]                                                                                                                                                                                                                              {'loss': 7.6466, 'grad_norm': 8.751847267150879, 'learning_rate': 9.432999922687396e-07, 'ppl': 2093.51552, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 69.56, 'tokens/train_per_sec_per_gpu': 1599.974365234375, 'epoch': 1.84, 'tokens/total': 22031296.0, 'tokens/trainable': 22007138.0}
 92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉               | 268/292 [52:27<04:23, 10.97s/it] 92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌              | 269/292 [52:37<04:07, 10.74s/it]                                                                                                                                                                                                                              {'loss': 7.974, 'grad_norm': 9.038701057434082, 'learning_rate': 8.697772430662859e-07, 'ppl': 2904.45197, 'memory/max_active (GiB)': 34.47, 'memory/max_allocated (GiB)': 32.23, 'memory/device_reserved (GiB)': 69.56, 'tokens/train_per_sec_per_gpu': 693.4686279296875, 'epoch': 1.84, 'tokens/total': 22100928.0, 'tokens/trainable': 22076672.0}
 92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌              | 269/292 [52:38<04:07, 10.74s/it] 92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏             | 270/292 [52:49<04:01, 11.00s/it]                                                                                                                                                                                                                              {'loss': 7.5606, 'grad_norm': 10.90684700012207, 'learning_rate': 7.991865140696331e-07, 'ppl': 1920.99777, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 69.56, 'tokens/train_per_sec_per_gpu': 158.62686157226562, 'epoch': 1.85, 'tokens/total': 22177856.0, 'tokens/trainable': 22153564.0}
 92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏             | 270/292 [52:50<04:01, 11.00s/it] 93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊             | 271/292 [53:01<03:59, 11.42s/it]                                                                                                                                                                                                                              {'loss': 8.4519, 'grad_norm': 6.492819309234619, 'learning_rate': 7.315363826320005e-07, 'ppl': 4683.96382, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 57.0, 'tokens/train_per_sec_per_gpu': 1392.9149169921875, 'epoch': 1.86, 'tokens/total': 22282240.0, 'tokens/trainable': 22257912.0}
 93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊             | 271/292 [53:02<03:59, 11.42s/it] 93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍            | 272/292 [53:14<03:54, 11.72s/it]                                                                                                                                                                                                                              {'loss': 9.1094, 'grad_norm': 6.967322826385498, 'learning_rate': 6.668350687998565e-07, 'ppl': 9039.86934, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 57.0, 'tokens/train_per_sec_per_gpu': 1371.2830810546875, 'epoch': 1.86, 'tokens/total': 22384512.0, 'tokens/trainable': 22360112.0}
 93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍            | 272/292 [53:14<03:54, 11.72s/it] 93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████            | 273/292 [53:24<03:35, 11.35s/it]                                                                                                                                                                                                                              {'loss': 8.5949, 'grad_norm': 8.484600067138672, 'learning_rate': 6.050904343141095e-07, 'ppl': 5404.02865, 'memory/max_active (GiB)': 37.03, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 57.0, 'tokens/train_per_sec_per_gpu': 822.1696166992188, 'epoch': 1.87, 'tokens/total': 22475008.0, 'tokens/trainable': 22450522.0}
 93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████            | 273/292 [53:25<03:35, 11.35s/it] 94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋           | 274/292 [53:36<03:25, 11.40s/it]                                                                                                                                                                                                                              {'loss': 8.24, 'grad_norm': 7.128989219665527, 'learning_rate': 5.463099816548579e-07, 'ppl': 3789.54031, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 57.0, 'tokens/train_per_sec_per_gpu': 1499.5008544921875, 'epoch': 1.88, 'tokens/total': 22573440.0, 'tokens/trainable': 22548884.0}
 94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋           | 274/292 [53:36<03:25, 11.40s/it] 94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎          | 275/292 [53:47<03:12, 11.32s/it]                                                                                                                                                                                                                              {'loss': 6.9146, 'grad_norm': 13.135233879089355, 'learning_rate': 4.905008531297661e-07, 'ppl': 1006.8682, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.74, 'memory/device_reserved (GiB)': 57.02, 'tokens/train_per_sec_per_gpu': 1577.3818359375, 'epoch': 1.88, 'tokens/total': 22630528.0, 'tokens/trainable': 22605900.0}
 94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎          | 275/292 [53:47<03:12, 11.32s/it] 95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉          | 276/292 [54:00<03:08, 11.80s/it]                                                                                                                                                                                                                              {'loss': 8.6944, 'grad_norm': 6.511268615722656, 'learning_rate': 4.3766983000621266e-07, 'ppl': 5969.38986, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 57.02, 'tokens/train_per_sec_per_gpu': 1318.8974609375, 'epoch': 1.89, 'tokens/total': 22752768.0, 'tokens/trainable': 22728108.0}
 95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉          | 276/292 [54:00<03:08, 11.80s/it] 95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌         | 277/292 [54:09<02:47, 11.17s/it]                                                                                                                                                                                                                              {'loss': 6.4458, 'grad_norm': 13.626824378967285, 'learning_rate': 3.8782333168732033e-07, 'ppl': 630.05052, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.74, 'memory/device_reserved (GiB)': 57.02, 'tokens/train_per_sec_per_gpu': 208.88296508789062, 'epoch': 1.9, 'tokens/total': 22799808.0, 'tokens/trainable': 22775052.0}
 95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌         | 277/292 [54:10<02:47, 11.17s/it] 95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏        | 278/292 [54:20<02:35, 11.10s/it]                                                                                                                                                                                                                              {'loss': 7.7067, 'grad_norm': 8.478606224060059, 'learning_rate': 3.4096741493194197e-07, 'ppl': 2223.1936, 'memory/max_active (GiB)': 31.52, 'memory/max_allocated (GiB)': 29.41, 'memory/device_reserved (GiB)': 57.02, 'tokens/train_per_sec_per_gpu': 801.87744140625, 'epoch': 1.9, 'tokens/total': 22879360.0, 'tokens/trainable': 22854434.0}
 95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏        | 278/292 [54:21<02:35, 11.10s/it] 96%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊        | 279/292 [54:33<02:29, 11.47s/it]                                                                                                                                                                                                                              {'loss': 8.2794, 'grad_norm': 8.319933891296387, 'learning_rate': 2.9710777311871e-07, 'ppl': 3941.82858, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 57.02, 'tokens/train_per_sec_per_gpu': 843.2362670898438, 'epoch': 1.91, 'tokens/total': 22990784.0, 'tokens/trainable': 22965810.0}
 96%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊        | 279/292 [54:33<02:29, 11.47s/it] 96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍       | 280/292 [54:46<02:25, 12.09s/it]                                                                                                                                                                                                                              {'loss': 8.7086, 'grad_norm': 6.078580856323242, 'learning_rate': 2.5624973555424815e-07, 'ppl': 6054.75989, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 57.02, 'tokens/train_per_sec_per_gpu': 1211.70556640625, 'epoch': 1.92, 'tokens/total': 23109120.0, 'tokens/trainable': 23084076.0}
 96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍       | 280/292 [54:46<02:25, 12.09s/it] 96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████       | 281/292 [54:55<02:02, 11.17s/it]                                                                                                                                                                                                                              {'loss': 6.9631, 'grad_norm': 9.414605140686035, 'learning_rate': 2.1839826682562015e-07, 'ppl': 1056.90489, 'memory/max_active (GiB)': 23.58, 'memory/max_allocated (GiB)': 21.52, 'memory/device_reserved (GiB)': 46.35, 'tokens/train_per_sec_per_gpu': 900.7665405273438, 'epoch': 1.92, 'tokens/total': 23161152.0, 'tokens/trainable': 23135964.0}
 96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████       | 281/292 [54:55<02:02, 11.17s/it] 97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋      | 282/292 [55:05<01:46, 10.65s/it]                                                                                                                                                                                                                              {'loss': 6.5326, 'grad_norm': 11.216902732849121, 'learning_rate': 1.8355796619708987e-07, 'ppl': 687.18257, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 70.36, 'tokens/train_per_sec_per_gpu': 1732.7987060546875, 'epoch': 1.93, 'tokens/total': 23218560.0, 'tokens/trainable': 23193270.0}
 97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋      | 282/292 [55:05<01:46, 10.65s/it] 97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎     | 283/292 [55:16<01:38, 10.89s/it]                                                                                                                                                                                                                              {'loss': 8.1908, 'grad_norm': 13.340356826782227, 'learning_rate': 1.517330670512629e-07, 'ppl': 3607.60718, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 70.36, 'tokens/train_per_sec_per_gpu': 1225.8748779296875, 'epoch': 1.94, 'tokens/total': 23325504.0, 'tokens/trainable': 23300172.0}
 97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎     | 283/292 [55:16<01:38, 10.89s/it] 97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉     | 284/292 [55:29<01:31, 11.38s/it]                                                                                                                                                                                                                              {'loss': 8.9583, 'grad_norm': 6.198460102081299, 'learning_rate': 1.229274363747146e-07, 'ppl': 7772.1336, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 70.36, 'tokens/train_per_sec_per_gpu': 1032.853271484375, 'epoch': 1.95, 'tokens/total': 23449664.0, 'tokens/trainable': 23424326.0}
 97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉     | 284/292 [55:29<01:31, 11.38s/it] 98%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌    | 285/292 [55:41<01:22, 11.79s/it]                                                                                                                                                                                                                              {'loss': 8.3808, 'grad_norm': 6.886651992797852, 'learning_rate': 9.71445742881022e-08, 'ppl': 4362.49753, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 70.36, 'tokens/train_per_sec_per_gpu': 1058.468017578125, 'epoch': 1.95, 'tokens/total': 23556800.0, 'tokens/trainable': 23531384.0}
 98%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌    | 285/292 [55:41<01:22, 11.79s/it] 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 286/292 [55:52<01:08, 11.44s/it]                                                                                                                                                                                                                              {'loss': 8.1776, 'grad_norm': 6.9330668449401855, 'learning_rate': 7.438761362087987e-08, 'ppl': 3560.29968, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 70.36, 'tokens/train_per_sec_per_gpu': 1539.2813720703125, 'epoch': 1.96, 'tokens/total': 23644416.0, 'tokens/trainable': 23618908.0}
 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 286/292 [55:52<01:08, 11.44s/it] 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊   | 287/292 [56:02<00:54, 10.95s/it]                                                                                                                                                                                                                              {'loss': 7.4402, 'grad_norm': 9.739463806152344, 'learning_rate': 5.4659319530636633e-08, 'ppl': 1703.09081, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 70.36, 'tokens/train_per_sec_per_gpu': 225.29293823242188, 'epoch': 1.97, 'tokens/total': 23721728.0, 'tokens/trainable': 23696168.0}
 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊   | 287/292 [56:02<00:54, 10.95s/it] 99%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 288/292 [56:13<00:43, 10.90s/it]                                                                                                                                                                                                                              {'loss': 7.3631, 'grad_norm': 9.398221969604492, 'learning_rate': 3.796208916709565e-08, 'ppl': 1576.71682, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 70.36, 'tokens/train_per_sec_per_gpu': 1522.9334716796875, 'epoch': 1.97, 'tokens/total': 23800640.0, 'tokens/trainable': 23774992.0}
 99%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 288/292 [56:13<00:43, 10.90s/it] 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 289/292 [56:24<00:33, 11.12s/it]                                                                                                                                                                                                                              {'loss': 8.0418, 'grad_norm': 10.373838424682617, 'learning_rate': 2.429795138085278e-08, 'ppl': 3108.20292, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 70.36, 'tokens/train_per_sec_per_gpu': 968.8304443359375, 'epoch': 1.98, 'tokens/total': 23904064.0, 'tokens/trainable': 23878400.0}
 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████  | 289/292 [56:24<00:33, 11.12s/it] 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 290/292 [56:36<00:22, 11.40s/it]                                                                                                                                                                                                                              {'loss': 7.3862, 'grad_norm': 8.32804012298584, 'learning_rate': 1.3668566476848777e-08, 'ppl': 1613.56291, 'memory/max_active (GiB)': 33.27, 'memory/max_allocated (GiB)': 31.05, 'memory/device_reserved (GiB)': 70.36, 'tokens/train_per_sec_per_gpu': 700.6759033203125, 'epoch': 1.99, 'tokens/total': 23976192.0, 'tokens/trainable': 23950424.0}
 99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 290/292 [56:36<00:22, 11.40s/it]100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 291/292 [56:46<00:11, 11.01s/it]                                                                                                                                                                                                                              {'loss': 8.0462, 'grad_norm': 9.239873886108398, 'learning_rate': 6.075226012636215e-09, 'ppl': 3121.90915, 'memory/max_active (GiB)': 31.94, 'memory/max_allocated (GiB)': 29.82, 'memory/device_reserved (GiB)': 48.87, 'tokens/train_per_sec_per_gpu': 350.1069641113281, 'epoch': 1.99, 'tokens/total': 24039936.0, 'tokens/trainable': 24014012.0}
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎| 291/292 [56:46<00:11, 11.01s/it]100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 292/292 [56:59<00:00, 11.51s/it]                                                                                                                                                                                                                              {'loss': 7.8723, 'grad_norm': 10.260153770446777, 'learning_rate': 1.5188526414244842e-09, 'ppl': 2623.59292, 'memory/max_active (GiB)': 36.94, 'memory/max_allocated (GiB)': 34.75, 'memory/device_reserved (GiB)': 64.88, 'tokens/train_per_sec_per_gpu': 171.65948486328125, 'epoch': 2.0, 'tokens/total': 24126848.0, 'tokens/trainable': 24100884.0}
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 292/292 [56:59<00:00, 11.51s/it][2026-01-06 02:51:35,104] [INFO] [axolotl.core.trainers.base._save:722] [PID:13318] Saving model checkpoint to stage1/checkpoint-292
[2026-01-06 02:51:43,950] [WARNING] [py.warnings._showwarnmsg:110] [PID:13318] /workspace/venv/lib/python3.12/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:675: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .
  warnings.warn(

                                                                                                                                                                                                                              {'train_runtime': 3477.3766, 'train_samples_per_second': 0.672, 'train_steps_per_second': 0.084, 'train_loss': 9.661695568528893, 'memory/max_active (GiB)': 19.48, 'memory/max_allocated (GiB)': 19.48, 'memory/device_reserved (GiB)': 20.15, 'epoch': 2.0, 'tokens/train_per_sec_per_gpu': 0.0, 'tokens/total': 24126848.0, 'tokens/trainable': 24100884.0}
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 292/292 [57:54<00:00, 11.51s/it]100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 292/292 [57:54<00:00, 11.90s/it]
[2026-01-06 02:53:22,189] [INFO] [axolotl.train.save_trained_model:233] [PID:13318] Training completed! Saving trained model to stage1.
[2026-01-06 02:53:22,193] [INFO] [axolotl.core.trainers.base._save:722] [PID:13318] Saving model checkpoint to stage1
[2026-01-06 02:53:36,296] [INFO] [axolotl.core.trainers.base._save:722] [PID:13318] Saving model checkpoint to stage1
Processing Files (0 / 0)      : |                                                                                                                                                                |  0.00B /  0.00B            
New Data Upload               : |                                                                                                                                                                |  0.00B /  0.00B            [A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:   1%|█▎                                                                                                                                                          | 41.9MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:   1%|██▎                                                                                                                                                         | 50.3MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:   1%|█▌                                                                                                                                                          | 50.3MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:   1%|█▊                                                                                                                                                          | 58.7MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:   1%|█▎                                                                                                                                                          | 41.9MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:   1%|██▎                                                                                                                                                         | 50.3MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:   1%|█▌                                                                                                                                                          | 50.3MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:   1%|█▊                                                                                                                                                          | 58.7MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   1%|██                                                                                                                                                          |  239MB / 18.2GB,   ???B/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:   2%|██▉                                                                                                                                                         | 92.2MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:   3%|████▎                                                                                                                                                       | 92.2MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:   2%|███▏                                                                                                                                                        |  101MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:   2%|███▏                                                                                                                                                        |  101MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   2%|███▌                                                                                                                                                        |  424MB / 18.2GB,  923MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:   3%|███▉                                                                                                                                                        |  126MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:   4%|██████▌                                                                                                                                                     |  143MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:   3%|████▎                                                                                                                                                       |  134MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:   3%|████▏                                                                                                                                                       |  134MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   3%|████▉                                                                                                                                                       |  575MB / 18.2GB,  838MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:   3%|████▉                                                                                                                                                       |  159MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:   6%|████████▉                                                                                                                                                   |  193MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:   3%|█████▎                                                                                                                                                      |  168MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:   4%|█████▌                                                                                                                                                      |  176MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   4%|██████▎                                                                                                                                                     |  734MB / 18.2GB,  824MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:   4%|██████                                                                                                                                                      |  193MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:   7%|██████████▍                                                                                                                                                 |  226MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:   4%|██████▍                                                                                                                                                     |  201MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:   4%|██████▋                                                                                                                                                     |  210MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   5%|███████▍                                                                                                                                                    |  868MB / 18.2GB,  787MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:   5%|███████                                                                                                                                                     |  226MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:   8%|████████████                                                                                                                                                |  260MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:   5%|███████▍                                                                                                                                                    |  235MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:   5%|███████▋                                                                                                                                                    |  243MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   5%|████████▌                                                                                                                                                   | 1.00GB / 18.2GB,  763MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:   5%|████████                                                                                                                                                    |  260MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:   9%|█████████████▌                                                                                                                                              |  294MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:   5%|████████▌                                                                                                                                                   |  268MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:   6%|████████▊                                                                                                                                                   |  277MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   6%|█████████▋                                                                                                                                                  | 1.14GB / 18.2GB,  748MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:   6%|█████████▏                                                                                                                                                  |  293MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  10%|███████████████▍                                                                                                                                            |  335MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:   6%|█████████▌                                                                                                                                                  |  302MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:   6%|█████████▊                                                                                                                                                  |  310MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   7%|██████████▉                                                                                                                                                 | 1.28GB / 18.2GB,  743MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:   7%|██████████▍                                                                                                                                                 |  335MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  11%|█████████████████                                                                                                                                           |  369MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:   7%|██████████▉                                                                                                                                                 |  344MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:   7%|██████████▉                                                                                                                                                 |  344MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   8%|████████████▏                                                                                                                                               | 1.43GB / 18.2GB,  745MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:   7%|███████████▌                                                                                                                                                |  369MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  12%|██████████████████▉                                                                                                                                         |  411MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:   8%|███████████▉                                                                                                                                                |  377MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:   8%|███████████▉                                                                                                                                                |  377MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   9%|█████████████▍                                                                                                                                              | 1.57GB / 18.2GB,  741MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:   8%|████████████▌                                                                                                                                               |  403MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  13%|████████████████████▌                                                                                                                                       |  445MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:   8%|█████████████                                                                                                                                               |  411MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:   8%|█████████████                                                                                                                                               |  411MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   9%|██████████████▌                                                                                                                                             | 1.71GB / 18.2GB,  734MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:   9%|█████████████▋                                                                                                                                              |  436MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  14%|██████████████████████                                                                                                                                      |  478MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:   9%|██████████████▏                                                                                                                                             |  445MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:   9%|██████████████                                                                                                                                              |  445MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  10%|███████████████▋                                                                                                                                            | 1.84GB / 18.2GB,  728MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:   9%|██████████████▋                                                                                                                                             |  470MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  15%|███████████████████████▋                                                                                                                                    |  512MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  10%|███████████████▍                                                                                                                                            |  487MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  10%|███████████████▏                                                                                                                                            |  478MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  11%|████████████████▉                                                                                                                                           | 1.98GB / 18.2GB,  727MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  10%|███████████████▋                                                                                                                                            |  503MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  16%|█████████████████████████▏                                                                                                                                  |  545MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  11%|████████████████▊                                                                                                                                           |  528MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  10%|████████████████▏                                                                                                                                           |  512MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  12%|██████████████████▏                                                                                                                                         | 2.13GB / 18.2GB,  726MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  11%|████████████████▊                                                                                                                                           |  537MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  17%|██████████████████████████▋                                                                                                                                 |  579MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  11%|█████████████████▊                                                                                                                                          |  562MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  11%|█████████████████▌                                                                                                                                          |  554MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  12%|███████████████████▍                                                                                                                                        | 2.27GB / 18.2GB,  725MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  11%|█████████████████▊                                                                                                                                          |  570MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  18%|████████████████████████████▋                                                                                                                               |  621MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  12%|██████████████████▉                                                                                                                                         |  596MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  12%|██████████████████▌                                                                                                                                         |  587MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  13%|████████████████████▌                                                                                                                                       | 2.41GB / 18.2GB,  724MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  12%|██████████████████▊                                                                                                                                         |  604MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  19%|██████████████████████████████▏                                                                                                                             |  654MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  13%|███████████████████▉                                                                                                                                        |  629MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  13%|███████████████████▋                                                                                                                                        |  621MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  14%|█████████████████████▊                                                                                                                                      | 2.55GB / 18.2GB,  721MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  13%|███████████████████▉                                                                                                                                        |  638MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  20%|███████████████████████████████▊                                                                                                                            |  688MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  14%|█████████████████████                                                                                                                                       |  663MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  13%|████████████████████▉                                                                                                                                       |  663MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  15%|██████████████████████▉                                                                                                                                     | 2.69GB / 18.2GB,  720MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  14%|█████████████████████▍                                                                                                                                      |  688MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  22%|█████████████████████████████████▋                                                                                                                          |  730MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  14%|██████████████████████▏                                                                                                                                     |  696MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  14%|██████████████████████                                                                                                                                      |  696MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  16%|████████████████████████▎                                                                                                                                   | 2.85GB / 18.2GB,  725MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  14%|██████████████████████▌                                                                                                                                     |  721MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  23%|███████████████████████████████████▋                                                                                                                        |  772MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  15%|███████████████████████▍                                                                                                                                    |  738MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  15%|███████████████████████                                                                                                                                     |  730MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  16%|█████████████████████████▋                                                                                                                                  | 3.00GB / 18.2GB,  726MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  15%|███████████████████████▌                                                                                                                                    |  755MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  24%|█████████████████████████████████████▏                                                                                                                      |  805MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  16%|████████████████████████▊                                                                                                                                   |  780MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  15%|████████████████████████▏                                                                                                                                   |  763MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  17%|██████████████████████████▊                                                                                                                                 | 3.14GB / 18.2GB,  726MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  16%|████████████████████████▋                                                                                                                                   |  788MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  25%|██████████████████████████████████████▋                                                                                                                     |  839MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  17%|██████████████████████████▍                                                                                                                                 |  830MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  16%|█████████████████████████▏                                                                                                                                  |  797MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  18%|████████████████████████████▏                                                                                                                               | 3.29GB / 18.2GB,  727MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  16%|█████████████████████████▋                                                                                                                                  |  822MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  26%|████████████████████████████████████████▎                                                                                                                   |  872MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  18%|███████████████████████████▋                                                                                                                                |  872MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  17%|██████████████████████████▎                                                                                                                                 |  830MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  19%|█████████████████████████████▎                                                                                                                              | 3.44GB / 18.2GB,  726MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  17%|██████████████████████████▉                                                                                                                                 |  864MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  27%|██████████████████████████████████████████▏                                                                                                                 |  914MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  18%|████████████████████████████▊                                                                                                                               |  906MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  18%|███████████████████████████▎                                                                                                                                |  864MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  20%|██████████████████████████████▋                                                                                                                             | 3.59GB / 18.2GB,  728MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  18%|████████████████████████████                                                                                                                                |  898MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  28%|████████████████████████████████████████████▏                                                                                                               |  956MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  19%|██████████████████████████████▏                                                                                                                             |  948MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  18%|████████████████████████████▋                                                                                                                               |  906MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  21%|████████████████████████████████                                                                                                                            | 3.75GB / 18.2GB,  731MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  19%|█████████████████████████████                                                                                                                               |  931MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  29%|█████████████████████████████████████████████▋                                                                                                              |  990MB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  20%|███████████████████████████████▏                                                                                                                            |  981MB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  19%|█████████████████████████████▋                                                                                                                              |  940MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  21%|█████████████████████████████████▏                                                                                                                          | 3.88GB / 18.2GB,  728MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  19%|██████████████████████████████▏                                                                                                                             |  965MB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  30%|███████████████████████████████████████████████▎                                                                                                            | 1.02GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  21%|████████████████████████████████▎                                                                                                                           | 1.01GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  20%|██████████████████████████████▊                                                                                                                             |  973MB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  22%|██████████████████████████████████▎                                                                                                                         | 4.01GB / 18.2GB,  726MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  20%|███████████████████████████████▋                                                                                                                            | 1.02GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  31%|████████████████████████████████████████████████▊                                                                                                           | 1.06GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  21%|█████████████████████████████████▎                                                                                                                          | 1.05GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  20%|███████████████████████████████▊                                                                                                                            | 1.01GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  23%|███████████████████████████████████▌                                                                                                                        | 4.17GB / 18.2GB,  727MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  21%|████████████████████████████████▊                                                                                                                           | 1.05GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  33%|██████████████████████████████████████████████████▊                                                                                                         | 1.10GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  22%|██████████████████████████████████▍                                                                                                                         | 1.08GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  21%|█████████████████████████████████▏                                                                                                                          | 1.05GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  24%|████████████████████████████████████▉                                                                                                                       | 4.32GB / 18.2GB,  728MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  22%|██████████████████████████████████                                                                                                                          | 1.09GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  34%|████████████████████████████████████████████████████▎                                                                                                       | 1.13GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  23%|███████████████████████████████████▍                                                                                                                        | 1.12GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  22%|██████████████████████████████████▏                                                                                                                         | 1.08GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  24%|██████████████████████████████████████                                                                                                                      | 4.46GB / 18.2GB,  727MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  23%|███████████████████████████████████▏                                                                                                                        | 1.12GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  35%|█████████████████████████████████████████████████████▊                                                                                                      | 1.17GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  23%|████████████████████████████████████▌                                                                                                                       | 1.15GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  23%|███████████████████████████████████▎                                                                                                                        | 1.12GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  25%|███████████████████████████████████████▎                                                                                                                    | 4.59GB / 18.2GB,  726MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  23%|████████████████████████████████████▏                                                                                                                       | 1.16GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  36%|███████████████████████████████████████████████████████▍                                                                                                    | 1.20GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  24%|█████████████████████████████████████▌                                                                                                                      | 1.18GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  23%|████████████████████████████████████▎                                                                                                                       | 1.15GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  26%|████████████████████████████████████████▍                                                                                                                   | 4.73GB / 18.2GB,  724MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  24%|█████████████████████████████████████▋                                                                                                                      | 1.21GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  37%|████████████████████████████████████████████████████████▉                                                                                                   | 1.23GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  25%|██████████████████████████████████████▋                                                                                                                     | 1.22GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  24%|█████████████████████████████████████▋                                                                                                                      | 1.19GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  27%|█████████████████████████████████████████▊                                                                                                                  | 4.89GB / 18.2GB,  726MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  25%|██████████████████████████████████████▊                                                                                                                     | 1.24GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  38%|██████████████████████████████████████████████████████████▌                                                                                                 | 1.27GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  26%|███████████████████████████████████████▉                                                                                                                    | 1.26GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  25%|██████████████████████████████████████▊                                                                                                                     | 1.22GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  28%|███████████████████████████████████████████                                                                                                                 | 5.03GB / 18.2GB,  726MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  26%|███████████████████████████████████████▊                                                                                                                    | 1.28GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  38%|████████████████████████████████████████████████████████████                                                                                                | 1.30GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  26%|█████████████████████████████████████████                                                                                                                   | 1.29GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  26%|███████████████████████████████████████▊                                                                                                                    | 1.26GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  28%|████████████████████████████████████████████▏                                                                                                               | 5.16GB / 18.2GB,  724MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  26%|████████████████████████████████████████▉                                                                                                                   | 1.31GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  39%|█████████████████████████████████████████████████████████████▌                                                                                              | 1.33GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  27%|██████████████████████████████████████████▏                                                                                                                 | 1.33GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  26%|█████████████████████████████████████████▏                                                                                                                  | 1.30GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  29%|█████████████████████████████████████████████▎                                                                                                              | 5.31GB / 18.2GB,  724MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  27%|█████████████████████████████████████████▉                                                                                                                  | 1.34GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  40%|███████████████████████████████████████████████████████████████▏                                                                                            | 1.37GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  28%|███████████████████████████████████████████▏                                                                                                                | 1.36GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  27%|██████████████████████████████████████████▏                                                                                                                 | 1.33GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  30%|██████████████████████████████████████████████▌                                                                                                             | 5.44GB / 18.2GB,  722MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  28%|██████████████████████████████████████████▉                                                                                                                 | 1.38GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  41%|████████████████████████████████████████████████████████████████▋                                                                                           | 1.40GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  28%|████████████████████████████████████████████▎                                                                                                               | 1.39GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  28%|███████████████████████████████████████████▎                                                                                                                | 1.37GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  31%|███████████████████████████████████████████████▋                                                                                                            | 5.57GB / 18.2GB,  721MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  28%|████████████████████████████████████████████                                                                                                                | 1.41GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  42%|██████████████████████████████████████████████████████████████████▎                                                                                         | 1.43GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  29%|█████████████████████████████████████████████▎                                                                                                              | 1.43GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  28%|████████████████████████████████████████████▎                                                                                                               | 1.40GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  31%|████████████████████████████████████████████████▊                                                                                                           | 5.71GB / 18.2GB,  720MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  29%|█████████████████████████████████████████████                                                                                                               | 1.44GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  43%|███████████████████████████████████████████████████████████████████▊                                                                                        | 1.47GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  30%|██████████████████████████████████████████████▍                                                                                                             | 1.46GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  29%|█████████████████████████████████████████████▍                                                                                                              | 1.43GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  32%|█████████████████████████████████████████████████▉                                                                                                          | 5.84GB / 18.2GB,  718MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  30%|██████████████████████████████████████████████▍                                                                                                             | 1.48GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  45%|█████████████████████████████████████████████████████████████████████▋                                                                                      | 1.51GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  30%|███████████████████████████████████████████████▍                                                                                                            | 1.49GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  30%|██████████████████████████████████████████████▍                                                                                                             | 1.47GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  33%|███████████████████████████████████████████████████▎                                                                                                        | 5.99GB / 18.2GB,  719MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  30%|███████████████████████████████████████████████▍                                                                                                            | 1.52GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  46%|███████████████████████████████████████████████████████████████████████▎                                                                                    | 1.54GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  31%|████████████████████████████████████████████████▌                                                                                                           | 1.53GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  30%|███████████████████████████████████████████████▌                                                                                                            | 1.50GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  34%|████████████████████████████████████████████████████▍                                                                                                       | 6.13GB / 18.2GB,  718MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  31%|████████████████████████████████████████████████▍                                                                                                           | 1.55GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  47%|████████████████████████████████████████████████████████████████████████▊                                                                                   | 1.58GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  32%|█████████████████████████████████████████████████▌                                                                                                          | 1.56GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  31%|████████████████████████████████████████████████▊                                                                                                           | 1.54GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  34%|█████████████████████████████████████████████████████▌                                                                                                      | 6.27GB / 18.2GB,  718MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  32%|█████████████████████████████████████████████████▊                                                                                                          | 1.59GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  48%|██████████████████████████████████████████████████████████████████████████▍                                                                                 | 1.61GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  32%|██████████████████████████████████████████████████▋                                                                                                         | 1.59GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  32%|█████████████████████████████████████████████████▉                                                                                                          | 1.58GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  35%|██████████████████████████████████████████████████████▊                                                                                                     | 6.41GB / 18.2GB,  718MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  32%|██████████████████████████████████████████████████▌                                                                                                         | 1.62GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  49%|████████████████████████████████████████████████████████████████████████████▎                                                                               | 1.65GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  33%|███████████████████████████████████████████████████▋                                                                                                        | 1.63GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  33%|██████████████████████████████████████████████████▉                                                                                                         | 1.61GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  36%|███████████████████████████████████████████████████████▉                                                                                                    | 6.55GB / 18.2GB,  717MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  33%|███████████████████████████████████████████████████▉                                                                                                        | 1.66GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  50%|█████████████████████████████████████████████████████████████████████████████▉                                                                              | 1.69GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  34%|████████████████████████████████████████████████████▊                                                                                                       | 1.66GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  33%|████████████████████████████████████████████████████                                                                                                        | 1.64GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  37%|█████████████████████████████████████████████████████████▏                                                                                                  | 6.69GB / 18.2GB,  717MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  34%|█████████████████████████████████████████████████████▏                                                                                                      | 1.70GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  51%|███████████████████████████████████████████████████████████████████████████████▊                                                                            | 1.73GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  35%|█████████████████████████████████████████████████████▊                                                                                                      | 1.69GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  34%|█████████████████████████████████████████████████████                                                                                                       | 1.68GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  37%|██████████████████████████████████████████████████████████▍                                                                                                 | 6.84GB / 18.2GB,  718MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  35%|██████████████████████████████████████████████████████▌                                                                                                     | 1.74GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  52%|█████████████████████████████████████████████████████████████████████████████████▊                                                                          | 1.77GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  35%|███████████████████████████████████████████████████████▏                                                                                                    | 1.74GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  35%|██████████████████████████████████████████████████████▏                                                                                                     | 1.71GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  38%|███████████████████████████████████████████████████████████▊                                                                                                | 7.00GB / 18.2GB,  719MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  36%|███████████████████████████████████████████████████████▌                                                                                                    | 1.78GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  53%|███████████████████████████████████████████████████████████████████████████████████▎                                                                        | 1.80GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  36%|████████████████████████████████████████████████████████▎                                                                                                   | 1.77GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  36%|███████████████████████████████████████████████████████▍                                                                                                    | 1.75GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  39%|█████████████████████████████████████████████████████████████                                                                                               | 7.14GB / 18.2GB,  719MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  36%|████████████████████████████████████████████████████████▉                                                                                                   | 1.82GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  55%|█████████████████████████████████████████████████████████████████████████████████████▏                                                                      | 1.85GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  37%|█████████████████████████████████████████████████████████▎                                                                                                  | 1.80GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  36%|████████████████████████████████████████████████████████▊                                                                                                   | 1.80GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  40%|██████████████████████████████████████████████████████████████▍                                                                                             | 7.30GB / 18.2GB,  721MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  37%|█████████████████████████████████████████████████████████▉                                                                                                  | 1.85GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  56%|██████████████████████████████████████████████████████████████████████████████████████▊                                                                     | 1.88GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  37%|██████████████████████████████████████████████████████████▍                                                                                                 | 1.84GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  37%|██████████████████████████████████████████████████████████▏                                                                                                 | 1.84GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  41%|███████████████████████████████████████████████████████████████▋                                                                                            | 7.45GB / 18.2GB,  721MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  38%|███████████████████████████████████████████████████████████▏                                                                                                | 1.90GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  57%|████████████████████████████████████████████████████████████████████████████████████████▋                                                                   | 1.92GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  38%|███████████████████████████████████████████████████████████▋                                                                                                | 1.88GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  38%|███████████████████████████████████████████████████████████▏                                                                                                | 1.87GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  42%|█████████████████████████████████████████████████████████████████                                                                                           | 7.60GB / 18.2GB,  722MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  39%|████████████████████████████████████████████████████████████▎                                                                                               | 1.93GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  58%|██████████████████████████████████████████████████████████████████████████████████████████▋                                                                 | 1.96GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  39%|████████████████████████████████████████████████████████████▊                                                                                               | 1.91GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  39%|████████████████████████████████████████████████████████████▌                                                                                               | 1.91GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  43%|██████████████████████████████████████████████████████████████████▎                                                                                         | 7.76GB / 18.2GB,  719MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  39%|█████████████████████████████████████████████████████████████▎                                                                                              | 1.96GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  59%|████████████████████████████████████████████████████████████████████████████████████████████▏                                                               | 2.00GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  40%|█████████████████████████████████████████████████████████████▊                                                                                              | 1.95GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  39%|█████████████████████████████████████████████████████████████▌                                                                                              | 1.95GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  43%|███████████████████████████████████████████████████████████████████▍                                                                                        | 7.89GB / 18.2GB,  717MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  40%|██████████████████████████████████████████████████████████████▉                                                                                             | 2.01GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  61%|██████████████████████████████████████████████████████████████████████████████████████████████▌                                                             | 2.05GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  41%|███████████████████████████████████████████████████████████████▏                                                                                            | 1.99GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  40%|██████████████████████████████████████████████████████████████▉                                                                                             | 1.99GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  44%|█████████████████████████████████████████████████████████████████████                                                                                       | 8.07GB / 18.2GB,  720MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  41%|███████████████████████████████████████████████████████████████▉                                                                                            | 2.05GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  62%|████████████████████████████████████████████████████████████████████████████████████████████████▍                                                           | 2.09GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  41%|████████████████████████████████████████████████████████████████▎                                                                                           | 2.02GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  41%|████████████████████████████████████████████████████████████████▎                                                                                           | 2.03GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  45%|██████████████████████████████████████████████████████████████████████▎                                                                                     | 8.23GB / 18.2GB,  721MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  42%|█████████████████████████████████████████████████████████████████▎                                                                                          | 2.09GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  63%|██████████████████████████████████████████████████████████████████████████████████████████████████                                                          | 2.12GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  42%|█████████████████████████████████████████████████████████████████▌                                                                                          | 2.06GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  42%|█████████████████████████████████████████████████████████████████▌                                                                                          | 2.07GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  46%|███████████████████████████████████████████████████████████████████████▋                                                                                    | 8.38GB / 18.2GB,  724MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  43%|██████████████████████████████████████████████████████████████████▌                                                                                         | 2.13GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  64%|███████████████████████████████████████████████████████████████████████████████████████████████████▌                                                        | 2.16GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  43%|██████████████████████████████████████████████████████████████████▋                                                                                         | 2.10GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  43%|██████████████████████████████████████████████████████████████████▋                                                                                         | 2.11GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  47%|████████████████████████████████████████████████████████████████████████▉                                                                                   | 8.53GB / 18.2GB,  725MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  43%|███████████████████████████████████████████████████████████████████▋                                                                                        | 2.16GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  65%|█████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                      | 2.19GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  44%|███████████████████████████████████████████████████████████████████▉                                                                                        | 2.14GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  43%|███████████████████████████████████████████████████████████████████▋                                                                                        | 2.14GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  48%|██████████████████████████████████████████████████████████████████████████▏                                                                                 | 8.67GB / 18.2GB,  725MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  44%|████████████████████████████████████████████████████████████████████▋                                                                                       | 2.20GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  66%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                     | 2.22GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  44%|█████████████████████████████████████████████████████████████████████                                                                                       | 2.17GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  44%|████████████████████████████████████████████████████████████████████▊                                                                                       | 2.17GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  48%|███████████████████████████████████████████████████████████████████████████▎                                                                                | 8.80GB / 18.2GB,  723MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  46%|███████████████████████████████████████████████████████████████████████                                                                                     | 2.27GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  68%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                 | 2.30GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  46%|███████████████████████████████████████████████████████████████████████▏                                                                                    | 2.24GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  45%|██████████████████████████████████████████████████████████████████████▉                                                                                     | 2.24GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  46%|███████████████████████████████████████████████████████████████████████                                                                                     | 2.27GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  68%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                 | 2.31GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  46%|███████████████████████████████████████████████████████████████████████▏                                                                                    | 2.24GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  46%|███████████████████████████████████████████████████████████████████████▏                                                                                    | 2.25GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  46%|████████████████████████████████████████████████████████████████████████▎                                                                                   | 2.32GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                               | 2.36GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  46%|████████████████████████████████████████████████████████████████████████▌                                                                                   | 2.28GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  46%|████████████████████████████████████████████████████████████████████████▏                                                                                   | 2.28GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  51%|███████████████████████████████████████████████████████████████████████████████▎                                                                            | 9.27GB / 18.2GB,  729MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  47%|█████████████████████████████████████████████████████████████████████████▍                                                                                  | 2.35GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                             | 2.39GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  47%|█████████████████████████████████████████████████████████████████████████▊                                                                                  | 2.32GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  47%|█████████████████████████████████████████████████████████████████████████▌                                                                                  | 2.32GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  52%|████████████████████████████████████████████████████████████████████████████████▌                                                                           | 9.42GB / 18.2GB,  729MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  48%|██████████████████████████████████████████████████████████████████████████▋                                                                                 | 2.39GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                           | 2.44GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  48%|██████████████████████████████████████████████████████████████████████████▉                                                                                 | 2.36GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  48%|██████████████████████████████████████████████████████████████████████████▌                                                                                 | 2.36GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  53%|█████████████████████████████████████████████████████████████████████████████████▉                                                                          | 9.58GB / 18.2GB,  731MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  49%|███████████████████████████████████████████████████████████████████████████▊                                                                                | 2.42GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  73%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                         | 2.47GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  49%|████████████████████████████████████████████████████████████████████████████▎                                                                               | 2.40GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  49%|███████████████████████████████████████████████████████████████████████████▉                                                                                | 2.40GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  53%|███████████████████████████████████████████████████████████████████████████████████▏                                                                        | 9.74GB / 18.2GB,  732MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  49%|█████████████████████████████████████████████████████████████████████████████                                                                               | 2.47GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  75%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                       | 2.52GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  50%|█████████████████████████████████████████████████████████████████████████████▎                                                                              | 2.43GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  50%|█████████████████████████████████████████████████████████████████████████████▎                                                                              | 2.44GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  54%|████████████████████████████████████████████████████████████████████████████████████▌                                                                       | 9.89GB / 18.2GB,  734MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  50%|██████████████████████████████████████████████████████████████████████████████                                                                              | 2.50GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                     | 2.56GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  50%|██████████████████████████████████████████████████████████████████████████████▍                                                                             | 2.47GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  50%|██████████████████████████████████████████████████████████████████████████████▎                                                                             | 2.47GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  55%|█████████████████████████████████████████████████████████████████████████████████████▊                                                                      | 10.0GB / 18.2GB,  734MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  51%|███████████████████████████████████████████████████████████████████████████████▏                                                                            | 2.53GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                    | 2.59GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  51%|███████████████████████████████████████████████████████████████████████████████▍                                                                            | 2.50GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  51%|███████████████████████████████████████████████████████████████████████████████▍                                                                            | 2.51GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  56%|██████████████████████████████████████████████████████████████████████████████████████▉                                                                     | 10.2GB / 18.2GB,  734MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  51%|████████████████████████████████████████████████████████████████████████████████▏                                                                           | 2.57GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                  | 2.63GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  52%|████████████████████████████████████████████████████████████████████████████████▊                                                                           | 2.54GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  52%|████████████████████████████████████████████████████████████████████████████████▍                                                                           | 2.54GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  57%|████████████████████████████████████████████████████████████████████████████████████████▏                                                                   | 10.3GB / 18.2GB,  732MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  52%|█████████████████████████████████████████████████████████████████████████████████▎                                                                          | 2.60GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                 | 2.66GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  52%|█████████████████████████████████████████████████████████████████████████████████▊                                                                          | 2.58GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  52%|█████████████████████████████████████████████████████████████████████████████████▊                                                                          | 2.58GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  57%|█████████████████████████████████████████████████████████████████████████████████████████▍                                                                  | 10.5GB / 18.2GB,  731MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  53%|██████████████████████████████████████████████████████████████████████████████████▌                                                                         | 2.64GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                               | 2.69GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  53%|██████████████████████████████████████████████████████████████████████████████████▉                                                                         | 2.61GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  53%|███████████████████████████████████████████████████████████████████████████████████                                                                         | 2.63GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  58%|██████████████████████████████████████████████████████████████████████████████████████████▋                                                                 | 10.6GB / 18.2GB,  732MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  54%|███████████████████████████████████████████████████████████████████████████████████▌                                                                        | 2.68GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  81%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                              | 2.73GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  54%|███████████████████████████████████████████████████████████████████████████████████▉                                                                        | 2.64GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  54%|████████████████████████████████████████████████████████████████████████████████████▏                                                                       | 2.66GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  59%|███████████████████████████████████████████████████████████████████████████████████████████▊                                                                | 10.7GB / 18.2GB,  730MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  54%|████████████████████████████████████████████████████████████████████████████████████▋                                                                       | 2.71GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                            | 2.76GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  55%|█████████████████████████████████████████████████████████████████████████████████████                                                                       | 2.68GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  55%|█████████████████████████████████████████████████████████████████████████████████████▏                                                                      | 2.69GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  60%|████████████████████████████████████████████████████████████████████████████████████████████▉                                                               | 10.9GB / 18.2GB,  730MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  55%|█████████████████████████████████████████████████████████████████████████████████████▋                                                                      | 2.74GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                           | 2.79GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  55%|██████████████████████████████████████████████████████████████████████████████████████                                                                      | 2.71GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  55%|██████████████████████████████████████████████████████████████████████████████████████▎                                                                     | 2.73GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  60%|██████████████████████████████████████████████████████████████████████████████████████████████▏                                                             | 11.0GB / 18.2GB,  728MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  56%|██████████████████████████████████████████████████████████████████████████████████████▊                                                                     | 2.78GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                         | 2.83GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  56%|███████████████████████████████████████████████████████████████████████████████████████▏                                                                    | 2.74GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  56%|███████████████████████████████████████████████████████████████████████████████████████▎                                                                    | 2.76GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  61%|███████████████████████████████████████████████████████████████████████████████████████████████▎                                                            | 11.1GB / 18.2GB,  725MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  56%|███████████████████████████████████████████████████████████████████████████████████████▊                                                                    | 2.81GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                       | 2.86GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  57%|████████████████████████████████████████████████████████████████████████████████████████▎                                                                   | 2.78GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  57%|████████████████████████████████████████████████████████████████████████████████████████▍                                                                   | 2.79GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  62%|████████████████████████████████████████████████████████████████████████████████████████████████▍                                                           | 11.3GB / 18.2GB,  725MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  57%|████████████████████████████████████████████████████████████████████████████████████████▊                                                                   | 2.84GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                     | 2.91GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  57%|█████████████████████████████████████████████████████████████████████████████████████████▎                                                                  | 2.81GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  57%|█████████████████████████████████████████████████████████████████████████████████████████▍                                                                  | 2.83GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  63%|█████████████████████████████████████████████████████████████████████████████████████████████████▋                                                          | 11.4GB / 18.2GB,  727MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  58%|██████████████████████████████████████████████████████████████████████████████████████████▍                                                                 | 2.89GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                   | 2.95GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  58%|██████████████████████████████████████████████████████████████████████████████████████████▍                                                                 | 2.84GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  58%|██████████████████████████████████████████████████████████████████████████████████████████▊                                                                 | 2.87GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  64%|███████████████████████████████████████████████████████████████████████████████████████████████████▏                                                        | 11.6GB / 18.2GB,  729MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  59%|███████████████████████████████████████████████████████████████████████████████████████████▍                                                                | 2.93GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                  | 2.99GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  59%|███████████████████████████████████████████████████████████████████████████████████████████▋                                                                | 2.89GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  59%|███████████████████████████████████████████████████████████████████████████████████████████▊                                                                | 2.90GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  64%|████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                       | 11.7GB / 18.2GB,  728MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  59%|████████████████████████████████████████████████████████████████████████████████████████████▌                                                               | 2.96GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                | 3.02GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  59%|████████████████████████████████████████████████████████████████████████████████████████████▊                                                               | 2.92GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  60%|████████████████████████████████████████████████████████████████████████████████████████████▉                                                               | 2.94GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  65%|█████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                      | 11.9GB / 18.2GB,  727MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  60%|█████████████████████████████████████████████████████████████████████████████████████████████▌                                                              | 2.99GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  90%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████               | 3.05GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  60%|█████████████████████████████████████████████████████████████████████████████████████████████▊                                                              | 2.95GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  60%|█████████████████████████████████████████████████████████████████████████████████████████████▉                                                              | 2.97GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  66%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                     | 12.0GB / 18.2GB,  727MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  61%|██████████████████████████████████████████████████████████████████████████████████████████████▉                                                             | 3.04GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌             | 3.09GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  61%|██████████████████████████████████████████████████████████████████████████████████████████████▉                                                             | 2.99GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  61%|███████████████████████████████████████████████████████████████████████████████████████████████                                                             | 3.00GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  67%|███████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                    | 12.2GB / 18.2GB,  728MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  62%|███████████████████████████████████████████████████████████████████████████████████████████████▉                                                            | 3.07GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏           | 3.12GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  62%|███████████████████████████████████████████████████████████████████████████████████████████████▉                                                            | 3.02GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  62%|████████████████████████████████████████████████████████████████████████████████████████████████                                                            | 3.04GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  67%|█████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 12.3GB / 18.2GB,  725MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  62%|████████████████████████████████████████████████████████████████████████████████████████████████▉                                                           | 3.10GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋          | 3.15GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  62%|█████████████████████████████████████████████████████████████████████████████████████████████████                                                           | 3.05GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  62%|█████████████████████████████████████████████████████████████████████████████████████████████████▏                                                          | 3.07GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  68%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                 | 12.4GB / 18.2GB,  725MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  63%|██████████████████████████████████████████████████████████████████████████████████████████████████                                                          | 3.14GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋        | 3.20GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  63%|██████████████████████████████████████████████████████████████████████████████████████████████████                                                          | 3.09GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  63%|██████████████████████████████████████████████████████████████████████████████████████████████████▏                                                         | 3.10GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  69%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                | 12.6GB / 18.2GB,  725MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  64%|███████████████████████████████████████████████████████████████████████████████████████████████████                                                         | 3.17GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏      | 3.23GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  64%|███████████████████████████████████████████████████████████████████████████████████████████████████▏                                                        | 3.12GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  64%|███████████████████████████████████████████████████████████████████████████████████████████████████▎                                                        | 3.14GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                               | 12.7GB / 18.2GB,  725MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  64%|████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                       | 3.20GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋     | 3.26GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  64%|████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                       | 3.15GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  64%|████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                       | 3.17GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                              | 12.8GB / 18.2GB,  725MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  65%|█████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                      | 3.24GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎   | 3.30GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  65%|█████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                      | 3.20GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  65%|█████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                      | 3.21GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  71%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████                                             | 13.0GB / 18.2GB,  726MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  66%|██████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                     | 3.27GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 3.33GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  66%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                     | 3.23GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  66%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                     | 3.25GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                           | 13.1GB / 18.2GB,  726MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  66%|███████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                    | 3.31GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 3.36GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  66%|███████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                    | 3.26GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  67%|███████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                    | 3.28GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  73%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                          | 13.3GB / 18.2GB,  726MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                   | 3.34GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                   | 3.30GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                   | 3.31GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  73%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                         | 13.4GB / 18.2GB,  723MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  68%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                  | 3.37GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  68%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                  | 3.33GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  68%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                  | 3.35GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                        | 13.5GB / 18.2GB,  719MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  68%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                 | 3.41GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  69%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                | 3.37GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  69%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                 | 3.38GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                       | 13.6GB / 18.2GB,  717MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  69%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                | 3.45GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  69%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                               | 3.41GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  69%|████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                | 3.41GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  75%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                       | 13.7GB / 18.2GB,  713MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                               | 3.48GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                              | 3.44GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████                                               | 3.45GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  76%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                      | 13.8GB / 18.2GB,  709MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                              | 3.51GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                             | 3.47GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                             | 3.48GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                     | 13.9GB / 18.2GB,  705MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                             | 3.55GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  71%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                            | 3.51GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  71%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                            | 3.51GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                    | 14.0GB / 18.2GB,  700MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  72%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                            | 3.58GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                           | 3.54GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                           | 3.55GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                   | 14.1GB / 18.2GB,  695MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                           | 3.62GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  73%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                          | 3.57GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  73%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                          | 3.58GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                  | 14.2GB / 18.2GB,  690MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  73%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                          | 3.65GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  73%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                         | 3.61GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  73%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                         | 3.62GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                 | 14.3GB / 18.2GB,  685MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                         | 3.68GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                        | 3.64GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                        | 3.65GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                 | 14.4GB / 18.2GB,  681MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                       | 3.72GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  75%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                       | 3.67GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  75%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                       | 3.68GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                | 14.5GB / 18.2GB,  675MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                      | 3.75GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  76%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                      | 3.71GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                      | 3.72GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                               | 14.6GB / 18.2GB,  670MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                     | 3.78GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                     | 3.74GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                     | 3.76GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  81%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                              | 14.7GB / 18.2GB,  667MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  76%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                    | 3.82GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                   | 3.78GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                    | 3.79GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                             | 14.8GB / 18.2GB,  660MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                   | 3.85GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                  | 3.82GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                   | 3.83GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                            | 14.9GB / 18.2GB,  655MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                  | 3.88GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                 | 3.85GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                 | 3.86GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                           | 15.0GB / 18.2GB,  649MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                 | 3.93GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                | 3.88GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                | 3.89GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                          | 15.1GB / 18.2GB,  646MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                | 3.96GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                               | 3.93GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                               | 3.93GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                         | 15.2GB / 18.2GB,  643MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                               | 3.99GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                              | 3.97GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  81%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                              | 3.97GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                        | 15.3GB / 18.2GB,  641MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  81%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                              | 4.03GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                            | 4.00GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                             | 4.00GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                        | 15.4GB / 18.2GB,  634MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                             | 4.06GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                           | 4.03GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                            | 4.03GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                       | 15.5GB / 18.2GB,  631MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                            | 4.09GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                          | 4.07GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                           | 4.07GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                      | 15.6GB / 18.2GB,  625MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                          | 4.14GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                         | 4.10GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                          | 4.10GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                     | 15.8GB / 18.2GB,  621MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                         | 4.17GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                        | 4.14GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                         | 4.14GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                    | 15.9GB / 18.2GB,  615MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                        | 4.21GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                       | 4.17GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  85%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                        | 4.17GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                   | 16.0GB / 18.2GB,  611MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                       | 4.24GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                      | 4.20GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  85%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                       | 4.20GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                  | 16.1GB / 18.2GB,  605MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                      | 4.28GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                     | 4.24GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                     | 4.24GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                 | 16.2GB / 18.2GB,  602MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                     | 4.31GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                    | 4.27GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                    | 4.28GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                | 16.3GB / 18.2GB,  598MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                    | 4.35GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                   | 4.31GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                   | 4.31GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                | 16.4GB / 18.2GB,  595MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                   | 4.38GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                  | 4.35GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                  | 4.35GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉               | 16.5GB / 18.2GB,  591MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                  | 4.41GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                | 4.38GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                 | 4.38GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊              | 16.6GB / 18.2GB,  586MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                | 4.45GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏               | 4.41GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                | 4.41GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋             | 16.7GB / 18.2GB,  584MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏               | 4.49GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌              | 4.45GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉               | 4.45GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊            | 16.8GB / 18.2GB,  582MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎              | 4.52GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋             | 4.49GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████              | 4.49GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌           | 16.9GB / 18.2GB,  579MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎             | 4.55GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋            | 4.52GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████             | 4.52GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍          | 17.0GB / 18.2GB,  575MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍            | 4.59GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊           | 4.55GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏           | 4.56GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎         | 17.1GB / 18.2GB,  572MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋           | 4.63GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊          | 4.59GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏          | 4.59GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎        | 17.2GB / 18.2GB,  568MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋          | 4.66GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉         | 4.62GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌         | 4.63GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏       | 17.3GB / 18.2GB,  562MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊         | 4.70GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉        | 4.66GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌        | 4.66GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████       | 17.4GB / 18.2GB,  558MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊        | 4.73GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎      | 4.70GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋       | 4.70GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉      | 17.5GB / 18.2GB,  556MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉       | 4.76GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  96%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍     | 4.73GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋      | 4.73GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊     | 17.6GB / 18.2GB,  552MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉      | 4.80GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍    | 4.76GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊     | 4.76GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋    | 17.7GB / 18.2GB,  548MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉     | 4.83GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 4.80GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊    | 4.80GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌   | 17.8GB / 18.2GB,  545MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 4.87GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 4.83GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉   | 4.83GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 17.9GB / 18.2GB,  542MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 4.90GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 4.87GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉  | 4.87GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 18.0GB / 18.2GB,  537MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 4.94GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 4.90GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ | 4.90GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      : 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 18.2GB / 18.2GB,  535MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 4.97GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (6 / 7)      : 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 18.2GB / 18.2GB,  529MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (7 / 7)      : 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18.2GB / 18.2GB,  516MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A[AProcessing Files (7 / 7)      : 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18.2GB / 18.2GB,  396MB/s  
New Data Upload               : |                                                                                                                                                                |  0.00B /  0.00B,  0.00B/s  
  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            
  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            
  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            
  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            
  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            
  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            
  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            
[2026-01-06 02:56:04,271] [INFO] [axolotl.train.save_trained_model:351] [PID:13318] Model successfully saved to stage1
[2026-01-06 02:56:04,531] [INFO] [axolotl.core.trainers.base._save:722] [PID:13318] Saving model checkpoint to stage1
Processing Files (0 / 0)      : |                                                                                                                                                                |  0.00B /  0.00B            
New Data Upload               : |                                                                                                                                                                |  0.00B /  0.00B            [A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:   1%|██                                                                                                                                                          | 67.1MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:   1%|█▊                                                                                                                                                          | 58.6MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:   2%|██▎                                                                                                                                                         | 75.5MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:   2%|███                                                                                                                                                         | 67.0MB / 3.38GB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:   1%|██                                                                                                                                                          | 67.1MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:   1%|█▊                                                                                                                                                          | 58.6MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:   2%|██▎                                                                                                                                                         | 75.5MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:   2%|███                                                                                                                                                         | 67.0MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   2%|██▌                                                                                                                                                         |  306MB / 18.2GB,   ???B/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:   2%|███▋                                                                                                                                                        |  117MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:   2%|███▍                                                                                                                                                        |  109MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:   3%|███▉                                                                                                                                                        |  126MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:   3%|█████▍                                                                                                                                                      |  117MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   3%|████▎                                                                                                                                                       |  508MB / 18.2GB, 1.01GB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:   3%|█████                                                                                                                                                       |  159MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:   3%|████▊                                                                                                                                                       |  151MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:   3%|████▉                                                                                                                                                       |  159MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:   5%|███████▎                                                                                                                                                    |  159MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   4%|█████▋                                                                                                                                                      |  667MB / 18.2GB,  902MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:   4%|██████                                                                                                                                                      |  193MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:   4%|██████▋                                                                                                                                                     |  210MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:   4%|██████▌                                                                                                                                                     |  210MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:   6%|█████████▎                                                                                                                                                  |  201MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   5%|███████▎                                                                                                                                                    |  852MB / 18.2GB,  908MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:   5%|███████▏                                                                                                                                                    |  226MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:   5%|███████▋                                                                                                                                                    |  243MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:   5%|███████▊                                                                                                                                                    |  252MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:   7%|███████████▏                                                                                                                                                |  243MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   5%|████████▌                                                                                                                                                   | 1.00GB / 18.2GB,  870MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:   5%|████████▏                                                                                                                                                   |  260MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:   6%|████████▊                                                                                                                                                   |  277MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:   6%|█████████▏                                                                                                                                                  |  293MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:   8%|█████████████▏                                                                                                                                              |  285MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   6%|█████████▊                                                                                                                                                  | 1.15GB / 18.2GB,  847MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:   6%|█████████▌                                                                                                                                                  |  302MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:   6%|██████████▏                                                                                                                                                 |  319MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:   7%|██████████▏                                                                                                                                                 |  327MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:   9%|██████████████▋                                                                                                                                             |  319MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   7%|███████████▏                                                                                                                                                | 1.30GB / 18.2GB,  832MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:   7%|██████████▌                                                                                                                                                 |  335MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:   7%|███████████▏                                                                                                                                                |  352MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:   7%|███████████▎                                                                                                                                                |  361MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  10%|████████████████▎                                                                                                                                           |  352MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   8%|████████████▎                                                                                                                                               | 1.44GB / 18.2GB,  809MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:   8%|███████████▉                                                                                                                                                |  377MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:   8%|████████████▎                                                                                                                                               |  386MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:   8%|████████████▎                                                                                                                                               |  394MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  12%|██████████████████▏                                                                                                                                         |  394MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :   9%|█████████████▌                                                                                                                                              | 1.59GB / 18.2GB,  802MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:   9%|█████████████▎                                                                                                                                              |  419MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:   9%|█████████████▌                                                                                                                                              |  428MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:   9%|█████████████▎                                                                                                                                              |  428MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  13%|███████████████████▊                                                                                                                                        |  428MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  10%|██████████████▉                                                                                                                                             | 1.74GB / 18.2GB,  797MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:   9%|██████████████▎                                                                                                                                             |  453MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:   9%|██████████████▋                                                                                                                                             |  461MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:   9%|██████████████▍                                                                                                                                             |  461MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  14%|█████████████████████▎                                                                                                                                      |  461MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  10%|████████████████                                                                                                                                            | 1.88GB / 18.2GB,  785MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  10%|███████████████▍                                                                                                                                            |  487MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  10%|███████████████▋                                                                                                                                            |  495MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  10%|███████████████▋                                                                                                                                            |  503MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  15%|███████████████████████▏                                                                                                                                    |  503MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  11%|█████████████████▎                                                                                                                                          | 2.03GB / 18.2GB,  782MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  11%|████████████████▍                                                                                                                                           |  520MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  11%|████████████████▊                                                                                                                                           |  528MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  11%|████████████████▊                                                                                                                                           |  537MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  16%|████████████████████████▊                                                                                                                                   |  537MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  12%|██████████████████▍                                                                                                                                         | 2.16GB / 18.2GB,  772MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  11%|█████████████████▌                                                                                                                                          |  554MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  11%|█████████████████▊                                                                                                                                          |  562MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  11%|█████████████████▊                                                                                                                                          |  570MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  17%|██████████████████████████▎                                                                                                                                 |  570MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  13%|███████████████████▌                                                                                                                                        | 2.29GB / 18.2GB,  765MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  12%|██████████████████▌                                                                                                                                         |  587MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  12%|██████████████████▉                                                                                                                                         |  596MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  12%|██████████████████▊                                                                                                                                         |  604MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  18%|███████████████████████████▉                                                                                                                                |  604MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  13%|████████████████████▊                                                                                                                                       | 2.43GB / 18.2GB,  758MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  13%|███████████████████▋                                                                                                                                        |  621MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  13%|███████████████████▉                                                                                                                                        |  629MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  13%|███████████████████▉                                                                                                                                        |  638MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  19%|█████████████████████████████▍                                                                                                                              |  638MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  14%|█████████████████████▉                                                                                                                                      | 2.56GB / 18.2GB,  752MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  13%|████████████████████▋                                                                                                                                       |  654MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  14%|█████████████████████                                                                                                                                       |  663MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  13%|████████████████████▉                                                                                                                                       |  671MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  20%|██████████████████████████████▉                                                                                                                             |  671MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  15%|███████████████████████                                                                                                                                     | 2.70GB / 18.2GB,  747MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  14%|██████████████████████                                                                                                                                      |  696MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  15%|██████████████████████▋                                                                                                                                     |  713MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  14%|██████████████████████▌                                                                                                                                     |  721MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  21%|████████████████████████████████▌                                                                                                                           |  705MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  16%|████████████████████████▌                                                                                                                                   | 2.87GB / 18.2GB,  755MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  15%|███████████████████████                                                                                                                                     |  730MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  15%|███████████████████████▋                                                                                                                                    |  746MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  15%|███████████████████████▊                                                                                                                                    |  763MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  22%|██████████████████████████████████                                                                                                                          |  738MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  17%|█████████████████████████▊                                                                                                                                  | 3.02GB / 18.2GB,  753MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  15%|████████████████████████▏                                                                                                                                   |  763MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  16%|████████████████████████▊                                                                                                                                   |  780MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  16%|████████████████████████▉                                                                                                                                   |  797MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  23%|███████████████████████████████████▋                                                                                                                        |  772MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  17%|██████████████████████████▉                                                                                                                                 | 3.15GB / 18.2GB,  748MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  16%|█████████████████████████▏                                                                                                                                  |  797MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  17%|█████████████████████████▊                                                                                                                                  |  814MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  17%|█████████████████████████▉                                                                                                                                  |  830MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  24%|█████████████████████████████████████▏                                                                                                                      |  805MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  18%|████████████████████████████                                                                                                                                | 3.28GB / 18.2GB,  745MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  17%|██████████████████████████▎                                                                                                                                 |  830MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  17%|██████████████████████████▉                                                                                                                                 |  847MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  17%|██████████████████████████▉                                                                                                                                 |  864MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  25%|██████████████████████████████████████▋                                                                                                                     |  839MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  19%|█████████████████████████████▏                                                                                                                              | 3.42GB / 18.2GB,  741MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  18%|███████████████████████████▎                                                                                                                                |  864MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  18%|███████████████████████████▉                                                                                                                                |  881MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  18%|████████████████████████████                                                                                                                                |  898MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  26%|████████████████████████████████████████▋                                                                                                                   |  881MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  20%|██████████████████████████████▍                                                                                                                             | 3.56GB / 18.2GB,  740MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  18%|████████████████████████████▋                                                                                                                               |  906MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  19%|█████████████████████████████                                                                                                                               |  914MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  19%|█████████████████████████████                                                                                                                               |  931MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  27%|██████████████████████████████████████████▏                                                                                                                 |  914MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  20%|███████████████████████████████▋                                                                                                                            | 3.70GB / 18.2GB,  739MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  19%|█████████████████████████████▋                                                                                                                              |  940MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  19%|██████████████████████████████▏                                                                                                                             |  948MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  19%|██████████████████████████████▏                                                                                                                             |  965MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  28%|███████████████████████████████████████████▊                                                                                                                |  948MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  21%|████████████████████████████████▊                                                                                                                           | 3.84GB / 18.2GB,  736MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  20%|██████████████████████████████▊                                                                                                                             |  973MB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  20%|███████████████████████████████▏                                                                                                                            |  981MB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  20%|███████████████████████████████▏                                                                                                                            |  998MB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  29%|█████████████████████████████████████████████▎                                                                                                              |  981MB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  22%|█████████████████████████████████▉                                                                                                                          | 3.97GB / 18.2GB,  733MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  20%|███████████████████████████████▊                                                                                                                            | 1.01GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  21%|████████████████████████████████▎                                                                                                                           | 1.01GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  21%|████████████████████████████████▏                                                                                                                           | 1.03GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  30%|██████████████████████████████████████████████▉                                                                                                             | 1.02GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  23%|███████████████████████████████████                                                                                                                         | 4.11GB / 18.2GB,  731MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  21%|████████████████████████████████▉                                                                                                                           | 1.04GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  22%|█████████████████████████████████▌                                                                                                                          | 1.06GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  22%|█████████████████████████████████▌                                                                                                                          | 1.07GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  31%|████████████████████████████████████████████████▍                                                                                                           | 1.05GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  23%|████████████████████████████████████▍                                                                                                                       | 4.26GB / 18.2GB,  732MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  22%|█████████████████████████████████▉                                                                                                                          | 1.07GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  22%|██████████████████████████████████▋                                                                                                                         | 1.09GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  22%|██████████████████████████████████▌                                                                                                                         | 1.11GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  32%|█████████████████████████████████████████████████▉                                                                                                          | 1.08GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  24%|█████████████████████████████████████▌                                                                                                                      | 4.39GB / 18.2GB,  730MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  22%|███████████████████████████████████                                                                                                                         | 1.11GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  23%|███████████████████████████████████▋                                                                                                                        | 1.12GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  23%|███████████████████████████████████▋                                                                                                                        | 1.14GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  33%|███████████████████████████████████████████████████▌                                                                                                        | 1.12GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  25%|██████████████████████████████████████▋                                                                                                                     | 4.53GB / 18.2GB,  727MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  23%|████████████████████████████████████                                                                                                                        | 1.14GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  24%|████████████████████████████████████▊                                                                                                                       | 1.16GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  24%|████████████████████████████████████▋                                                                                                                       | 1.17GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  34%|█████████████████████████████████████████████████████                                                                                                       | 1.15GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  26%|███████████████████████████████████████▊                                                                                                                    | 4.66GB / 18.2GB,  726MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  24%|█████████████████████████████████████▏                                                                                                                      | 1.17GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  24%|█████████████████████████████████████▊                                                                                                                      | 1.19GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  24%|█████████████████████████████████████▋                                                                                                                      | 1.21GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  35%|██████████████████████████████████████████████████████▋                                                                                                     | 1.18GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  26%|████████████████████████████████████████▉                                                                                                                   | 4.79GB / 18.2GB,  724MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  25%|██████████████████████████████████████▏                                                                                                                     | 1.21GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  25%|██████████████████████████████████████▉                                                                                                                     | 1.22GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  25%|██████████████████████████████████████▊                                                                                                                     | 1.24GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  36%|████████████████████████████████████████████████████████▌                                                                                                   | 1.22GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  27%|██████████████████████████████████████████▏                                                                                                                 | 4.94GB / 18.2GB,  724MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  26%|███████████████████████████████████████▊                                                                                                                    | 1.26GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  26%|████████████████████████████████████████▊                                                                                                                   | 1.28GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  26%|████████████████████████████████████████                                                                                                                    | 1.28GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  38%|██████████████████████████████████████████████████████████▉                                                                                                 | 1.28GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  28%|███████████████████████████████████████████▉                                                                                                                | 5.14GB / 18.2GB,  732MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  27%|█████████████████████████████████████████▍                                                                                                                  | 1.31GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  27%|██████████████████████████████████████████▏                                                                                                                 | 1.33GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  27%|█████████████████████████████████████████▋                                                                                                                  | 1.33GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  39%|████████████████████████████████████████████████████████████▊                                                                                               | 1.32GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  29%|█████████████████████████████████████████████▌                                                                                                              | 5.32GB / 18.2GB,  738MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  27%|██████████████████████████████████████████▋                                                                                                                 | 1.35GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  28%|███████████████████████████████████████████▏                                                                                                                | 1.36GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  28%|███████████████████████████████████████████▏                                                                                                                | 1.38GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  40%|██████████████████████████████████████████████████████████████▊                                                                                             | 1.36GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  30%|██████████████████████████████████████████████▉                                                                                                             | 5.49GB / 18.2GB,  741MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  28%|███████████████████████████████████████████▊                                                                                                                | 1.38GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  29%|████████████████████████████████████████████▌                                                                                                               | 1.40GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  29%|█████████████████████████████████████████████                                                                                                               | 1.44GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  41%|████████████████████████████████████████████████████████████████▋                                                                                           | 1.40GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  31%|████████████████████████████████████████████████▍                                                                                                           | 5.67GB / 18.2GB,  744MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  29%|████████████████████████████████████████████▊                                                                                                               | 1.42GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  29%|█████████████████████████████████████████████▌                                                                                                              | 1.43GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  30%|██████████████████████████████████████████████▏                                                                                                             | 1.48GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  42%|██████████████████████████████████████████████████████████████████▎                                                                                         | 1.43GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  32%|█████████████████████████████████████████████████▌                                                                                                          | 5.80GB / 18.2GB,  742MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  29%|█████████████████████████████████████████████▉                                                                                                              | 1.45GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  30%|██████████████████████████████████████████████▋                                                                                                             | 1.47GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  30%|███████████████████████████████████████████████▏                                                                                                            | 1.51GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  43%|███████████████████████████████████████████████████████████████████▊                                                                                        | 1.47GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  33%|██████████████████████████████████████████████████▋                                                                                                         | 5.94GB / 18.2GB,  741MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  30%|██████████████████████████████████████████████▉                                                                                                             | 1.48GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  31%|███████████████████████████████████████████████▋                                                                                                            | 1.50GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  31%|████████████████████████████████████████████████▍                                                                                                           | 1.55GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  44%|█████████████████████████████████████████████████████████████████████▎                                                                                      | 1.50GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  33%|███████████████████████████████████████████████████▉                                                                                                        | 6.08GB / 18.2GB,  740MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  31%|████████████████████████████████████████████████▌                                                                                                           | 1.54GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  31%|█████████████████████████████████████████████████                                                                                                           | 1.54GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  32%|██████████████████████████████████████████████████                                                                                                          | 1.60GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  46%|███████████████████████████████████████████████████████████████████████▋                                                                                    | 1.55GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  34%|█████████████████████████████████████████████████████▌                                                                                                      | 6.27GB / 18.2GB,  745MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  32%|█████████████████████████████████████████████████▋                                                                                                          | 1.57GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  32%|██████████████████████████████████████████████████▍                                                                                                         | 1.59GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  33%|███████████████████████████████████████████████████▍                                                                                                        | 1.64GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  47%|██████████████████████████████████████████████████████████████████████████                                                                                  | 1.60GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  35%|███████████████████████████████████████████████████████                                                                                                     | 6.44GB / 18.2GB,  748MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  33%|██████████████████████████████████████████████████▋                                                                                                         | 1.60GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  33%|███████████████████████████████████████████████████▍                                                                                                        | 1.62GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  34%|████████████████████████████████████████████████████▍                                                                                                       | 1.68GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  49%|███████████████████████████████████████████████████████████████████████████▉                                                                                | 1.64GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  36%|████████████████████████████████████████████████████████▎                                                                                                   | 6.58GB / 18.2GB,  747MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  33%|███████████████████████████████████████████████████▊                                                                                                        | 1.64GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  34%|████████████████████████████████████████████████████▌                                                                                                       | 1.65GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  34%|█████████████████████████████████████████████████████▍                                                                                                      | 1.71GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  50%|█████████████████████████████████████████████████████████████████████████████▍                                                                              | 1.68GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  37%|█████████████████████████████████████████████████████████▍                                                                                                  | 6.72GB / 18.2GB,  745MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  34%|████████████████████████████████████████████████████▊                                                                                                       | 1.67GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  35%|█████████████████████████████████████████████████████▊                                                                                                      | 1.69GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  35%|██████████████████████████████████████████████████████▌                                                                                                     | 1.74GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  51%|███████████████████████████████████████████████████████████████████████████████                                                                             | 1.71GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  38%|██████████████████████████████████████████████████████████▋                                                                                                 | 6.86GB / 18.2GB,  744MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  35%|█████████████████████████████████████████████████████▉                                                                                                      | 1.70GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  35%|██████████████████████████████████████████████████████▉                                                                                                     | 1.73GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  36%|███████████████████████████████████████████████████████▌                                                                                                    | 1.78GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  52%|████████████████████████████████████████████████████████████████████████████████▌                                                                           | 1.74GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  38%|███████████████████████████████████████████████████████████▊                                                                                                | 6.99GB / 18.2GB,  743MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  35%|██████████████████████████████████████████████████████▉                                                                                                     | 1.74GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  36%|███████████████████████████████████████████████████████▉                                                                                                    | 1.76GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  36%|████████████████████████████████████████████████████████▌                                                                                                   | 1.81GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  53%|██████████████████████████████████████████████████████████████████████████████████▏                                                                         | 1.78GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  39%|████████████████████████████████████████████████████████████▉                                                                                               | 7.13GB / 18.2GB,  741MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  36%|████████████████████████████████████████████████████████▎                                                                                                   | 1.78GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  37%|█████████████████████████████████████████████████████████                                                                                                   | 1.80GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  37%|█████████████████████████████████████████████████████████▋                                                                                                  | 1.85GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  54%|███████████████████████████████████████████████████████████████████████████████████▋                                                                        | 1.81GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  40%|██████████████████████████████████████████████████████████████▏                                                                                             | 7.27GB / 18.2GB,  741MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  37%|█████████████████████████████████████████████████████████▎                                                                                                  | 1.81GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  37%|██████████████████████████████████████████████████████████▏                                                                                                 | 1.83GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  38%|██████████████████████████████████████████████████████████▋                                                                                                 | 1.88GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  55%|██████████████████████████████████████████████████████████████████████████████████████                                                                      | 1.86GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  41%|███████████████████████████████████████████████████████████████▍                                                                                            | 7.42GB / 18.2GB,  741MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  38%|██████████████████████████████████████████████████████████▋                                                                                                 | 1.85GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  38%|███████████████████████████████████████████████████████████▋                                                                                                | 1.88GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  39%|████████████████████████████████████████████████████████████▌                                                                                               | 1.94GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  56%|███████████████████████████████████████████████████████████████████████████████████████▌                                                                    | 1.90GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  42%|█████████████████████████████████████████████████████████████████                                                                                           | 7.60GB / 18.2GB,  745MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  38%|███████████████████████████████████████████████████████████▋                                                                                                | 1.89GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  39%|█████████████████████████████████████████████████████████████▎                                                                                              | 1.93GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  40%|█████████████████████████████████████████████████████████████▊                                                                                              | 1.98GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  57%|█████████████████████████████████████████████████████████████████████████████████████████▌                                                                  | 1.94GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  43%|██████████████████████████████████████████████████████████████████▍                                                                                         | 7.77GB / 18.2GB,  747MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  39%|█████████████████████████████████████████████████████████████                                                                                               | 1.93GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  40%|██████████████████████████████████████████████████████████████▋                                                                                             | 1.97GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  40%|██████████████████████████████████████████████████████████████▉                                                                                             | 2.01GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  59%|███████████████████████████████████████████████████████████████████████████████████████████▍                                                                | 1.98GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  43%|███████████████████████████████████████████████████████████████████▊                                                                                        | 7.93GB / 18.2GB,  748MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  40%|██████████████████████████████████████████████████████████████▏                                                                                             | 1.96GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  41%|███████████████████████████████████████████████████████████████▉                                                                                            | 2.01GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  41%|███████████████████████████████████████████████████████████████▉                                                                                            | 2.05GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  60%|████████████████████████████████████████████████████████████████████████████████████████████▉                                                               | 2.01GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  44%|█████████████████████████████████████████████████████████████████████                                                                                       | 8.07GB / 18.2GB,  742MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  41%|███████████████████████████████████████████████████████████████▏                                                                                            | 2.00GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  42%|█████████████████████████████████████████████████████████████████                                                                                           | 2.05GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  42%|█████████████████████████████████████████████████████████████████                                                                                           | 2.08GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  61%|██████████████████████████████████████████████████████████████████████████████████████████████▉                                                             | 2.06GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  45%|██████████████████████████████████████████████████████████████████████▎                                                                                     | 8.22GB / 18.2GB,  740MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  41%|████████████████████████████████████████████████████████████████▎                                                                                           | 2.03GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  42%|██████████████████████████████████████████████████████████████████▏                                                                                         | 2.08GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  42%|██████████████████████████████████████████████████████████████████                                                                                          | 2.11GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  62%|████████████████████████████████████████████████████████████████████████████████████████████████▍                                                           | 2.09GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  46%|███████████████████████████████████████████████████████████████████████▍                                                                                    | 8.35GB / 18.2GB,  735MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  42%|█████████████████████████████████████████████████████████████████▎                                                                                          | 2.06GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  43%|███████████████████████████████████████████████████████████████████▏                                                                                        | 2.11GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  43%|███████████████████████████████████████████████████████████████████▎                                                                                        | 2.16GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  63%|██████████████████████████████████████████████████████████████████████████████████████████████████                                                          | 2.12GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  47%|████████████████████████████████████████████████████████████████████████▋                                                                                   | 8.49GB / 18.2GB,  734MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  43%|██████████████████████████████████████████████████████████████████▎                                                                                         | 2.10GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  44%|████████████████████████████████████████████████████████████████████▎                                                                                       | 2.15GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  44%|████████████████████████████████████████████████████████████████████▍                                                                                       | 2.19GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  64%|███████████████████████████████████████████████████████████████████████████████████████████████████▌                                                        | 2.16GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  47%|█████████████████████████████████████████████████████████████████████████▊                                                                                  | 8.63GB / 18.2GB,  733MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  43%|███████████████████████████████████████████████████████████████████▍                                                                                        | 2.13GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  44%|█████████████████████████████████████████████████████████████████████▎                                                                                      | 2.18GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  45%|█████████████████████████████████████████████████████████████████████▍                                                                                      | 2.22GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  65%|█████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                      | 2.19GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  48%|██████████████████████████████████████████████████████████████████████████▉                                                                                 | 8.76GB / 18.2GB,  731MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  44%|████████████████████████████████████████████████████████████████████▍                                                                                       | 2.16GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  45%|██████████████████████████████████████████████████████████████████████▍                                                                                     | 2.21GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  45%|██████████████████████████████████████████████████████████████████████▌                                                                                     | 2.26GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  66%|██████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                     | 2.22GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  49%|████████████████████████████████████████████████████████████████████████████                                                                                | 8.90GB / 18.2GB,  731MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  45%|█████████████████████████████████████████████████████████████████████▌                                                                                      | 2.20GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  46%|███████████████████████████████████████████████████████████████████████▍                                                                                    | 2.25GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  46%|███████████████████████████████████████████████████████████████████████▌                                                                                    | 2.29GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                   | 2.26GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  49%|█████████████████████████████████████████████████████████████████████████████▏                                                                              | 9.03GB / 18.2GB,  729MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  45%|██████████████████████████████████████████████████████████████████████▌                                                                                     | 2.23GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  46%|████████████████████████████████████████████████████████████████████████▌                                                                                   | 2.28GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  47%|████████████████████████████████████████████████████████████████████████▌                                                                                   | 2.32GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  68%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                  | 2.29GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  50%|██████████████████████████████████████████████████████████████████████████████▎                                                                             | 9.16GB / 18.2GB,  728MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  46%|███████████████████████████████████████████████████████████████████████▋                                                                                    | 2.26GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  47%|█████████████████████████████████████████████████████████████████████████▌                                                                                  | 2.32GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  47%|█████████████████████████████████████████████████████████████████████████▋                                                                                  | 2.36GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  69%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                | 2.32GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  51%|███████████████████████████████████████████████████████████████████████████████▌                                                                            | 9.30GB / 18.2GB,  728MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  47%|████████████████████████████████████████████████████████████████████████▋                                                                                   | 2.30GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  48%|██████████████████████████████████████████████████████████████████████████▋                                                                                 | 2.35GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  48%|██████████████████████████████████████████████████████████████████████████▋                                                                                 | 2.39GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                               | 2.36GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  52%|████████████████████████████████████████████████████████████████████████████████▋                                                                           | 9.43GB / 18.2GB,  726MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  47%|█████████████████████████████████████████████████████████████████████████▊                                                                                  | 2.33GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  49%|███████████████████████████████████████████████████████████████████████████▋                                                                                | 2.38GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  49%|███████████████████████████████████████████████████████████████████████████▊                                                                                | 2.42GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                             | 2.39GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  52%|█████████████████████████████████████████████████████████████████████████████████▊                                                                          | 9.57GB / 18.2GB,  726MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  48%|██████████████████████████████████████████████████████████████████████████▊                                                                                 | 2.37GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  49%|████████████████████████████████████████████████████████████████████████████▊                                                                               | 2.42GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  49%|████████████████████████████████████████████████████████████████████████████▊                                                                               | 2.46GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  72%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                            | 2.42GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  53%|██████████████████████████████████████████████████████████████████████████████████▉                                                                         | 9.70GB / 18.2GB,  726MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  49%|███████████████████████████████████████████████████████████████████████████▉                                                                                | 2.40GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  50%|██████████████████████████████████████████████████████████████████████████████▍                                                                             | 2.47GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  50%|█████████████████████████████████████████████████████████████████████████████▊                                                                              | 2.49GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  73%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                          | 2.47GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  54%|████████████████████████████████████████████████████████████████████████████████████▎                                                                       | 9.86GB / 18.2GB,  729MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  50%|█████████████████████████████████████████████████████████████████████████████▎                                                                              | 2.44GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  51%|███████████████████████████████████████████████████████████████████████████████▋                                                                            | 2.51GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  51%|███████████████████████████████████████████████████████████████████████████████▍                                                                            | 2.54GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                        | 2.51GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  55%|█████████████████████████████████████████████████████████████████████████████████████▊                                                                      | 10.0GB / 18.2GB,  733MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  50%|██████████████████████████████████████████████████████████████████████████████▌                                                                             | 2.48GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  52%|████████████████████████████████████████████████████████████████████████████████▊                                                                           | 2.54GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  52%|████████████████████████████████████████████████████████████████████████████████▋                                                                           | 2.58GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                      | 2.54GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  56%|███████████████████████████████████████████████████████████████████████████████████████                                                                     | 10.2GB / 18.2GB,  734MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  51%|███████████████████████████████████████████████████████████████████████████████▋                                                                            | 2.52GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  52%|█████████████████████████████████████████████████████████████████████████████████▊                                                                          | 2.58GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  53%|██████████████████████████████████████████████████████████████████████████████████                                                                          | 2.63GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                    | 2.58GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  57%|████████████████████████████████████████████████████████████████████████████████████████▍                                                                   | 10.3GB / 18.2GB,  732MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  52%|████████████████████████████████████████████████████████████████████████████████▉                                                                           | 2.56GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  53%|███████████████████████████████████████████████████████████████████████████████████▏                                                                        | 2.62GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  53%|███████████████████████████████████████████████████████████████████████████████████                                                                         | 2.66GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                   | 2.62GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  57%|█████████████████████████████████████████████████████████████████████████████████████████▋                                                                  | 10.5GB / 18.2GB,  733MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  53%|██████████████████████████████████████████████████████████████████████████████████                                                                          | 2.59GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  54%|████████████████████████████████████████████████████████████████████████████████████▎                                                                       | 2.65GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  54%|████████████████████████████████████████████████████████████████████████████████████▏                                                                       | 2.69GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                 | 2.66GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  58%|██████████████████████████████████████████████████████████████████████████████████████████▉                                                                 | 10.6GB / 18.2GB,  734MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  53%|███████████████████████████████████████████████████████████████████████████████████                                                                         | 2.63GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  55%|█████████████████████████████████████████████████████████████████████████████████████▊                                                                      | 2.70GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  55%|█████████████████████████████████████████████████████████████████████████████████████▏                                                                      | 2.73GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                               | 2.69GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  59%|████████████████████████████████████████████████████████████████████████████████████████████▏                                                               | 10.8GB / 18.2GB,  735MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  54%|████████████████████████████████████████████████████████████████████████████████████▏                                                                       | 2.66GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  56%|██████████████████████████████████████████████████████████████████████████████████████▉                                                                     | 2.73GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  55%|██████████████████████████████████████████████████████████████████████████████████████▏                                                                     | 2.76GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  81%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                              | 2.73GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  60%|█████████████████████████████████████████████████████████████████████████████████████████████▎                                                              | 10.9GB / 18.2GB,  735MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  55%|█████████████████████████████████████████████████████████████████████████████████████▏                                                                      | 2.69GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  56%|███████████████████████████████████████████████████████████████████████████████████████▉                                                                    | 2.77GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  56%|███████████████████████████████████████████████████████████████████████████████████████▎                                                                    | 2.79GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                            | 2.76GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  61%|██████████████████████████████████████████████████████████████████████████████████████████████▌                                                             | 11.1GB / 18.2GB,  734MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  55%|██████████████████████████████████████████████████████████████████████████████████████▌                                                                     | 2.73GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  57%|█████████████████████████████████████████████████████████████████████████████████████████                                                                   | 2.80GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  57%|████████████████████████████████████████████████████████████████████████████████████████▎                                                                   | 2.83GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                           | 2.79GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  61%|███████████████████████████████████████████████████████████████████████████████████████████████▋                                                            | 11.2GB / 18.2GB,  734MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  56%|███████████████████████████████████████████████████████████████████████████████████████▌                                                                    | 2.77GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  58%|██████████████████████████████████████████████████████████████████████████████████████████                                                                  | 2.84GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  57%|█████████████████████████████████████████████████████████████████████████████████████████▍                                                                  | 2.86GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                         | 2.84GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  62%|████████████████████████████████████████████████████████████████████████████████████████████████▉                                                           | 11.3GB / 18.2GB,  735MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  57%|████████████████████████████████████████████████████████████████████████████████████████▋                                                                   | 2.80GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  58%|███████████████████████████████████████████████████████████████████████████████████████████▏                                                                | 2.87GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  58%|██████████████████████████████████████████████████████████████████████████████████████████▍                                                                 | 2.89GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                       | 2.87GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  63%|██████████████████████████████████████████████████████████████████████████████████████████████████                                                          | 11.5GB / 18.2GB,  735MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  58%|█████████████████████████████████████████████████████████████████████████████████████████▋                                                                  | 2.84GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  59%|████████████████████████████████████████████████████████████████████████████████████████████▎                                                               | 2.90GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  59%|███████████████████████████████████████████████████████████████████████████████████████████▍                                                                | 2.93GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                      | 2.90GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  64%|███████████████████████████████████████████████████████████████████████████████████████████████████▏                                                        | 11.6GB / 18.2GB,  735MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  58%|██████████████████████████████████████████████████████████████████████████████████████████▊                                                                 | 2.87GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  60%|█████████████████████████████████████████████████████████████████████████████████████████████▌                                                              | 2.94GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  59%|████████████████████████████████████████████████████████████████████████████████████████████▌                                                               | 2.96GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                    | 2.94GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  64%|████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                       | 11.7GB / 18.2GB,  734MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  59%|███████████████████████████████████████████████████████████████████████████████████████████▊                                                                | 2.90GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  61%|██████████████████████████████████████████████████████████████████████████████████████████████▋                                                             | 2.98GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  60%|█████████████████████████████████████████████████████████████████████████████████████████████▊                                                              | 3.00GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                  | 2.98GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  65%|█████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                      | 11.9GB / 18.2GB,  736MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  60%|████████████████████████████████████████████████████████████████████████████████████████████▉                                                               | 2.94GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  62%|███████████████████████████████████████████████████████████████████████████████████████████████▉                                                            | 3.02GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  61%|███████████████████████████████████████████████████████████████████████████████████████████████▏                                                            | 3.05GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                 | 3.01GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  66%|███████████████████████████████████████████████████████████████████████████████████████████████████████                                                     | 12.1GB / 18.2GB,  738MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  60%|██████████████████████████████████████████████████████████████████████████████████████████████▎                                                             | 2.98GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  62%|█████████████████████████████████████████████████████████████████████████████████████████████████                                                           | 3.05GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  62%|████████████████████████████████████████████████████████████████████████████████████████████████▏                                                           | 3.08GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋               | 3.05GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                   | 12.2GB / 18.2GB,  738MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  61%|███████████████████████████████████████████████████████████████████████████████████████████████▎                                                            | 3.01GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  63%|██████████████████████████████████████████████████████████████████████████████████████████████████                                                          | 3.09GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  62%|█████████████████████████████████████████████████████████████████████████████████████████████████▎                                                          | 3.11GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏             | 3.08GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  68%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                  | 12.3GB / 18.2GB,  739MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  62%|████████████████████████████████████████████████████████████████████████████████████████████████▍                                                           | 3.05GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  64%|███████████████████████████████████████████████████████████████████████████████████████████████████▏                                                        | 3.12GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  63%|██████████████████████████████████████████████████████████████████████████████████████████████████▎                                                         | 3.15GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏           | 3.12GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  68%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                 | 12.5GB / 18.2GB,  738MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  62%|█████████████████████████████████████████████████████████████████████████████████████████████████▍                                                          | 3.08GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  64%|████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                       | 3.15GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  64%|███████████████████████████████████████████████████████████████████████████████████████████████████▎                                                        | 3.18GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋          | 3.15GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  69%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                | 12.6GB / 18.2GB,  732MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  63%|██████████████████████████████████████████████████████████████████████████████████████████████████▍                                                         | 3.11GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  65%|█████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                      | 3.19GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  65%|████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                       | 3.22GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏        | 3.19GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                               | 12.7GB / 18.2GB,  728MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  64%|███████████████████████████████████████████████████████████████████████████████████████████████████▌                                                        | 3.15GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  66%|██████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                     | 3.22GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  65%|█████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                      | 3.25GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊       | 3.22GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                             | 12.9GB / 18.2GB,  725MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  65%|████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                       | 3.18GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  66%|███████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                    | 3.26GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  66%|██████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                     | 3.29GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  96%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎     | 3.25GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  71%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                            | 13.0GB / 18.2GB,  721MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  65%|█████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                      | 3.22GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                   | 3.30GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████                                                    | 3.33GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉    | 3.29GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                           | 13.2GB / 18.2GB,  723MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  66%|███████████████████████████████████████████████████████████████████████████████████████████████████████                                                     | 3.25GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  68%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                  | 3.33GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  67%|█████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3.36GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 3.32GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  73%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                          | 13.3GB / 18.2GB,  723MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                   | 3.30GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  69%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                 | 3.36GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  68%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                 | 3.40GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 3.36GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (3 / 7)      :  74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                         | 13.5GB / 18.2GB,  723MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  68%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                  | 3.33GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  69%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                | 3.40GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  69%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                | 3.43GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                        | 13.6GB / 18.2GB,  716MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  68%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                 | 3.36GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████                                               | 3.43GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  69%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                               | 3.46GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  75%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                       | 13.7GB / 18.2GB,  709MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  69%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                | 3.40GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████                                              | 3.46GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                              | 3.51GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  76%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                      | 13.8GB / 18.2GB,  706MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                               | 3.44GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  71%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                            | 3.50GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                             | 3.54GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                     | 13.9GB / 18.2GB,  704MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                              | 3.47GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                           | 3.54GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  72%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                            | 3.57GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                    | 14.0GB / 18.2GB,  700MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                             | 3.51GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  73%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                          | 3.57GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                           | 3.61GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                   | 14.1GB / 18.2GB,  697MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                            | 3.54GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  73%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                         | 3.61GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  73%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                          | 3.64GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                  | 14.2GB / 18.2GB,  694MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  73%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                           | 3.57GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                        | 3.64GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  74%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                         | 3.67GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                 | 14.3GB / 18.2GB,  690MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  73%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                         | 3.61GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  75%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                       | 3.67GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                        | 3.71GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                | 14.4GB / 18.2GB,  685MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                        | 3.64GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  76%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                      | 3.71GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                      | 3.75GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                | 14.5GB / 18.2GB,  677MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  75%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                       | 3.67GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                     | 3.74GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                     | 3.78GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                               | 14.6GB / 18.2GB,  671MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                      | 3.71GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                    | 3.77GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  76%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                    | 3.82GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  81%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                              | 14.7GB / 18.2GB,  665MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                     | 3.74GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                   | 3.81GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                   | 3.85GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                             | 14.8GB / 18.2GB,  661MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  77%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                    | 3.77GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                  | 3.84GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                  | 3.88GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                            | 14.9GB / 18.2GB,  657MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                   | 3.81GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                | 3.88GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                 | 3.92GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  82%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                           | 15.0GB / 18.2GB,  653MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                  | 3.85GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                               | 3.91GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                | 3.95GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                          | 15.1GB / 18.2GB,  650MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                 | 3.88GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                              | 3.94GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                               | 3.98GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                         | 15.2GB / 18.2GB,  647MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                | 3.92GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                             | 3.98GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                              | 4.02GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                         | 15.3GB / 18.2GB,  644MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                               | 3.95GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                            | 4.01GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                             | 4.05GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  85%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                        | 15.4GB / 18.2GB,  640MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                              | 3.98GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                           | 4.05GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                            | 4.09GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                       | 15.5GB / 18.2GB,  638MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                            | 4.03GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                          | 4.09GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                          | 4.14GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                      | 15.7GB / 18.2GB,  639MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                           | 4.07GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                         | 4.12GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                         | 4.18GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                     | 15.8GB / 18.2GB,  635MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                          | 4.10GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  85%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                        | 4.15GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                        | 4.21GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                    | 15.9GB / 18.2GB,  632MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                         | 4.14GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  85%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                      | 4.19GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                       | 4.24GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                   | 16.0GB / 18.2GB,  630MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  85%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                        | 4.17GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                     | 4.23GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                      | 4.28GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                  | 16.1GB / 18.2GB,  626MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  85%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                       | 4.20GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                    | 4.26GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                     | 4.31GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                 | 16.2GB / 18.2GB,  621MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                      | 4.24GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                   | 4.29GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                    | 4.35GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                | 16.3GB / 18.2GB,  613MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  87%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                    | 4.27GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                  | 4.33GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  88%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                   | 4.38GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏               | 16.4GB / 18.2GB,  608MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                   | 4.30GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                 | 4.36GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                  | 4.41GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  90%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████               | 16.5GB / 18.2GB,  603MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                  | 4.34GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                | 4.40GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                 | 4.45GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉              | 16.6GB / 18.2GB,  598MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  89%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                 | 4.37GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊               | 4.43GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏               | 4.49GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊             | 16.7GB / 18.2GB,  595MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                | 4.40GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊              | 4.46GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎              | 4.52GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋            | 16.8GB / 18.2GB,  590MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋               | 4.45GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏            | 4.50GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌             | 4.56GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊           | 16.9GB / 18.2GB,  589MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊              | 4.48GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏           | 4.54GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋            | 4.60GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌          | 17.0GB / 18.2GB,  586MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊             | 4.51GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎          | 4.57GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋           | 4.63GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍         | 17.1GB / 18.2GB,  582MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉            | 4.55GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋         | 4.61GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋          | 4.66GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍        | 17.2GB / 18.2GB,  579MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏          | 4.59GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋        | 4.65GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊         | 4.70GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎       | 17.3GB / 18.2GB,  576MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  94%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎         | 4.62GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊       | 4.68GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊        | 4.73GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏      | 17.4GB / 18.2GB,  573MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎        | 4.66GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  96%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████      | 4.72GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉       | 4.76GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  96%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏     | 17.6GB / 18.2GB,  570MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍       | 4.69GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏    | 4.76GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉      | 4.80GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉     | 17.7GB / 18.2GB,  565MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍      | 4.72GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 4.79GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉     | 4.83GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊    | 17.8GB / 18.2GB,  560MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  96%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌     | 4.76GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎  | 4.82GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  97%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 4.87GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋   | 17.9GB / 18.2GB,  556MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌    | 4.79GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 4.86GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 4.90GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌  | 18.0GB / 18.2GB,  552MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋   | 4.82GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍| 4.89GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏ | 4.93GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (4 / 7)      :  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 18.1GB / 18.2GB,  548MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋  | 4.86GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 4.97GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (5 / 7)      :  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 18.1GB / 18.2GB,  543MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊ | 4.89GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (6 / 7)      : 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 18.2GB / 18.2GB,  535MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊| 4.92GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (6 / 7)      : 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉| 18.2GB / 18.2GB,  525MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (7 / 7)      : 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18.2GB / 18.2GB,  512MB/s  

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[A

  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            [A[A


  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            [A[A[A


  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            [A[A[A[A


  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            [A[A[A[A[A


  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            [A[A[A[A[A[A


  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            [A[A[A[A[A[A[A


  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB            [A[A[A[A[A[A[A[AProcessing Files (7 / 7)      : 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18.2GB / 18.2GB,  446MB/s  
New Data Upload               : |                                                                                                                                                                |  0.00B /  0.00B,  0.00B/s  
  .../stage1/training_args.bin: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9.43kB / 9.43kB            
  ...ce/stage1/tokenizer.model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.69MB / 4.69MB            
  ...0002-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.93GB / 4.93GB            
  ...0003-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.91GB / 4.91GB            
  ...0001-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.99GB / 4.99GB            
  ...ace/stage1/tokenizer.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33.4MB / 33.4MB            
  ...0004-of-00004.safetensors: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3.38GB / 3.38GB