diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..18490970cdb55ea7c9b6d683513bd222d1e5e295 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+global_step_0/tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/global_step_0/README.md b/global_step_0/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7711c5fdbc3259b38a56eb7bb022bd66baa0142c
--- /dev/null
+++ b/global_step_0/README.md
@@ -0,0 +1,60 @@
+---
+library_name: transformers
+license: other
+tags:
+- llama-factory
+- full
+- generated_from_trainer
+model-index:
+- name: think_sft_nopack_lr1.5e5_ep3
+ results: []
+---
+
+
+
+# think_sft_nopack_lr1.5e5_ep3
+
+This model is a fine-tuned version of a custom Llama 3B model pretrained on 52B tokens on the open_thoughts_43k_think_format dataset.
+
+## Model description
+
+More information needed
+
+## Intended uses & limitations
+
+More information needed
+
+## Training and evaluation data
+
+More information needed
+
+## Training procedure
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 1.5e-05
+- train_batch_size: 2
+- eval_batch_size: 8
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 4
+- gradient_accumulation_steps: 32
+- total_train_batch_size: 256
+- total_eval_batch_size: 32
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 3.0
+
+### Training results
+
+
+
+### Framework versions
+
+- Transformers 4.57.1
+- Pytorch 2.6.0+cu124
+- Datasets 4.0.0
+- Tokenizers 0.22.1
diff --git a/global_step_0/all_results.json b/global_step_0/all_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..361e4e132f240d363d25c252389f9999a100c486
--- /dev/null
+++ b/global_step_0/all_results.json
@@ -0,0 +1,8 @@
+{
+ "epoch": 3.0,
+ "total_flos": 1.1980638081930756e+19,
+ "train_loss": 0.49363853406255476,
+ "train_runtime": 40041.2675,
+ "train_samples_per_second": 3.261,
+ "train_steps_per_second": 0.013
+}
\ No newline at end of file
diff --git a/global_step_0/chat_template.jinja b/global_step_0/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..2413626ec1ea4485c29369803d71586d2a5ed64b
--- /dev/null
+++ b/global_step_0/chat_template.jinja
@@ -0,0 +1,21 @@
+{{- bos_token }}
+{%- if messages[0]['role'] == 'system' %}
+ {%- set system_message = messages[0]['content'] %}
+ {%- set loop_messages = messages[1:] %}
+{%- else %}
+ {%- set system_message = "" %}
+ {%- set loop_messages = messages %}
+{%- endif %}
+{%- if system_message %}
+{{- '<|start_header_id|>system<|end_header_id|>\n\n' + system_message + '<|eot_id|>' }}
+{%- endif %}
+{%- for message in loop_messages %}
+ {%- if message['role'] == 'user' %}
+{{- '<|start_header_id|>user<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}
+ {%- elif message['role'] == 'assistant' %}
+{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+{{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
+{%- endif %}
diff --git a/global_step_0/config.json b/global_step_0/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..44616e007e75e54816a9f0899ea06b59549777bf
--- /dev/null
+++ b/global_step_0/config.json
@@ -0,0 +1,36 @@
+{
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "dtype": "bfloat16",
+ "eos_token_id": 128009,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 3072,
+ "initializer_range": 0.02,
+ "intermediate_size": 8192,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 24,
+ "num_hidden_layers": 28,
+ "num_key_value_heads": 8,
+ "pad_token_id": 128001,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 32.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": true,
+ "transformers_version": "4.57.1",
+ "use_cache": false,
+ "vocab_size": 128256
+}
diff --git a/global_step_0/generation_config.json b/global_step_0/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..28e1ef5d319e1b40e14cae21d3ec6ee69cc033e8
--- /dev/null
+++ b/global_step_0/generation_config.json
@@ -0,0 +1,13 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 128000,
+ "do_sample": true,
+ "eos_token_id": [
+ 128009,
+ 128001
+ ],
+ "pad_token_id": 128001,
+ "temperature": 0.6,
+ "top_p": 0.9,
+ "transformers_version": "4.57.1"
+}
diff --git a/global_step_0/logs/sft_train_20260305_150038.log b/global_step_0/logs/sft_train_20260305_150038.log
new file mode 100644
index 0000000000000000000000000000000000000000..797680cf615a2eda7d5294c18694c6d678edd4c8
--- /dev/null
+++ b/global_step_0/logs/sft_train_20260305_150038.log
@@ -0,0 +1,1176 @@
+[INFO|2026-03-05 15:00:44] llamafactory.launcher:143 >> Initializing 4 distributed tasks at: 127.0.0.1:53151
+W0305 15:00:45.406000 1741551 site-packages/torch/distributed/run.py:792]
+W0305 15:00:45.406000 1741551 site-packages/torch/distributed/run.py:792] *****************************************
+W0305 15:00:45.406000 1741551 site-packages/torch/distributed/run.py:792] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
+W0305 15:00:45.406000 1741551 site-packages/torch/distributed/run.py:792] *****************************************
+[2026-03-05 15:00:54,415] [INFO] [real_accelerator.py:254:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+[2026-03-05 15:00:56,464] [INFO] [real_accelerator.py:254:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+[2026-03-05 15:00:56,464] [INFO] [real_accelerator.py:254:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+[2026-03-05 15:00:56,609] [INFO] [real_accelerator.py:254:get_accelerator] Setting ds_accelerator to cuda (auto detect)
+/home/salman/anaconda3/envs/reward-signal/lib/python3.11/site-packages/jieba/_compat.py:18: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+ import pkg_resources
+[2026-03-05 15:00:57,141] [INFO] [comm.py:669:init_distributed] cdb=None
+[INFO|2026-03-05 15:00:58] llamafactory.hparams.parser:423 >> Process rank: 3, world size: 4, device: cuda:3, distributed training: True, compute dtype: torch.bfloat16
+/home/salman/anaconda3/envs/reward-signal/lib/python3.11/site-packages/jieba/_compat.py:18: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+ import pkg_resources
+/home/salman/anaconda3/envs/reward-signal/lib/python3.11/site-packages/jieba/_compat.py:18: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+ import pkg_resources
+[2026-03-05 15:00:58,934] [INFO] [comm.py:669:init_distributed] cdb=None
+[2026-03-05 15:00:58,996] [INFO] [comm.py:669:init_distributed] cdb=None
+[2026-03-05 15:00:58,996] [INFO] [comm.py:700:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl
+/home/salman/anaconda3/envs/reward-signal/lib/python3.11/site-packages/jieba/_compat.py:18: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
+ import pkg_resources
+[2026-03-05 15:00:59,183] [INFO] [comm.py:669:init_distributed] cdb=None
+[rank3]:[W305 15:00:59.155040498 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 3] using GPU 3 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
+[INFO|2026-03-05 15:01:01] llamafactory.hparams.parser:423 >> Process rank: 0, world size: 4, device: cuda:0, distributed training: True, compute dtype: torch.bfloat16
+[INFO|tokenization_utils_base.py:2093] 2026-03-05 15:01:01,764 >> loading file tokenizer.json
+[INFO|tokenization_utils_base.py:2093] 2026-03-05 15:01:01,764 >> loading file tokenizer.model
+[INFO|tokenization_utils_base.py:2093] 2026-03-05 15:01:01,764 >> loading file added_tokens.json
+[INFO|tokenization_utils_base.py:2093] 2026-03-05 15:01:01,764 >> loading file special_tokens_map.json
+[INFO|tokenization_utils_base.py:2093] 2026-03-05 15:01:01,764 >> loading file tokenizer_config.json
+[INFO|tokenization_utils_base.py:2093] 2026-03-05 15:01:01,764 >> loading file chat_template.jinja
+[INFO|tokenization_utils_base.py:2364] 2026-03-05 15:01:02,134 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
+[INFO|configuration_utils.py:763] 2026-03-05 15:01:02,134 >> loading configuration file /local2/salman/model/pretrain_model/v2_4_gpu_llama_3b_nemo_52b/config.json
+[INFO|configuration_utils.py:839] 2026-03-05 15:01:02,136 >> Model config LlamaConfig {
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "dtype": "bfloat16",
+ "eos_token_id": 128001,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 3072,
+ "initializer_range": 0.02,
+ "intermediate_size": 8192,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 24,
+ "num_hidden_layers": 28,
+ "num_key_value_heads": 8,
+ "pad_token_id": 128001,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 32.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": true,
+ "transformers_version": "4.57.1",
+ "use_cache": false,
+ "vocab_size": 128256
+}
+
+[INFO|tokenization_utils_base.py:2093] 2026-03-05 15:01:02,137 >> loading file tokenizer.json
+[INFO|tokenization_utils_base.py:2093] 2026-03-05 15:01:02,137 >> loading file tokenizer.model
+[INFO|tokenization_utils_base.py:2093] 2026-03-05 15:01:02,137 >> loading file added_tokens.json
+[INFO|tokenization_utils_base.py:2093] 2026-03-05 15:01:02,137 >> loading file special_tokens_map.json
+[INFO|tokenization_utils_base.py:2093] 2026-03-05 15:01:02,137 >> loading file tokenizer_config.json
+[INFO|tokenization_utils_base.py:2093] 2026-03-05 15:01:02,137 >> loading file chat_template.jinja
+[INFO|tokenization_utils_base.py:2364] 2026-03-05 15:01:02,481 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
+[INFO|2026-03-05 15:01:02] llamafactory.data.template:143 >> Replace eos token: <|eot_id|>.
+[INFO|2026-03-05 15:01:02] llamafactory.data.template:143 >> Add <|eom_id|> to stop words.
+[INFO|2026-03-05 15:01:02] llamafactory.data.loader:143 >> Loading dataset /home/salman/reward-signal-analysis/data/open_thought_sft_data/think_format/open-thoughts114k_math_think_format.jsonl...
+
Converting format of dataset (num_proc=16): 0%| | 0/43525 [00:00, ? examples/s][INFO|2026-03-05 15:01:02] llamafactory.hparams.parser:423 >> Process rank: 1, world size: 4, device: cuda:1, distributed training: True, compute dtype: torch.bfloat16
+
Converting format of dataset (num_proc=16): 1%| | 336/43525 [00:00<00:47, 905.93 examples/s]
Converting format of dataset (num_proc=16): 24%|██▍ | 10554/43525 [00:00<00:01, 25617.28 examples/s]
Converting format of dataset (num_proc=16): 46%|████▌ | 19979/43525 [00:00<00:00, 43413.40 examples/s]
Converting format of dataset (num_proc=16): 72%|███████▏ | 31463/43525 [00:00<00:00, 38741.76 examples/s][INFO|2026-03-05 15:01:03] llamafactory.hparams.parser:423 >> Process rank: 2, world size: 4, device: cuda:2, distributed training: True, compute dtype: torch.bfloat16
+[rank1]:[W305 15:01:03.435588757 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 1] using GPU 1 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
+
Converting format of dataset (num_proc=16): 85%|████████▍ | 36924/43525 [00:01<00:00, 20118.93 examples/s]
Converting format of dataset (num_proc=16): 93%|█████████▎| 40644/43525 [00:01<00:00, 18445.98 examples/s]
Converting format of dataset (num_proc=16): 100%|██████████| 43525/43525 [00:02<00:00, 18189.21 examples/s]
+[rank2]:[W305 15:01:05.313540243 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 2] using GPU 2 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
+[rank0]:[W305 15:01:06.696130699 ProcessGroupNCCL.cpp:4561] [PG ID 0 PG GUID 0 Rank 0] using GPU 0 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
+NCCL version 2.21.5+cuda12.4
+
Running tokenizer on dataset (num_proc=16): 0%| | 0/43525 [00:00, ? examples/s]
Running tokenizer on dataset (num_proc=16): 2%|▏ | 1000/43525 [00:07<05:09, 137.58 examples/s]
Running tokenizer on dataset (num_proc=16): 5%|▍ | 2000/43525 [00:07<02:07, 324.64 examples/s]
Running tokenizer on dataset (num_proc=16): 7%|▋ | 3000/43525 [00:08<01:19, 511.94 examples/s]
Running tokenizer on dataset (num_proc=16): 9%|▉ | 4000/43525 [00:08<00:49, 793.96 examples/s]
Running tokenizer on dataset (num_proc=16): 11%|█▏ | 5000/43525 [00:08<00:38, 1012.22 examples/s]
Running tokenizer on dataset (num_proc=16): 14%|█▍ | 6000/43525 [00:08<00:27, 1359.19 examples/s]
Running tokenizer on dataset (num_proc=16): 18%|█▊ | 8000/43525 [00:09<00:21, 1656.68 examples/s]
Running tokenizer on dataset (num_proc=16): 21%|██ | 9000/43525 [00:10<00:17, 1950.11 examples/s]
Running tokenizer on dataset (num_proc=16): 23%|██▎ | 10000/43525 [00:10<00:13, 2443.20 examples/s]
Running tokenizer on dataset (num_proc=16): 28%|██▊ | 12000/43525 [00:11<00:17, 1776.95 examples/s]
Running tokenizer on dataset (num_proc=16): 30%|██▉ | 13000/43525 [00:12<00:15, 2025.05 examples/s]
Running tokenizer on dataset (num_proc=16): 32%|███▏ | 14000/43525 [00:12<00:13, 2262.75 examples/s]
Running tokenizer on dataset (num_proc=16): 34%|███▍ | 15000/43525 [00:12<00:11, 2397.61 examples/s]
Running tokenizer on dataset (num_proc=16): 37%|███▋ | 16000/43525 [00:13<00:15, 1726.04 examples/s]
Running tokenizer on dataset (num_proc=16): 39%|███▉ | 17000/43525 [00:14<00:19, 1387.00 examples/s]
Running tokenizer on dataset (num_proc=16): 41%|████ | 17720/43525 [00:15<00:16, 1518.06 examples/s]
Running tokenizer on dataset (num_proc=16): 45%|████▍ | 19440/43525 [00:15<00:11, 2177.47 examples/s]
Running tokenizer on dataset (num_proc=16): 49%|████▉ | 21440/43525 [00:15<00:06, 3184.92 examples/s]
Running tokenizer on dataset (num_proc=16): 56%|█████▌ | 24440/43525 [00:16<00:05, 3446.77 examples/s]
Running tokenizer on dataset (num_proc=16): 58%|█████▊ | 25440/43525 [00:17<00:06, 2711.43 examples/s]
Running tokenizer on dataset (num_proc=16): 61%|██████ | 26440/43525 [00:17<00:06, 2629.01 examples/s]
Running tokenizer on dataset (num_proc=16): 63%|██████▎ | 27440/43525 [00:18<00:08, 1864.46 examples/s]
Running tokenizer on dataset (num_proc=16): 67%|██████▋ | 29161/43525 [00:19<00:07, 1883.59 examples/s]
Running tokenizer on dataset (num_proc=16): 69%|██████▉ | 30161/43525 [00:19<00:06, 2061.89 examples/s]
Running tokenizer on dataset (num_proc=16): 73%|███████▎ | 31881/43525 [00:20<00:03, 2972.89 examples/s]
Running tokenizer on dataset (num_proc=16): 75%|███████▍ | 32601/43525 [00:20<00:03, 2858.47 examples/s]
Running tokenizer on dataset (num_proc=16): 77%|███████▋ | 33321/43525 [00:20<00:03, 3043.33 examples/s]
Running tokenizer on dataset (num_proc=16): 78%|███████▊ | 34041/43525 [00:21<00:03, 2407.83 examples/s]
Running tokenizer on dataset (num_proc=16): 80%|███████▉ | 34762/43525 [00:21<00:05, 1647.55 examples/s]
Running tokenizer on dataset (num_proc=16): 82%|████████▏ | 35762/43525 [00:22<00:03, 2246.58 examples/s]
Running tokenizer on dataset (num_proc=16): 84%|████████▍ | 36482/43525 [00:22<00:03, 2087.69 examples/s]
Running tokenizer on dataset (num_proc=16): 86%|████████▌ | 37482/43525 [00:24<00:05, 1156.79 examples/s]
Running tokenizer on dataset (num_proc=16): 88%|████████▊ | 38203/43525 [00:24<00:03, 1371.08 examples/s]
Running tokenizer on dataset (num_proc=16): 92%|█████████▏| 39924/43525 [00:24<00:01, 2154.90 examples/s]
Running tokenizer on dataset (num_proc=16): 93%|█████████▎| 40644/43525 [00:24<00:01, 2346.41 examples/s]
Running tokenizer on dataset (num_proc=16): 97%|█████████▋| 42085/43525 [00:27<00:01, 990.35 examples/s]
Running tokenizer on dataset (num_proc=16): 98%|█████████▊| 42805/43525 [00:29<00:01, 670.87 examples/s]
Running tokenizer on dataset (num_proc=16): 100%|██████████| 43525/43525 [00:30<00:00, 834.07 examples/s]
Running tokenizer on dataset (num_proc=16): 100%|██████████| 43525/43525 [00:30<00:00, 1432.50 examples/s]
+training example:
+input_ids:
+[128000, 128006, 9125, 128007, 271, 7927, 3560, 439, 459, 18328, 18065, 27461, 24919, 4860, 1555, 264, 37538, 1317, 7422, 1920, 1603, 8405, 279, 1620, 24473, 323, 13687, 10105, 13, 1115, 7612, 23387, 304, 264, 16195, 11008, 315, 6492, 11, 29385, 4954, 11, 27501, 11, 32834, 24280, 11, 22599, 11, 1203, 376, 4628, 11, 323, 20140, 311, 2274, 1664, 12, 25742, 291, 7422, 1920, 13, 5321, 6070, 701, 2077, 1139, 1403, 1925, 14491, 25, 36287, 323, 12761, 13, 763, 279, 36287, 3857, 11, 1176, 1781, 3094, 555, 3094, 4871, 366, 27963, 29, 27516, 27963, 29, 9681, 13, 9062, 3094, 1288, 2997, 11944, 38864, 1778, 439, 22209, 287, 4860, 11, 29385, 4954, 9959, 14955, 11, 87881, 287, 502, 6848, 11, 69963, 279, 13708, 315, 279, 1510, 7504, 11, 74285, 904, 6103, 11, 323, 17951, 5977, 3766, 7504, 13, 763, 279, 12761, 3857, 11, 3196, 389, 5370, 13865, 11, 48539, 811, 11, 323, 63851, 505, 279, 36287, 3857, 11, 60826, 3118, 279, 1620, 6425, 430, 499, 82577, 4495, 13, 578, 6425, 1288, 7293, 264, 20406, 11, 13687, 11, 64694, 7645, 1742, 323, 7872, 5995, 3094, 4460, 311, 5662, 279, 17102, 13, 4800, 11, 1456, 311, 11886, 279, 2768, 3488, 1555, 279, 3485, 17959, 25, 5321, 3289, 1497, 701, 1620, 4320, 304, 279, 3830, 25, 1144, 80175, 90, 7927, 22559, 7966, 128009, 128006, 882, 128007, 271, 22818, 1972, 5219, 18240, 264, 11, 293, 11, 272, 1144, 8, 323, 264, 6928, 1396, 18240, 1144, 13231, 1144, 8, 1778, 430, 279, 48411, 18240, 282, 2120, 8, 284, 865, 61, 18, 489, 264, 865, 61, 17, 489, 293, 865, 489, 272, 1144, 8, 706, 2380, 1972, 20282, 18240, 865, 62, 16, 11, 865, 62, 17, 11, 865, 62, 18, 1144, 705, 323, 279, 4787, 18240, 865, 62, 17, 482, 865, 62, 16, 284, 1144, 13231, 1144, 8, 323, 18240, 865, 62, 18, 871, 1144, 38118, 90, 16, 15523, 17, 26628, 87, 62, 16, 489, 865, 62, 17, 8, 1144, 8, 527, 20097, 11, 1505, 279, 7340, 907, 315, 18240, 1144, 38118, 90, 17, 264, 61, 18, 489, 220, 1544, 272, 482, 220, 24, 264, 293, 15523, 59, 13231, 61, 18, 92, 1144, 570, 128009, 128006, 78191, 128007, 271, 14023, 771, 397, 33413, 11, 1095, 596, 1456, 311, 22118, 420, 3575, 3094, 555, 3094, 13, 578, 3575, 374, 922, 9455, 279, 7340, 907, 315, 279, 7645, 320, 17, 64, 44301, 489, 220, 1544, 66, 482, 220, 24, 370, 5738, 34586, 44301, 2728, 1063, 4787, 389, 279, 20282, 315, 279, 41999, 48411, 282, 2120, 8, 284, 865, 44301, 489, 3944, 30556, 489, 45341, 489, 272, 13, 578, 20282, 527, 1972, 11, 449, 865, 32907, 482, 865, 32086, 284, 49438, 323, 865, 121412, 871, 320, 87, 32086, 489, 865, 32907, 5738, 17, 13, 4815, 5451, 11, 358, 1205, 311, 19635, 430, 369, 264, 41999, 48411, 449, 20282, 865, 32086, 11, 865, 32907, 11, 865, 121412, 11, 279, 37929, 649, 387, 13605, 1701, 11655, 1955, 596, 55179, 13, 2100, 11, 1095, 596, 3350, 1884, 1523, 13, 4815, 36644, 1955, 596, 55179, 369, 282, 2120, 8, 284, 865, 44301, 489, 3944, 30556, 489, 45341, 489, 272, 512, 16, 13, 8279, 315, 20282, 25, 865, 32086, 489, 865, 32907, 489, 865, 121412, 284, 482, 64, 198, 17, 13, 8279, 315, 3956, 25, 865, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 284, 293, 198, 18, 13, 5761, 315, 20282, 25, 865, 32086, 87, 32907, 87, 121412, 284, 482, 66, 271, 8140, 2218, 7645, 374, 320, 17, 64, 44301, 489, 220, 1544, 66, 482, 220, 24, 370, 5738, 34586, 44301, 13, 6914, 596, 1518, 422, 584, 649, 3237, 420, 304, 3878, 315, 279, 20282, 1701, 11655, 1955, 596, 55179, 13, 4815, 5451, 11, 1095, 596, 28779, 264, 11, 293, 11, 272, 304, 3878, 315, 279, 20282, 13, 4815, 3915, 11655, 1955, 512, 64, 284, 29506, 87, 32086, 489, 865, 32907, 489, 865, 121412, 340, 65, 284, 865, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 198, 66, 284, 482, 87, 32086, 87, 32907, 87, 121412, 271, 2169, 36368, 1521, 1139, 279, 7645, 1473, 17, 64, 44301, 489, 220, 1544, 66, 482, 220, 24, 370, 720, 28, 220, 17, 66767, 87, 32086, 489, 865, 32907, 489, 865, 121412, 595, 44301, 489, 220, 1544, 4172, 87, 32086, 87, 32907, 87, 121412, 8, 482, 220, 24, 66767, 87, 32086, 489, 865, 32907, 489, 865, 121412, 29254, 87, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 696, 10267, 757, 12849, 1855, 4751, 3094, 555, 3094, 382, 5451, 4751, 25, 220, 17, 66767, 87, 32086, 489, 865, 32907, 489, 865, 121412, 595, 44301, 284, 482, 17, 2120, 32086, 489, 865, 32907, 489, 865, 121412, 8, 44301, 271, 16041, 4751, 25, 220, 1544, 4172, 87, 32086, 87, 32907, 87, 121412, 8, 284, 482, 1544, 87, 32086, 87, 32907, 87, 121412, 271, 38075, 4751, 25, 482, 24, 66767, 87, 32086, 489, 865, 32907, 489, 865, 121412, 29254, 87, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 8, 284, 220, 24, 2120, 32086, 489, 865, 32907, 489, 865, 121412, 2432, 87, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 696, 4516, 35271, 682, 2380, 1473, 12, 17, 2120, 32086, 489, 865, 32907, 489, 865, 121412, 8, 44301, 482, 1544, 87, 32086, 87, 32907, 87, 121412, 489, 220, 24, 2120, 32086, 489, 865, 32907, 489, 865, 121412, 2432, 87, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 696, 81122, 11, 420, 5084, 6485, 13, 10926, 1070, 374, 264, 3967, 9764, 430, 36716, 1521, 3878, 30, 6914, 757, 1781, 13, 4815, 3905, 543, 430, 369, 264, 41999, 48411, 11, 279, 50419, 519, 423, 374, 2728, 555, 423, 284, 220, 972, 69744, 482, 220, 19, 65, 44301, 67, 489, 293, 30556, 66, 30556, 482, 220, 19, 582, 44301, 482, 220, 1544, 64, 30556, 67, 30556, 369, 3944, 44301, 489, 45341, 30556, 489, 21375, 489, 294, 13, 2030, 304, 1057, 1162, 11, 279, 48411, 374, 1647, 292, 320, 21307, 36706, 220, 16, 705, 779, 264, 28, 16, 11, 719, 1618, 279, 37929, 527, 264, 11, 293, 11, 272, 439, 2728, 13, 14144, 11, 3604, 11, 304, 279, 5410, 1376, 11, 279, 50419, 519, 369, 865, 44301, 489, 264, 865, 30556, 489, 293, 865, 489, 272, 1053, 387, 423, 284, 220, 972, 13997, 482, 220, 19, 64, 44301, 66, 489, 264, 30556, 65, 30556, 482, 220, 19, 65, 44301, 482, 220, 1544, 66, 30556, 13, 2030, 358, 2643, 387, 27890, 709, 279, 15150, 13, 6914, 757, 10356, 1473, 14524, 11, 279, 50419, 519, 315, 264, 41999, 865, 44301, 489, 17585, 30556, 489, 97130, 489, 436, 374, 2728, 555, 1473, 101561, 284, 220, 972, 79, 23866, 482, 220, 19, 79, 44301, 81, 489, 281, 30556, 80, 30556, 482, 220, 19, 80, 44301, 482, 220, 1544, 81, 30556, 271, 9642, 11, 779, 304, 1057, 1162, 11, 449, 281, 284, 264, 11, 2874, 284, 293, 11, 436, 284, 272, 11, 779, 82263, 284, 220, 972, 13997, 482, 220, 19, 64, 44301, 66, 489, 264, 30556, 65, 30556, 482, 220, 19, 65, 44301, 482, 220, 1544, 66, 30556, 271, 4071, 358, 2846, 539, 2771, 422, 279, 50419, 519, 374, 6089, 5552, 1618, 11, 719, 7344, 13, 8876, 279, 48411, 706, 2380, 1972, 20282, 11, 279, 50419, 519, 2011, 387, 2536, 62035, 13, 4452, 11, 279, 3575, 5415, 430, 682, 20282, 527, 1972, 11, 779, 82263, 63247, 220, 15, 13, 2030, 7344, 279, 7645, 584, 2351, 14892, 449, 374, 5552, 311, 279, 50419, 519, 1980, 14524, 11, 1095, 596, 1817, 279, 7645, 2728, 25, 220, 17, 64, 44301, 489, 220, 1544, 66, 482, 220, 24, 370, 13, 1442, 584, 9616, 449, 279, 50419, 519, 15150, 1473, 101561, 284, 220, 972, 13997, 482, 220, 19, 64, 44301, 66, 489, 264, 30556, 65, 30556, 482, 220, 19, 65, 44301, 482, 220, 1544, 66, 30556, 271, 2688, 6089, 8196, 13, 10926, 539, 13, 6914, 596, 1456, 2500, 5603, 382, 93114, 11, 8530, 279, 7645, 320, 17, 64, 44301, 489, 220, 1544, 66, 482, 220, 24, 370, 8, 649, 387, 59624, 304, 3878, 315, 279, 20282, 13, 6914, 596, 1456, 32434, 10831, 279, 11655, 1955, 24282, 1139, 433, 382, 4516, 1095, 596, 28779, 264, 11, 293, 11, 272, 1473, 17, 66767, 87, 32086, 489, 865, 32907, 489, 865, 121412, 595, 44301, 489, 220, 1544, 4172, 87, 32086, 87, 32907, 87, 121412, 8, 482, 220, 24, 66767, 87, 32086, 489, 865, 32907, 489, 865, 121412, 29254, 87, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 696, 47354, 1855, 4751, 1473, 5451, 4751, 25, 220, 17, 35399, 16, 30876, 18, 6737, 87, 32086, 489, 865, 32907, 489, 865, 121412, 30876, 18, 284, 482, 17, 2120, 32086, 489, 865, 32907, 489, 865, 121412, 30876, 18, 271, 16041, 4751, 25, 220, 1544, 35399, 87, 32086, 87, 32907, 87, 121412, 8, 284, 482, 1544, 87, 32086, 87, 32907, 87, 121412, 271, 38075, 4751, 25, 482, 24, 35399, 16, 18201, 87, 32086, 489, 865, 32907, 489, 865, 121412, 2432, 87, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 8, 284, 220, 24, 2120, 32086, 489, 865, 32907, 489, 865, 121412, 2432, 87, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 696, 4516, 10917, 433, 682, 3871, 1473, 12, 17, 2120, 32086, 489, 865, 32907, 489, 865, 121412, 30876, 18, 482, 220, 1544, 87, 32086, 87, 32907, 87, 121412, 489, 220, 24, 2120, 32086, 489, 865, 32907, 489, 865, 121412, 2432, 87, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 696, 81122, 13, 6914, 596, 1518, 422, 584, 649, 8331, 420, 477, 40821, 433, 13, 6914, 596, 79164, 328, 284, 865, 32086, 489, 865, 32907, 489, 865, 121412, 11, 393, 284, 865, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 11, 1229, 284, 865, 32086, 87, 32907, 87, 121412, 13, 5112, 1057, 7645, 9221, 1473, 12, 17, 50, 44301, 482, 1544, 48, 489, 220, 24, 50, 393, 271, 4071, 369, 264, 41999, 48411, 11, 279, 5133, 1990, 328, 11, 393, 11, 1229, 374, 2728, 555, 11655, 1955, 596, 55179, 13, 2030, 8530, 584, 649, 29243, 420, 7645, 311, 2555, 775, 382, 93114, 11, 7344, 1701, 55443, 37498, 13, 6914, 596, 12849, 420, 7645, 369, 3230, 20282, 13, 6914, 596, 23289, 430, 865, 32086, 11, 865, 32907, 11, 865, 121412, 527, 7482, 449, 865, 32907, 482, 865, 32086, 284, 49438, 323, 865, 121412, 871, 320, 87, 32086, 489, 865, 32907, 5738, 17, 13, 10926, 584, 649, 1719, 58053, 279, 20282, 304, 3878, 315, 7482, 430, 12602, 279, 2728, 4787, 382, 22818, 430, 865, 32907, 482, 865, 32086, 284, 49438, 11, 1095, 596, 743, 865, 32086, 284, 259, 482, 49438, 14, 17, 323, 865, 32907, 284, 259, 489, 49438, 14, 17, 369, 1063, 259, 13, 5112, 279, 83063, 315, 865, 32086, 323, 865, 32907, 374, 259, 11, 323, 279, 3044, 865, 121412, 871, 320, 87, 32086, 489, 865, 32907, 5738, 17, 9221, 865, 121412, 871, 259, 13, 4815, 55915, 11, 1095, 757, 743, 1473, 87, 32086, 284, 259, 482, 49438, 14, 17, 271, 87, 32907, 284, 259, 489, 49438, 14, 17, 271, 87, 121412, 284, 259, 489, 274, 11, 1405, 274, 871, 220, 15, 320, 11536, 865, 121412, 871, 259, 696, 4516, 1457, 11, 1057, 20282, 527, 13605, 304, 3878, 315, 259, 11, 49438, 11, 323, 274, 871, 220, 15, 382, 7184, 11, 1095, 596, 12849, 328, 11, 393, 11, 1229, 304, 3878, 315, 259, 11, 49438, 11, 274, 382, 5451, 11, 328, 284, 865, 32086, 489, 865, 32907, 489, 865, 121412, 284, 320, 83, 482, 49438, 14, 17, 8, 489, 320, 83, 489, 49438, 14, 17, 8, 489, 320, 83, 489, 274, 8, 284, 220, 18, 83, 489, 274, 271, 16041, 11, 393, 284, 865, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 271, 47354, 1855, 4751, 1473, 87, 32086, 87, 32907, 284, 320, 83, 482, 49438, 14, 17, 2432, 83, 489, 49438, 14, 17, 8, 284, 259, 30556, 482, 320, 34586, 14, 17, 8, 30556, 284, 259, 30556, 482, 49438, 30556, 14, 19, 271, 87, 32086, 87, 121412, 284, 320, 83, 482, 49438, 14, 17, 2432, 83, 489, 274, 8, 284, 259, 1175, 489, 274, 8, 482, 320, 34586, 14, 17, 2432, 83, 489, 274, 8, 284, 259, 30556, 489, 10814, 482, 320, 34586, 259, 5738, 17, 482, 320, 34586, 274, 5738, 17, 271, 87, 32907, 87, 121412, 284, 320, 83, 489, 49438, 14, 17, 2432, 83, 489, 274, 8, 284, 259, 1175, 489, 274, 8, 489, 320, 34586, 14, 17, 2432, 83, 489, 274, 8, 284, 259, 30556, 489, 10814, 489, 320, 34586, 259, 5738, 17, 489, 320, 34586, 274, 5738, 17, 271, 33408, 1521, 3871, 1473, 47, 284, 510, 83, 30556, 482, 49438, 30556, 14, 19, 60, 489, 510, 83, 30556, 489, 10814, 482, 320, 34586, 259, 5738, 17, 482, 320, 34586, 274, 5738, 17, 60, 489, 510, 83, 30556, 489, 10814, 489, 320, 34586, 259, 5738, 17, 489, 320, 34586, 274, 5738, 17, 2595, 10267, 596, 16343, 3878, 1473, 5451, 4751, 25, 259, 30556, 482, 49438, 30556, 14, 19, 271, 16041, 4751, 25, 259, 30556, 489, 10814, 482, 320, 34586, 259, 5738, 17, 482, 320, 34586, 274, 5738, 17, 271, 38075, 4751, 25, 259, 30556, 489, 10814, 489, 320, 34586, 259, 5738, 17, 489, 320, 34586, 274, 5738, 17, 271, 33408, 1124, 1473, 83, 30556, 482, 49438, 30556, 14, 19, 489, 259, 30556, 489, 10814, 482, 320, 34586, 259, 5738, 17, 482, 320, 34586, 274, 5738, 17, 489, 259, 30556, 489, 10814, 489, 320, 34586, 259, 5738, 17, 489, 320, 34586, 274, 5738, 17, 271, 82214, 1093, 3878, 1473, 83, 30556, 489, 259, 30556, 489, 259, 30556, 284, 220, 18, 83, 30556, 271, 2641, 489, 10814, 284, 220, 17, 2641, 271, 12, 34586, 30556, 14, 19, 271, 2520, 279, 3878, 449, 49438, 259, 14, 17, 25, 482, 320, 34586, 259, 5738, 17, 489, 320, 34586, 259, 5738, 17, 284, 220, 15, 271, 68791, 11, 369, 49438, 274, 14, 17, 25, 482, 320, 34586, 274, 5738, 17, 489, 320, 34586, 274, 5738, 17, 284, 220, 15, 271, 4516, 393, 284, 220, 18, 83, 30556, 489, 220, 17, 2641, 482, 49438, 30556, 14, 19, 271, 7184, 1229, 284, 865, 32086, 87, 32907, 87, 121412, 284, 320, 83, 482, 49438, 14, 17, 2432, 83, 489, 49438, 14, 17, 2432, 83, 489, 274, 8, 284, 510, 83, 30556, 482, 320, 34586, 14, 17, 30876, 17, 9725, 83, 489, 274, 8, 284, 320, 83, 30556, 482, 49438, 30556, 14, 19, 2432, 83, 489, 274, 696, 96255, 420, 704, 1473, 28, 259, 44301, 489, 259, 30556, 274, 482, 320, 34586, 30556, 14, 19, 8, 259, 482, 320, 34586, 30556, 14, 19, 8, 274, 271, 7184, 11, 1095, 596, 20206, 328, 11, 393, 11, 1229, 1139, 279, 7645, 1473, 12, 17, 50, 44301, 482, 1544, 48, 489, 220, 24, 50, 393, 271, 5451, 11, 12849, 328, 44301, 1473, 50, 284, 220, 18, 83, 489, 274, 271, 50, 44301, 284, 320, 18, 83, 489, 274, 30876, 18, 284, 220, 1544, 83, 44301, 489, 220, 1544, 83, 30556, 274, 489, 220, 24, 83, 274, 30556, 489, 274, 44301, 271, 96255, 555, 482, 17, 25, 482, 17, 9, 1544, 83, 44301, 482, 17, 9, 1544, 83, 30556, 274, 482, 17, 9, 24, 83, 274, 30556, 482, 17, 34554, 44301, 284, 482, 4370, 83, 44301, 482, 4370, 83, 30556, 274, 482, 972, 83, 274, 30556, 482, 17, 82, 44301, 271, 5971, 11, 12849, 482, 1544, 48, 1473, 48, 284, 259, 44301, 489, 259, 30556, 274, 482, 320, 34586, 30556, 14, 19, 8, 259, 482, 320, 34586, 30556, 14, 19, 8, 274, 271, 96255, 555, 482, 1544, 25, 482, 1544, 83, 44301, 482, 1544, 83, 30556, 274, 489, 320, 1544, 34586, 30556, 14, 19, 8, 259, 489, 320, 1544, 34586, 30556, 14, 19, 16871, 271, 38075, 4751, 25, 220, 24, 50, 393, 271, 50, 284, 220, 18, 83, 489, 274, 271, 47, 284, 220, 18, 83, 30556, 489, 220, 17, 2641, 482, 49438, 30556, 14, 19, 271, 4516, 220, 24, 50, 393, 284, 220, 24, 6737, 18, 83, 489, 274, 18201, 18, 83, 30556, 489, 220, 17, 2641, 482, 49438, 30556, 14, 19, 696, 10267, 596, 9407, 420, 2027, 3094, 555, 3094, 382, 5451, 11, 31370, 320, 18, 83, 489, 274, 8, 449, 320, 18, 83, 30556, 489, 220, 17, 2641, 482, 49438, 30556, 14, 19, 7887, 28, 220, 18, 83, 6737, 18, 83, 30556, 8, 489, 220, 18, 83, 6737, 17, 2641, 8, 489, 220, 18, 83, 35399, 34586, 30556, 14, 19, 8, 489, 274, 6737, 18, 83, 30556, 8, 489, 274, 6737, 17, 2641, 8, 489, 274, 35399, 34586, 30556, 14, 19, 696, 28, 220, 24, 83, 44301, 489, 220, 21, 83, 30556, 274, 482, 320, 18, 83, 49438, 30556, 5738, 19, 489, 220, 18, 82, 259, 30556, 489, 220, 17, 83, 274, 30556, 482, 320, 82, 49438, 30556, 5738, 19, 271, 82214, 1093, 3878, 1473, 24, 83, 44301, 489, 320, 21, 83, 30556, 274, 489, 220, 18, 83, 30556, 274, 8, 489, 320, 17, 83, 274, 30556, 8, 489, 10505, 18, 83, 49438, 30556, 14, 19, 482, 274, 49438, 30556, 14, 19, 696, 28, 220, 24, 83, 44301, 489, 220, 24, 83, 30556, 274, 489, 220, 17, 83, 274, 30556, 482, 320, 34586, 30556, 14, 19, 2432, 18, 83, 489, 274, 696, 96255, 420, 555, 220, 24, 1473, 24, 6737, 24, 83, 44301, 489, 220, 24, 83, 30556, 274, 489, 220, 17, 83, 274, 30556, 482, 320, 34586, 30556, 14, 19, 2432, 18, 83, 489, 274, 595, 284, 220, 5932, 83, 44301, 489, 220, 5932, 83, 30556, 274, 489, 220, 972, 83, 274, 30556, 482, 320, 24, 34586, 30556, 14, 19, 2432, 18, 83, 489, 274, 696, 7184, 11, 35271, 682, 2380, 5596, 1473, 5451, 961, 25, 482, 4370, 83, 44301, 482, 4370, 83, 30556, 274, 482, 972, 83, 274, 30556, 482, 17, 82, 44301, 271, 16041, 961, 25, 482, 1544, 83, 44301, 482, 1544, 83, 30556, 274, 489, 320, 1544, 34586, 30556, 14, 19, 8, 259, 489, 320, 1544, 34586, 30556, 14, 19, 16871, 271, 38075, 961, 25, 220, 5932, 83, 44301, 489, 220, 5932, 83, 30556, 274, 489, 220, 972, 83, 274, 30556, 482, 320, 24, 34586, 30556, 14, 19, 2432, 18, 83, 489, 274, 696, 10267, 596, 923, 1124, 4751, 555, 4751, 382, 2520, 259, 44301, 3878, 1473, 12, 4370, 83, 44301, 482, 1544, 83, 44301, 489, 5932, 83, 44301, 284, 220, 15, 271, 2520, 259, 30556, 274, 3878, 1473, 12, 4370, 83, 30556, 274, 482, 1544, 83, 30556, 274, 489, 5932, 83, 30556, 274, 284, 220, 15, 271, 2520, 259, 274, 30556, 3878, 1473, 12, 972, 83, 274, 30556, 489, 972, 83, 274, 30556, 284, 220, 15, 271, 2520, 274, 44301, 3878, 1473, 12, 17, 82, 44301, 320, 1527, 1176, 961, 8, 489, 2564, 320, 5686, 961, 706, 912, 274, 44301, 4751, 11, 4948, 961, 1101, 7000, 8, 779, 2860, 482, 17, 82, 44301, 271, 7184, 279, 3878, 16239, 49438, 30556, 1473, 3915, 2132, 961, 25, 320, 1544, 34586, 30556, 14, 19, 8, 259, 489, 320, 1544, 34586, 30556, 14, 19, 16871, 271, 3915, 4948, 961, 25, 482, 320, 24, 34586, 30556, 14, 19, 2432, 18, 83, 489, 274, 8, 284, 482, 1544, 34586, 30556, 14, 19, 259, 482, 24, 34586, 30556, 14, 19, 274, 271, 33408, 1521, 3871, 1473, 7, 1544, 34586, 30556, 14, 19, 259, 482, 1544, 34586, 30556, 14, 19, 259, 8, 489, 320, 1544, 34586, 30556, 14, 19, 274, 482, 24, 34586, 30556, 14, 19, 274, 8, 284, 220, 15, 489, 320, 972, 34586, 30556, 14, 19, 16871, 284, 320, 24, 34586, 30556, 14, 17, 16871, 271, 4516, 8244, 11, 35271, 682, 3878, 1473, 12, 17, 82, 44301, 489, 320, 24, 34586, 30556, 14, 17, 16871, 271, 4516, 279, 4553, 7645, 15858, 9803, 311, 1473, 12, 17, 82, 44301, 489, 320, 24, 34586, 30556, 14, 17, 16871, 271, 55915, 11, 279, 4113, 7645, 320, 17, 64, 44301, 489, 220, 1544, 66, 482, 24, 370, 8, 17239, 482, 17, 82, 44301, 489, 320, 24, 34586, 30556, 14, 17, 16871, 271, 55915, 11, 279, 7645, 584, 1205, 311, 35608, 374, 1473, 7, 17, 64, 44301, 489, 220, 1544, 66, 482, 24, 370, 5738, 34586, 44301, 284, 10505, 17, 82, 44301, 489, 320, 24, 34586, 30556, 14, 17, 16871, 5738, 34586, 44301, 284, 10505, 17, 82, 44301, 5738, 34586, 44301, 489, 320, 24, 34586, 30556, 14, 17, 274, 5738, 34586, 44301, 284, 482, 17, 1161, 14, 34586, 30876, 18, 489, 320, 24, 14, 17, 2432, 82, 14, 34586, 696, 10267, 757, 743, 597, 284, 274, 14, 34586, 13, 8876, 274, 871, 220, 15, 323, 49438, 871, 220, 15, 11, 597, 871, 220, 15, 13, 5112, 1057, 7645, 9221, 1473, 12, 17, 74, 44301, 489, 320, 24, 14, 17, 8, 74, 271, 4516, 1457, 11, 279, 3575, 26338, 311, 9455, 279, 7340, 907, 315, 279, 734, 342, 6097, 8, 284, 482, 17, 74, 44301, 489, 320, 24, 14, 17, 8, 74, 369, 597, 871, 220, 15, 382, 2028, 374, 264, 83768, 3575, 13, 2057, 1505, 279, 7340, 11, 1935, 279, 32905, 315, 342, 6097, 8, 449, 5201, 311, 597, 11, 743, 433, 311, 7315, 11, 323, 11886, 369, 597, 382, 47354, 342, 59436, 74, 7887, 70, 59436, 74, 8, 284, 482, 21, 74, 30556, 489, 220, 24, 14, 17, 271, 1681, 342, 59436, 74, 8, 284, 220, 15, 1473, 12, 21, 74, 30556, 489, 220, 24, 14, 17, 284, 220, 15, 271, 2228, 220, 21, 74, 30556, 284, 220, 24, 14, 17, 271, 2228, 597, 30556, 284, 320, 24, 14, 17, 5738, 21, 284, 220, 24, 14, 717, 284, 220, 18, 14, 19, 271, 2228, 597, 284, 18430, 7, 18, 14, 19, 8, 284, 18430, 7, 18, 5738, 17, 118792, 220, 15, 13, 22455, 271, 12834, 597, 871, 220, 15, 11, 584, 1935, 279, 6928, 3789, 13, 4800, 11, 1817, 422, 420, 374, 264, 7340, 555, 2132, 32905, 1296, 382, 16041, 32905, 1473, 70, 4708, 7, 74, 8, 284, 482, 717, 74, 271, 1688, 597, 284, 18430, 7, 18, 5738, 17, 11, 342, 4708, 7, 74, 8, 284, 482, 717, 6737, 27986, 7, 18, 5738, 17, 8, 284, 482, 21, 27986, 7, 18, 8, 366, 220, 15, 11, 902, 3445, 433, 596, 264, 2254, 7340, 382, 55915, 11, 279, 7340, 907, 315, 342, 6097, 8, 374, 17427, 520, 597, 284, 18430, 7, 18, 5738, 17, 382, 47354, 342, 84173, 7, 18, 5738, 17, 7887, 70, 84173, 7, 18, 5738, 17, 8, 284, 482, 17, 6737, 27986, 7, 18, 5738, 17, 30876, 18, 489, 320, 24, 14, 17, 18201, 27986, 7, 18, 5738, 17, 696, 47354, 1855, 4751, 1473, 5451, 4751, 25, 482, 17, 6737, 320, 18, 13571, 16, 14, 17, 31185, 17, 883, 61, 18, 284, 482, 17, 6737, 320, 18, 13571, 18, 14, 17, 31185, 23, 883, 284, 482, 17, 6737, 18, 110682, 18, 5738, 23, 284, 482, 320, 18, 110682, 18, 5738, 19, 271, 16041, 4751, 25, 320, 24, 14, 17, 18201, 27986, 7, 18, 5738, 17, 8, 284, 320, 24, 110682, 18, 5738, 19, 271, 33408, 2225, 3878, 1473, 12, 320, 18, 110682, 18, 5738, 19, 489, 320, 24, 110682, 18, 5738, 19, 284, 320, 21, 110682, 18, 5738, 19, 284, 320, 18, 110682, 18, 5738, 17, 271, 55915, 11, 279, 7340, 907, 315, 342, 6097, 8, 374, 320, 18, 110682, 18, 5738, 17, 382, 39, 768, 11, 279, 7340, 907, 315, 320, 17, 64, 44301, 489, 220, 1544, 66, 482, 24, 370, 5738, 34586, 44301, 374, 320, 18, 110682, 18, 5738, 17, 382, 4071, 3868, 11, 1095, 757, 1817, 422, 1070, 527, 904, 17413, 389, 597, 13, 20474, 430, 584, 1047, 274, 284, 597, 34586, 13, 2030, 584, 617, 311, 6106, 430, 279, 20282, 527, 1972, 323, 27651, 865, 121412, 871, 320, 87, 32086, 489, 865, 32907, 5738, 17, 13, 4452, 11, 304, 1057, 1719, 17820, 2065, 11, 584, 743, 865, 121412, 284, 259, 489, 274, 11, 323, 2533, 274, 871, 220, 15, 11, 865, 121412, 871, 259, 284, 320, 87, 32086, 489, 865, 32907, 5738, 17, 11, 902, 374, 2736, 20097, 13, 2100, 439, 1317, 439, 274, 871, 220, 15, 11, 279, 3044, 10187, 13, 2030, 2533, 597, 284, 274, 14, 34586, 871, 220, 15, 11, 323, 49438, 871, 220, 15, 11, 274, 871, 220, 15, 374, 13890, 311, 597, 871, 220, 15, 11, 902, 584, 2736, 6646, 13, 15636, 11, 279, 7340, 374, 13118, 320, 18, 110682, 18, 5738, 17, 382, 4071, 1095, 757, 10356, 420, 1121, 555, 13126, 3230, 5157, 13, 1789, 3187, 11, 1935, 49438, 284, 220, 16, 13, 5112, 279, 7645, 320, 17, 64, 44301, 489, 1544, 66, 482, 24, 370, 5738, 16, 44301, 1288, 387, 31127, 1534, 520, 220, 18, 110682, 18, 14, 17, 13, 6914, 596, 1817, 449, 597, 284, 18430, 7, 18, 5738, 17, 11, 779, 274, 284, 18430, 7, 18, 5738, 17, 382, 12487, 279, 20282, 527, 865, 32086, 284, 259, 482, 220, 16, 14, 17, 11, 865, 32907, 284, 259, 489, 220, 16, 14, 17, 11, 865, 121412, 284, 259, 489, 18430, 7, 18, 5738, 17, 13, 5112, 328, 284, 220, 18, 83, 489, 18430, 7, 18, 5738, 17, 11, 393, 284, 220, 18, 83, 30556, 489, 220, 17, 83, 6737, 27986, 7, 18, 5738, 17, 8, 482, 220, 16, 14, 19, 284, 220, 18, 83, 30556, 489, 259, 9, 27986, 7, 18, 8, 482, 220, 16, 14, 19, 11, 1229, 284, 320, 83, 30556, 482, 220, 16, 14, 19, 2432, 83, 489, 18430, 7, 18, 5738, 17, 570, 5112, 12849, 264, 11, 293, 11, 272, 505, 11655, 1955, 596, 55179, 13, 5112, 12849, 220, 17, 64, 44301, 489, 1544, 66, 482, 24, 370, 13, 1102, 2643, 387, 66838, 11, 719, 8530, 584, 649, 1817, 382, 93114, 11, 5296, 430, 1057, 96354, 6197, 603, 311, 459, 7645, 18222, 1193, 389, 597, 11, 902, 374, 274, 14, 34586, 11, 323, 1555, 50068, 11, 584, 1766, 279, 7340, 13, 15636, 11, 279, 7340, 907, 374, 320, 18, 110682, 18, 5738, 17, 13, 4452, 11, 1095, 757, 19635, 430, 279, 3575, 5415, 865, 121412, 871, 320, 87, 32086, 489, 865, 32907, 5738, 17, 11, 902, 304, 1057, 1719, 17820, 2065, 374, 274, 871, 220, 15, 13, 15636, 11, 439, 1317, 439, 274, 374, 6928, 11, 902, 433, 374, 11, 584, 527, 17339, 13, 2100, 1070, 527, 912, 4726, 17413, 11, 323, 279, 7340, 374, 13118, 320, 18, 110682, 18, 5738, 17, 382, 14524, 11, 719, 1120, 311, 387, 17879, 11, 1095, 596, 1518, 422, 994, 597, 284, 18430, 7, 18, 5738, 17, 11, 279, 41999, 48411, 13118, 706, 2380, 1972, 20282, 13, 8876, 584, 527, 2728, 430, 682, 20282, 527, 1972, 11, 719, 994, 584, 1719, 58053, 11, 584, 1205, 311, 6106, 430, 279, 41999, 449, 1521, 20282, 374, 2764, 13, 4452, 11, 2533, 584, 527, 2728, 430, 682, 20282, 527, 1972, 323, 27651, 279, 2728, 4787, 11, 1057, 1719, 17820, 2065, 2736, 5097, 430, 1139, 2759, 13, 15636, 11, 439, 1317, 439, 274, 871, 220, 15, 11, 279, 20282, 527, 1972, 323, 27651, 279, 2631, 4787, 13, 15636, 11, 279, 7340, 907, 374, 13118, 320, 18, 110682, 18, 5738, 17, 382, 45600, 11, 279, 7340, 907, 315, 320, 17, 64, 44301, 489, 1544, 66, 482, 24, 370, 5738, 34586, 44301, 374, 320, 18, 110682, 18, 5738, 17, 382, 334, 19918, 22559, 1035, 59, 80175, 36802, 67, 38118, 90, 18, 59, 27986, 90, 18, 3500, 90, 17, 11498, 524, 27963, 1363, 22818, 1972, 5219, 18240, 64, 11, 293, 11, 272, 58858, 323, 264, 6928, 1396, 1144, 11781, 13231, 58858, 1778, 430, 279, 48411, 18240, 69, 2120, 8, 284, 865, 61, 18, 489, 3944, 61, 17, 489, 45341, 489, 272, 58858, 706, 2380, 1972, 20282, 18240, 87, 62, 16, 11, 865, 62, 17, 11, 865, 62, 18, 58858, 449, 279, 4787, 18240, 87, 62, 17, 482, 865, 62, 16, 284, 1144, 13231, 58858, 323, 18240, 87, 62, 18, 871, 1144, 38118, 90, 16, 15523, 17, 26628, 87, 62, 16, 489, 865, 62, 17, 10929, 705, 584, 1205, 311, 1505, 279, 7340, 907, 315, 1144, 11781, 38118, 90, 17, 64, 61, 18, 489, 220, 1544, 66, 482, 220, 24, 370, 15523, 59, 13231, 61, 18, 11281, 3677, 16834, 11655, 1955, 596, 55179, 369, 279, 48411, 18240, 69, 2120, 10929, 997, 16, 13, 18240, 87, 62, 16, 489, 865, 62, 17, 489, 865, 62, 18, 284, 482, 64, 59, 340, 17, 13, 18240, 87, 62, 16, 87, 62, 17, 489, 865, 62, 16, 87, 62, 18, 489, 865, 62, 17, 87, 62, 18, 284, 293, 59, 340, 18, 13, 18240, 87, 62, 16, 87, 62, 17, 87, 62, 18, 284, 482, 66, 59, 696, 1687, 1719, 58053, 279, 20282, 439, 11263, 512, 12, 18240, 87, 62, 16, 284, 259, 482, 1144, 38118, 36802, 13231, 15523, 17, 11281, 340, 12, 18240, 87, 62, 17, 284, 259, 489, 1144, 38118, 36802, 13231, 15523, 17, 11281, 340, 12, 18240, 87, 62, 18, 284, 259, 489, 274, 58858, 1405, 18240, 82, 871, 220, 15, 59, 696, 3214, 3781, 10831, 1521, 1139, 11655, 1955, 596, 55179, 11, 584, 636, 512, 12, 18240, 50, 284, 865, 62, 16, 489, 865, 62, 17, 489, 865, 62, 18, 284, 220, 18, 83, 489, 274, 59, 340, 12, 18240, 47, 284, 865, 62, 16, 87, 62, 17, 489, 865, 62, 16, 87, 62, 18, 489, 865, 62, 17, 87, 62, 18, 284, 220, 18, 83, 61, 17, 489, 220, 17, 2641, 482, 1144, 38118, 36802, 13231, 61, 17, 15523, 19, 11281, 340, 12, 18240, 48, 284, 865, 62, 16, 87, 62, 17, 87, 62, 18, 284, 320, 83, 61, 17, 482, 1144, 38118, 36802, 13231, 61, 17, 15523, 19, 32988, 83, 489, 274, 10929, 696, 3214, 3781, 10831, 18240, 64, 284, 482, 50, 59, 705, 18240, 65, 284, 393, 59, 705, 323, 18240, 66, 284, 482, 48, 58858, 1139, 279, 7645, 18240, 17, 64, 61, 18, 489, 220, 1544, 66, 482, 220, 24, 370, 59, 705, 584, 40821, 433, 311, 512, 59, 9837, 12, 17, 82, 61, 18, 489, 1144, 38118, 90, 24, 15523, 17, 11281, 13231, 61, 17, 274, 198, 59, 2595, 1090, 60637, 18240, 74, 284, 1144, 38118, 85486, 15523, 59, 13231, 11281, 705, 279, 7645, 9221, 512, 59, 9837, 59, 38118, 20597, 17, 82, 61, 18, 489, 1144, 38118, 90, 24, 15523, 17, 11281, 13231, 61, 17, 274, 15523, 59, 13231, 61, 18, 92, 284, 482, 17, 74, 61, 18, 489, 1144, 38118, 90, 24, 15523, 17, 92, 74, 198, 59, 2595, 1271, 35608, 1144, 4172, 17, 74, 61, 18, 489, 1144, 38118, 90, 24, 15523, 17, 92, 74, 59, 705, 584, 1935, 279, 32905, 323, 743, 433, 311, 7315, 512, 59, 9837, 70, 59436, 74, 8, 284, 482, 21, 74, 61, 17, 489, 1144, 38118, 90, 24, 15523, 17, 92, 284, 220, 15, 1144, 6517, 552, 597, 61, 17, 284, 1144, 38118, 90, 18, 15523, 19, 92, 1144, 6517, 552, 597, 284, 1144, 38118, 36802, 27986, 90, 18, 3500, 90, 17, 534, 59, 2595, 41229, 279, 2132, 32905, 43496, 264, 7340, 13, 3804, 3781, 10831, 18240, 74, 284, 1144, 38118, 36802, 27986, 90, 18, 3500, 90, 17, 11281, 8, 1203, 1139, 279, 7645, 6835, 512, 59, 9837, 12, 17, 59, 2414, 11781, 38118, 36802, 27986, 90, 18, 3500, 90, 17, 11281, 1315, 30876, 18, 489, 1144, 38118, 90, 24, 15523, 17, 11281, 2414, 11781, 38118, 36802, 27986, 90, 18, 3500, 90, 17, 11281, 1315, 8, 284, 1144, 38118, 90, 18, 59, 27986, 90, 18, 3500, 90, 17, 534, 59, 2595, 45600, 11, 279, 7340, 907, 315, 1144, 11781, 38118, 90, 17, 64, 61, 18, 489, 220, 1544, 66, 482, 220, 24, 370, 15523, 59, 13231, 61, 18, 11281, 8, 374, 1144, 11781, 80175, 36802, 67, 38118, 90, 18, 59, 27986, 90, 18, 3500, 90, 17, 3500, 59, 3677, 791, 1620, 4320, 374, 1144, 80175, 36802, 67, 38118, 90, 18, 59, 27986, 90, 18, 3500, 90, 17, 3500, 13, 128009]
+inputs:
+<|begin_of_text|><|start_header_id|>system<|end_header_id|>
+
+Your role as an assistant involves thoroughly exploring questions through a systematic long thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution. In the Thought section, first think step by step inside ... tags. Each step should include detailed considerations such as analysing questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The solution should remain a logical, accurate, concise expression style and detail necessary step needed to reach the conclusion. Now, try to solve the following question through the above guidelines: Please enclose your final answer in the box: \boxed{Your Answer}.<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+Given real numbers \( a, b, c \) and a positive number \( \lambda \) such that the polynomial \( f(x) = x^3 + a x^2 + b x + c \) has three real roots \( x_1, x_2, x_3 \), and the conditions \( x_2 - x_1 = \lambda \) and \( x_3 > \frac{1}{2}(x_1 + x_2) \) are satisfied, find the maximum value of \( \frac{2 a^3 + 27 c - 9 a b}{\lambda^3} \).<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+
+Okay, let's try to tackle this problem step by step. The problem is about finding the maximum value of the expression (2a³ + 27c - 9ab)/λ³ given some conditions on the roots of the cubic polynomial f(x) = x³ + ax² + bx + c. The roots are real, with x₂ - x₁ = λ and x₃ > (x₁ + x₂)/2.
+
+First, I need to recall that for a cubic polynomial with roots x₁, x₂, x₃, the coefficients can be expressed using Vieta's formulas. So, let's write those down.
+
+Vieta's formulas for f(x) = x³ + ax² + bx + c:
+1. Sum of roots: x₁ + x₂ + x₃ = -a
+2. Sum of products: x₁x₂ + x₁x₃ + x₂x₃ = b
+3. Product of roots: x₁x₂x₃ = -c
+
+Our target expression is (2a³ + 27c - 9ab)/λ³. Let's see if we can express this in terms of the roots using Vieta's formulas.
+
+First, let's substitute a, b, c in terms of the roots.
+
+From Vieta:
+a = -(x₁ + x₂ + x₃)
+b = x₁x₂ + x₁x₃ + x₂x₃
+c = -x₁x₂x₃
+
+Plugging these into the expression:
+
+2a³ + 27c - 9ab
+= 2(-(x₁ + x₂ + x₃))³ + 27(-x₁x₂x₃) - 9(-(x₁ + x₂ + x₃))(x₁x₂ + x₁x₃ + x₂x₃)
+
+Let me compute each term step by step.
+
+First term: 2(-(x₁ + x₂ + x₃))³ = -2(x₁ + x₂ + x₃)³
+
+Second term: 27(-x₁x₂x₃) = -27x₁x₂x₃
+
+Third term: -9(-(x₁ + x₂ + x₃))(x₁x₂ + x₁x₃ + x₂x₃) = 9(x₁ + x₂ + x₃)(x₁x₂ + x₁x₃ + x₂x₃)
+
+So combining all three:
+
+-2(x₁ + x₂ + x₃)³ -27x₁x₂x₃ + 9(x₁ + x₂ + x₃)(x₁x₂ + x₁x₃ + x₂x₃)
+
+Hmm, this seems complex. Maybe there is a known identity that relates these terms? Let me think.
+
+Recall that for a cubic polynomial, the discriminant D is given by D = 18abcd - 4b³d + b²c² - 4ac³ - 27a²d² for ax³ + bx² + cx + d. But in our case, the polynomial is monic (leading coefficient 1), so a=1, but here the coefficients are a, b, c as given. Wait, actually, in the standard form, the discriminant for x³ + a x² + b x + c would be D = 18abc - 4a³c + a²b² - 4b³ - 27c². But I might be mixing up the formula. Let me verify:
+
+Wait, the discriminant of a cubic x³ + px² + qx + r is given by:
+
+Δ = 18pqr - 4p³r + p²q² - 4q³ - 27r²
+
+Yes, so in our case, with p = a, q = b, r = c, so Δ = 18abc - 4a³c + a²b² - 4b³ - 27c²
+
+But I'm not sure if the discriminant is directly related here, but maybe. Since the polynomial has three real roots, the discriminant must be non-negative. However, the problem states that all roots are real, so Δ ≥ 0. But maybe the expression we're dealing with is related to the discriminant?
+
+Wait, let's check the expression given: 2a³ + 27c - 9ab. If we compare with the discriminant formula:
+
+Δ = 18abc - 4a³c + a²b² - 4b³ - 27c²
+
+Not directly obvious. Maybe not. Let's try another approach.
+
+Alternatively, perhaps the expression (2a³ + 27c - 9ab) can be rewritten in terms of the roots. Let's try substituting the Vieta expressions into it.
+
+So let's substitute a, b, c:
+
+2(-(x₁ + x₂ + x₃))³ + 27(-x₁x₂x₃) - 9(-(x₁ + x₂ + x₃))(x₁x₂ + x₁x₃ + x₂x₃)
+
+Compute each term:
+
+First term: 2*(-1)^3*(x₁ + x₂ + x₃)^3 = -2(x₁ + x₂ + x₃)^3
+
+Second term: 27*(-x₁x₂x₃) = -27x₁x₂x₃
+
+Third term: -9*(-1)*(x₁ + x₂ + x₃)(x₁x₂ + x₁x₃ + x₂x₃) = 9(x₁ + x₂ + x₃)(x₁x₂ + x₁x₃ + x₂x₃)
+
+So putting it all together:
+
+-2(x₁ + x₂ + x₃)^3 - 27x₁x₂x₃ + 9(x₁ + x₂ + x₃)(x₁x₂ + x₁x₃ + x₂x₃)
+
+Hmm. Let's see if we can factor this or simplify it. Let's denote S = x₁ + x₂ + x₃, P = x₁x₂ + x₁x₃ + x₂x₃, Q = x₁x₂x₃. Then our expression becomes:
+
+-2S³ -27Q + 9S P
+
+But for a cubic polynomial, the relationship between S, P, Q is given by Vieta's formulas. But perhaps we can relate this expression to something else.
+
+Alternatively, maybe using symmetric sums. Let's compute this expression for specific roots. Let's suppose that x₁, x₂, x₃ are variables with x₂ - x₁ = λ and x₃ > (x₁ + x₂)/2. Maybe we can parametrize the roots in terms of variables that capture the given conditions.
+
+Given that x₂ - x₁ = λ, let's set x₁ = t - λ/2 and x₂ = t + λ/2 for some t. Then the midpoint of x₁ and x₂ is t, and the condition x₃ > (x₁ + x₂)/2 becomes x₃ > t.
+
+Therefore, let me set:
+
+x₁ = t - λ/2
+
+x₂ = t + λ/2
+
+x₃ = t + s, where s > 0 (since x₃ > t)
+
+So now, our roots are expressed in terms of t, λ, and s > 0.
+
+Now, let's compute S, P, Q in terms of t, λ, s.
+
+First, S = x₁ + x₂ + x₃ = (t - λ/2) + (t + λ/2) + (t + s) = 3t + s
+
+Second, P = x₁x₂ + x₁x₃ + x₂x₃
+
+Compute each term:
+
+x₁x₂ = (t - λ/2)(t + λ/2) = t² - (λ/2)² = t² - λ²/4
+
+x₁x₃ = (t - λ/2)(t + s) = t(t + s) - (λ/2)(t + s) = t² + ts - (λ t)/2 - (λ s)/2
+
+x₂x₃ = (t + λ/2)(t + s) = t(t + s) + (λ/2)(t + s) = t² + ts + (λ t)/2 + (λ s)/2
+
+Adding these together:
+
+P = [t² - λ²/4] + [t² + ts - (λ t)/2 - (λ s)/2] + [t² + ts + (λ t)/2 + (λ s)/2]
+
+Let's combine terms:
+
+First term: t² - λ²/4
+
+Second term: t² + ts - (λ t)/2 - (λ s)/2
+
+Third term: t² + ts + (λ t)/2 + (λ s)/2
+
+Adding them:
+
+t² - λ²/4 + t² + ts - (λ t)/2 - (λ s)/2 + t² + ts + (λ t)/2 + (λ s)/2
+
+Combine like terms:
+
+t² + t² + t² = 3t²
+
+ts + ts = 2ts
+
+-λ²/4
+
+For the terms with λ t/2: - (λ t)/2 + (λ t)/2 = 0
+
+Similarly, for λ s/2: - (λ s)/2 + (λ s)/2 = 0
+
+So P = 3t² + 2ts - λ²/4
+
+Now Q = x₁x₂x₃ = (t - λ/2)(t + λ/2)(t + s) = [t² - (λ/2)^2](t + s) = (t² - λ²/4)(t + s)
+
+Multiply this out:
+
+= t³ + t² s - (λ²/4) t - (λ²/4) s
+
+Now, let's plug S, P, Q into the expression:
+
+-2S³ -27Q + 9S P
+
+First, compute S³:
+
+S = 3t + s
+
+S³ = (3t + s)^3 = 27t³ + 27t² s + 9t s² + s³
+
+Multiply by -2: -2*27t³ -2*27t² s -2*9t s² -2*s³ = -54t³ -54t² s -18t s² -2s³
+
+Next, compute -27Q:
+
+Q = t³ + t² s - (λ²/4) t - (λ²/4) s
+
+Multiply by -27: -27t³ -27t² s + (27λ²/4) t + (27λ²/4)s
+
+Third term: 9S P
+
+S = 3t + s
+
+P = 3t² + 2ts - λ²/4
+
+So 9S P = 9*(3t + s)*(3t² + 2ts - λ²/4)
+
+Let's expand this product step by step.
+
+First, multiply (3t + s) with (3t² + 2ts - λ²/4):
+
+= 3t*(3t²) + 3t*(2ts) + 3t*(-λ²/4) + s*(3t²) + s*(2ts) + s*(-λ²/4)
+
+= 9t³ + 6t² s - (3t λ²)/4 + 3s t² + 2t s² - (s λ²)/4
+
+Combine like terms:
+
+9t³ + (6t² s + 3t² s) + (2t s²) + (-3t λ²/4 - s λ²/4)
+
+= 9t³ + 9t² s + 2t s² - (λ²/4)(3t + s)
+
+Multiply this by 9:
+
+9*(9t³ + 9t² s + 2t s² - (λ²/4)(3t + s)) = 81t³ + 81t² s + 18t s² - (9λ²/4)(3t + s)
+
+Now, combining all three parts:
+
+First part: -54t³ -54t² s -18t s² -2s³
+
+Second part: -27t³ -27t² s + (27λ²/4) t + (27λ²/4)s
+
+Third part: 81t³ + 81t² s + 18t s² - (9λ²/4)(3t + s)
+
+Let's add them term by term.
+
+For t³ terms:
+
+-54t³ -27t³ +81t³ = 0
+
+For t² s terms:
+
+-54t² s -27t² s +81t² s = 0
+
+For t s² terms:
+
+-18t s² +18t s² = 0
+
+For s³ terms:
+
+-2s³ (from first part) +... (second part has no s³ term, third part also none) so total -2s³
+
+Now the terms involving λ²:
+
+From second part: (27λ²/4) t + (27λ²/4)s
+
+From third part: - (9λ²/4)(3t + s) = -27λ²/4 t -9λ²/4 s
+
+Adding these together:
+
+(27λ²/4 t -27λ²/4 t) + (27λ²/4 s -9λ²/4 s) = 0 + (18λ²/4)s = (9λ²/2)s
+
+So overall, combining all terms:
+
+-2s³ + (9λ²/2)s
+
+So the entire expression simplifies to:
+
+-2s³ + (9λ²/2)s
+
+Therefore, the original expression (2a³ + 27c -9ab) equals -2s³ + (9λ²/2)s
+
+Therefore, the expression we need to maximize is:
+
+(2a³ + 27c -9ab)/λ³ = (-2s³ + (9λ²/2)s)/λ³ = (-2s³)/λ³ + (9λ²/2 s)/λ³ = -2(s/λ)^3 + (9/2)(s/λ)
+
+Let me set k = s/λ. Since s > 0 and λ > 0, k > 0. Then our expression becomes:
+
+-2k³ + (9/2)k
+
+So now, the problem reduces to finding the maximum value of the function g(k) = -2k³ + (9/2)k for k > 0.
+
+This is a calculus problem. To find the maximum, take the derivative of g(k) with respect to k, set it to zero, and solve for k.
+
+Compute g'(k):
+
+g'(k) = -6k² + 9/2
+
+Set g'(k) = 0:
+
+-6k² + 9/2 = 0
+
+=> 6k² = 9/2
+
+=> k² = (9/2)/6 = 9/12 = 3/4
+
+=> k = sqrt(3/4) = sqrt(3)/2 ≈ 0.866
+
+Since k > 0, we take the positive root. Now, check if this is a maximum by second derivative test.
+
+Second derivative:
+
+g''(k) = -12k
+
+At k = sqrt(3)/2, g''(k) = -12*(sqrt(3)/2) = -6sqrt(3) < 0, which means it's a local maximum.
+
+Therefore, the maximum value of g(k) is achieved at k = sqrt(3)/2.
+
+Compute g(sqrt(3)/2):
+
+g(sqrt(3)/2) = -2*(sqrt(3)/2)^3 + (9/2)*(sqrt(3)/2)
+
+Compute each term:
+
+First term: -2*( (3^(1/2))/2 )^3 = -2*( (3^(3/2))/8 ) = -2*(3√3)/8 = - (3√3)/4
+
+Second term: (9/2)*(sqrt(3)/2) = (9√3)/4
+
+Adding both terms:
+
+- (3√3)/4 + (9√3)/4 = (6√3)/4 = (3√3)/2
+
+Therefore, the maximum value of g(k) is (3√3)/2.
+
+Hence, the maximum value of (2a³ + 27c -9ab)/λ³ is (3√3)/2.
+
+But wait, let me check if there are any constraints on k. Remember that we had s = kλ. But we have to ensure that the roots are real and satisfy x₃ > (x₁ + x₂)/2. However, in our parametrization, we set x₃ = t + s, and since s > 0, x₃ > t = (x₁ + x₂)/2, which is already satisfied. So as long as s > 0, the condition holds. But since k = s/λ > 0, and λ > 0, s > 0 is equivalent to k > 0, which we already considered. Therefore, the maximum is indeed (3√3)/2.
+
+But let me verify this result by considering specific cases. For example, take λ = 1. Then the expression (2a³ +27c -9ab)/1³ should be maximized at 3√3/2. Let's check with k = sqrt(3)/2, so s = sqrt(3)/2.
+
+Then the roots are x₁ = t - 1/2, x₂ = t + 1/2, x₃ = t + sqrt(3)/2. Then S = 3t + sqrt(3)/2, P = 3t² + 2t*(sqrt(3)/2) - 1/4 = 3t² + t*sqrt(3) - 1/4, Q = (t² - 1/4)(t + sqrt(3)/2). Then compute a, b, c from Vieta's formulas. Then compute 2a³ +27c -9ab. It might be tedious, but perhaps we can check.
+
+Alternatively, note that our derivation led us to an expression dependent only on k, which is s/λ, and through substitution, we found the maximum. Therefore, the maximum value is (3√3)/2. However, let me recall that the problem states x₃ > (x₁ + x₂)/2, which in our parametrization is s > 0. Therefore, as long as s is positive, which it is, we are okay. So there are no further constraints, and the maximum is indeed (3√3)/2.
+
+Wait, but just to be thorough, let's see if when k = sqrt(3)/2, the cubic polynomial indeed has three real roots. Since we are given that all roots are real, but when we parametrize, we need to ensure that the cubic with these roots is valid. However, since we are given that all roots are real and satisfy the given conditions, our parametrization already takes that into account. Therefore, as long as s > 0, the roots are real and satisfy the required conditions. Therefore, the maximum value is indeed (3√3)/2.
+
+Thus, the maximum value of (2a³ +27c -9ab)/λ³ is (3√3)/2.
+
+**Final Answer**
+\boxed{\dfrac{3\sqrt{3}}{2}}
+
+
+Given real numbers \(a, b, c\) and a positive number \(\lambda\) such that the polynomial \(f(x) = x^3 + ax^2 + bx + c\) has three real roots \(x_1, x_2, x_3\) with the conditions \(x_2 - x_1 = \lambda\) and \(x_3 > \frac{1}{2}(x_1 + x_2)\), we need to find the maximum value of \(\frac{2a^3 + 27c - 9ab}{\lambda^3}\).
+
+Using Vieta's formulas for the polynomial \(f(x)\):
+1. \(x_1 + x_2 + x_3 = -a\)
+2. \(x_1x_2 + x_1x_3 + x_2x_3 = b\)
+3. \(x_1x_2x_3 = -c\)
+
+We parametrize the roots as follows:
+- \(x_1 = t - \frac{\lambda}{2}\)
+- \(x_2 = t + \frac{\lambda}{2}\)
+- \(x_3 = t + s\) where \(s > 0\)
+
+Substituting these into Vieta's formulas, we get:
+- \(S = x_1 + x_2 + x_3 = 3t + s\)
+- \(P = x_1x_2 + x_1x_3 + x_2x_3 = 3t^2 + 2ts - \frac{\lambda^2}{4}\)
+- \(Q = x_1x_2x_3 = (t^2 - \frac{\lambda^2}{4})(t + s)\)
+
+Substituting \(a = -S\), \(b = P\), and \(c = -Q\) into the expression \(2a^3 + 27c - 9ab\), we simplify it to:
+\[
+-2s^3 + \frac{9}{2}\lambda^2 s
+\]
+
+Introducing \(k = \frac{s}{\lambda}\), the expression becomes:
+\[
+\frac{-2s^3 + \frac{9}{2}\lambda^2 s}{\lambda^3} = -2k^3 + \frac{9}{2}k
+\]
+
+To maximize \(-2k^3 + \frac{9}{2}k\), we take the derivative and set it to zero:
+\[
+g'(k) = -6k^2 + \frac{9}{2} = 0 \implies k^2 = \frac{3}{4} \implies k = \frac{\sqrt{3}}{2}
+\]
+
+Checking the second derivative confirms a maximum. Substituting \(k = \frac{\sqrt{3}}{2}\) back into the expression gives:
+\[
+-2\left(\frac{\sqrt{3}}{2}\right)^3 + \frac{9}{2}\left(\frac{\sqrt{3}}{2}\right) = \frac{3\sqrt{3}}{2}
+\]
+
+Thus, the maximum value of \(\frac{2a^3 + 27c - 9ab}{\lambda^3}\) is \(\boxed{\dfrac{3\sqrt{3}}{2}}\).
+
+The final answer is \boxed{\dfrac{3\sqrt{3}}{2}}.<|eot_id|>
+label_ids:
+[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 14023, 771, 397, 33413, 11, 1095, 596, 1456, 311, 22118, 420, 3575, 3094, 555, 3094, 13, 578, 3575, 374, 922, 9455, 279, 7340, 907, 315, 279, 7645, 320, 17, 64, 44301, 489, 220, 1544, 66, 482, 220, 24, 370, 5738, 34586, 44301, 2728, 1063, 4787, 389, 279, 20282, 315, 279, 41999, 48411, 282, 2120, 8, 284, 865, 44301, 489, 3944, 30556, 489, 45341, 489, 272, 13, 578, 20282, 527, 1972, 11, 449, 865, 32907, 482, 865, 32086, 284, 49438, 323, 865, 121412, 871, 320, 87, 32086, 489, 865, 32907, 5738, 17, 13, 4815, 5451, 11, 358, 1205, 311, 19635, 430, 369, 264, 41999, 48411, 449, 20282, 865, 32086, 11, 865, 32907, 11, 865, 121412, 11, 279, 37929, 649, 387, 13605, 1701, 11655, 1955, 596, 55179, 13, 2100, 11, 1095, 596, 3350, 1884, 1523, 13, 4815, 36644, 1955, 596, 55179, 369, 282, 2120, 8, 284, 865, 44301, 489, 3944, 30556, 489, 45341, 489, 272, 512, 16, 13, 8279, 315, 20282, 25, 865, 32086, 489, 865, 32907, 489, 865, 121412, 284, 482, 64, 198, 17, 13, 8279, 315, 3956, 25, 865, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 284, 293, 198, 18, 13, 5761, 315, 20282, 25, 865, 32086, 87, 32907, 87, 121412, 284, 482, 66, 271, 8140, 2218, 7645, 374, 320, 17, 64, 44301, 489, 220, 1544, 66, 482, 220, 24, 370, 5738, 34586, 44301, 13, 6914, 596, 1518, 422, 584, 649, 3237, 420, 304, 3878, 315, 279, 20282, 1701, 11655, 1955, 596, 55179, 13, 4815, 5451, 11, 1095, 596, 28779, 264, 11, 293, 11, 272, 304, 3878, 315, 279, 20282, 13, 4815, 3915, 11655, 1955, 512, 64, 284, 29506, 87, 32086, 489, 865, 32907, 489, 865, 121412, 340, 65, 284, 865, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 198, 66, 284, 482, 87, 32086, 87, 32907, 87, 121412, 271, 2169, 36368, 1521, 1139, 279, 7645, 1473, 17, 64, 44301, 489, 220, 1544, 66, 482, 220, 24, 370, 720, 28, 220, 17, 66767, 87, 32086, 489, 865, 32907, 489, 865, 121412, 595, 44301, 489, 220, 1544, 4172, 87, 32086, 87, 32907, 87, 121412, 8, 482, 220, 24, 66767, 87, 32086, 489, 865, 32907, 489, 865, 121412, 29254, 87, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 696, 10267, 757, 12849, 1855, 4751, 3094, 555, 3094, 382, 5451, 4751, 25, 220, 17, 66767, 87, 32086, 489, 865, 32907, 489, 865, 121412, 595, 44301, 284, 482, 17, 2120, 32086, 489, 865, 32907, 489, 865, 121412, 8, 44301, 271, 16041, 4751, 25, 220, 1544, 4172, 87, 32086, 87, 32907, 87, 121412, 8, 284, 482, 1544, 87, 32086, 87, 32907, 87, 121412, 271, 38075, 4751, 25, 482, 24, 66767, 87, 32086, 489, 865, 32907, 489, 865, 121412, 29254, 87, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 8, 284, 220, 24, 2120, 32086, 489, 865, 32907, 489, 865, 121412, 2432, 87, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 696, 4516, 35271, 682, 2380, 1473, 12, 17, 2120, 32086, 489, 865, 32907, 489, 865, 121412, 8, 44301, 482, 1544, 87, 32086, 87, 32907, 87, 121412, 489, 220, 24, 2120, 32086, 489, 865, 32907, 489, 865, 121412, 2432, 87, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 696, 81122, 11, 420, 5084, 6485, 13, 10926, 1070, 374, 264, 3967, 9764, 430, 36716, 1521, 3878, 30, 6914, 757, 1781, 13, 4815, 3905, 543, 430, 369, 264, 41999, 48411, 11, 279, 50419, 519, 423, 374, 2728, 555, 423, 284, 220, 972, 69744, 482, 220, 19, 65, 44301, 67, 489, 293, 30556, 66, 30556, 482, 220, 19, 582, 44301, 482, 220, 1544, 64, 30556, 67, 30556, 369, 3944, 44301, 489, 45341, 30556, 489, 21375, 489, 294, 13, 2030, 304, 1057, 1162, 11, 279, 48411, 374, 1647, 292, 320, 21307, 36706, 220, 16, 705, 779, 264, 28, 16, 11, 719, 1618, 279, 37929, 527, 264, 11, 293, 11, 272, 439, 2728, 13, 14144, 11, 3604, 11, 304, 279, 5410, 1376, 11, 279, 50419, 519, 369, 865, 44301, 489, 264, 865, 30556, 489, 293, 865, 489, 272, 1053, 387, 423, 284, 220, 972, 13997, 482, 220, 19, 64, 44301, 66, 489, 264, 30556, 65, 30556, 482, 220, 19, 65, 44301, 482, 220, 1544, 66, 30556, 13, 2030, 358, 2643, 387, 27890, 709, 279, 15150, 13, 6914, 757, 10356, 1473, 14524, 11, 279, 50419, 519, 315, 264, 41999, 865, 44301, 489, 17585, 30556, 489, 97130, 489, 436, 374, 2728, 555, 1473, 101561, 284, 220, 972, 79, 23866, 482, 220, 19, 79, 44301, 81, 489, 281, 30556, 80, 30556, 482, 220, 19, 80, 44301, 482, 220, 1544, 81, 30556, 271, 9642, 11, 779, 304, 1057, 1162, 11, 449, 281, 284, 264, 11, 2874, 284, 293, 11, 436, 284, 272, 11, 779, 82263, 284, 220, 972, 13997, 482, 220, 19, 64, 44301, 66, 489, 264, 30556, 65, 30556, 482, 220, 19, 65, 44301, 482, 220, 1544, 66, 30556, 271, 4071, 358, 2846, 539, 2771, 422, 279, 50419, 519, 374, 6089, 5552, 1618, 11, 719, 7344, 13, 8876, 279, 48411, 706, 2380, 1972, 20282, 11, 279, 50419, 519, 2011, 387, 2536, 62035, 13, 4452, 11, 279, 3575, 5415, 430, 682, 20282, 527, 1972, 11, 779, 82263, 63247, 220, 15, 13, 2030, 7344, 279, 7645, 584, 2351, 14892, 449, 374, 5552, 311, 279, 50419, 519, 1980, 14524, 11, 1095, 596, 1817, 279, 7645, 2728, 25, 220, 17, 64, 44301, 489, 220, 1544, 66, 482, 220, 24, 370, 13, 1442, 584, 9616, 449, 279, 50419, 519, 15150, 1473, 101561, 284, 220, 972, 13997, 482, 220, 19, 64, 44301, 66, 489, 264, 30556, 65, 30556, 482, 220, 19, 65, 44301, 482, 220, 1544, 66, 30556, 271, 2688, 6089, 8196, 13, 10926, 539, 13, 6914, 596, 1456, 2500, 5603, 382, 93114, 11, 8530, 279, 7645, 320, 17, 64, 44301, 489, 220, 1544, 66, 482, 220, 24, 370, 8, 649, 387, 59624, 304, 3878, 315, 279, 20282, 13, 6914, 596, 1456, 32434, 10831, 279, 11655, 1955, 24282, 1139, 433, 382, 4516, 1095, 596, 28779, 264, 11, 293, 11, 272, 1473, 17, 66767, 87, 32086, 489, 865, 32907, 489, 865, 121412, 595, 44301, 489, 220, 1544, 4172, 87, 32086, 87, 32907, 87, 121412, 8, 482, 220, 24, 66767, 87, 32086, 489, 865, 32907, 489, 865, 121412, 29254, 87, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 696, 47354, 1855, 4751, 1473, 5451, 4751, 25, 220, 17, 35399, 16, 30876, 18, 6737, 87, 32086, 489, 865, 32907, 489, 865, 121412, 30876, 18, 284, 482, 17, 2120, 32086, 489, 865, 32907, 489, 865, 121412, 30876, 18, 271, 16041, 4751, 25, 220, 1544, 35399, 87, 32086, 87, 32907, 87, 121412, 8, 284, 482, 1544, 87, 32086, 87, 32907, 87, 121412, 271, 38075, 4751, 25, 482, 24, 35399, 16, 18201, 87, 32086, 489, 865, 32907, 489, 865, 121412, 2432, 87, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 8, 284, 220, 24, 2120, 32086, 489, 865, 32907, 489, 865, 121412, 2432, 87, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 696, 4516, 10917, 433, 682, 3871, 1473, 12, 17, 2120, 32086, 489, 865, 32907, 489, 865, 121412, 30876, 18, 482, 220, 1544, 87, 32086, 87, 32907, 87, 121412, 489, 220, 24, 2120, 32086, 489, 865, 32907, 489, 865, 121412, 2432, 87, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 696, 81122, 13, 6914, 596, 1518, 422, 584, 649, 8331, 420, 477, 40821, 433, 13, 6914, 596, 79164, 328, 284, 865, 32086, 489, 865, 32907, 489, 865, 121412, 11, 393, 284, 865, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 11, 1229, 284, 865, 32086, 87, 32907, 87, 121412, 13, 5112, 1057, 7645, 9221, 1473, 12, 17, 50, 44301, 482, 1544, 48, 489, 220, 24, 50, 393, 271, 4071, 369, 264, 41999, 48411, 11, 279, 5133, 1990, 328, 11, 393, 11, 1229, 374, 2728, 555, 11655, 1955, 596, 55179, 13, 2030, 8530, 584, 649, 29243, 420, 7645, 311, 2555, 775, 382, 93114, 11, 7344, 1701, 55443, 37498, 13, 6914, 596, 12849, 420, 7645, 369, 3230, 20282, 13, 6914, 596, 23289, 430, 865, 32086, 11, 865, 32907, 11, 865, 121412, 527, 7482, 449, 865, 32907, 482, 865, 32086, 284, 49438, 323, 865, 121412, 871, 320, 87, 32086, 489, 865, 32907, 5738, 17, 13, 10926, 584, 649, 1719, 58053, 279, 20282, 304, 3878, 315, 7482, 430, 12602, 279, 2728, 4787, 382, 22818, 430, 865, 32907, 482, 865, 32086, 284, 49438, 11, 1095, 596, 743, 865, 32086, 284, 259, 482, 49438, 14, 17, 323, 865, 32907, 284, 259, 489, 49438, 14, 17, 369, 1063, 259, 13, 5112, 279, 83063, 315, 865, 32086, 323, 865, 32907, 374, 259, 11, 323, 279, 3044, 865, 121412, 871, 320, 87, 32086, 489, 865, 32907, 5738, 17, 9221, 865, 121412, 871, 259, 13, 4815, 55915, 11, 1095, 757, 743, 1473, 87, 32086, 284, 259, 482, 49438, 14, 17, 271, 87, 32907, 284, 259, 489, 49438, 14, 17, 271, 87, 121412, 284, 259, 489, 274, 11, 1405, 274, 871, 220, 15, 320, 11536, 865, 121412, 871, 259, 696, 4516, 1457, 11, 1057, 20282, 527, 13605, 304, 3878, 315, 259, 11, 49438, 11, 323, 274, 871, 220, 15, 382, 7184, 11, 1095, 596, 12849, 328, 11, 393, 11, 1229, 304, 3878, 315, 259, 11, 49438, 11, 274, 382, 5451, 11, 328, 284, 865, 32086, 489, 865, 32907, 489, 865, 121412, 284, 320, 83, 482, 49438, 14, 17, 8, 489, 320, 83, 489, 49438, 14, 17, 8, 489, 320, 83, 489, 274, 8, 284, 220, 18, 83, 489, 274, 271, 16041, 11, 393, 284, 865, 32086, 87, 32907, 489, 865, 32086, 87, 121412, 489, 865, 32907, 87, 121412, 271, 47354, 1855, 4751, 1473, 87, 32086, 87, 32907, 284, 320, 83, 482, 49438, 14, 17, 2432, 83, 489, 49438, 14, 17, 8, 284, 259, 30556, 482, 320, 34586, 14, 17, 8, 30556, 284, 259, 30556, 482, 49438, 30556, 14, 19, 271, 87, 32086, 87, 121412, 284, 320, 83, 482, 49438, 14, 17, 2432, 83, 489, 274, 8, 284, 259, 1175, 489, 274, 8, 482, 320, 34586, 14, 17, 2432, 83, 489, 274, 8, 284, 259, 30556, 489, 10814, 482, 320, 34586, 259, 5738, 17, 482, 320, 34586, 274, 5738, 17, 271, 87, 32907, 87, 121412, 284, 320, 83, 489, 49438, 14, 17, 2432, 83, 489, 274, 8, 284, 259, 1175, 489, 274, 8, 489, 320, 34586, 14, 17, 2432, 83, 489, 274, 8, 284, 259, 30556, 489, 10814, 489, 320, 34586, 259, 5738, 17, 489, 320, 34586, 274, 5738, 17, 271, 33408, 1521, 3871, 1473, 47, 284, 510, 83, 30556, 482, 49438, 30556, 14, 19, 60, 489, 510, 83, 30556, 489, 10814, 482, 320, 34586, 259, 5738, 17, 482, 320, 34586, 274, 5738, 17, 60, 489, 510, 83, 30556, 489, 10814, 489, 320, 34586, 259, 5738, 17, 489, 320, 34586, 274, 5738, 17, 2595, 10267, 596, 16343, 3878, 1473, 5451, 4751, 25, 259, 30556, 482, 49438, 30556, 14, 19, 271, 16041, 4751, 25, 259, 30556, 489, 10814, 482, 320, 34586, 259, 5738, 17, 482, 320, 34586, 274, 5738, 17, 271, 38075, 4751, 25, 259, 30556, 489, 10814, 489, 320, 34586, 259, 5738, 17, 489, 320, 34586, 274, 5738, 17, 271, 33408, 1124, 1473, 83, 30556, 482, 49438, 30556, 14, 19, 489, 259, 30556, 489, 10814, 482, 320, 34586, 259, 5738, 17, 482, 320, 34586, 274, 5738, 17, 489, 259, 30556, 489, 10814, 489, 320, 34586, 259, 5738, 17, 489, 320, 34586, 274, 5738, 17, 271, 82214, 1093, 3878, 1473, 83, 30556, 489, 259, 30556, 489, 259, 30556, 284, 220, 18, 83, 30556, 271, 2641, 489, 10814, 284, 220, 17, 2641, 271, 12, 34586, 30556, 14, 19, 271, 2520, 279, 3878, 449, 49438, 259, 14, 17, 25, 482, 320, 34586, 259, 5738, 17, 489, 320, 34586, 259, 5738, 17, 284, 220, 15, 271, 68791, 11, 369, 49438, 274, 14, 17, 25, 482, 320, 34586, 274, 5738, 17, 489, 320, 34586, 274, 5738, 17, 284, 220, 15, 271, 4516, 393, 284, 220, 18, 83, 30556, 489, 220, 17, 2641, 482, 49438, 30556, 14, 19, 271, 7184, 1229, 284, 865, 32086, 87, 32907, 87, 121412, 284, 320, 83, 482, 49438, 14, 17, 2432, 83, 489, 49438, 14, 17, 2432, 83, 489, 274, 8, 284, 510, 83, 30556, 482, 320, 34586, 14, 17, 30876, 17, 9725, 83, 489, 274, 8, 284, 320, 83, 30556, 482, 49438, 30556, 14, 19, 2432, 83, 489, 274, 696, 96255, 420, 704, 1473, 28, 259, 44301, 489, 259, 30556, 274, 482, 320, 34586, 30556, 14, 19, 8, 259, 482, 320, 34586, 30556, 14, 19, 8, 274, 271, 7184, 11, 1095, 596, 20206, 328, 11, 393, 11, 1229, 1139, 279, 7645, 1473, 12, 17, 50, 44301, 482, 1544, 48, 489, 220, 24, 50, 393, 271, 5451, 11, 12849, 328, 44301, 1473, 50, 284, 220, 18, 83, 489, 274, 271, 50, 44301, 284, 320, 18, 83, 489, 274, 30876, 18, 284, 220, 1544, 83, 44301, 489, 220, 1544, 83, 30556, 274, 489, 220, 24, 83, 274, 30556, 489, 274, 44301, 271, 96255, 555, 482, 17, 25, 482, 17, 9, 1544, 83, 44301, 482, 17, 9, 1544, 83, 30556, 274, 482, 17, 9, 24, 83, 274, 30556, 482, 17, 34554, 44301, 284, 482, 4370, 83, 44301, 482, 4370, 83, 30556, 274, 482, 972, 83, 274, 30556, 482, 17, 82, 44301, 271, 5971, 11, 12849, 482, 1544, 48, 1473, 48, 284, 259, 44301, 489, 259, 30556, 274, 482, 320, 34586, 30556, 14, 19, 8, 259, 482, 320, 34586, 30556, 14, 19, 8, 274, 271, 96255, 555, 482, 1544, 25, 482, 1544, 83, 44301, 482, 1544, 83, 30556, 274, 489, 320, 1544, 34586, 30556, 14, 19, 8, 259, 489, 320, 1544, 34586, 30556, 14, 19, 16871, 271, 38075, 4751, 25, 220, 24, 50, 393, 271, 50, 284, 220, 18, 83, 489, 274, 271, 47, 284, 220, 18, 83, 30556, 489, 220, 17, 2641, 482, 49438, 30556, 14, 19, 271, 4516, 220, 24, 50, 393, 284, 220, 24, 6737, 18, 83, 489, 274, 18201, 18, 83, 30556, 489, 220, 17, 2641, 482, 49438, 30556, 14, 19, 696, 10267, 596, 9407, 420, 2027, 3094, 555, 3094, 382, 5451, 11, 31370, 320, 18, 83, 489, 274, 8, 449, 320, 18, 83, 30556, 489, 220, 17, 2641, 482, 49438, 30556, 14, 19, 7887, 28, 220, 18, 83, 6737, 18, 83, 30556, 8, 489, 220, 18, 83, 6737, 17, 2641, 8, 489, 220, 18, 83, 35399, 34586, 30556, 14, 19, 8, 489, 274, 6737, 18, 83, 30556, 8, 489, 274, 6737, 17, 2641, 8, 489, 274, 35399, 34586, 30556, 14, 19, 696, 28, 220, 24, 83, 44301, 489, 220, 21, 83, 30556, 274, 482, 320, 18, 83, 49438, 30556, 5738, 19, 489, 220, 18, 82, 259, 30556, 489, 220, 17, 83, 274, 30556, 482, 320, 82, 49438, 30556, 5738, 19, 271, 82214, 1093, 3878, 1473, 24, 83, 44301, 489, 320, 21, 83, 30556, 274, 489, 220, 18, 83, 30556, 274, 8, 489, 320, 17, 83, 274, 30556, 8, 489, 10505, 18, 83, 49438, 30556, 14, 19, 482, 274, 49438, 30556, 14, 19, 696, 28, 220, 24, 83, 44301, 489, 220, 24, 83, 30556, 274, 489, 220, 17, 83, 274, 30556, 482, 320, 34586, 30556, 14, 19, 2432, 18, 83, 489, 274, 696, 96255, 420, 555, 220, 24, 1473, 24, 6737, 24, 83, 44301, 489, 220, 24, 83, 30556, 274, 489, 220, 17, 83, 274, 30556, 482, 320, 34586, 30556, 14, 19, 2432, 18, 83, 489, 274, 595, 284, 220, 5932, 83, 44301, 489, 220, 5932, 83, 30556, 274, 489, 220, 972, 83, 274, 30556, 482, 320, 24, 34586, 30556, 14, 19, 2432, 18, 83, 489, 274, 696, 7184, 11, 35271, 682, 2380, 5596, 1473, 5451, 961, 25, 482, 4370, 83, 44301, 482, 4370, 83, 30556, 274, 482, 972, 83, 274, 30556, 482, 17, 82, 44301, 271, 16041, 961, 25, 482, 1544, 83, 44301, 482, 1544, 83, 30556, 274, 489, 320, 1544, 34586, 30556, 14, 19, 8, 259, 489, 320, 1544, 34586, 30556, 14, 19, 16871, 271, 38075, 961, 25, 220, 5932, 83, 44301, 489, 220, 5932, 83, 30556, 274, 489, 220, 972, 83, 274, 30556, 482, 320, 24, 34586, 30556, 14, 19, 2432, 18, 83, 489, 274, 696, 10267, 596, 923, 1124, 4751, 555, 4751, 382, 2520, 259, 44301, 3878, 1473, 12, 4370, 83, 44301, 482, 1544, 83, 44301, 489, 5932, 83, 44301, 284, 220, 15, 271, 2520, 259, 30556, 274, 3878, 1473, 12, 4370, 83, 30556, 274, 482, 1544, 83, 30556, 274, 489, 5932, 83, 30556, 274, 284, 220, 15, 271, 2520, 259, 274, 30556, 3878, 1473, 12, 972, 83, 274, 30556, 489, 972, 83, 274, 30556, 284, 220, 15, 271, 2520, 274, 44301, 3878, 1473, 12, 17, 82, 44301, 320, 1527, 1176, 961, 8, 489, 2564, 320, 5686, 961, 706, 912, 274, 44301, 4751, 11, 4948, 961, 1101, 7000, 8, 779, 2860, 482, 17, 82, 44301, 271, 7184, 279, 3878, 16239, 49438, 30556, 1473, 3915, 2132, 961, 25, 320, 1544, 34586, 30556, 14, 19, 8, 259, 489, 320, 1544, 34586, 30556, 14, 19, 16871, 271, 3915, 4948, 961, 25, 482, 320, 24, 34586, 30556, 14, 19, 2432, 18, 83, 489, 274, 8, 284, 482, 1544, 34586, 30556, 14, 19, 259, 482, 24, 34586, 30556, 14, 19, 274, 271, 33408, 1521, 3871, 1473, 7, 1544, 34586, 30556, 14, 19, 259, 482, 1544, 34586, 30556, 14, 19, 259, 8, 489, 320, 1544, 34586, 30556, 14, 19, 274, 482, 24, 34586, 30556, 14, 19, 274, 8, 284, 220, 15, 489, 320, 972, 34586, 30556, 14, 19, 16871, 284, 320, 24, 34586, 30556, 14, 17, 16871, 271, 4516, 8244, 11, 35271, 682, 3878, 1473, 12, 17, 82, 44301, 489, 320, 24, 34586, 30556, 14, 17, 16871, 271, 4516, 279, 4553, 7645, 15858, 9803, 311, 1473, 12, 17, 82, 44301, 489, 320, 24, 34586, 30556, 14, 17, 16871, 271, 55915, 11, 279, 4113, 7645, 320, 17, 64, 44301, 489, 220, 1544, 66, 482, 24, 370, 8, 17239, 482, 17, 82, 44301, 489, 320, 24, 34586, 30556, 14, 17, 16871, 271, 55915, 11, 279, 7645, 584, 1205, 311, 35608, 374, 1473, 7, 17, 64, 44301, 489, 220, 1544, 66, 482, 24, 370, 5738, 34586, 44301, 284, 10505, 17, 82, 44301, 489, 320, 24, 34586, 30556, 14, 17, 16871, 5738, 34586, 44301, 284, 10505, 17, 82, 44301, 5738, 34586, 44301, 489, 320, 24, 34586, 30556, 14, 17, 274, 5738, 34586, 44301, 284, 482, 17, 1161, 14, 34586, 30876, 18, 489, 320, 24, 14, 17, 2432, 82, 14, 34586, 696, 10267, 757, 743, 597, 284, 274, 14, 34586, 13, 8876, 274, 871, 220, 15, 323, 49438, 871, 220, 15, 11, 597, 871, 220, 15, 13, 5112, 1057, 7645, 9221, 1473, 12, 17, 74, 44301, 489, 320, 24, 14, 17, 8, 74, 271, 4516, 1457, 11, 279, 3575, 26338, 311, 9455, 279, 7340, 907, 315, 279, 734, 342, 6097, 8, 284, 482, 17, 74, 44301, 489, 320, 24, 14, 17, 8, 74, 369, 597, 871, 220, 15, 382, 2028, 374, 264, 83768, 3575, 13, 2057, 1505, 279, 7340, 11, 1935, 279, 32905, 315, 342, 6097, 8, 449, 5201, 311, 597, 11, 743, 433, 311, 7315, 11, 323, 11886, 369, 597, 382, 47354, 342, 59436, 74, 7887, 70, 59436, 74, 8, 284, 482, 21, 74, 30556, 489, 220, 24, 14, 17, 271, 1681, 342, 59436, 74, 8, 284, 220, 15, 1473, 12, 21, 74, 30556, 489, 220, 24, 14, 17, 284, 220, 15, 271, 2228, 220, 21, 74, 30556, 284, 220, 24, 14, 17, 271, 2228, 597, 30556, 284, 320, 24, 14, 17, 5738, 21, 284, 220, 24, 14, 717, 284, 220, 18, 14, 19, 271, 2228, 597, 284, 18430, 7, 18, 14, 19, 8, 284, 18430, 7, 18, 5738, 17, 118792, 220, 15, 13, 22455, 271, 12834, 597, 871, 220, 15, 11, 584, 1935, 279, 6928, 3789, 13, 4800, 11, 1817, 422, 420, 374, 264, 7340, 555, 2132, 32905, 1296, 382, 16041, 32905, 1473, 70, 4708, 7, 74, 8, 284, 482, 717, 74, 271, 1688, 597, 284, 18430, 7, 18, 5738, 17, 11, 342, 4708, 7, 74, 8, 284, 482, 717, 6737, 27986, 7, 18, 5738, 17, 8, 284, 482, 21, 27986, 7, 18, 8, 366, 220, 15, 11, 902, 3445, 433, 596, 264, 2254, 7340, 382, 55915, 11, 279, 7340, 907, 315, 342, 6097, 8, 374, 17427, 520, 597, 284, 18430, 7, 18, 5738, 17, 382, 47354, 342, 84173, 7, 18, 5738, 17, 7887, 70, 84173, 7, 18, 5738, 17, 8, 284, 482, 17, 6737, 27986, 7, 18, 5738, 17, 30876, 18, 489, 320, 24, 14, 17, 18201, 27986, 7, 18, 5738, 17, 696, 47354, 1855, 4751, 1473, 5451, 4751, 25, 482, 17, 6737, 320, 18, 13571, 16, 14, 17, 31185, 17, 883, 61, 18, 284, 482, 17, 6737, 320, 18, 13571, 18, 14, 17, 31185, 23, 883, 284, 482, 17, 6737, 18, 110682, 18, 5738, 23, 284, 482, 320, 18, 110682, 18, 5738, 19, 271, 16041, 4751, 25, 320, 24, 14, 17, 18201, 27986, 7, 18, 5738, 17, 8, 284, 320, 24, 110682, 18, 5738, 19, 271, 33408, 2225, 3878, 1473, 12, 320, 18, 110682, 18, 5738, 19, 489, 320, 24, 110682, 18, 5738, 19, 284, 320, 21, 110682, 18, 5738, 19, 284, 320, 18, 110682, 18, 5738, 17, 271, 55915, 11, 279, 7340, 907, 315, 342, 6097, 8, 374, 320, 18, 110682, 18, 5738, 17, 382, 39, 768, 11, 279, 7340, 907, 315, 320, 17, 64, 44301, 489, 220, 1544, 66, 482, 24, 370, 5738, 34586, 44301, 374, 320, 18, 110682, 18, 5738, 17, 382, 4071, 3868, 11, 1095, 757, 1817, 422, 1070, 527, 904, 17413, 389, 597, 13, 20474, 430, 584, 1047, 274, 284, 597, 34586, 13, 2030, 584, 617, 311, 6106, 430, 279, 20282, 527, 1972, 323, 27651, 865, 121412, 871, 320, 87, 32086, 489, 865, 32907, 5738, 17, 13, 4452, 11, 304, 1057, 1719, 17820, 2065, 11, 584, 743, 865, 121412, 284, 259, 489, 274, 11, 323, 2533, 274, 871, 220, 15, 11, 865, 121412, 871, 259, 284, 320, 87, 32086, 489, 865, 32907, 5738, 17, 11, 902, 374, 2736, 20097, 13, 2100, 439, 1317, 439, 274, 871, 220, 15, 11, 279, 3044, 10187, 13, 2030, 2533, 597, 284, 274, 14, 34586, 871, 220, 15, 11, 323, 49438, 871, 220, 15, 11, 274, 871, 220, 15, 374, 13890, 311, 597, 871, 220, 15, 11, 902, 584, 2736, 6646, 13, 15636, 11, 279, 7340, 374, 13118, 320, 18, 110682, 18, 5738, 17, 382, 4071, 1095, 757, 10356, 420, 1121, 555, 13126, 3230, 5157, 13, 1789, 3187, 11, 1935, 49438, 284, 220, 16, 13, 5112, 279, 7645, 320, 17, 64, 44301, 489, 1544, 66, 482, 24, 370, 5738, 16, 44301, 1288, 387, 31127, 1534, 520, 220, 18, 110682, 18, 14, 17, 13, 6914, 596, 1817, 449, 597, 284, 18430, 7, 18, 5738, 17, 11, 779, 274, 284, 18430, 7, 18, 5738, 17, 382, 12487, 279, 20282, 527, 865, 32086, 284, 259, 482, 220, 16, 14, 17, 11, 865, 32907, 284, 259, 489, 220, 16, 14, 17, 11, 865, 121412, 284, 259, 489, 18430, 7, 18, 5738, 17, 13, 5112, 328, 284, 220, 18, 83, 489, 18430, 7, 18, 5738, 17, 11, 393, 284, 220, 18, 83, 30556, 489, 220, 17, 83, 6737, 27986, 7, 18, 5738, 17, 8, 482, 220, 16, 14, 19, 284, 220, 18, 83, 30556, 489, 259, 9, 27986, 7, 18, 8, 482, 220, 16, 14, 19, 11, 1229, 284, 320, 83, 30556, 482, 220, 16, 14, 19, 2432, 83, 489, 18430, 7, 18, 5738, 17, 570, 5112, 12849, 264, 11, 293, 11, 272, 505, 11655, 1955, 596, 55179, 13, 5112, 12849, 220, 17, 64, 44301, 489, 1544, 66, 482, 24, 370, 13, 1102, 2643, 387, 66838, 11, 719, 8530, 584, 649, 1817, 382, 93114, 11, 5296, 430, 1057, 96354, 6197, 603, 311, 459, 7645, 18222, 1193, 389, 597, 11, 902, 374, 274, 14, 34586, 11, 323, 1555, 50068, 11, 584, 1766, 279, 7340, 13, 15636, 11, 279, 7340, 907, 374, 320, 18, 110682, 18, 5738, 17, 13, 4452, 11, 1095, 757, 19635, 430, 279, 3575, 5415, 865, 121412, 871, 320, 87, 32086, 489, 865, 32907, 5738, 17, 11, 902, 304, 1057, 1719, 17820, 2065, 374, 274, 871, 220, 15, 13, 15636, 11, 439, 1317, 439, 274, 374, 6928, 11, 902, 433, 374, 11, 584, 527, 17339, 13, 2100, 1070, 527, 912, 4726, 17413, 11, 323, 279, 7340, 374, 13118, 320, 18, 110682, 18, 5738, 17, 382, 14524, 11, 719, 1120, 311, 387, 17879, 11, 1095, 596, 1518, 422, 994, 597, 284, 18430, 7, 18, 5738, 17, 11, 279, 41999, 48411, 13118, 706, 2380, 1972, 20282, 13, 8876, 584, 527, 2728, 430, 682, 20282, 527, 1972, 11, 719, 994, 584, 1719, 58053, 11, 584, 1205, 311, 6106, 430, 279, 41999, 449, 1521, 20282, 374, 2764, 13, 4452, 11, 2533, 584, 527, 2728, 430, 682, 20282, 527, 1972, 323, 27651, 279, 2728, 4787, 11, 1057, 1719, 17820, 2065, 2736, 5097, 430, 1139, 2759, 13, 15636, 11, 439, 1317, 439, 274, 871, 220, 15, 11, 279, 20282, 527, 1972, 323, 27651, 279, 2631, 4787, 13, 15636, 11, 279, 7340, 907, 374, 13118, 320, 18, 110682, 18, 5738, 17, 382, 45600, 11, 279, 7340, 907, 315, 320, 17, 64, 44301, 489, 1544, 66, 482, 24, 370, 5738, 34586, 44301, 374, 320, 18, 110682, 18, 5738, 17, 382, 334, 19918, 22559, 1035, 59, 80175, 36802, 67, 38118, 90, 18, 59, 27986, 90, 18, 3500, 90, 17, 11498, 524, 27963, 1363, 22818, 1972, 5219, 18240, 64, 11, 293, 11, 272, 58858, 323, 264, 6928, 1396, 1144, 11781, 13231, 58858, 1778, 430, 279, 48411, 18240, 69, 2120, 8, 284, 865, 61, 18, 489, 3944, 61, 17, 489, 45341, 489, 272, 58858, 706, 2380, 1972, 20282, 18240, 87, 62, 16, 11, 865, 62, 17, 11, 865, 62, 18, 58858, 449, 279, 4787, 18240, 87, 62, 17, 482, 865, 62, 16, 284, 1144, 13231, 58858, 323, 18240, 87, 62, 18, 871, 1144, 38118, 90, 16, 15523, 17, 26628, 87, 62, 16, 489, 865, 62, 17, 10929, 705, 584, 1205, 311, 1505, 279, 7340, 907, 315, 1144, 11781, 38118, 90, 17, 64, 61, 18, 489, 220, 1544, 66, 482, 220, 24, 370, 15523, 59, 13231, 61, 18, 11281, 3677, 16834, 11655, 1955, 596, 55179, 369, 279, 48411, 18240, 69, 2120, 10929, 997, 16, 13, 18240, 87, 62, 16, 489, 865, 62, 17, 489, 865, 62, 18, 284, 482, 64, 59, 340, 17, 13, 18240, 87, 62, 16, 87, 62, 17, 489, 865, 62, 16, 87, 62, 18, 489, 865, 62, 17, 87, 62, 18, 284, 293, 59, 340, 18, 13, 18240, 87, 62, 16, 87, 62, 17, 87, 62, 18, 284, 482, 66, 59, 696, 1687, 1719, 58053, 279, 20282, 439, 11263, 512, 12, 18240, 87, 62, 16, 284, 259, 482, 1144, 38118, 36802, 13231, 15523, 17, 11281, 340, 12, 18240, 87, 62, 17, 284, 259, 489, 1144, 38118, 36802, 13231, 15523, 17, 11281, 340, 12, 18240, 87, 62, 18, 284, 259, 489, 274, 58858, 1405, 18240, 82, 871, 220, 15, 59, 696, 3214, 3781, 10831, 1521, 1139, 11655, 1955, 596, 55179, 11, 584, 636, 512, 12, 18240, 50, 284, 865, 62, 16, 489, 865, 62, 17, 489, 865, 62, 18, 284, 220, 18, 83, 489, 274, 59, 340, 12, 18240, 47, 284, 865, 62, 16, 87, 62, 17, 489, 865, 62, 16, 87, 62, 18, 489, 865, 62, 17, 87, 62, 18, 284, 220, 18, 83, 61, 17, 489, 220, 17, 2641, 482, 1144, 38118, 36802, 13231, 61, 17, 15523, 19, 11281, 340, 12, 18240, 48, 284, 865, 62, 16, 87, 62, 17, 87, 62, 18, 284, 320, 83, 61, 17, 482, 1144, 38118, 36802, 13231, 61, 17, 15523, 19, 32988, 83, 489, 274, 10929, 696, 3214, 3781, 10831, 18240, 64, 284, 482, 50, 59, 705, 18240, 65, 284, 393, 59, 705, 323, 18240, 66, 284, 482, 48, 58858, 1139, 279, 7645, 18240, 17, 64, 61, 18, 489, 220, 1544, 66, 482, 220, 24, 370, 59, 705, 584, 40821, 433, 311, 512, 59, 9837, 12, 17, 82, 61, 18, 489, 1144, 38118, 90, 24, 15523, 17, 11281, 13231, 61, 17, 274, 198, 59, 2595, 1090, 60637, 18240, 74, 284, 1144, 38118, 85486, 15523, 59, 13231, 11281, 705, 279, 7645, 9221, 512, 59, 9837, 59, 38118, 20597, 17, 82, 61, 18, 489, 1144, 38118, 90, 24, 15523, 17, 11281, 13231, 61, 17, 274, 15523, 59, 13231, 61, 18, 92, 284, 482, 17, 74, 61, 18, 489, 1144, 38118, 90, 24, 15523, 17, 92, 74, 198, 59, 2595, 1271, 35608, 1144, 4172, 17, 74, 61, 18, 489, 1144, 38118, 90, 24, 15523, 17, 92, 74, 59, 705, 584, 1935, 279, 32905, 323, 743, 433, 311, 7315, 512, 59, 9837, 70, 59436, 74, 8, 284, 482, 21, 74, 61, 17, 489, 1144, 38118, 90, 24, 15523, 17, 92, 284, 220, 15, 1144, 6517, 552, 597, 61, 17, 284, 1144, 38118, 90, 18, 15523, 19, 92, 1144, 6517, 552, 597, 284, 1144, 38118, 36802, 27986, 90, 18, 3500, 90, 17, 534, 59, 2595, 41229, 279, 2132, 32905, 43496, 264, 7340, 13, 3804, 3781, 10831, 18240, 74, 284, 1144, 38118, 36802, 27986, 90, 18, 3500, 90, 17, 11281, 8, 1203, 1139, 279, 7645, 6835, 512, 59, 9837, 12, 17, 59, 2414, 11781, 38118, 36802, 27986, 90, 18, 3500, 90, 17, 11281, 1315, 30876, 18, 489, 1144, 38118, 90, 24, 15523, 17, 11281, 2414, 11781, 38118, 36802, 27986, 90, 18, 3500, 90, 17, 11281, 1315, 8, 284, 1144, 38118, 90, 18, 59, 27986, 90, 18, 3500, 90, 17, 534, 59, 2595, 45600, 11, 279, 7340, 907, 315, 1144, 11781, 38118, 90, 17, 64, 61, 18, 489, 220, 1544, 66, 482, 220, 24, 370, 15523, 59, 13231, 61, 18, 11281, 8, 374, 1144, 11781, 80175, 36802, 67, 38118, 90, 18, 59, 27986, 90, 18, 3500, 90, 17, 3500, 59, 3677, 791, 1620, 4320, 374, 1144, 80175, 36802, 67, 38118, 90, 18, 59, 27986, 90, 18, 3500, 90, 17, 3500, 13, 128009]
+labels:
+
+Okay, let's try to tackle this problem step by step. The problem is about finding the maximum value of the expression (2a³ + 27c - 9ab)/λ³ given some conditions on the roots of the cubic polynomial f(x) = x³ + ax² + bx + c. The roots are real, with x₂ - x₁ = λ and x₃ > (x₁ + x₂)/2.
+
+First, I need to recall that for a cubic polynomial with roots x₁, x₂, x₃, the coefficients can be expressed using Vieta's formulas. So, let's write those down.
+
+Vieta's formulas for f(x) = x³ + ax² + bx + c:
+1. Sum of roots: x₁ + x₂ + x₃ = -a
+2. Sum of products: x₁x₂ + x₁x₃ + x₂x₃ = b
+3. Product of roots: x₁x₂x₃ = -c
+
+Our target expression is (2a³ + 27c - 9ab)/λ³. Let's see if we can express this in terms of the roots using Vieta's formulas.
+
+First, let's substitute a, b, c in terms of the roots.
+
+From Vieta:
+a = -(x₁ + x₂ + x₃)
+b = x₁x₂ + x₁x₃ + x₂x₃
+c = -x₁x₂x₃
+
+Plugging these into the expression:
+
+2a³ + 27c - 9ab
+= 2(-(x₁ + x₂ + x₃))³ + 27(-x₁x₂x₃) - 9(-(x₁ + x₂ + x₃))(x₁x₂ + x₁x₃ + x₂x₃)
+
+Let me compute each term step by step.
+
+First term: 2(-(x₁ + x₂ + x₃))³ = -2(x₁ + x₂ + x₃)³
+
+Second term: 27(-x₁x₂x₃) = -27x₁x₂x₃
+
+Third term: -9(-(x₁ + x₂ + x₃))(x₁x₂ + x₁x₃ + x₂x₃) = 9(x₁ + x₂ + x₃)(x₁x₂ + x₁x₃ + x₂x₃)
+
+So combining all three:
+
+-2(x₁ + x₂ + x₃)³ -27x₁x₂x₃ + 9(x₁ + x₂ + x₃)(x₁x₂ + x₁x₃ + x₂x₃)
+
+Hmm, this seems complex. Maybe there is a known identity that relates these terms? Let me think.
+
+Recall that for a cubic polynomial, the discriminant D is given by D = 18abcd - 4b³d + b²c² - 4ac³ - 27a²d² for ax³ + bx² + cx + d. But in our case, the polynomial is monic (leading coefficient 1), so a=1, but here the coefficients are a, b, c as given. Wait, actually, in the standard form, the discriminant for x³ + a x² + b x + c would be D = 18abc - 4a³c + a²b² - 4b³ - 27c². But I might be mixing up the formula. Let me verify:
+
+Wait, the discriminant of a cubic x³ + px² + qx + r is given by:
+
+Δ = 18pqr - 4p³r + p²q² - 4q³ - 27r²
+
+Yes, so in our case, with p = a, q = b, r = c, so Δ = 18abc - 4a³c + a²b² - 4b³ - 27c²
+
+But I'm not sure if the discriminant is directly related here, but maybe. Since the polynomial has three real roots, the discriminant must be non-negative. However, the problem states that all roots are real, so Δ ≥ 0. But maybe the expression we're dealing with is related to the discriminant?
+
+Wait, let's check the expression given: 2a³ + 27c - 9ab. If we compare with the discriminant formula:
+
+Δ = 18abc - 4a³c + a²b² - 4b³ - 27c²
+
+Not directly obvious. Maybe not. Let's try another approach.
+
+Alternatively, perhaps the expression (2a³ + 27c - 9ab) can be rewritten in terms of the roots. Let's try substituting the Vieta expressions into it.
+
+So let's substitute a, b, c:
+
+2(-(x₁ + x₂ + x₃))³ + 27(-x₁x₂x₃) - 9(-(x₁ + x₂ + x₃))(x₁x₂ + x₁x₃ + x₂x₃)
+
+Compute each term:
+
+First term: 2*(-1)^3*(x₁ + x₂ + x₃)^3 = -2(x₁ + x₂ + x₃)^3
+
+Second term: 27*(-x₁x₂x₃) = -27x₁x₂x₃
+
+Third term: -9*(-1)*(x₁ + x₂ + x₃)(x₁x₂ + x₁x₃ + x₂x₃) = 9(x₁ + x₂ + x₃)(x₁x₂ + x₁x₃ + x₂x₃)
+
+So putting it all together:
+
+-2(x₁ + x₂ + x₃)^3 - 27x₁x₂x₃ + 9(x₁ + x₂ + x₃)(x₁x₂ + x₁x₃ + x₂x₃)
+
+Hmm. Let's see if we can factor this or simplify it. Let's denote S = x₁ + x₂ + x₃, P = x₁x₂ + x₁x₃ + x₂x₃, Q = x₁x₂x₃. Then our expression becomes:
+
+-2S³ -27Q + 9S P
+
+But for a cubic polynomial, the relationship between S, P, Q is given by Vieta's formulas. But perhaps we can relate this expression to something else.
+
+Alternatively, maybe using symmetric sums. Let's compute this expression for specific roots. Let's suppose that x₁, x₂, x₃ are variables with x₂ - x₁ = λ and x₃ > (x₁ + x₂)/2. Maybe we can parametrize the roots in terms of variables that capture the given conditions.
+
+Given that x₂ - x₁ = λ, let's set x₁ = t - λ/2 and x₂ = t + λ/2 for some t. Then the midpoint of x₁ and x₂ is t, and the condition x₃ > (x₁ + x₂)/2 becomes x₃ > t.
+
+Therefore, let me set:
+
+x₁ = t - λ/2
+
+x₂ = t + λ/2
+
+x₃ = t + s, where s > 0 (since x₃ > t)
+
+So now, our roots are expressed in terms of t, λ, and s > 0.
+
+Now, let's compute S, P, Q in terms of t, λ, s.
+
+First, S = x₁ + x₂ + x₃ = (t - λ/2) + (t + λ/2) + (t + s) = 3t + s
+
+Second, P = x₁x₂ + x₁x₃ + x₂x₃
+
+Compute each term:
+
+x₁x₂ = (t - λ/2)(t + λ/2) = t² - (λ/2)² = t² - λ²/4
+
+x₁x₃ = (t - λ/2)(t + s) = t(t + s) - (λ/2)(t + s) = t² + ts - (λ t)/2 - (λ s)/2
+
+x₂x₃ = (t + λ/2)(t + s) = t(t + s) + (λ/2)(t + s) = t² + ts + (λ t)/2 + (λ s)/2
+
+Adding these together:
+
+P = [t² - λ²/4] + [t² + ts - (λ t)/2 - (λ s)/2] + [t² + ts + (λ t)/2 + (λ s)/2]
+
+Let's combine terms:
+
+First term: t² - λ²/4
+
+Second term: t² + ts - (λ t)/2 - (λ s)/2
+
+Third term: t² + ts + (λ t)/2 + (λ s)/2
+
+Adding them:
+
+t² - λ²/4 + t² + ts - (λ t)/2 - (λ s)/2 + t² + ts + (λ t)/2 + (λ s)/2
+
+Combine like terms:
+
+t² + t² + t² = 3t²
+
+ts + ts = 2ts
+
+-λ²/4
+
+For the terms with λ t/2: - (λ t)/2 + (λ t)/2 = 0
+
+Similarly, for λ s/2: - (λ s)/2 + (λ s)/2 = 0
+
+So P = 3t² + 2ts - λ²/4
+
+Now Q = x₁x₂x₃ = (t - λ/2)(t + λ/2)(t + s) = [t² - (λ/2)^2](t + s) = (t² - λ²/4)(t + s)
+
+Multiply this out:
+
+= t³ + t² s - (λ²/4) t - (λ²/4) s
+
+Now, let's plug S, P, Q into the expression:
+
+-2S³ -27Q + 9S P
+
+First, compute S³:
+
+S = 3t + s
+
+S³ = (3t + s)^3 = 27t³ + 27t² s + 9t s² + s³
+
+Multiply by -2: -2*27t³ -2*27t² s -2*9t s² -2*s³ = -54t³ -54t² s -18t s² -2s³
+
+Next, compute -27Q:
+
+Q = t³ + t² s - (λ²/4) t - (λ²/4) s
+
+Multiply by -27: -27t³ -27t² s + (27λ²/4) t + (27λ²/4)s
+
+Third term: 9S P
+
+S = 3t + s
+
+P = 3t² + 2ts - λ²/4
+
+So 9S P = 9*(3t + s)*(3t² + 2ts - λ²/4)
+
+Let's expand this product step by step.
+
+First, multiply (3t + s) with (3t² + 2ts - λ²/4):
+
+= 3t*(3t²) + 3t*(2ts) + 3t*(-λ²/4) + s*(3t²) + s*(2ts) + s*(-λ²/4)
+
+= 9t³ + 6t² s - (3t λ²)/4 + 3s t² + 2t s² - (s λ²)/4
+
+Combine like terms:
+
+9t³ + (6t² s + 3t² s) + (2t s²) + (-3t λ²/4 - s λ²/4)
+
+= 9t³ + 9t² s + 2t s² - (λ²/4)(3t + s)
+
+Multiply this by 9:
+
+9*(9t³ + 9t² s + 2t s² - (λ²/4)(3t + s)) = 81t³ + 81t² s + 18t s² - (9λ²/4)(3t + s)
+
+Now, combining all three parts:
+
+First part: -54t³ -54t² s -18t s² -2s³
+
+Second part: -27t³ -27t² s + (27λ²/4) t + (27λ²/4)s
+
+Third part: 81t³ + 81t² s + 18t s² - (9λ²/4)(3t + s)
+
+Let's add them term by term.
+
+For t³ terms:
+
+-54t³ -27t³ +81t³ = 0
+
+For t² s terms:
+
+-54t² s -27t² s +81t² s = 0
+
+For t s² terms:
+
+-18t s² +18t s² = 0
+
+For s³ terms:
+
+-2s³ (from first part) +... (second part has no s³ term, third part also none) so total -2s³
+
+Now the terms involving λ²:
+
+From second part: (27λ²/4) t + (27λ²/4)s
+
+From third part: - (9λ²/4)(3t + s) = -27λ²/4 t -9λ²/4 s
+
+Adding these together:
+
+(27λ²/4 t -27λ²/4 t) + (27λ²/4 s -9λ²/4 s) = 0 + (18λ²/4)s = (9λ²/2)s
+
+So overall, combining all terms:
+
+-2s³ + (9λ²/2)s
+
+So the entire expression simplifies to:
+
+-2s³ + (9λ²/2)s
+
+Therefore, the original expression (2a³ + 27c -9ab) equals -2s³ + (9λ²/2)s
+
+Therefore, the expression we need to maximize is:
+
+(2a³ + 27c -9ab)/λ³ = (-2s³ + (9λ²/2)s)/λ³ = (-2s³)/λ³ + (9λ²/2 s)/λ³ = -2(s/λ)^3 + (9/2)(s/λ)
+
+Let me set k = s/λ. Since s > 0 and λ > 0, k > 0. Then our expression becomes:
+
+-2k³ + (9/2)k
+
+So now, the problem reduces to finding the maximum value of the function g(k) = -2k³ + (9/2)k for k > 0.
+
+This is a calculus problem. To find the maximum, take the derivative of g(k) with respect to k, set it to zero, and solve for k.
+
+Compute g'(k):
+
+g'(k) = -6k² + 9/2
+
+Set g'(k) = 0:
+
+-6k² + 9/2 = 0
+
+=> 6k² = 9/2
+
+=> k² = (9/2)/6 = 9/12 = 3/4
+
+=> k = sqrt(3/4) = sqrt(3)/2 ≈ 0.866
+
+Since k > 0, we take the positive root. Now, check if this is a maximum by second derivative test.
+
+Second derivative:
+
+g''(k) = -12k
+
+At k = sqrt(3)/2, g''(k) = -12*(sqrt(3)/2) = -6sqrt(3) < 0, which means it's a local maximum.
+
+Therefore, the maximum value of g(k) is achieved at k = sqrt(3)/2.
+
+Compute g(sqrt(3)/2):
+
+g(sqrt(3)/2) = -2*(sqrt(3)/2)^3 + (9/2)*(sqrt(3)/2)
+
+Compute each term:
+
+First term: -2*( (3^(1/2))/2 )^3 = -2*( (3^(3/2))/8 ) = -2*(3√3)/8 = - (3√3)/4
+
+Second term: (9/2)*(sqrt(3)/2) = (9√3)/4
+
+Adding both terms:
+
+- (3√3)/4 + (9√3)/4 = (6√3)/4 = (3√3)/2
+
+Therefore, the maximum value of g(k) is (3√3)/2.
+
+Hence, the maximum value of (2a³ + 27c -9ab)/λ³ is (3√3)/2.
+
+But wait, let me check if there are any constraints on k. Remember that we had s = kλ. But we have to ensure that the roots are real and satisfy x₃ > (x₁ + x₂)/2. However, in our parametrization, we set x₃ = t + s, and since s > 0, x₃ > t = (x₁ + x₂)/2, which is already satisfied. So as long as s > 0, the condition holds. But since k = s/λ > 0, and λ > 0, s > 0 is equivalent to k > 0, which we already considered. Therefore, the maximum is indeed (3√3)/2.
+
+But let me verify this result by considering specific cases. For example, take λ = 1. Then the expression (2a³ +27c -9ab)/1³ should be maximized at 3√3/2. Let's check with k = sqrt(3)/2, so s = sqrt(3)/2.
+
+Then the roots are x₁ = t - 1/2, x₂ = t + 1/2, x₃ = t + sqrt(3)/2. Then S = 3t + sqrt(3)/2, P = 3t² + 2t*(sqrt(3)/2) - 1/4 = 3t² + t*sqrt(3) - 1/4, Q = (t² - 1/4)(t + sqrt(3)/2). Then compute a, b, c from Vieta's formulas. Then compute 2a³ +27c -9ab. It might be tedious, but perhaps we can check.
+
+Alternatively, note that our derivation led us to an expression dependent only on k, which is s/λ, and through substitution, we found the maximum. Therefore, the maximum value is (3√3)/2. However, let me recall that the problem states x₃ > (x₁ + x₂)/2, which in our parametrization is s > 0. Therefore, as long as s is positive, which it is, we are okay. So there are no further constraints, and the maximum is indeed (3√3)/2.
+
+Wait, but just to be thorough, let's see if when k = sqrt(3)/2, the cubic polynomial indeed has three real roots. Since we are given that all roots are real, but when we parametrize, we need to ensure that the cubic with these roots is valid. However, since we are given that all roots are real and satisfy the given conditions, our parametrization already takes that into account. Therefore, as long as s > 0, the roots are real and satisfy the required conditions. Therefore, the maximum value is indeed (3√3)/2.
+
+Thus, the maximum value of (2a³ +27c -9ab)/λ³ is (3√3)/2.
+
+**Final Answer**
+\boxed{\dfrac{3\sqrt{3}}{2}}
+
+
+Given real numbers \(a, b, c\) and a positive number \(\lambda\) such that the polynomial \(f(x) = x^3 + ax^2 + bx + c\) has three real roots \(x_1, x_2, x_3\) with the conditions \(x_2 - x_1 = \lambda\) and \(x_3 > \frac{1}{2}(x_1 + x_2)\), we need to find the maximum value of \(\frac{2a^3 + 27c - 9ab}{\lambda^3}\).
+
+Using Vieta's formulas for the polynomial \(f(x)\):
+1. \(x_1 + x_2 + x_3 = -a\)
+2. \(x_1x_2 + x_1x_3 + x_2x_3 = b\)
+3. \(x_1x_2x_3 = -c\)
+
+We parametrize the roots as follows:
+- \(x_1 = t - \frac{\lambda}{2}\)
+- \(x_2 = t + \frac{\lambda}{2}\)
+- \(x_3 = t + s\) where \(s > 0\)
+
+Substituting these into Vieta's formulas, we get:
+- \(S = x_1 + x_2 + x_3 = 3t + s\)
+- \(P = x_1x_2 + x_1x_3 + x_2x_3 = 3t^2 + 2ts - \frac{\lambda^2}{4}\)
+- \(Q = x_1x_2x_3 = (t^2 - \frac{\lambda^2}{4})(t + s)\)
+
+Substituting \(a = -S\), \(b = P\), and \(c = -Q\) into the expression \(2a^3 + 27c - 9ab\), we simplify it to:
+\[
+-2s^3 + \frac{9}{2}\lambda^2 s
+\]
+
+Introducing \(k = \frac{s}{\lambda}\), the expression becomes:
+\[
+\frac{-2s^3 + \frac{9}{2}\lambda^2 s}{\lambda^3} = -2k^3 + \frac{9}{2}k
+\]
+
+To maximize \(-2k^3 + \frac{9}{2}k\), we take the derivative and set it to zero:
+\[
+g'(k) = -6k^2 + \frac{9}{2} = 0 \implies k^2 = \frac{3}{4} \implies k = \frac{\sqrt{3}}{2}
+\]
+
+Checking the second derivative confirms a maximum. Substituting \(k = \frac{\sqrt{3}}{2}\) back into the expression gives:
+\[
+-2\left(\frac{\sqrt{3}}{2}\right)^3 + \frac{9}{2}\left(\frac{\sqrt{3}}{2}\right) = \frac{3\sqrt{3}}{2}
+\]
+
+Thus, the maximum value of \(\frac{2a^3 + 27c - 9ab}{\lambda^3}\) is \(\boxed{\dfrac{3\sqrt{3}}{2}}\).
+
+The final answer is \boxed{\dfrac{3\sqrt{3}}{2}}.<|eot_id|>
+[INFO|configuration_utils.py:763] 2026-03-05 15:01:44,389 >> loading configuration file /local2/salman/model/pretrain_model/v2_4_gpu_llama_3b_nemo_52b/config.json
+[INFO|configuration_utils.py:839] 2026-03-05 15:01:44,391 >> Model config LlamaConfig {
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 128000,
+ "dtype": "bfloat16",
+ "eos_token_id": 128001,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 3072,
+ "initializer_range": 0.02,
+ "intermediate_size": 8192,
+ "max_position_embeddings": 131072,
+ "mlp_bias": false,
+ "model_type": "llama",
+ "num_attention_heads": 24,
+ "num_hidden_layers": 28,
+ "num_key_value_heads": 8,
+ "pad_token_id": 128001,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": {
+ "factor": 32.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": true,
+ "transformers_version": "4.57.1",
+ "use_cache": false,
+ "vocab_size": 128256
+}
+
+[INFO|2026-03-05 15:01:44] llamafactory.model.model_utils.kv_cache:143 >> KV cache is disabled during training.
+[INFO|2026-03-05 15:01:44] llamafactory.model.model_utils.liger_kernel:143 >> Liger kernel has been applied to the model.
+[WARNING|logging.py:328] 2026-03-05 15:01:44,600 >> `torch_dtype` is deprecated! Use `dtype` instead!
+[INFO|modeling_utils.py:1169] 2026-03-05 15:01:44,600 >> loading weights file /local2/salman/model/pretrain_model/v2_4_gpu_llama_3b_nemo_52b/model.safetensors.index.json
+[INFO|modeling_utils.py:2341] 2026-03-05 15:01:44,600 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.
+[INFO|configuration_utils.py:986] 2026-03-05 15:01:44,604 >> Generate config GenerationConfig {
+ "bos_token_id": 128000,
+ "eos_token_id": 128001,
+ "pad_token_id": 128001,
+ "use_cache": false
+}
+
+
Loading checkpoint shards: 0%| | 0/2 [00:00, ?it/s]`torch_dtype` is deprecated! Use `dtype` instead!
+`torch_dtype` is deprecated! Use `dtype` instead!
+`torch_dtype` is deprecated! Use `dtype` instead!
+
Loading checkpoint shards: 0%| | 0/2 [00:00, ?it/s]
Loading checkpoint shards: 0%| | 0/2 [00:00, ?it/s]
Loading checkpoint shards: 0%| | 0/2 [00:00, ?it/s]
Loading checkpoint shards: 50%|█████ | 1/2 [00:00<00:00, 2.78it/s]
Loading checkpoint shards: 50%|█████ | 1/2 [00:00<00:00, 3.05it/s]
Loading checkpoint shards: 50%|█████ | 1/2 [00:00<00:00, 2.88it/s]
Loading checkpoint shards: 50%|█████ | 1/2 [00:00<00:00, 2.80it/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 4.26it/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 3.94it/s]
+[INFO|configuration_utils.py:939] 2026-03-05 15:01:45,151 >> loading configuration file /local2/salman/model/pretrain_model/v2_4_gpu_llama_3b_nemo_52b/generation_config.json
+[INFO|configuration_utils.py:986] 2026-03-05 15:01:45,151 >> Generate config GenerationConfig {
+ "bos_token_id": 128000,
+ "do_sample": true,
+ "eos_token_id": [
+ 128001
+ ],
+ "pad_token_id": 128001,
+ "temperature": 0.6,
+ "top_p": 0.9
+}
+
+[INFO|dynamic_module_utils.py:423] 2026-03-05 15:01:45,151 >> Could not locate the custom_generate/generate.py inside /local2/salman/model/pretrain_model/v2_4_gpu_llama_3b_nemo_52b.
+[INFO|2026-03-05 15:01:45] llamafactory.model.model_utils.checkpointing:143 >> Gradient checkpointing enabled.
+[INFO|2026-03-05 15:01:45] llamafactory.model.model_utils.attention:143 >> Using FlashAttention-2 for faster training and inference.
+[INFO|2026-03-05 15:01:45] llamafactory.model.adapter:143 >> Upcasting trainable params to float32.
+[INFO|2026-03-05 15:01:45] llamafactory.model.adapter:143 >> Fine-tuning method: Full
+[INFO|2026-03-05 15:01:45] llamafactory.model.loader:143 >> trainable params: 3,212,749,824 || all params: 3,212,749,824 || trainable%: 100.0000
+[INFO|trainer.py:749] 2026-03-05 15:01:45,205 >> Using auto half precision backend
+[WARNING|trainer.py:982] 2026-03-05 15:01:45,206 >> The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 128009}.
+
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 4.62it/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 4.28it/s]
+
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 4.31it/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 4.01it/s]
+The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 128009}.
+
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 4.21it/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 3.91it/s]
+The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 128009}.
+The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 128009}.
+Gradient accumulation steps mismatch: GradientAccumulationPlugin has 1, DeepSpeed config has 32. Using DeepSpeed's value.
+[2026-03-05 15:01:45,555] [INFO] [logging.py:107:log_dist] [Rank 0] DeepSpeed info: version=0.16.9, git-hash=unknown, git-branch=unknown
+[2026-03-05 15:01:45,555] [INFO] [config.py:735:__init__] Config mesh_device None world_size = 4
+[2026-03-05 15:01:46,373] [INFO] [logging.py:107:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False
+[2026-03-05 15:01:46,374] [INFO] [logging.py:107:log_dist] [Rank 0] Using client Optimizer as basic optimizer
+[2026-03-05 15:01:46,374] [INFO] [logging.py:107:log_dist] [Rank 0] Removing param_group that has no 'params' in the basic Optimizer
+[2026-03-05 15:01:46,383] [INFO] [logging.py:107:log_dist] [Rank 0] DeepSpeed Basic Optimizer = AdamW
+[2026-03-05 15:01:46,384] [INFO] [utils.py:59:is_zero_supported_optimizer] Checking ZeRO support for optimizer=AdamW type=
+[2026-03-05 15:01:46,384] [INFO] [logging.py:107:log_dist] [Rank 0] Creating torch.bfloat16 ZeRO stage 2 optimizer
+[2026-03-05 15:01:46,384] [INFO] [stage_1_and_2.py:150:__init__] Reduce bucket size 500000000
+[2026-03-05 15:01:46,384] [INFO] [stage_1_and_2.py:151:__init__] Allgather bucket size 500000000
+[2026-03-05 15:01:46,384] [INFO] [stage_1_and_2.py:152:__init__] CPU Offload: False
+[2026-03-05 15:01:46,384] [INFO] [stage_1_and_2.py:153:__init__] Round robin gradient partitioning: True
+[2026-03-05 15:01:56,186] [INFO] [utils.py:781:see_memory_usage] Before initializing optimizer states
+[2026-03-05 15:01:56,187] [INFO] [utils.py:782:see_memory_usage] MA 8.98 GB Max_MA 8.98 GB CA 9.03 GB Max_CA 9 GB
+[2026-03-05 15:01:56,187] [INFO] [utils.py:789:see_memory_usage] CPU Virtual Memory: used = 82.93 GB, percent = 5.5%
+[2026-03-05 15:01:56,445] [INFO] [utils.py:781:see_memory_usage] After initializing optimizer states
+[2026-03-05 15:01:56,445] [INFO] [utils.py:782:see_memory_usage] MA 8.98 GB Max_MA 11.97 GB CA 12.01 GB Max_CA 12 GB
+[2026-03-05 15:01:56,445] [INFO] [utils.py:789:see_memory_usage] CPU Virtual Memory: used = 82.97 GB, percent = 5.5%
+[2026-03-05 15:01:56,445] [INFO] [stage_1_and_2.py:557:__init__] optimizer state initialized
+[2026-03-05 15:01:56,681] [INFO] [utils.py:781:see_memory_usage] After initializing ZeRO optimizer
+[2026-03-05 15:01:56,682] [INFO] [utils.py:782:see_memory_usage] MA 8.98 GB Max_MA 8.98 GB CA 12.01 GB Max_CA 12 GB
+[2026-03-05 15:01:56,682] [INFO] [utils.py:789:see_memory_usage] CPU Virtual Memory: used = 82.96 GB, percent = 5.5%
+[2026-03-05 15:01:56,683] [INFO] [logging.py:107:log_dist] [Rank 0] DeepSpeed Final Optimizer = DeepSpeedZeroOptimizer
+[2026-03-05 15:01:56,684] [INFO] [logging.py:107:log_dist] [Rank 0] DeepSpeed using configured LR scheduler = None
+[2026-03-05 15:01:56,684] [INFO] [logging.py:107:log_dist] [Rank 0] DeepSpeed LR Scheduler = None
+[2026-03-05 15:01:56,684] [INFO] [logging.py:107:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0, 0.0], mom=[(0.9, 0.999), (0.9, 0.999)]
+[2026-03-05 15:01:56,684] [INFO] [config.py:1003:print] DeepSpeedEngine configuration:
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] activation_checkpointing_config {
+ "partition_activations": false,
+ "contiguous_memory_optimization": false,
+ "cpu_checkpointing": false,
+ "number_checkpoints": null,
+ "synchronize_checkpoint_boundary": false,
+ "profile": false
+}
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'intra_op_parallelism': 1, 'single_submit': False, 'overlap_events': True, 'use_gds': False}
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] amp_enabled .................. False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] amp_params ................... False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] autotuning_config ............ {
+ "enabled": false,
+ "start_step": null,
+ "end_step": null,
+ "metric_path": null,
+ "arg_mappings": null,
+ "metric": "throughput",
+ "model_info": null,
+ "results_dir": "autotuning_results",
+ "exps_dir": "autotuning_exps",
+ "overwrite": true,
+ "fast": true,
+ "start_profile_step": 3,
+ "end_profile_step": 5,
+ "tuner_type": "gridsearch",
+ "tuner_early_stopping": 5,
+ "tuner_num_trials": 50,
+ "model_info_path": null,
+ "mp_size": 1,
+ "max_train_batch_size": null,
+ "min_train_batch_size": 1,
+ "max_train_micro_batch_size_per_gpu": 1.024000e+03,
+ "min_train_micro_batch_size_per_gpu": 1,
+ "num_tuning_micro_batch_sizes": 3
+}
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] bfloat16_enabled ............. True
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] bfloat16_immediate_grad_update True
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] checkpoint_parallel_write_pipeline False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] checkpoint_tag_validation_enabled True
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] checkpoint_tag_validation_fail False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] comms_config .................
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] communication_data_type ...... None
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] compile_config ............... deepcompile=False free_activation=False offload_activation=False offload_opt_states=False double_buffer=True symmetric_memory=False debug_log=False offload_parameters=False sync_before_reduce=False sync_after_reduce=False sync_before_allgather=False sync_after_allgather=False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}}
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] curriculum_enabled_legacy .... False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] curriculum_params_legacy ..... False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] data_efficiency_config ....... {'enabled': False, 'seed': 1234, 'data_sampling': {'enabled': False, 'num_epochs': 1000, 'num_workers': 0, 'pin_memory': False, 'curriculum_learning': {'enabled': False}, 'dynamic_batching': {'enabled': False, 'lr_scaling_method': 'linear', 'min_batch_size': 1, 'max_batch_size': None, 'sequence_picking_order': 'dataloader', 'verbose': False}}, 'data_routing': {'enabled': False, 'random_ltd': {'enabled': False, 'layer_token_lr_schedule': {'enabled': False}}}}
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] data_efficiency_enabled ...... False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] dataloader_drop_last ......... False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] disable_allgather ............ False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] dump_state ................... False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] dynamic_loss_scale_args ...... None
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] eigenvalue_enabled ........... False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] eigenvalue_gas_boundary_resolution 1
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] eigenvalue_layer_name ........ bert.encoder.layer
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] eigenvalue_layer_num ......... 0
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] eigenvalue_max_iter .......... 100
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] eigenvalue_stability ......... 1e-06
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] eigenvalue_tol ............... 0.01
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] eigenvalue_verbose ........... False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] elasticity_enabled ........... False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] flops_profiler_config ........ {
+ "enabled": false,
+ "recompute_fwd_factor": 0.0,
+ "profile_step": 1,
+ "module_depth": -1,
+ "top_modules": 1,
+ "detailed": true,
+ "output_file": null
+}
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] fp16_auto_cast ............... None
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] fp16_enabled ................. False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] fp16_master_weights_and_gradients False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] global_rank .................. 0
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] grad_accum_dtype ............. None
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] gradient_accumulation_steps .. 32
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] gradient_clipping ............ 1.0
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] gradient_predivide_factor .... 1.0
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] graph_harvesting ............. False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] hybrid_engine ................ enabled=False max_out_tokens=512 inference_tp_size=1 release_inference_cache=False pin_parameters=True tp_gather_partition_size=8
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] initial_dynamic_scale ........ 1
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] load_universal_checkpoint .... False
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] loss_scale ................... 1.0
+[2026-03-05 15:01:56,685] [INFO] [config.py:1007:print] memory_breakdown ............. False
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] mics_hierarchial_params_gather False
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] mics_shard_size .............. -1
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] monitor_config ............... tensorboard=TensorBoardConfig(enabled=False, output_path='', job_name='DeepSpeedJobName') comet=CometConfig(enabled=False, samples_log_interval=100, project=None, workspace=None, api_key=None, experiment_name=None, experiment_key=None, online=None, mode=None) wandb=WandbConfig(enabled=False, group=None, team=None, project='deepspeed') csv_monitor=CSVConfig(enabled=False, output_path='', job_name='DeepSpeedJobName')
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] nebula_config ................ {
+ "enabled": false,
+ "persistent_storage_path": null,
+ "persistent_time_interval": 100,
+ "num_of_version_in_retention": 2,
+ "enable_nebula_load": true,
+ "load_path": null
+}
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] optimizer_legacy_fusion ...... False
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] optimizer_name ............... None
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] optimizer_params ............. None
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0, 'pipe_partitioned': True, 'grad_partitioned': True}
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] pld_enabled .................. False
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] pld_params ................... False
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] prescale_gradients ........... False
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] scheduler_name ............... None
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] scheduler_params ............. None
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] seq_parallel_communication_data_type torch.float32
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] sparse_attention ............. None
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] sparse_gradients_enabled ..... False
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] steps_per_print .............. inf
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] tensor_parallel_config ....... dtype=torch.float16 autotp_size=0 tp_overlap_comm=False tensor_parallel=TPConfig(tp_size=1, tp_grain_size=1, mpu=None, tp_group=None) injection_policy_tuple=None keep_module_on_host=False replace_with_kernel_inject=False
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] timers_config ................ enabled=True synchronized=True
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] train_batch_size ............. 256
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] train_micro_batch_size_per_gpu 2
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] use_data_before_expert_parallel_ False
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] use_node_local_storage ....... False
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] wall_clock_breakdown ......... False
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] weight_quantization_config ... None
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] world_size ................... 4
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] zero_allow_untested_optimizer True
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] zero_config .................. stage=2 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500000000 use_multi_rank_bucket_allreduce=True allgather_partitions=True allgather_bucket_size=500000000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1000000000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50000000 param_persistence_threshold=100000 model_persistence_threshold=9223372036854775807 max_live_parameters=1000000000 max_reuse_distance=1000000000 gather_16bit_weights_on_model_save=False module_granularity_threshold=0 use_all_reduce_for_fetch_params=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=True zero_hpz_partition_size=1 zero_quantized_weights=False zero_quantized_nontrainable_weights=False zero_quantized_gradients=False zeropp_loco_param=None mics_shard_size=-1 mics_hierarchical_params_gather=False memory_efficient_linear=True pipeline_loading_checkpoint=False override_module_apply=True log_trace_cache_warnings=False
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] zero_enabled ................. True
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] zero_force_ds_cpu_optimizer .. True
+[2026-03-05 15:01:56,686] [INFO] [config.py:1007:print] zero_optimization_stage ...... 2
+[2026-03-05 15:01:56,686] [INFO] [config.py:993:print_user_config] json = {
+ "train_batch_size": 256,
+ "train_micro_batch_size_per_gpu": 2,
+ "gradient_accumulation_steps": 32,
+ "gradient_clipping": 1.0,
+ "zero_allow_untested_optimizer": true,
+ "fp16": {
+ "enabled": false,
+ "loss_scale": 0,
+ "loss_scale_window": 1000,
+ "initial_scale_power": 16,
+ "hysteresis": 2,
+ "min_loss_scale": 1
+ },
+ "bf16": {
+ "enabled": true
+ },
+ "zero_optimization": {
+ "stage": 2,
+ "allgather_partitions": true,
+ "allgather_bucket_size": 5.000000e+08,
+ "overlap_comm": false,
+ "reduce_scatter": true,
+ "reduce_bucket_size": 5.000000e+08,
+ "contiguous_gradients": true,
+ "round_robin_gradients": true
+ },
+ "steps_per_print": inf
+}
+[INFO|trainer.py:2519] 2026-03-05 15:01:56,687 >> ***** Running training *****
+[INFO|trainer.py:2520] 2026-03-05 15:01:56,687 >> Num examples = 43,525
+[INFO|trainer.py:2521] 2026-03-05 15:01:56,687 >> Num Epochs = 3
+[INFO|trainer.py:2522] 2026-03-05 15:01:56,687 >> Instantaneous batch size per device = 2
+[INFO|trainer.py:2525] 2026-03-05 15:01:56,687 >> Total train batch size (w. parallel, distributed & accumulation) = 256
+[INFO|trainer.py:2526] 2026-03-05 15:01:56,687 >> Gradient Accumulation steps = 32
+[INFO|trainer.py:2527] 2026-03-05 15:01:56,687 >> Total optimization steps = 513
+[INFO|trainer.py:2528] 2026-03-05 15:01:56,688 >> Number of trainable parameters = 3,212,749,824
+2026/03/05 15:01:56 INFO mlflow.tracking.fluent: Experiment with name 'llama3b_think_sft_nopack_lr1.5e5_ep3' does not exist. Creating a new experiment.
+
0%| | 0/513 [00:00, ?it/s]
0%| | 1/513 [00:55<7:52:31, 55.37s/it]
0%| | 2/513 [01:49<7:47:34, 54.90s/it]
1%| | 3/513 [02:42<7:37:22, 53.81s/it]
1%| | 4/513 [03:36<7:35:40, 53.71s/it]
1%| | 5/513 [04:28<7:32:09, 53.40s/it]
1%| | 6/513 [05:21<7:29:59, 53.25s/it]
1%|▏ | 7/513 [06:12<7:20:42, 52.26s/it]
2%|▏ | 8/513 [07:05<7:22:03, 52.52s/it]
2%|▏ | 9/513 [07:56<7:18:50, 52.24s/it]
2%|▏ | 10/513 [08:49<7:18:47, 52.34s/it]
{'loss': 0.8486, 'grad_norm': 1.3511555194854736, 'learning_rate': 2.596153846153846e-06, 'epoch': 0.06}
+
2%|▏ | 10/513 [08:49<7:18:47, 52.34s/it]
2%|▏ | 11/513 [09:42<7:19:14, 52.50s/it]
2%|▏ | 12/513 [10:34<7:17:42, 52.42s/it]
3%|▎ | 13/513 [11:27<7:18:56, 52.67s/it]
3%|▎ | 14/513 [12:17<7:10:58, 51.82s/it]
3%|▎ | 15/513 [13:11<7:14:54, 52.40s/it]
3%|▎ | 16/513 [14:02<7:10:22, 51.96s/it]
3%|▎ | 17/513 [14:55<7:11:50, 52.24s/it]
4%|▎ | 18/513 [15:44<7:04:51, 51.50s/it]
4%|▎ | 19/513 [16:36<7:04:43, 51.59s/it]
4%|▍ | 20/513 [17:27<7:03:07, 51.50s/it]
{'loss': 0.7412, 'grad_norm': 0.7383383512496948, 'learning_rate': 5.480769230769231e-06, 'epoch': 0.12}
+
4%|▍ | 20/513 [17:27<7:03:07, 51.50s/it]
4%|▍ | 21/513 [18:19<7:02:58, 51.58s/it]
4%|▍ | 22/513 [19:11<7:02:15, 51.60s/it]
4%|▍ | 23/513 [20:03<7:02:03, 51.68s/it]
5%|▍ | 24/513 [20:54<7:00:11, 51.56s/it]
5%|▍ | 25/513 [21:45<6:57:23, 51.32s/it]
5%|▌ | 26/513 [22:37<6:58:06, 51.51s/it]
5%|▌ | 27/513 [23:30<7:00:22, 51.90s/it]
5%|▌ | 28/513 [24:20<6:56:01, 51.47s/it]
6%|▌ | 29/513 [25:12<6:56:25, 51.62s/it]
6%|▌ | 30/513 [26:04<6:56:01, 51.68s/it]
{'loss': 0.6532, 'grad_norm': 0.47219017148017883, 'learning_rate': 8.365384615384616e-06, 'epoch': 0.18}
+
6%|▌ | 30/513 [26:04<6:56:01, 51.68s/it]
6%|▌ | 31/513 [26:57<6:57:56, 52.03s/it]
6%|▌ | 32/513 [27:50<6:59:25, 52.32s/it]
6%|▋ | 33/513 [28:41<6:56:19, 52.04s/it]
7%|▋ | 34/513 [29:33<6:55:02, 51.99s/it]
7%|▋ | 35/513 [30:25<6:55:25, 52.15s/it]
7%|▋ | 36/513 [31:19<6:56:52, 52.44s/it]
7%|▋ | 37/513 [32:10<6:54:47, 52.28s/it]
7%|▋ | 38/513 [33:02<6:51:42, 52.01s/it]
8%|▊ | 39/513 [33:55<6:54:32, 52.47s/it]
8%|▊ | 40/513 [34:48<6:53:11, 52.41s/it]
{'loss': 0.6102, 'grad_norm': 0.30038249492645264, 'learning_rate': 1.125e-05, 'epoch': 0.24}
+
8%|▊ | 40/513 [34:48<6:53:11, 52.41s/it]
8%|▊ | 41/513 [35:39<6:49:49, 52.10s/it]
8%|▊ | 42/513 [36:31<6:48:16, 52.01s/it]
8%|▊ | 43/513 [37:23<6:48:01, 52.09s/it]
9%|▊ | 44/513 [38:16<6:49:24, 52.38s/it]
9%|▉ | 45/513 [39:07<6:45:45, 52.02s/it]
9%|▉ | 46/513 [39:58<6:42:56, 51.77s/it]
9%|▉ | 47/513 [40:50<6:40:24, 51.56s/it]
9%|▉ | 48/513 [41:42<6:40:56, 51.73s/it]
10%|▉ | 49/513 [42:34<6:40:30, 51.79s/it]
10%|▉ | 50/513 [43:26<6:40:02, 51.84s/it]
{'loss': 0.5784, 'grad_norm': 0.2751595377922058, 'learning_rate': 1.4134615384615384e-05, 'epoch': 0.29}
+
10%|▉ | 50/513 [43:26<6:40:02, 51.84s/it]
10%|▉ | 51/513 [44:16<6:36:23, 51.48s/it]
10%|█ | 52/513 [45:07<6:34:46, 51.38s/it]
10%|█ | 53/513 [45:59<6:33:36, 51.34s/it]
11%|█ | 54/513 [46:50<6:32:49, 51.35s/it]
11%|█ | 55/513 [47:42<6:33:32, 51.56s/it]
11%|█ | 56/513 [48:34<6:34:15, 51.76s/it]
11%|█ | 57/513 [49:26<6:34:10, 51.86s/it]
11%|█▏ | 58/513 [50:19<6:35:09, 52.11s/it]
12%|█▏ | 59/513 [51:10<6:31:10, 51.70s/it]
12%|█▏ | 60/513 [51:59<6:25:01, 51.00s/it]
{'loss': 0.5641, 'grad_norm': 0.26936954259872437, 'learning_rate': 1.4991468156423456e-05, 'epoch': 0.35}
+
12%|█▏ | 60/513 [51:59<6:25:01, 51.00s/it]
12%|█▏ | 61/513 [52:51<6:25:36, 51.19s/it]
12%|█▏ | 62/513 [53:43<6:26:25, 51.41s/it]
12%|█▏ | 63/513 [54:33<6:23:49, 51.18s/it]
12%|█▏ | 64/513 [55:24<6:21:37, 51.00s/it]
13%|█▎ | 65/513 [56:17<6:24:48, 51.54s/it]
13%|█▎ | 66/513 [57:09<6:26:19, 51.86s/it]
13%|█▎ | 67/513 [58:00<6:21:49, 51.37s/it]
13%|█▎ | 68/513 [58:51<6:21:44, 51.47s/it]
13%|█▎ | 69/513 [59:42<6:18:59, 51.22s/it]
14%|█▎ | 70/513 [1:00:35<6:22:41, 51.83s/it]
{'loss': 0.5469, 'grad_norm': 0.25376981496810913, 'learning_rate': 1.494972625749433e-05, 'epoch': 0.41}
+
14%|█▎ | 70/513 [1:00:35<6:22:41, 51.83s/it]
14%|█▍ | 71/513 [1:01:25<6:18:16, 51.35s/it]
14%|█▍ | 72/513 [1:02:18<6:20:53, 51.82s/it]
14%|█▍ | 73/513 [1:03:10<6:20:15, 51.85s/it]
14%|█▍ | 74/513 [1:04:02<6:19:19, 51.84s/it]
15%|█▍ | 75/513 [1:04:52<6:14:55, 51.36s/it]
15%|█▍ | 76/513 [1:05:45<6:17:40, 51.85s/it]
15%|█▌ | 77/513 [1:06:37<6:17:09, 51.90s/it]
15%|█▌ | 78/513 [1:07:29<6:15:25, 51.78s/it]
15%|█▌ | 79/513 [1:08:20<6:14:12, 51.73s/it]
16%|█▌ | 80/513 [1:09:12<6:13:27, 51.75s/it]
{'loss': 0.5424, 'grad_norm': 0.2703434228897095, 'learning_rate': 1.4873400764197756e-05, 'epoch': 0.47}
+
16%|█▌ | 80/513 [1:09:12<6:13:27, 51.75s/it]
16%|█▌ | 81/513 [1:10:04<6:12:05, 51.68s/it]
16%|█▌ | 82/513 [1:10:56<6:13:09, 51.95s/it]
16%|█▌ | 83/513 [1:11:48<6:11:49, 51.88s/it]
16%|█▋ | 84/513 [1:12:41<6:13:14, 52.20s/it]
17%|█▋ | 85/513 [1:13:52<6:53:29, 57.97s/it]
17%|█▋ | 86/513 [1:15:31<8:19:15, 70.15s/it]
17%|█▋ | 87/513 [1:17:07<9:12:44, 77.85s/it]
17%|█▋ | 88/513 [1:18:39<9:41:13, 82.06s/it]
17%|█▋ | 89/513 [1:19:49<9:14:14, 78.43s/it]
18%|█▊ | 90/513 [1:21:01<9:00:15, 76.63s/it]
{'loss': 0.5293, 'grad_norm': 0.3386951684951782, 'learning_rate': 1.4762845999606666e-05, 'epoch': 0.53}
+
18%|█▊ | 90/513 [1:21:01<9:00:15, 76.63s/it]
18%|█▊ | 91/513 [1:22:15<8:52:14, 75.68s/it]
18%|█▊ | 92/513 [1:23:49<9:29:59, 81.23s/it]
18%|█▊ | 93/513 [1:25:24<9:58:02, 85.43s/it]
18%|█▊ | 94/513 [1:26:55<10:07:53, 87.05s/it]
19%|█▊ | 95/513 [1:28:26<10:14:19, 88.18s/it]
19%|█▊ | 96/513 [1:29:58<10:20:43, 89.31s/it]
19%|█▉ | 97/513 [1:31:28<10:22:30, 89.79s/it]
19%|█▉ | 98/513 [1:33:05<10:34:37, 91.75s/it]
19%|█▉ | 99/513 [1:34:34<10:28:46, 91.13s/it]
19%|█▉ | 100/513 [1:36:07<10:30:19, 91.57s/it]
{'loss': 0.5266, 'grad_norm': 0.30952027440071106, 'learning_rate': 1.4618575188100301e-05, 'epoch': 0.59}
+
19%|█▉ | 100/513 [1:36:07<10:30:19, 91.57s/it]
20%|█▉ | 101/513 [1:37:36<10:24:05, 90.89s/it]
20%|█▉ | 102/513 [1:38:59<10:05:56, 88.46s/it]
20%|██ | 103/513 [1:40:36<10:20:55, 90.87s/it]
20%|██ | 104/513 [1:42:13<10:32:44, 92.82s/it]
20%|██ | 105/513 [1:43:48<10:35:14, 93.42s/it]
21%|██ | 106/513 [1:45:21<10:33:23, 93.37s/it]
21%|██ | 107/513 [1:46:56<10:35:55, 93.98s/it]
21%|██ | 108/513 [1:48:31<10:36:15, 94.26s/it]
21%|██ | 109/513 [1:50:05<10:32:57, 94.00s/it]
21%|██▏ | 110/513 [1:51:33<10:19:53, 92.29s/it]
{'loss': 0.522, 'grad_norm': 0.2706937789916992, 'learning_rate': 1.4441258072841264e-05, 'epoch': 0.65}
+
21%|██▏ | 110/513 [1:51:33<10:19:53, 92.29s/it]
22%|██▏ | 111/513 [1:52:56<9:59:08, 89.42s/it]
22%|██▏ | 112/513 [1:54:06<9:18:39, 83.59s/it]
22%|██▏ | 113/513 [1:55:14<8:47:18, 79.10s/it]
22%|██▏ | 114/513 [1:56:25<8:28:12, 76.42s/it]
22%|██▏ | 115/513 [1:57:33<8:11:07, 74.04s/it]
23%|██▎ | 116/513 [1:58:43<8:02:04, 72.86s/it]
23%|██▎ | 117/513 [1:59:56<8:00:35, 72.82s/it]
23%|██▎ | 118/513 [2:01:07<7:55:14, 72.19s/it]
23%|██▎ | 119/513 [2:02:16<7:47:33, 71.20s/it]
23%|██▎ | 120/513 [2:03:27<7:46:30, 71.22s/it]
{'loss': 0.5222, 'grad_norm': 0.286222368478775, 'learning_rate': 1.4231717806651086e-05, 'epoch': 0.71}
+
23%|██▎ | 120/513 [2:03:27<7:46:30, 71.22s/it]
24%|██▎ | 121/513 [2:04:34<7:36:42, 69.90s/it]
24%|██▍ | 122/513 [2:05:43<7:35:23, 69.88s/it]
24%|██▍ | 123/513 [2:06:54<7:34:35, 69.94s/it]
24%|██▍ | 124/513 [2:08:03<7:31:35, 69.65s/it]
24%|██▍ | 125/513 [2:09:13<7:31:32, 69.83s/it]
25%|██▍ | 126/513 [2:10:20<7:25:05, 69.01s/it]
25%|██▍ | 127/513 [2:11:29<7:24:54, 69.16s/it]
25%|██▍ | 128/513 [2:12:40<7:25:45, 69.47s/it]
25%|██▌ | 129/513 [2:13:49<7:24:49, 69.50s/it]
25%|██▌ | 130/513 [2:14:59<7:23:56, 69.55s/it]
{'loss': 0.5106, 'grad_norm': 0.2553636431694031, 'learning_rate': 1.3990927130717711e-05, 'epoch': 0.76}
+
25%|██▌ | 130/513 [2:14:59<7:23:56, 69.55s/it]
26%|██▌ | 131/513 [2:16:11<7:28:43, 70.48s/it]
26%|██▌ | 132/513 [2:17:30<7:43:19, 72.97s/it]
26%|██▌ | 133/513 [2:18:43<7:41:55, 72.94s/it]
26%|██▌ | 134/513 [2:19:55<7:39:41, 72.77s/it]
26%|██▋ | 135/513 [2:21:08<7:37:14, 72.58s/it]
27%|██▋ | 136/513 [2:22:20<7:35:53, 72.56s/it]
27%|██▋ | 137/513 [2:23:31<7:32:04, 72.14s/it]
27%|██▋ | 138/513 [2:24:43<7:30:42, 72.11s/it]
27%|██▋ | 139/513 [2:25:57<7:31:34, 72.45s/it]
27%|██▋ | 140/513 [2:27:07<7:25:59, 71.74s/it]
{'loss': 0.5114, 'grad_norm': 0.2975357472896576, 'learning_rate': 1.3720003858874311e-05, 'epoch': 0.82}
+
27%|██▋ | 140/513 [2:27:07<7:25:59, 71.74s/it]
27%|██▋ | 141/513 [2:28:14<7:17:04, 70.50s/it]
28%|██▊ | 142/513 [2:29:26<7:18:10, 70.86s/it]
28%|██▊ | 143/513 [2:30:37<7:18:02, 71.03s/it]
28%|██▊ | 144/513 [2:31:47<7:14:16, 70.61s/it]
28%|██▊ | 145/513 [2:33:00<7:17:26, 71.32s/it]
28%|██▊ | 146/513 [2:34:11<7:15:00, 71.12s/it]
29%|██▊ | 147/513 [2:35:22<7:15:09, 71.34s/it]
29%|██▉ | 148/513 [2:36:32<7:11:22, 70.91s/it]
29%|██▉ | 149/513 [2:37:43<7:09:05, 70.73s/it]
29%|██▉ | 150/513 [2:38:51<7:04:10, 70.11s/it]
{'loss': 0.5099, 'grad_norm': 0.24958086013793945, 'learning_rate': 1.3420205688412603e-05, 'epoch': 0.88}
+
29%|██▉ | 150/513 [2:38:51<7:04:10, 70.11s/it]
29%|██▉ | 151/513 [2:40:06<7:11:06, 71.46s/it]
30%|██▉ | 152/513 [2:41:20<7:15:12, 72.33s/it]
30%|██▉ | 153/513 [2:42:33<7:13:55, 72.32s/it]
30%|███ | 154/513 [2:43:42<7:07:40, 71.48s/it]
30%|███ | 155/513 [2:44:53<7:05:57, 71.39s/it]
30%|███ | 156/513 [2:46:05<7:06:05, 71.61s/it]
31%|███ | 157/513 [2:47:21<7:12:18, 72.86s/it]
31%|███ | 158/513 [2:48:39<7:20:37, 74.47s/it]
31%|███ | 159/513 [2:49:58<7:25:50, 75.57s/it]
31%|███ | 160/513 [2:51:13<7:24:11, 75.50s/it]
{'loss': 0.5086, 'grad_norm': 0.302441269159317, 'learning_rate': 1.3092924361520291e-05, 'epoch': 0.94}
+
31%|███ | 160/513 [2:51:13<7:24:11, 75.50s/it]
31%|███▏ | 161/513 [2:52:28<7:21:44, 75.30s/it]
32%|███▏ | 162/513 [2:53:39<7:13:40, 74.13s/it]
32%|███▏ | 163/513 [2:54:48<7:02:53, 72.50s/it]
32%|███▏ | 164/513 [2:55:58<6:56:52, 71.67s/it]
32%|███▏ | 165/513 [2:57:09<6:55:59, 71.72s/it]
32%|███▏ | 166/513 [2:58:22<6:57:08, 72.13s/it]
33%|███▎ | 167/513 [2:59:33<6:53:24, 71.69s/it]
33%|███▎ | 168/513 [3:00:45<6:53:11, 71.86s/it]
33%|███▎ | 169/513 [3:01:57<6:51:00, 71.69s/it]
33%|███▎ | 170/513 [3:03:05<6:44:39, 70.78s/it]
{'loss': 0.5061, 'grad_norm': 0.24974007904529572, 'learning_rate': 1.2739679204446694e-05, 'epoch': 1.0}
+
33%|███▎ | 170/513 [3:03:05<6:44:39, 70.78s/it]
33%|███▎ | 171/513 [3:03:08<4:46:16, 50.22s/it]
34%|███▎ | 172/513 [3:04:18<5:19:56, 56.29s/it]
34%|███▎ | 173/513 [3:05:26<5:38:30, 59.74s/it]
34%|███▍ | 174/513 [3:06:35<5:53:28, 62.56s/it]
34%|███▍ | 175/513 [3:07:47<6:08:01, 65.33s/it]
34%|███▍ | 176/513 [3:09:01<6:21:46, 67.97s/it]
35%|███▍ | 177/513 [3:10:14<6:29:54, 69.63s/it]
35%|███▍ | 178/513 [3:11:27<6:33:07, 70.41s/it]
35%|███▍ | 179/513 [3:12:37<6:32:20, 70.48s/it]
35%|███▌ | 180/513 [3:13:45<6:27:03, 69.74s/it]
{'loss': 0.4746, 'grad_norm': 0.35062289237976074, 'learning_rate': 1.236211007438955e-05, 'epoch': 1.05}
+
35%|███▌ | 180/513 [3:13:45<6:27:03, 69.74s/it]
35%|███▌ | 181/513 [3:14:56<6:27:06, 69.96s/it]
35%|███▌ | 182/513 [3:16:05<6:24:37, 69.72s/it]
36%|███▌ | 183/513 [3:17:19<6:30:26, 70.99s/it]
36%|███▌ | 184/513 [3:18:39<6:44:03, 73.69s/it]
36%|███▌ | 185/513 [3:19:52<6:41:07, 73.38s/it]
36%|███▋ | 186/513 [3:21:06<6:42:19, 73.82s/it]
36%|███▋ | 187/513 [3:22:20<6:40:14, 73.66s/it]
37%|███▋ | 188/513 [3:23:32<6:37:14, 73.34s/it]
37%|███▋ | 189/513 [3:24:45<6:34:24, 73.04s/it]
37%|███▋ | 190/513 [3:25:56<6:30:43, 72.58s/it]
{'loss': 0.478, 'grad_norm': 0.28535276651382446, 'learning_rate': 1.1961969746845325e-05, 'epoch': 1.11}
+
37%|███▋ | 190/513 [3:25:56<6:30:43, 72.58s/it]
37%|███▋ | 191/513 [3:27:03<6:21:00, 71.00s/it]
37%|███▋ | 192/513 [3:28:13<6:18:18, 70.71s/it]
38%|███▊ | 193/513 [3:29:23<6:15:40, 70.44s/it]
38%|███▊ | 194/513 [3:30:31<6:10:39, 69.72s/it]
38%|███▊ | 195/513 [3:31:46<6:17:41, 71.26s/it]
38%|███▊ | 196/513 [3:32:54<6:10:47, 70.18s/it]
38%|███▊ | 197/513 [3:34:03<6:07:24, 69.76s/it]
39%|███▊ | 198/513 [3:35:16<6:12:39, 70.98s/it]
39%|███▉ | 199/513 [3:36:29<6:13:39, 71.40s/it]
39%|███▉ | 200/513 [3:37:41<6:13:47, 71.65s/it]
{'loss': 0.4755, 'grad_norm': 0.2474713921546936, 'learning_rate': 1.1541115778763038e-05, 'epoch': 1.17}
+
39%|███▉ | 200/513 [3:37:41<6:13:47, 71.65s/it][INFO|trainer.py:4309] 2026-03-05 18:39:42,672 >> Saving model checkpoint to /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-200
+[INFO|configuration_utils.py:491] 2026-03-05 18:39:42,675 >> Configuration saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-200/config.json
+[INFO|configuration_utils.py:757] 2026-03-05 18:39:42,676 >> Configuration saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-200/generation_config.json
+[INFO|modeling_utils.py:4189] 2026-03-05 18:39:47,807 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 2 checkpoint shards. You can find where each parameters has been saved in the index located at /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-200/model.safetensors.index.json.
+[INFO|tokenization_utils_base.py:2421] 2026-03-05 18:39:47,807 >> chat template saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-200/chat_template.jinja
+[INFO|tokenization_utils_base.py:2590] 2026-03-05 18:39:47,809 >> tokenizer config file saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-200/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2599] 2026-03-05 18:39:47,809 >> Special tokens file saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-200/special_tokens_map.json
+[2026-03-05 18:39:48,359] [INFO] [logging.py:107:log_dist] [Rank 0] [Torch] Checkpoint global_step200 is about to be saved!
+[2026-03-05 18:39:48,364] [INFO] [logging.py:107:log_dist] [Rank 0] Saving model checkpoint: /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-200/global_step200/mp_rank_00_model_states.pt
+[2026-03-05 18:39:48,365] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-200/global_step200/mp_rank_00_model_states.pt...
+[2026-03-05 18:39:54,671] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-200/global_step200/mp_rank_00_model_states.pt.
+[2026-03-05 18:39:54,676] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-200/global_step200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt...
+[2026-03-05 18:40:04,317] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-200/global_step200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt.
+[2026-03-05 18:40:04,318] [INFO] [engine.py:3701:_save_zero_checkpoint] zero checkpoint saved /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-200/global_step200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
+[2026-03-05 18:40:04,318] [INFO] [torch_checkpoint_engine.py:33:commit] [Torch] Checkpoint global_step200 is ready now!
+
39%|███▉ | 201/513 [3:39:24<7:00:57, 80.95s/it]
39%|███▉ | 202/513 [3:40:38<6:49:29, 79.00s/it]
40%|███▉ | 203/513 [3:41:57<6:47:34, 78.89s/it]
40%|███▉ | 204/513 [3:43:10<6:37:12, 77.13s/it]
40%|███▉ | 205/513 [3:44:19<6:23:12, 74.65s/it]
40%|████ | 206/513 [3:45:25<6:09:24, 72.20s/it]
40%|████ | 207/513 [3:46:32<6:00:47, 70.74s/it]
41%|████ | 208/513 [3:47:42<5:57:31, 70.33s/it]
41%|████ | 209/513 [3:48:59<6:06:21, 72.31s/it]
41%|████ | 210/513 [3:50:20<6:18:10, 74.89s/it]
{'loss': 0.4765, 'grad_norm': 0.23004528880119324, 'learning_rate': 1.1101501885274894e-05, 'epoch': 1.23}
+
41%|████ | 210/513 [3:50:20<6:18:10, 74.89s/it]
41%|████ | 211/513 [3:51:38<6:22:25, 75.98s/it]
41%|████▏ | 212/513 [3:52:58<6:26:32, 77.05s/it]
42%|████▏ | 213/513 [3:54:16<6:26:36, 77.32s/it]
42%|████▏ | 214/513 [3:55:36<6:29:09, 78.09s/it]
42%|████▏ | 215/513 [3:56:51<6:23:41, 77.25s/it]
42%|████▏ | 216/513 [3:57:58<6:06:39, 74.07s/it]
42%|████▏ | 217/513 [3:59:10<6:03:28, 73.68s/it]
42%|████▏ | 218/513 [4:00:26<6:05:07, 74.26s/it]
43%|████▎ | 219/513 [4:01:41<6:05:34, 74.61s/it]
43%|████▎ | 220/513 [4:02:56<6:05:02, 74.75s/it]
{'loss': 0.4706, 'grad_norm': 0.23046620190143585, 'learning_rate': 1.0645168870035313e-05, 'epoch': 1.29}
+
43%|████▎ | 220/513 [4:02:56<6:05:02, 74.75s/it]
43%|████▎ | 221/513 [4:04:12<6:04:21, 74.87s/it]
43%|████▎ | 222/513 [4:05:30<6:08:27, 75.97s/it]
43%|████▎ | 223/513 [4:06:44<6:04:43, 75.46s/it]
44%|████▎ | 224/513 [4:08:02<6:06:25, 76.08s/it]
44%|████▍ | 225/513 [4:09:23<6:12:25, 77.59s/it]
44%|████▍ | 226/513 [4:10:40<6:10:17, 77.41s/it]
44%|████▍ | 227/513 [4:11:49<5:56:40, 74.83s/it]
44%|████▍ | 228/513 [4:12:59<5:48:44, 73.42s/it]
45%|████▍ | 229/513 [4:14:13<5:48:23, 73.60s/it]
45%|████▍ | 230/513 [4:15:23<5:42:11, 72.55s/it]
{'loss': 0.4681, 'grad_norm': 0.243893101811409, 'learning_rate': 1.0174235151272025e-05, 'epoch': 1.35}
+
45%|████▍ | 230/513 [4:15:23<5:42:11, 72.55s/it]
45%|████▌ | 231/513 [4:16:34<5:38:50, 72.09s/it]
45%|████▌ | 232/513 [4:17:50<5:42:18, 73.09s/it]
45%|████▌ | 233/513 [4:19:03<5:41:07, 73.10s/it]
46%|████▌ | 234/513 [4:20:20<5:45:50, 74.37s/it]
46%|████▌ | 235/513 [4:21:38<5:50:15, 75.59s/it]
46%|████▌ | 236/513 [4:22:55<5:50:19, 75.88s/it]
46%|████▌ | 237/513 [4:24:18<5:59:04, 78.06s/it]
46%|████▋ | 238/513 [4:25:38<6:00:05, 78.57s/it]
47%|████▋ | 239/513 [4:26:55<5:57:23, 78.26s/it]
47%|████▋ | 240/513 [4:28:11<5:51:55, 77.34s/it]
{'loss': 0.4715, 'grad_norm': 0.2657492160797119, 'learning_rate': 9.690886927529886e-06, 'epoch': 1.41}
+
47%|████▋ | 240/513 [4:28:11<5:51:55, 77.34s/it]
47%|████▋ | 241/513 [4:29:29<5:51:53, 77.62s/it]
47%|████▋ | 242/513 [4:30:46<5:49:27, 77.37s/it]
47%|████▋ | 243/513 [4:32:00<5:43:59, 76.44s/it]
48%|████▊ | 244/513 [4:33:17<5:43:03, 76.52s/it]
48%|████▊ | 245/513 [4:34:34<5:42:26, 76.67s/it]
48%|████▊ | 246/513 [4:35:47<5:36:16, 75.57s/it]
48%|████▊ | 247/513 [4:37:01<5:33:14, 75.17s/it]
48%|████▊ | 248/513 [4:38:13<5:28:04, 74.28s/it]
49%|████▊ | 249/513 [4:39:31<5:31:48, 75.41s/it]
49%|████▊ | 250/513 [4:40:41<5:22:51, 73.66s/it]
{'loss': 0.4711, 'grad_norm': 0.24003422260284424, 'learning_rate': 9.197368028760536e-06, 'epoch': 1.46}
+
49%|████▊ | 250/513 [4:40:41<5:22:51, 73.66s/it]
49%|████▉ | 251/513 [4:41:55<5:22:39, 73.89s/it]
49%|████▉ | 252/513 [4:43:05<5:15:36, 72.55s/it]
49%|████▉ | 253/513 [4:44:25<5:24:30, 74.89s/it]
50%|████▉ | 254/513 [4:45:42<5:25:41, 75.45s/it]
50%|████▉ | 255/513 [4:46:59<5:27:25, 76.15s/it]
50%|████▉ | 256/513 [4:48:16<5:27:05, 76.36s/it]
50%|█████ | 257/513 [4:49:31<5:24:14, 76.00s/it]
50%|█████ | 258/513 [4:50:48<5:24:10, 76.28s/it]
50%|█████ | 259/513 [4:52:10<5:29:57, 77.94s/it]
51%|█████ | 260/513 [4:53:35<5:37:18, 80.00s/it]
{'loss': 0.4685, 'grad_norm': 0.238833948969841, 'learning_rate': 8.695969499871911e-06, 'epoch': 1.52}
+
51%|█████ | 260/513 [4:53:35<5:37:18, 80.00s/it]
51%|█████ | 261/513 [4:54:58<5:39:18, 80.79s/it]
51%|█████ | 262/513 [4:56:22<5:42:59, 81.99s/it]
51%|█████▏ | 263/513 [4:57:49<5:47:14, 83.34s/it]
51%|█████▏ | 264/513 [4:59:13<5:46:35, 83.52s/it]
52%|█████▏ | 265/513 [5:00:35<5:43:30, 83.10s/it]
52%|█████▏ | 266/513 [5:01:49<5:30:51, 80.37s/it]
52%|█████▏ | 267/513 [5:03:09<5:29:11, 80.29s/it]
52%|█████▏ | 268/513 [5:04:28<5:26:09, 79.88s/it]
52%|█████▏ | 269/513 [5:05:46<5:22:13, 79.24s/it]
53%|█████▎ | 270/513 [5:07:00<5:14:23, 77.63s/it]
{'loss': 0.4688, 'grad_norm': 0.237404927611351, 'learning_rate': 8.18901896509343e-06, 'epoch': 1.58}
+
53%|█████▎ | 270/513 [5:07:00<5:14:23, 77.63s/it]
53%|█████▎ | 271/513 [5:08:13<5:08:19, 76.44s/it]
53%|█████▎ | 272/513 [5:09:26<5:03:01, 75.44s/it]
53%|█████▎ | 273/513 [5:10:37<4:56:30, 74.13s/it]
53%|█████▎ | 274/513 [5:11:55<4:59:53, 75.29s/it]
54%|█████▎ | 275/513 [5:13:09<4:56:13, 74.68s/it]
54%|█████▍ | 276/513 [5:14:21<4:51:50, 73.89s/it]
54%|█████▍ | 277/513 [5:15:31<4:45:59, 72.71s/it]
54%|█████▍ | 278/513 [5:16:42<4:43:26, 72.37s/it]
54%|█████▍ | 279/513 [5:17:51<4:37:24, 71.13s/it]
55%|█████▍ | 280/513 [5:18:58<4:31:44, 69.97s/it]
{'loss': 0.4722, 'grad_norm': 0.22758300602436066, 'learning_rate': 7.678869822530362e-06, 'epoch': 1.64}
+
55%|█████▍ | 280/513 [5:18:58<4:31:44, 69.97s/it]
55%|█████▍ | 281/513 [5:20:12<4:35:18, 71.20s/it]
55%|█████▍ | 282/513 [5:21:22<4:33:14, 70.97s/it]
55%|█████▌ | 283/513 [5:22:33<4:31:39, 70.87s/it]
55%|█████▌ | 284/513 [5:23:36<4:21:30, 68.52s/it]
56%|█████▌ | 285/513 [5:24:35<4:09:22, 65.63s/it]
56%|█████▌ | 286/513 [5:25:51<4:20:45, 68.92s/it]
56%|█████▌ | 287/513 [5:27:07<4:27:15, 70.95s/it]
56%|█████▌ | 288/513 [5:28:24<4:33:03, 72.81s/it]
56%|█████▋ | 289/513 [5:29:39<4:33:49, 73.35s/it]
57%|█████▋ | 290/513 [5:30:51<4:31:33, 73.07s/it]
{'loss': 0.4649, 'grad_norm': 0.22680319845676422, 'learning_rate': 7.167890319069035e-06, 'epoch': 1.7}
+
57%|█████▋ | 290/513 [5:30:51<4:31:33, 73.07s/it]
57%|█████▋ | 291/513 [5:32:04<4:29:32, 72.85s/it]
57%|█████▋ | 292/513 [5:33:11<4:22:30, 71.27s/it]
57%|█████▋ | 293/513 [5:34:27<4:25:53, 72.52s/it]
57%|█████▋ | 294/513 [5:35:35<4:20:19, 71.32s/it]
58%|█████▊ | 295/513 [5:36:51<4:24:13, 72.72s/it]
58%|█████▊ | 296/513 [5:38:02<4:20:26, 72.01s/it]
58%|█████▊ | 297/513 [5:39:10<4:14:54, 70.81s/it]
58%|█████▊ | 298/513 [5:40:24<4:17:11, 71.77s/it]
58%|█████▊ | 299/513 [5:41:36<4:16:37, 71.95s/it]
58%|█████▊ | 300/513 [5:42:50<4:17:46, 72.61s/it]
{'loss': 0.4692, 'grad_norm': 0.2401188611984253, 'learning_rate': 6.658452556350092e-06, 'epoch': 1.76}
+
58%|█████▊ | 300/513 [5:42:50<4:17:46, 72.61s/it]
59%|█████▊ | 301/513 [5:44:01<4:14:16, 71.96s/it]
59%|█████▉ | 302/513 [5:45:14<4:14:21, 72.33s/it]
59%|█████▉ | 303/513 [5:46:22<4:09:20, 71.24s/it]
59%|█████▉ | 304/513 [5:47:33<4:06:58, 70.90s/it]
59%|█████▉ | 305/513 [5:48:46<4:08:04, 71.56s/it]
60%|█████▉ | 306/513 [5:49:56<4:05:37, 71.20s/it]
60%|█████▉ | 307/513 [5:51:07<4:04:30, 71.22s/it]
60%|██████ | 308/513 [5:52:18<4:02:48, 71.07s/it]
60%|██████ | 309/513 [5:53:27<3:59:22, 70.41s/it]
60%|██████ | 310/513 [5:54:40<4:01:06, 71.26s/it]
{'loss': 0.4653, 'grad_norm': 0.2211555689573288, 'learning_rate': 6.152921478846986e-06, 'epoch': 1.82}
+
60%|██████ | 310/513 [5:54:40<4:01:06, 71.26s/it]
61%|██████ | 311/513 [5:55:52<4:00:23, 71.40s/it]
61%|██████ | 312/513 [5:57:06<4:01:45, 72.17s/it]
61%|██████ | 313/513 [5:58:28<4:10:55, 75.28s/it]
61%|██████ | 314/513 [5:59:44<4:10:04, 75.40s/it]
61%|██████▏ | 315/513 [6:00:55<4:04:29, 74.09s/it]
62%|██████▏ | 316/513 [6:02:12<4:06:28, 75.07s/it]
62%|██████▏ | 317/513 [6:03:24<4:01:30, 73.93s/it]
62%|██████▏ | 318/513 [6:04:31<3:54:07, 72.04s/it]
62%|██████▏ | 319/513 [6:05:40<3:49:51, 71.09s/it]
62%|██████▏ | 320/513 [6:06:49<3:46:01, 70.27s/it]
{'loss': 0.4664, 'grad_norm': 0.24088308215141296, 'learning_rate': 5.65364389516988e-06, 'epoch': 1.88}
+
62%|██████▏ | 320/513 [6:06:49<3:46:01, 70.27s/it]
63%|██████▎ | 321/513 [6:07:58<3:44:27, 70.15s/it]
63%|██████▎ | 322/513 [6:09:07<3:42:09, 69.79s/it]
63%|██████▎ | 323/513 [6:10:18<3:41:37, 69.99s/it]
63%|██████▎ | 324/513 [6:11:27<3:40:09, 69.89s/it]
63%|██████▎ | 325/513 [6:12:39<3:41:00, 70.53s/it]
64%|██████▎ | 326/513 [6:13:49<3:39:18, 70.37s/it]
64%|██████▎ | 327/513 [6:15:01<3:39:05, 70.67s/it]
64%|██████▍ | 328/513 [6:16:11<3:37:20, 70.49s/it]
64%|██████▍ | 329/513 [6:17:22<3:36:42, 70.67s/it]
64%|██████▍ | 330/513 [6:18:35<3:37:31, 71.32s/it]
{'loss': 0.4621, 'grad_norm': 0.21008798480033875, 'learning_rate': 5.162937583561072e-06, 'epoch': 1.94}
+
64%|██████▍ | 330/513 [6:18:35<3:37:31, 71.32s/it]
65%|██████▍ | 331/513 [6:19:46<3:36:22, 71.33s/it]
65%|██████▍ | 332/513 [6:20:56<3:33:23, 70.74s/it]
65%|██████▍ | 333/513 [6:22:02<3:28:02, 69.35s/it]
65%|██████▌ | 334/513 [6:23:15<3:30:45, 70.65s/it]
65%|██████▌ | 335/513 [6:24:24<3:27:55, 70.09s/it]
65%|██████▌ | 336/513 [6:25:35<3:27:27, 70.33s/it]
66%|██████▌ | 337/513 [6:26:47<3:28:04, 70.94s/it]
66%|██████▌ | 338/513 [6:28:00<3:28:34, 71.51s/it]
66%|██████▌ | 339/513 [6:29:20<3:34:32, 73.98s/it]
66%|██████▋ | 340/513 [6:30:39<3:37:52, 75.57s/it]
{'loss': 0.4675, 'grad_norm': 0.2156449556350708, 'learning_rate': 4.683080532156986e-06, 'epoch': 1.99}
+
66%|██████▋ | 340/513 [6:30:39<3:37:52, 75.57s/it]
66%|██████▋ | 341/513 [6:31:58<3:39:38, 76.62s/it]
67%|██████▋ | 342/513 [6:32:01<2:35:06, 54.43s/it]
67%|██████▋ | 343/513 [6:33:14<2:50:14, 60.09s/it]
67%|██████▋ | 344/513 [6:34:23<2:56:50, 62.79s/it]
67%|██████▋ | 345/513 [6:35:36<3:03:46, 65.64s/it]
67%|██████▋ | 346/513 [6:36:48<3:08:42, 67.80s/it]
68%|██████▊ | 347/513 [6:37:59<3:09:30, 68.50s/it]
68%|██████▊ | 348/513 [6:39:07<3:08:06, 68.41s/it]
68%|██████▊ | 349/513 [6:40:22<3:12:57, 70.59s/it]
68%|██████▊ | 350/513 [6:41:31<3:10:29, 70.12s/it]
{'loss': 0.44, 'grad_norm': 0.2731837034225464, 'learning_rate': 4.216300363966383e-06, 'epoch': 2.05}
+
68%|██████▊ | 350/513 [6:41:31<3:10:29, 70.12s/it]
68%|██████▊ | 351/513 [6:42:42<3:09:28, 70.17s/it]
69%|██████▊ | 352/513 [6:43:55<3:10:41, 71.07s/it]
69%|██████▉ | 353/513 [6:45:04<3:08:03, 70.52s/it]
69%|██████▉ | 354/513 [6:46:20<3:11:00, 72.08s/it]
69%|██████▉ | 355/513 [6:48:06<3:36:42, 82.29s/it]
69%|██████▉ | 356/513 [6:49:51<3:53:13, 89.13s/it]
70%|██████▉ | 357/513 [6:51:36<4:04:24, 94.00s/it]
70%|██████▉ | 358/513 [6:53:13<4:04:47, 94.76s/it]
70%|██████▉ | 359/513 [6:54:52<4:06:36, 96.08s/it]
70%|███████ | 360/513 [6:56:27<4:04:18, 95.80s/it]
{'loss': 0.4447, 'grad_norm': 0.2207324057817459, 'learning_rate': 3.7647639956567304e-06, 'epoch': 2.11}
+
70%|███████ | 360/513 [6:56:27<4:04:18, 95.80s/it]
70%|███████ | 361/513 [6:58:07<4:05:31, 96.92s/it]
71%|███████ | 362/513 [6:59:46<4:05:43, 97.64s/it]
71%|███████ | 363/513 [7:01:26<4:05:39, 98.27s/it]
71%|███████ | 364/513 [7:03:06<4:05:30, 98.86s/it]
71%|███████ | 365/513 [7:04:49<4:06:41, 100.01s/it]
71%|███████▏ | 366/513 [7:06:27<4:03:31, 99.40s/it]
72%|███████▏ | 367/513 [7:08:02<3:58:36, 98.05s/it]
72%|███████▏ | 368/513 [7:09:39<3:56:06, 97.70s/it]
72%|███████▏ | 369/513 [7:11:17<3:55:01, 97.93s/it]
72%|███████▏ | 370/513 [7:12:51<3:50:49, 96.85s/it]
{'loss': 0.4391, 'grad_norm': 0.21577142179012299, 'learning_rate': 3.3305675781554655e-06, 'epoch': 2.16}
+
72%|███████▏ | 370/513 [7:12:51<3:50:49, 96.85s/it]
72%|███████▏ | 371/513 [7:14:26<3:47:24, 96.09s/it]
73%|███████▎ | 372/513 [7:16:03<3:46:28, 96.37s/it]
73%|███████▎ | 373/513 [7:17:40<3:45:18, 96.56s/it]
73%|███████▎ | 374/513 [7:19:19<3:45:54, 97.51s/it]
73%|███████▎ | 375/513 [7:21:03<3:48:29, 99.34s/it]
73%|███████▎ | 376/513 [7:22:47<3:50:01, 100.74s/it]
73%|███████▎ | 377/513 [7:24:27<3:47:46, 100.49s/it]
74%|███████▎ | 378/513 [7:26:02<3:42:32, 98.91s/it]
74%|███████▍ | 379/513 [7:27:39<3:39:13, 98.16s/it]
74%|███████▍ | 380/513 [7:29:12<3:34:33, 96.79s/it]
{'loss': 0.4419, 'grad_norm': 0.22381627559661865, 'learning_rate': 2.915726765764453e-06, 'epoch': 2.22}
+
74%|███████▍ | 380/513 [7:29:12<3:34:33, 96.79s/it]
74%|███████▍ | 381/513 [7:30:56<3:37:20, 98.79s/it]
74%|███████▍ | 382/513 [7:32:34<3:35:42, 98.80s/it]
75%|███████▍ | 383/513 [7:34:13<3:34:07, 98.83s/it]
75%|███████▍ | 384/513 [7:35:52<3:32:39, 98.91s/it]
75%|███████▌ | 385/513 [7:37:30<3:29:50, 98.37s/it]
75%|███████▌ | 386/513 [7:39:05<3:26:32, 97.58s/it]
75%|███████▌ | 387/513 [7:40:43<3:25:14, 97.73s/it]
76%|███████▌ | 388/513 [7:42:22<3:24:05, 97.96s/it]
76%|███████▌ | 389/513 [7:44:03<3:24:06, 98.77s/it]
76%|███████▌ | 390/513 [7:45:42<3:22:50, 98.95s/it]
{'loss': 0.4377, 'grad_norm': 0.2167045623064041, 'learning_rate': 2.522167358961046e-06, 'epoch': 2.28}
+
76%|███████▌ | 390/513 [7:45:42<3:22:50, 98.95s/it]
76%|███████▌ | 391/513 [7:47:18<3:19:30, 98.12s/it]
76%|███████▋ | 392/513 [7:48:59<3:19:23, 98.87s/it]
77%|███████▋ | 393/513 [7:50:44<3:21:25, 100.71s/it]
77%|███████▋ | 394/513 [7:52:26<3:20:24, 101.04s/it]
77%|███████▋ | 395/513 [7:54:08<3:19:38, 101.51s/it]
77%|███████▋ | 396/513 [7:55:48<3:16:44, 100.90s/it]
77%|███████▋ | 397/513 [7:57:21<3:10:55, 98.75s/it]
78%|███████▊ | 398/513 [7:58:55<3:06:12, 97.15s/it]
78%|███████▊ | 399/513 [8:00:31<3:04:07, 96.91s/it]
78%|███████▊ | 400/513 [8:02:07<3:01:47, 96.52s/it]
{'loss': 0.4387, 'grad_norm': 0.2239835262298584, 'learning_rate': 2.151716364324264e-06, 'epoch': 2.34}
+
78%|███████▊ | 400/513 [8:02:07<3:01:47, 96.52s/it][INFO|trainer.py:4309] 2026-03-05 23:04:07,534 >> Saving model checkpoint to /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-400
+[INFO|configuration_utils.py:491] 2026-03-05 23:04:07,536 >> Configuration saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-400/config.json
+[INFO|configuration_utils.py:757] 2026-03-05 23:04:07,536 >> Configuration saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-400/generation_config.json
+[INFO|modeling_utils.py:4189] 2026-03-05 23:04:11,928 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 2 checkpoint shards. You can find where each parameters has been saved in the index located at /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-400/model.safetensors.index.json.
+[INFO|tokenization_utils_base.py:2421] 2026-03-05 23:04:11,928 >> chat template saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-400/chat_template.jinja
+[INFO|tokenization_utils_base.py:2590] 2026-03-05 23:04:11,930 >> tokenizer config file saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-400/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2599] 2026-03-05 23:04:11,930 >> Special tokens file saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-400/special_tokens_map.json
+[2026-03-05 23:04:12,412] [INFO] [logging.py:107:log_dist] [Rank 0] [Torch] Checkpoint global_step400 is about to be saved!
+[2026-03-05 23:04:12,421] [INFO] [logging.py:107:log_dist] [Rank 0] Saving model checkpoint: /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-400/global_step400/mp_rank_00_model_states.pt
+[2026-03-05 23:04:12,421] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-400/global_step400/mp_rank_00_model_states.pt...
+[2026-03-05 23:04:18,139] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-400/global_step400/mp_rank_00_model_states.pt.
+[2026-03-05 23:04:18,156] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-400/global_step400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt...
+[2026-03-05 23:04:27,597] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-400/global_step400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt.
+[2026-03-05 23:04:27,598] [INFO] [engine.py:3701:_save_zero_checkpoint] zero checkpoint saved /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-400/global_step400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
+[2026-03-05 23:04:27,598] [INFO] [torch_checkpoint_engine.py:33:commit] [Torch] Checkpoint global_step400 is ready now!
+
78%|███████▊ | 401/513 [8:04:13<3:17:04, 105.58s/it]
78%|███████▊ | 402/513 [8:05:50<3:10:30, 102.98s/it]
79%|███████▊ | 403/513 [8:07:31<3:07:34, 102.31s/it]
79%|███████▉ | 404/513 [8:09:05<3:01:13, 99.75s/it]
79%|███████▉ | 405/513 [8:10:39<2:56:36, 98.11s/it]
79%|███████▉ | 406/513 [8:12:16<2:54:03, 97.60s/it]
79%|███████▉ | 407/513 [8:13:54<2:52:58, 97.91s/it]
80%|███████▉ | 408/513 [8:15:33<2:52:00, 98.29s/it]
80%|███████▉ | 409/513 [8:17:12<2:50:37, 98.44s/it]
80%|███████▉ | 410/513 [8:18:53<2:50:01, 99.05s/it]
{'loss': 0.4426, 'grad_norm': 0.2177765816450119, 'learning_rate': 1.806093513088348e-06, 'epoch': 2.4}
+
80%|███████▉ | 410/513 [8:18:53<2:50:01, 99.05s/it]
80%|████████ | 411/513 [8:20:36<2:50:30, 100.29s/it]
80%|████████ | 412/513 [8:22:19<2:50:16, 101.16s/it]
81%|████████ | 413/513 [8:24:01<2:49:09, 101.50s/it]
81%|████████ | 414/513 [8:25:42<2:47:04, 101.25s/it]
81%|████████ | 415/513 [8:27:22<2:44:35, 100.77s/it]
81%|████████ | 416/513 [8:28:58<2:40:49, 99.47s/it]
81%|████████▏ | 417/513 [8:30:39<2:39:41, 99.81s/it]
81%|████████▏ | 418/513 [8:32:15<2:36:23, 98.78s/it]
82%|████████▏ | 419/513 [8:33:58<2:36:53, 100.14s/it]
82%|████████▏ | 420/513 [8:35:37<2:34:34, 99.72s/it]
{'loss': 0.442, 'grad_norm': 0.21108600497245789, 'learning_rate': 1.486903277696733e-06, 'epoch': 2.46}
+
82%|████████▏ | 420/513 [8:35:37<2:34:34, 99.72s/it]
82%|████████▏ | 421/513 [8:37:15<2:32:06, 99.20s/it]
82%|████████▏ | 422/513 [8:38:53<2:29:38, 98.67s/it]
82%|████████▏ | 423/513 [8:40:29<2:26:47, 97.86s/it]
83%|████████▎ | 424/513 [8:42:08<2:25:41, 98.22s/it]
83%|████████▎ | 425/513 [8:43:43<2:22:43, 97.31s/it]
83%|████████▎ | 426/513 [8:45:17<2:19:56, 96.51s/it]
83%|████████▎ | 427/513 [8:46:56<2:19:22, 97.23s/it]
83%|████████▎ | 428/513 [8:48:38<2:19:39, 98.58s/it]
84%|████████▎ | 429/513 [8:50:21<2:19:38, 99.75s/it]
84%|████████▍ | 430/513 [8:51:59<2:17:23, 99.32s/it]
{'loss': 0.4449, 'grad_norm': 0.20833276212215424, 'learning_rate': 1.1956274234177322e-06, 'epoch': 2.52}
+
84%|████████▍ | 430/513 [8:51:59<2:17:23, 99.32s/it]
84%|████████▍ | 431/513 [8:53:40<2:16:20, 99.76s/it]
84%|████████▍ | 432/513 [8:55:21<2:15:23, 100.30s/it]
84%|████████▍ | 433/513 [8:57:04<2:14:44, 101.05s/it]
85%|████████▍ | 434/513 [8:58:41<2:11:28, 99.85s/it]
85%|████████▍ | 435/513 [9:00:21<2:09:56, 99.96s/it]
85%|████████▍ | 436/513 [9:01:55<2:05:51, 98.08s/it]
85%|████████▌ | 437/513 [9:03:30<2:02:56, 97.07s/it]
85%|████████▌ | 438/513 [9:05:10<2:02:39, 98.13s/it]
86%|████████▌ | 439/513 [9:06:52<2:02:14, 99.12s/it]
86%|████████▌ | 440/513 [9:08:26<1:59:00, 97.81s/it]
{'loss': 0.4391, 'grad_norm': 0.20782434940338135, 'learning_rate': 9.336181295993204e-07, 'epoch': 2.58}
+
86%|████████▌ | 440/513 [9:08:26<1:59:00, 97.81s/it]
86%|████████▌ | 441/513 [9:10:05<1:57:27, 97.89s/it]
86%|████████▌ | 442/513 [9:11:42<1:55:36, 97.70s/it]
86%|████████▋ | 443/513 [9:13:22<1:54:48, 98.41s/it]
87%|████████▋ | 444/513 [9:14:56<1:51:50, 97.25s/it]
87%|████████▋ | 445/513 [9:16:34<1:50:23, 97.40s/it]
87%|████████▋ | 446/513 [9:18:11<1:48:42, 97.35s/it]
87%|████████▋ | 447/513 [9:19:53<1:48:35, 98.72s/it]
87%|████████▋ | 448/513 [9:21:30<1:46:20, 98.17s/it]
88%|████████▊ | 449/513 [9:23:11<1:45:34, 98.97s/it]
88%|████████▊ | 450/513 [9:24:54<1:45:01, 100.03s/it]
{'loss': 0.4374, 'grad_norm': 0.20101866126060486, 'learning_rate': 7.02091712495907e-07, 'epoch': 2.64}
+
88%|████████▊ | 450/513 [9:24:54<1:45:01, 100.03s/it]
88%|████████▊ | 451/513 [9:26:33<1:43:13, 99.90s/it]
88%|████████▊ | 452/513 [9:28:08<1:39:56, 98.30s/it]
88%|████████▊ | 453/513 [9:29:46<1:38:21, 98.36s/it]
88%|████████▊ | 454/513 [9:31:28<1:37:36, 99.27s/it]
89%|████████▊ | 455/513 [9:33:02<1:34:30, 97.76s/it]
89%|████████▉ | 456/513 [9:34:28<1:29:28, 94.19s/it]
89%|████████▉ | 457/513 [9:36:01<1:27:46, 94.04s/it]
89%|████████▉ | 458/513 [9:37:43<1:28:13, 96.24s/it]
89%|████████▉ | 459/513 [9:39:25<1:28:08, 97.93s/it]
90%|████████▉ | 460/513 [9:41:06<1:27:17, 98.83s/it]
{'loss': 0.4403, 'grad_norm': 0.1978382021188736, 'learning_rate': 5.021229788074589e-07, 'epoch': 2.69}
+
90%|████████▉ | 460/513 [9:41:06<1:27:17, 98.83s/it]
90%|████████▉ | 461/513 [9:42:44<1:25:31, 98.69s/it]
90%|█████████ | 462/513 [9:44:22<1:23:49, 98.62s/it]
90%|█████████ | 463/513 [9:45:58<1:21:23, 97.67s/it]
90%|█████████ | 464/513 [9:47:35<1:19:37, 97.49s/it]
91%|█████████ | 465/513 [9:49:13<1:18:01, 97.53s/it]
91%|█████████ | 466/513 [9:50:51<1:16:38, 97.85s/it]
91%|█████████ | 467/513 [9:52:31<1:15:31, 98.52s/it]
91%|█████████ | 468/513 [9:54:11<1:14:09, 98.88s/it]
91%|█████████▏| 469/513 [9:55:46<1:11:35, 97.63s/it]
92%|█████████▏| 470/513 [9:57:23<1:10:00, 97.68s/it]
{'loss': 0.44, 'grad_norm': 0.20072239637374878, 'learning_rate': 3.3464023614327683e-07, 'epoch': 2.75}
+
92%|█████████▏| 470/513 [9:57:23<1:10:00, 97.68s/it]
92%|█████████▏| 471/513 [9:59:03<1:08:42, 98.16s/it]
92%|█████████▏| 472/513 [10:00:40<1:06:58, 98.01s/it]
92%|█████████▏| 473/513 [10:02:19<1:05:29, 98.23s/it]
92%|█████████▏| 474/513 [10:03:57<1:03:47, 98.15s/it]
93%|█████████▎| 475/513 [10:05:34<1:01:55, 97.78s/it]
93%|█████████▎| 476/513 [10:07:12<1:00:19, 97.82s/it]
93%|█████████▎| 477/513 [10:08:55<59:42, 99.51s/it]
93%|█████████▎| 478/513 [10:10:33<57:40, 98.88s/it]
93%|█████████▎| 479/513 [10:12:12<56:09, 99.10s/it]
94%|█████████▎| 480/513 [10:13:50<54:13, 98.58s/it]
{'loss': 0.4358, 'grad_norm': 0.2036609798669815, 'learning_rate': 2.0042098357321209e-07, 'epoch': 2.81}
+
94%|█████████▎| 480/513 [10:13:50<54:13, 98.58s/it]
94%|█████████▍| 481/513 [10:15:30<52:54, 99.20s/it]
94%|█████████▍| 482/513 [10:17:07<50:52, 98.48s/it]
94%|█████████▍| 483/513 [10:18:44<49:01, 98.05s/it]
94%|█████████▍| 484/513 [10:20:21<47:15, 97.79s/it]
95%|█████████▍| 485/513 [10:22:03<46:08, 98.87s/it]
95%|█████████▍| 486/513 [10:23:44<44:46, 99.49s/it]
95%|█████████▍| 487/513 [10:25:24<43:16, 99.85s/it]
95%|█████████▌| 488/513 [10:27:05<41:39, 99.99s/it]
95%|█████████▌| 489/513 [10:28:48<40:21, 100.90s/it]
96%|█████████▌| 490/513 [10:30:28<38:36, 100.70s/it]
{'loss': 0.4384, 'grad_norm': 0.20166757702827454, 'learning_rate': 1.0008830227189431e-07, 'epoch': 2.87}
+
96%|█████████▌| 490/513 [10:30:28<38:36, 100.70s/it]
96%|█████████▌| 491/513 [10:32:11<37:09, 101.36s/it]
96%|█████████▌| 492/513 [10:33:51<35:18, 100.89s/it]
96%|█████████▌| 493/513 [10:35:28<33:16, 99.81s/it]
96%|█████████▋| 494/513 [10:37:07<31:32, 99.60s/it]
96%|█████████▋| 495/513 [10:38:47<29:53, 99.66s/it]
97%|█████████▋| 496/513 [10:40:29<28:26, 100.40s/it]
97%|█████████▋| 497/513 [10:42:11<26:55, 100.98s/it]
97%|█████████▋| 498/513 [10:43:54<25:21, 101.40s/it]
97%|█████████▋| 499/513 [10:45:33<23:31, 100.84s/it]
97%|█████████▋| 500/513 [10:47:13<21:45, 100.40s/it]
{'loss': 0.4438, 'grad_norm': 0.20334972441196442, 'learning_rate': 3.410796301156205e-08, 'epoch': 2.93}
+
97%|█████████▋| 500/513 [10:47:13<21:45, 100.40s/it]
98%|█████████▊| 501/513 [10:48:51<19:57, 99.77s/it]
98%|█████████▊| 502/513 [10:50:27<18:05, 98.71s/it]
98%|█████████▊| 503/513 [10:51:57<16:01, 96.15s/it]
98%|█████████▊| 504/513 [10:53:36<14:32, 96.93s/it]
98%|█████████▊| 505/513 [10:55:18<13:08, 98.51s/it]
99%|█████████▊| 506/513 [10:56:58<11:33, 99.01s/it]
99%|█████████▉| 507/513 [10:58:40<09:58, 99.72s/it]
99%|█████████▉| 508/513 [11:00:08<08:01, 96.33s/it]
99%|█████████▉| 509/513 [11:01:53<06:35, 98.91s/it]
99%|█████████▉| 510/513 [11:03:34<04:58, 99.46s/it]
{'loss': 0.4378, 'grad_norm': 0.20352092385292053, 'learning_rate': 2.7862639312792317e-09, 'epoch': 2.99}
+
99%|█████████▉| 510/513 [11:03:34<04:58, 99.46s/it]
100%|█████████▉| 511/513 [11:05:14<03:19, 99.72s/it]
100%|█████████▉| 512/513 [11:06:53<01:39, 99.55s/it]
100%|██████████| 513/513 [11:06:57<00:00, 70.84s/it][INFO|trainer.py:4309] 2026-03-06 02:08:58,011 >> Saving model checkpoint to /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-513
+[INFO|configuration_utils.py:491] 2026-03-06 02:08:58,013 >> Configuration saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-513/config.json
+[INFO|configuration_utils.py:757] 2026-03-06 02:08:58,014 >> Configuration saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-513/generation_config.json
+[INFO|modeling_utils.py:4189] 2026-03-06 02:09:02,393 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 2 checkpoint shards. You can find where each parameters has been saved in the index located at /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-513/model.safetensors.index.json.
+[INFO|tokenization_utils_base.py:2421] 2026-03-06 02:09:02,393 >> chat template saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-513/chat_template.jinja
+[INFO|tokenization_utils_base.py:2590] 2026-03-06 02:09:02,395 >> tokenizer config file saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-513/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2599] 2026-03-06 02:09:02,395 >> Special tokens file saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-513/special_tokens_map.json
+[2026-03-06 02:09:02,887] [INFO] [logging.py:107:log_dist] [Rank 0] [Torch] Checkpoint global_step513 is about to be saved!
+[2026-03-06 02:09:02,900] [INFO] [logging.py:107:log_dist] [Rank 0] Saving model checkpoint: /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-513/global_step513/mp_rank_00_model_states.pt
+[2026-03-06 02:09:02,900] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-513/global_step513/mp_rank_00_model_states.pt...
+[2026-03-06 02:09:08,710] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-513/global_step513/mp_rank_00_model_states.pt.
+[2026-03-06 02:09:08,719] [INFO] [torch_checkpoint_engine.py:21:save] [Torch] Saving /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-513/global_step513/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt...
+[2026-03-06 02:09:17,788] [INFO] [torch_checkpoint_engine.py:23:save] [Torch] Saved /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-513/global_step513/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt.
+[2026-03-06 02:09:17,788] [INFO] [engine.py:3701:_save_zero_checkpoint] zero checkpoint saved /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/checkpoint-513/global_step513/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
+[2026-03-06 02:09:17,788] [INFO] [torch_checkpoint_engine.py:33:commit] [Torch] Checkpoint global_step513 is ready now!
+[INFO|trainer.py:2810] 2026-03-06 02:09:17,955 >>
+
+Training completed. Do not forget to share your model on huggingface.co/models =)
+
+
+
{'train_runtime': 40041.2675, 'train_samples_per_second': 3.261, 'train_steps_per_second': 0.013, 'train_loss': 0.49363853406255476, 'epoch': 3.0}
+
100%|██████████| 513/513 [11:07:21<00:00, 70.84s/it]
100%|██████████| 513/513 [11:07:22<00:00, 78.06s/it]
+[INFO|trainer.py:4309] 2026-03-06 02:09:21,898 >> Saving model checkpoint to /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3
+[INFO|configuration_utils.py:491] 2026-03-06 02:09:21,900 >> Configuration saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/config.json
+[INFO|configuration_utils.py:757] 2026-03-06 02:09:21,900 >> Configuration saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/generation_config.json
+[INFO|modeling_utils.py:4189] 2026-03-06 02:09:26,695 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 2 checkpoint shards. You can find where each parameters has been saved in the index located at /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/model.safetensors.index.json.
+[INFO|tokenization_utils_base.py:2421] 2026-03-06 02:09:26,695 >> chat template saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/chat_template.jinja
+[INFO|tokenization_utils_base.py:2590] 2026-03-06 02:09:26,697 >> tokenizer config file saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2599] 2026-03-06 02:09:26,697 >> Special tokens file saved in /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/special_tokens_map.json
+***** train metrics *****
+ epoch = 3.0
+ total_flos = 11157838704GF
+ train_loss = 0.4936
+ train_runtime = 11:07:21.26
+ train_samples_per_second = 3.261
+ train_steps_per_second = 0.013
+Figure saved at: /local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/training_loss.png
+[WARNING|2026-03-06 02:09:27] llamafactory.extras.ploting:148 >> No metric eval_loss to plot.
+[WARNING|2026-03-06 02:09:27] llamafactory.extras.ploting:148 >> No metric eval_accuracy to plot.
+[INFO|modelcard.py:456] 2026-03-06 02:09:27,263 >> Dropping the following result as it does not have all the necessary fields:
+{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/meta.yaml b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/meta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c2e35c74b79744fd7ec39024868f294cce1123f8
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/meta.yaml
@@ -0,0 +1,14 @@
+artifact_uri: file:///local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/artifacts
+end_time: 1772791757971
+entry_point_name: ''
+experiment_id: '356092632336622637'
+lifecycle_stage: active
+run_id: c370ae36b3594e5b8e4483476b3515b7
+run_name: llama3b_think_sft_nopack_lr1.5e5_ep3
+source_name: ''
+source_type: 4
+source_version: ''
+start_time: 1772751716706
+status: 3
+tags: []
+user_id: salman
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/epoch b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/epoch
new file mode 100644
index 0000000000000000000000000000000000000000..97af6927422df272d54f9427901853e6fae9eacf
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/epoch
@@ -0,0 +1,52 @@
+1772752246047 0.05881271825032163 10
+1772752764661 0.11762543650064326 20
+1772753281010 0.1764381547509649 30
+1772753804852 0.23525087300128653 40
+1772754322796 0.29406359125160814 50
+1772754836358 0.3528763095019298 60
+1772755352360 0.4116890277522514 70
+1772755869425 0.47050174600257305 80
+1772756578289 0.5293144642528946 90
+1772757484272 0.5881271825032163 100
+1772758410313 0.6469399007535379 110
+1772759124014 0.7057526190038595 120
+1772759815988 0.7645653372541812 130
+1772760543838 0.8233780555045028 140
+1772761248573 0.8821907737548245 150
+1772761990136 0.9410034920051461 160
+1772762702603 0.9998162102554677 170
+1772763342537 1.0529314464252895 180
+1772764073344 1.1117441646756112 190
+1772764778282 1.1705568829259327 200
+1772765536908 1.2293696011762543 210
+1772766293667 1.288182319426576 220
+1772767040309 1.3469950376768978 230
+1772767807864 1.4058077559272193 240
+1772768557956 1.4646204741775408 250
+1772769332250 1.5234331924278626 260
+1772770136860 1.5822459106781843 270
+1772770855053 1.6410586289285058 280
+1772771568547 1.6998713471788274 290
+1772772287333 1.758684065429149 300
+1772772997323 1.8174967836794707 310
+1772773725742 1.8763095019297924 320
+1772774432045 1.935122220180114 330
+1772775156419 1.9939349384304355 340
+1772775808686 2.0470501746002574 350
+1772776704525 2.105862892850579 360
+1772777688582 2.1646756111009005 370
+1772778669427 2.2234883293512224 380
+1772779659140 2.282301047601544 390
+1772780643974 2.3411137658518655 400
+1772781649888 2.399926484102187 410
+1772782654359 2.4587392023525085 420
+1772783636091 2.5175519206028305 430
+1772784623680 2.576364638853152 440
+1772785610745 2.6351773571034736 450
+1772786582766 2.6939900753537955 460
+1772787560653 2.7528027936041166 470
+1772788546976 2.8116155118544386 480
+1772789545218 2.87042823010476 490
+1772790549864 2.9292409483550816 500
+1772791531194 2.9880536666054036 510
+1772791757957 3.0 513
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/grad_norm b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/grad_norm
new file mode 100644
index 0000000000000000000000000000000000000000..df72ee82e7695b2415f4cf800da3ff756244a06f
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/grad_norm
@@ -0,0 +1,51 @@
+1772752246047 1.3511555194854736 10
+1772752764661 0.7383383512496948 20
+1772753281010 0.47219017148017883 30
+1772753804852 0.30038249492645264 40
+1772754322796 0.2751595377922058 50
+1772754836358 0.26936954259872437 60
+1772755352360 0.25376981496810913 70
+1772755869425 0.2703434228897095 80
+1772756578289 0.3386951684951782 90
+1772757484272 0.30952027440071106 100
+1772758410313 0.2706937789916992 110
+1772759124014 0.286222368478775 120
+1772759815988 0.2553636431694031 130
+1772760543838 0.2975357472896576 140
+1772761248573 0.24958086013793945 150
+1772761990136 0.302441269159317 160
+1772762702603 0.24974007904529572 170
+1772763342537 0.35062289237976074 180
+1772764073344 0.28535276651382446 190
+1772764778282 0.2474713921546936 200
+1772765536908 0.23004528880119324 210
+1772766293667 0.23046620190143585 220
+1772767040309 0.243893101811409 230
+1772767807864 0.2657492160797119 240
+1772768557956 0.24003422260284424 250
+1772769332250 0.238833948969841 260
+1772770136860 0.237404927611351 270
+1772770855053 0.22758300602436066 280
+1772771568547 0.22680319845676422 290
+1772772287333 0.2401188611984253 300
+1772772997323 0.2211555689573288 310
+1772773725742 0.24088308215141296 320
+1772774432045 0.21008798480033875 330
+1772775156419 0.2156449556350708 340
+1772775808686 0.2731837034225464 350
+1772776704525 0.2207324057817459 360
+1772777688582 0.21577142179012299 370
+1772778669427 0.22381627559661865 380
+1772779659140 0.2167045623064041 390
+1772780643974 0.2239835262298584 400
+1772781649888 0.2177765816450119 410
+1772782654359 0.21108600497245789 420
+1772783636091 0.20833276212215424 430
+1772784623680 0.20782434940338135 440
+1772785610745 0.20101866126060486 450
+1772786582766 0.1978382021188736 460
+1772787560653 0.20072239637374878 470
+1772788546976 0.2036609798669815 480
+1772789545218 0.20166757702827454 490
+1772790549864 0.20334972441196442 500
+1772791531194 0.20352092385292053 510
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/learning_rate b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/learning_rate
new file mode 100644
index 0000000000000000000000000000000000000000..378f0b61caa000a20eff691f4b0d44b9a032ccd6
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/learning_rate
@@ -0,0 +1,51 @@
+1772752246047 2.596153846153846e-06 10
+1772752764661 5.480769230769231e-06 20
+1772753281010 8.365384615384616e-06 30
+1772753804852 1.125e-05 40
+1772754322796 1.4134615384615384e-05 50
+1772754836358 1.4991468156423456e-05 60
+1772755352360 1.494972625749433e-05 70
+1772755869425 1.4873400764197756e-05 80
+1772756578289 1.4762845999606666e-05 90
+1772757484272 1.4618575188100301e-05 100
+1772758410313 1.4441258072841264e-05 110
+1772759124014 1.4231717806651086e-05 120
+1772759815988 1.3990927130717711e-05 130
+1772760543838 1.3720003858874311e-05 140
+1772761248573 1.3420205688412603e-05 150
+1772761990136 1.3092924361520291e-05 160
+1772762702603 1.2739679204446694e-05 170
+1772763342537 1.236211007438955e-05 180
+1772764073344 1.1961969746845325e-05 190
+1772764778282 1.1541115778763038e-05 200
+1772765536908 1.1101501885274894e-05 210
+1772766293667 1.0645168870035313e-05 220
+1772767040309 1.0174235151272025e-05 230
+1772767807864 9.690886927529886e-06 240
+1772768557956 9.197368028760536e-06 250
+1772769332250 8.695969499871911e-06 260
+1772770136860 8.18901896509343e-06 270
+1772770855053 7.678869822530362e-06 280
+1772771568547 7.167890319069035e-06 290
+1772772287333 6.658452556350092e-06 300
+1772772997323 6.152921478846986e-06 310
+1772773725742 5.65364389516988e-06 320
+1772774432045 5.162937583561072e-06 330
+1772775156419 4.683080532156986e-06 340
+1772775808686 4.216300363966383e-06 350
+1772776704525 3.7647639956567304e-06 360
+1772777688582 3.3305675781554655e-06 370
+1772778669427 2.915726765764453e-06 380
+1772779659140 2.522167358961046e-06 390
+1772780643974 2.151716364324264e-06 400
+1772781649888 1.806093513088348e-06 410
+1772782654359 1.486903277696733e-06 420
+1772783636091 1.1956274234177322e-06 430
+1772784623680 9.336181295993204e-07 440
+1772785610745 7.02091712495907e-07 450
+1772786582766 5.021229788074589e-07 460
+1772787560653 3.3464023614327683e-07 470
+1772788546976 2.0042098357321209e-07 480
+1772789545218 1.0008830227189431e-07 490
+1772790549864 3.410796301156205e-08 500
+1772791531194 2.7862639312792317e-09 510
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/loss b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/loss
new file mode 100644
index 0000000000000000000000000000000000000000..71f1daff0141a49f6122575354fb5e356362a096
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/loss
@@ -0,0 +1,51 @@
+1772752246047 0.8486 10
+1772752764661 0.7412 20
+1772753281010 0.6532 30
+1772753804852 0.6102 40
+1772754322796 0.5784 50
+1772754836358 0.5641 60
+1772755352360 0.5469 70
+1772755869425 0.5424 80
+1772756578289 0.5293 90
+1772757484272 0.5266 100
+1772758410313 0.522 110
+1772759124014 0.5222 120
+1772759815988 0.5106 130
+1772760543838 0.5114 140
+1772761248573 0.5099 150
+1772761990136 0.5086 160
+1772762702603 0.5061 170
+1772763342537 0.4746 180
+1772764073344 0.478 190
+1772764778282 0.4755 200
+1772765536908 0.4765 210
+1772766293667 0.4706 220
+1772767040309 0.4681 230
+1772767807864 0.4715 240
+1772768557956 0.4711 250
+1772769332250 0.4685 260
+1772770136860 0.4688 270
+1772770855053 0.4722 280
+1772771568547 0.4649 290
+1772772287333 0.4692 300
+1772772997323 0.4653 310
+1772773725742 0.4664 320
+1772774432045 0.4621 330
+1772775156419 0.4675 340
+1772775808686 0.44 350
+1772776704525 0.4447 360
+1772777688582 0.4391 370
+1772778669427 0.4419 380
+1772779659140 0.4377 390
+1772780643974 0.4387 400
+1772781649888 0.4426 410
+1772782654359 0.442 420
+1772783636091 0.4449 430
+1772784623680 0.4391 440
+1772785610745 0.4374 450
+1772786582766 0.4403 460
+1772787560653 0.44 470
+1772788546976 0.4358 480
+1772789545218 0.4384 490
+1772790549864 0.4438 500
+1772791531194 0.4378 510
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/total_flos b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/total_flos
new file mode 100644
index 0000000000000000000000000000000000000000..9a802873e27ec4d9646414f92508dca5f1d7f0e3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/total_flos
@@ -0,0 +1 @@
+1772791757957 1.1980638081930756e+19 513
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/train_loss b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/train_loss
new file mode 100644
index 0000000000000000000000000000000000000000..3bbb73714175b21eef7cfbdb6eae39fbfdd30604
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/train_loss
@@ -0,0 +1 @@
+1772791757957 0.49363853406255476 513
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/train_runtime b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/train_runtime
new file mode 100644
index 0000000000000000000000000000000000000000..ddd75666b9e8558533c6490b184e7157dda0bd77
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/train_runtime
@@ -0,0 +1 @@
+1772791757957 40041.2675 513
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/train_samples_per_second b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/train_samples_per_second
new file mode 100644
index 0000000000000000000000000000000000000000..dbc59d8a207474395bbdf872cc548d3c14a08285
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/train_samples_per_second
@@ -0,0 +1 @@
+1772791757957 3.261 513
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/train_steps_per_second b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/train_steps_per_second
new file mode 100644
index 0000000000000000000000000000000000000000..d9172f42961505aefb9b26fd11e2fec900f727e5
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/metrics/train_steps_per_second
@@ -0,0 +1 @@
+1772791757957 0.013 513
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/_name_or_path b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/_name_or_path
new file mode 100644
index 0000000000000000000000000000000000000000..d8e3ea0bc188cdd969fb8d4e7cf8f2f2c4523f88
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/_name_or_path
@@ -0,0 +1 @@
+/local2/salman/model/pretrain_model/v2_4_gpu_llama_3b_nemo_52b
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/accelerator_config b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/accelerator_config
new file mode 100644
index 0000000000000000000000000000000000000000..b1e7502bd7285c87efc171b09bce733551c8d179
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/accelerator_config
@@ -0,0 +1 @@
+{'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/adafactor b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/adafactor
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/adafactor
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/adam_beta1 b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/adam_beta1
new file mode 100644
index 0000000000000000000000000000000000000000..9a7d84f2a96bb56f53bfc3a42ac10d06459e55c3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/adam_beta1
@@ -0,0 +1 @@
+0.9
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/adam_beta2 b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/adam_beta2
new file mode 100644
index 0000000000000000000000000000000000000000..79cbfdf0652c46b13ed8946e54aa94ff7bdd44ab
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/adam_beta2
@@ -0,0 +1 @@
+0.999
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/adam_epsilon b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/adam_epsilon
new file mode 100644
index 0000000000000000000000000000000000000000..851199be9c9a0b8c721d7f305f5af1759637102d
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/adam_epsilon
@@ -0,0 +1 @@
+1e-08
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/add_cross_attention b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/add_cross_attention
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/add_cross_attention
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/architectures b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/architectures
new file mode 100644
index 0000000000000000000000000000000000000000..fe06827e8a90199228b9e7009f79062405f3d52f
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/architectures
@@ -0,0 +1 @@
+['LlamaForCausalLM']
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/attention_bias b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/attention_bias
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/attention_bias
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/attention_dropout b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/attention_dropout
new file mode 100644
index 0000000000000000000000000000000000000000..171538eb0b00f4eddffa17929796de55b838f34b
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/attention_dropout
@@ -0,0 +1 @@
+0.0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/auto_find_batch_size b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/auto_find_batch_size
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/auto_find_batch_size
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/average_tokens_across_devices b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/average_tokens_across_devices
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/average_tokens_across_devices
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/bad_words_ids b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/bad_words_ids
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/bad_words_ids
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/batch_eval_metrics b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/batch_eval_metrics
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/batch_eval_metrics
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/begin_suppress_tokens b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/begin_suppress_tokens
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/begin_suppress_tokens
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/bf16 b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/bf16
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/bf16
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/bf16_full_eval b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/bf16_full_eval
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/bf16_full_eval
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/bos_token_id b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/bos_token_id
new file mode 100644
index 0000000000000000000000000000000000000000..5499007cbac38bc897e3c2766b82a647ac28c735
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/bos_token_id
@@ -0,0 +1 @@
+128000
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/chunk_size_feed_forward b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/chunk_size_feed_forward
new file mode 100644
index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/chunk_size_feed_forward
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/cross_attention_hidden_size b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/cross_attention_hidden_size
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/cross_attention_hidden_size
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/data_seed b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/data_seed
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/data_seed
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dataloader_drop_last b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dataloader_drop_last
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dataloader_drop_last
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dataloader_num_workers b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dataloader_num_workers
new file mode 100644
index 0000000000000000000000000000000000000000..bf0d87ab1b2b0ec1a11a3973d2845b42413d9767
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dataloader_num_workers
@@ -0,0 +1 @@
+4
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dataloader_persistent_workers b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dataloader_persistent_workers
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dataloader_persistent_workers
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dataloader_pin_memory b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dataloader_pin_memory
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dataloader_pin_memory
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dataloader_prefetch_factor b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dataloader_prefetch_factor
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dataloader_prefetch_factor
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ddp_backend b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ddp_backend
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ddp_backend
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ddp_broadcast_buffers b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ddp_broadcast_buffers
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ddp_broadcast_buffers
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ddp_bucket_cap_mb b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ddp_bucket_cap_mb
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ddp_bucket_cap_mb
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ddp_find_unused_parameters b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ddp_find_unused_parameters
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ddp_find_unused_parameters
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ddp_timeout b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ddp_timeout
new file mode 100644
index 0000000000000000000000000000000000000000..ea953a778190bc7131bea1a3f79bd88ff34fc5d1
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ddp_timeout
@@ -0,0 +1 @@
+180000000
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/debug b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/debug
new file mode 100644
index 0000000000000000000000000000000000000000..0637a088a01e8ddab3bf3fa98dbe804cbde1a0dc
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/debug
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/decoder_start_token_id b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/decoder_start_token_id
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/decoder_start_token_id
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/deepspeed b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/deepspeed
new file mode 100644
index 0000000000000000000000000000000000000000..c76c24a25f92f79e456f4367b44d5089cf94aac6
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/deepspeed
@@ -0,0 +1 @@
+examples/deepspeed/ds_z2_config.json
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/disable_tqdm b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/disable_tqdm
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/disable_tqdm
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/diversity_penalty b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/diversity_penalty
new file mode 100644
index 0000000000000000000000000000000000000000..171538eb0b00f4eddffa17929796de55b838f34b
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/diversity_penalty
@@ -0,0 +1 @@
+0.0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/do_eval b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/do_eval
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/do_eval
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/do_predict b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/do_predict
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/do_predict
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/do_sample b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/do_sample
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/do_sample
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/do_train b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/do_train
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/do_train
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dtype b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dtype
new file mode 100644
index 0000000000000000000000000000000000000000..8481ec0098496c454d11e66437510c620f01aa78
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/dtype
@@ -0,0 +1 @@
+bfloat16
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/early_stopping b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/early_stopping
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/early_stopping
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/encoder_no_repeat_ngram_size b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/encoder_no_repeat_ngram_size
new file mode 100644
index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/encoder_no_repeat_ngram_size
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eos_token_id b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eos_token_id
new file mode 100644
index 0000000000000000000000000000000000000000..1fc3affa551d3f978dc4cd9d6dd031cb9031bcd5
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eos_token_id
@@ -0,0 +1 @@
+128009
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_accumulation_steps b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_accumulation_steps
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_accumulation_steps
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_delay b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_delay
new file mode 100644
index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_delay
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_do_concat_batches b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_do_concat_batches
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_do_concat_batches
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_on_start b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_on_start
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_on_start
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_steps b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_steps
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_steps
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_strategy b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_strategy
new file mode 100644
index 0000000000000000000000000000000000000000..54299a48fb3ae76c848b3acc12248574d05d81b8
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_strategy
@@ -0,0 +1 @@
+no
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_use_gather_object b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_use_gather_object
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/eval_use_gather_object
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/exponential_decay_length_penalty b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/exponential_decay_length_penalty
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/exponential_decay_length_penalty
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/finetuning_task b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/finetuning_task
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/finetuning_task
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/forced_bos_token_id b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/forced_bos_token_id
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/forced_bos_token_id
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/forced_eos_token_id b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/forced_eos_token_id
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/forced_eos_token_id
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fp16 b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fp16
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fp16
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fp16_backend b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fp16_backend
new file mode 100644
index 0000000000000000000000000000000000000000..4d18c3e59ecf5c28b46b06ce26f2406b2d449870
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fp16_backend
@@ -0,0 +1 @@
+auto
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fp16_full_eval b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fp16_full_eval
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fp16_full_eval
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fp16_opt_level b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fp16_opt_level
new file mode 100644
index 0000000000000000000000000000000000000000..a9ada426ac8819467c6dc392dcbea40183a3e16e
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fp16_opt_level
@@ -0,0 +1 @@
+O1
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fsdp b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fsdp
new file mode 100644
index 0000000000000000000000000000000000000000..0637a088a01e8ddab3bf3fa98dbe804cbde1a0dc
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fsdp
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fsdp_config b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fsdp_config
new file mode 100644
index 0000000000000000000000000000000000000000..9d33480169a14dfac929530aefc3cd1f5776a983
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fsdp_config
@@ -0,0 +1 @@
+{'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fsdp_min_num_params b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fsdp_min_num_params
new file mode 100644
index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fsdp_min_num_params
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fsdp_transformer_layer_cls_to_wrap b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fsdp_transformer_layer_cls_to_wrap
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/fsdp_transformer_layer_cls_to_wrap
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/full_determinism b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/full_determinism
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/full_determinism
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/generation_config b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/generation_config
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/generation_config
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/generation_max_length b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/generation_max_length
new file mode 100644
index 0000000000000000000000000000000000000000..e0c3f84a6747696c58b1a32f81129c66498e094a
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/generation_max_length
@@ -0,0 +1 @@
+8192
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/generation_num_beams b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/generation_num_beams
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/generation_num_beams
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/gradient_accumulation_steps b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/gradient_accumulation_steps
new file mode 100644
index 0000000000000000000000000000000000000000..1758dddccea2b3b02d21228a0d06a45a35c0d861
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/gradient_accumulation_steps
@@ -0,0 +1 @@
+32
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/gradient_checkpointing b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/gradient_checkpointing
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/gradient_checkpointing
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/gradient_checkpointing_kwargs b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/gradient_checkpointing_kwargs
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/gradient_checkpointing_kwargs
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/greater_is_better b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/greater_is_better
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/greater_is_better
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/group_by_length b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/group_by_length
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/group_by_length
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/half_precision_backend b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/half_precision_backend
new file mode 100644
index 0000000000000000000000000000000000000000..4d18c3e59ecf5c28b46b06ce26f2406b2d449870
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/half_precision_backend
@@ -0,0 +1 @@
+auto
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/head_dim b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/head_dim
new file mode 100644
index 0000000000000000000000000000000000000000..b854a292176003137b48b2f2eb6267c6c3085c9b
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/head_dim
@@ -0,0 +1 @@
+128
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hidden_act b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hidden_act
new file mode 100644
index 0000000000000000000000000000000000000000..84972cd9564e61cac416981cb71bb1e176046f68
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hidden_act
@@ -0,0 +1 @@
+silu
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hidden_size b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hidden_size
new file mode 100644
index 0000000000000000000000000000000000000000..489250e329290c73b6a137d9af9a29e421e6e1d7
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hidden_size
@@ -0,0 +1 @@
+3072
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_always_push b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_always_push
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_always_push
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_model_id b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_model_id
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_model_id
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_private_repo b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_private_repo
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_private_repo
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_revision b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_revision
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_revision
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_strategy b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_strategy
new file mode 100644
index 0000000000000000000000000000000000000000..8532b12ca8add8fe61b84623fab9d559a366ce3c
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_strategy
@@ -0,0 +1 @@
+every_save
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_token b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_token
new file mode 100644
index 0000000000000000000000000000000000000000..0a574a354979ef783f5f4fe08c3595f79596ff41
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/hub_token
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/id2label b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/id2label
new file mode 100644
index 0000000000000000000000000000000000000000..74c276dcae370126a18f5657c0e1ed72e72325e9
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/id2label
@@ -0,0 +1 @@
+{0: 'LABEL_0', 1: 'LABEL_1'}
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ignore_data_skip b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ignore_data_skip
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ignore_data_skip
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/include_for_metrics b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/include_for_metrics
new file mode 100644
index 0000000000000000000000000000000000000000..0637a088a01e8ddab3bf3fa98dbe804cbde1a0dc
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/include_for_metrics
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/include_inputs_for_metrics b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/include_inputs_for_metrics
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/include_inputs_for_metrics
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/include_num_input_tokens_seen b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/include_num_input_tokens_seen
new file mode 100644
index 0000000000000000000000000000000000000000..54299a48fb3ae76c848b3acc12248574d05d81b8
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/include_num_input_tokens_seen
@@ -0,0 +1 @@
+no
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/include_tokens_per_second b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/include_tokens_per_second
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/include_tokens_per_second
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/initializer_range b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/initializer_range
new file mode 100644
index 0000000000000000000000000000000000000000..79dd775c1e90ab736c362ede2f2332678eccf47e
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/initializer_range
@@ -0,0 +1 @@
+0.02
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/intermediate_size b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/intermediate_size
new file mode 100644
index 0000000000000000000000000000000000000000..e0c3f84a6747696c58b1a32f81129c66498e094a
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/intermediate_size
@@ -0,0 +1 @@
+8192
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/is_decoder b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/is_decoder
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/is_decoder
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/is_encoder_decoder b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/is_encoder_decoder
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/is_encoder_decoder
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/jit_mode_eval b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/jit_mode_eval
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/jit_mode_eval
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/label2id b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/label2id
new file mode 100644
index 0000000000000000000000000000000000000000..0589857be5c3ad7b568bf7c79a4172a5aa887693
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/label2id
@@ -0,0 +1 @@
+{'LABEL_0': 0, 'LABEL_1': 1}
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/label_names b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/label_names
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/label_names
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/label_smoothing_factor b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/label_smoothing_factor
new file mode 100644
index 0000000000000000000000000000000000000000..171538eb0b00f4eddffa17929796de55b838f34b
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/label_smoothing_factor
@@ -0,0 +1 @@
+0.0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/learning_rate b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/learning_rate
new file mode 100644
index 0000000000000000000000000000000000000000..851afc92400e328f36a83152a5c0a3ae9da45fca
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/learning_rate
@@ -0,0 +1 @@
+1.5e-05
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/length_column_name b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/length_column_name
new file mode 100644
index 0000000000000000000000000000000000000000..c2e7ec839dabf14d5d59f187c6b8fdb3460872aa
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/length_column_name
@@ -0,0 +1 @@
+length
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/length_penalty b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/length_penalty
new file mode 100644
index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/length_penalty
@@ -0,0 +1 @@
+1.0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/liger_kernel_config b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/liger_kernel_config
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/liger_kernel_config
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/load_best_model_at_end b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/load_best_model_at_end
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/load_best_model_at_end
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/local_rank b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/local_rank
new file mode 100644
index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/local_rank
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/log_level b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/log_level
new file mode 100644
index 0000000000000000000000000000000000000000..ecf328558d66d304c19bdd373f647085a3f0880d
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/log_level
@@ -0,0 +1 @@
+passive
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/log_level_replica b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/log_level_replica
new file mode 100644
index 0000000000000000000000000000000000000000..14b472df8d4481c6fea79c066ae4650980f02b7c
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/log_level_replica
@@ -0,0 +1 @@
+warning
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/log_on_each_node b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/log_on_each_node
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/log_on_each_node
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/logging_dir b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/logging_dir
new file mode 100644
index 0000000000000000000000000000000000000000..1d04fea0ac87c57b36d52dc646937dfe05738ef6
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/logging_dir
@@ -0,0 +1 @@
+/local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/runs/Mar05_15-00-58_raven
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/logging_first_step b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/logging_first_step
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/logging_first_step
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/logging_nan_inf_filter b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/logging_nan_inf_filter
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/logging_nan_inf_filter
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/logging_steps b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/logging_steps
new file mode 100644
index 0000000000000000000000000000000000000000..9a037142aa3c1b4c490e1a38251620f113465330
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/logging_steps
@@ -0,0 +1 @@
+10
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/logging_strategy b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/logging_strategy
new file mode 100644
index 0000000000000000000000000000000000000000..17f15e19cf5e8064aff8d528657b70e9611eb59e
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/logging_strategy
@@ -0,0 +1 @@
+steps
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/lr_scheduler_kwargs b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/lr_scheduler_kwargs
new file mode 100644
index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/lr_scheduler_kwargs
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/lr_scheduler_type b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/lr_scheduler_type
new file mode 100644
index 0000000000000000000000000000000000000000..84aa3999b5b7cae7f78b1f77e04d182643005a92
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/lr_scheduler_type
@@ -0,0 +1 @@
+cosine
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/max_grad_norm b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/max_grad_norm
new file mode 100644
index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/max_grad_norm
@@ -0,0 +1 @@
+1.0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/max_length b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/max_length
new file mode 100644
index 0000000000000000000000000000000000000000..2edeafb09db0093bae6ff060e2dcd2166f5c9387
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/max_length
@@ -0,0 +1 @@
+20
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/max_position_embeddings b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/max_position_embeddings
new file mode 100644
index 0000000000000000000000000000000000000000..41fc5b8c922eda0b84dd1d03152fd870cd1a8295
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/max_position_embeddings
@@ -0,0 +1 @@
+131072
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/max_steps b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/max_steps
new file mode 100644
index 0000000000000000000000000000000000000000..d7d17fcbef95ca19081c4cc5e97cbc592cc7081f
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/max_steps
@@ -0,0 +1 @@
+-1
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/metric_for_best_model b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/metric_for_best_model
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/metric_for_best_model
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/min_length b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/min_length
new file mode 100644
index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/min_length
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/mlp_bias b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/mlp_bias
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/mlp_bias
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/model_type b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/model_type
new file mode 100644
index 0000000000000000000000000000000000000000..056bf100b8c479097594926c1ab454fe1ee0f39c
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/model_type
@@ -0,0 +1 @@
+llama
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/mp_parameters b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/mp_parameters
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/neftune_noise_alpha b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/neftune_noise_alpha
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/neftune_noise_alpha
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/no_cuda b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/no_cuda
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/no_cuda
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/no_repeat_ngram_size b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/no_repeat_ngram_size
new file mode 100644
index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/no_repeat_ngram_size
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_attention_heads b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_attention_heads
new file mode 100644
index 0000000000000000000000000000000000000000..cabf43b5ddf813cbe89697372a21373f14921884
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_attention_heads
@@ -0,0 +1 @@
+24
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_beam_groups b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_beam_groups
new file mode 100644
index 0000000000000000000000000000000000000000..56a6051ca2b02b04ef92d5150c9ef600403cb1de
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_beam_groups
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_beams b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_beams
new file mode 100644
index 0000000000000000000000000000000000000000..56a6051ca2b02b04ef92d5150c9ef600403cb1de
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_beams
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_hidden_layers b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_hidden_layers
new file mode 100644
index 0000000000000000000000000000000000000000..368f89ceef179cc546403ac0d5ef1d0e4b340447
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_hidden_layers
@@ -0,0 +1 @@
+28
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_key_value_heads b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_key_value_heads
new file mode 100644
index 0000000000000000000000000000000000000000..301160a93062df23030a69f4b5e4d9bf71866ee9
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_key_value_heads
@@ -0,0 +1 @@
+8
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_return_sequences b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_return_sequences
new file mode 100644
index 0000000000000000000000000000000000000000..56a6051ca2b02b04ef92d5150c9ef600403cb1de
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_return_sequences
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_train_epochs b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_train_epochs
new file mode 100644
index 0000000000000000000000000000000000000000..f398a20612afa114338cf6fec4d5378e51473059
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/num_train_epochs
@@ -0,0 +1 @@
+3.0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/optim b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/optim
new file mode 100644
index 0000000000000000000000000000000000000000..2fd30f30cf2a0413799ab7959d66333f63162f20
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/optim
@@ -0,0 +1 @@
+adamw_torch
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/optim_args b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/optim_args
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/optim_args
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/optim_target_modules b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/optim_target_modules
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/optim_target_modules
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/output_attentions b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/output_attentions
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/output_attentions
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/output_dir b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/output_dir
new file mode 100644
index 0000000000000000000000000000000000000000..c30e0dc20233bf05b963489ad59af173f7461fed
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/output_dir
@@ -0,0 +1 @@
+/local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/output_hidden_states b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/output_hidden_states
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/output_hidden_states
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/output_scores b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/output_scores
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/output_scores
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/overwrite_output_dir b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/overwrite_output_dir
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/overwrite_output_dir
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/pad_token_id b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/pad_token_id
new file mode 100644
index 0000000000000000000000000000000000000000..f3b3deeda7ab5834f5e3e5a2ed8083a147e11f2a
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/pad_token_id
@@ -0,0 +1 @@
+128001
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/parallelism_config b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/parallelism_config
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/parallelism_config
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/past_index b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/past_index
new file mode 100644
index 0000000000000000000000000000000000000000..d7d17fcbef95ca19081c4cc5e97cbc592cc7081f
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/past_index
@@ -0,0 +1 @@
+-1
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/per_device_eval_batch_size b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/per_device_eval_batch_size
new file mode 100644
index 0000000000000000000000000000000000000000..301160a93062df23030a69f4b5e4d9bf71866ee9
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/per_device_eval_batch_size
@@ -0,0 +1 @@
+8
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/per_device_train_batch_size b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/per_device_train_batch_size
new file mode 100644
index 0000000000000000000000000000000000000000..d8263ee9860594d2806b0dfd1bfd17528b0ba2a4
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/per_device_train_batch_size
@@ -0,0 +1 @@
+2
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/per_gpu_eval_batch_size b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/per_gpu_eval_batch_size
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/per_gpu_eval_batch_size
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/per_gpu_train_batch_size b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/per_gpu_train_batch_size
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/per_gpu_train_batch_size
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/placement_strategy b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/placement_strategy
new file mode 100644
index 0000000000000000000000000000000000000000..0d156a4c857f8ef50c1dc4c08dc2d14fbf2e8bf1
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/placement_strategy
@@ -0,0 +1 @@
+PACK
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/predict_with_generate b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/predict_with_generate
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/predict_with_generate
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/prediction_loss_only b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/prediction_loss_only
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/prediction_loss_only
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/prefix b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/prefix
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/prefix
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/pretraining_tp b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/pretraining_tp
new file mode 100644
index 0000000000000000000000000000000000000000..56a6051ca2b02b04ef92d5150c9ef600403cb1de
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/pretraining_tp
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/problem_type b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/problem_type
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/problem_type
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/project b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/project
new file mode 100644
index 0000000000000000000000000000000000000000..58df447a1503b0f78c9105cdd52cf573b95f390f
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/project
@@ -0,0 +1 @@
+huggingface
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/pruned_heads b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/pruned_heads
new file mode 100644
index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/pruned_heads
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/push_to_hub b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/push_to_hub
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/push_to_hub
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/push_to_hub_model_id b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/push_to_hub_model_id
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/push_to_hub_model_id
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/push_to_hub_organization b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/push_to_hub_organization
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/push_to_hub_organization
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/push_to_hub_token b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/push_to_hub_token
new file mode 100644
index 0000000000000000000000000000000000000000..36e61093756f7c43b24cd50fc63164c08bcf50f1
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/push_to_hub_token
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_init_kwargs b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_init_kwargs
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_init_kwargs
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_num_workers b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_num_workers
new file mode 100644
index 0000000000000000000000000000000000000000..56a6051ca2b02b04ef92d5150c9ef600403cb1de
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_num_workers
@@ -0,0 +1 @@
+1
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_run_name b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_run_name
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_run_name
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_scope b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_scope
new file mode 100644
index 0000000000000000000000000000000000000000..1c1206e8bf4337e96dad9a6d139628852077558d
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_scope
@@ -0,0 +1 @@
+last
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_storage_filesystem b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_storage_filesystem
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_storage_filesystem
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_storage_path b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_storage_path
new file mode 100644
index 0000000000000000000000000000000000000000..c6962e171e535e015188e2c019a938766fa0d574
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/ray_storage_path
@@ -0,0 +1 @@
+./saves
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/remove_invalid_values b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/remove_invalid_values
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/remove_invalid_values
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/remove_unused_columns b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/remove_unused_columns
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/remove_unused_columns
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/repetition_penalty b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/repetition_penalty
new file mode 100644
index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/repetition_penalty
@@ -0,0 +1 @@
+1.0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/report_to b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/report_to
new file mode 100644
index 0000000000000000000000000000000000000000..95988dbfb90ccf107f671f17824204c92d3d1674
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/report_to
@@ -0,0 +1 @@
+['mlflow']
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/resources_per_worker b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/resources_per_worker
new file mode 100644
index 0000000000000000000000000000000000000000..37cf4fc6d29e1d007e764540c6445ce887916e44
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/resources_per_worker
@@ -0,0 +1 @@
+{'GPU': 1}
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/restore_callback_states_from_checkpoint b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/restore_callback_states_from_checkpoint
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/restore_callback_states_from_checkpoint
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/resume_from_checkpoint b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/resume_from_checkpoint
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/resume_from_checkpoint
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/return_dict b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/return_dict
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/return_dict
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/return_dict_in_generate b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/return_dict_in_generate
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/return_dict_in_generate
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/rms_norm_eps b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/rms_norm_eps
new file mode 100644
index 0000000000000000000000000000000000000000..5868ff147459cee04c24f2de58e75969024870b8
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/rms_norm_eps
@@ -0,0 +1 @@
+1e-05
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/rope_scaling b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/rope_scaling
new file mode 100644
index 0000000000000000000000000000000000000000..9215f7897f1cde9615d8a2cbc581da576c91844b
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/rope_scaling
@@ -0,0 +1 @@
+{'factor': 32.0, 'high_freq_factor': 4.0, 'low_freq_factor': 1.0, 'original_max_position_embeddings': 8192, 'rope_type': 'llama3'}
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/rope_theta b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/rope_theta
new file mode 100644
index 0000000000000000000000000000000000000000..0dc280f5f9cfaea3b39e34bef5251cb1bbaf3bf8
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/rope_theta
@@ -0,0 +1 @@
+500000.0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/run_name b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/run_name
new file mode 100644
index 0000000000000000000000000000000000000000..0f23e0877477455534bae4babc7610a7dc7cea17
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/run_name
@@ -0,0 +1 @@
+llama3b_think_sft_nopack_lr1.5e5_ep3
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_on_each_node b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_on_each_node
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_on_each_node
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_only_model b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_only_model
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_only_model
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_safetensors b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_safetensors
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_steps b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_steps
new file mode 100644
index 0000000000000000000000000000000000000000..ae4ee13c08e7628701b925b8962108bd7643bf6e
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_steps
@@ -0,0 +1 @@
+200
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_strategy b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_strategy
new file mode 100644
index 0000000000000000000000000000000000000000..17f15e19cf5e8064aff8d528657b70e9611eb59e
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_strategy
@@ -0,0 +1 @@
+steps
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_total_limit b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_total_limit
new file mode 100644
index 0000000000000000000000000000000000000000..7813681f5b41c028345ca62a2be376bae70b7f61
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/save_total_limit
@@ -0,0 +1 @@
+5
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/seed b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/seed
new file mode 100644
index 0000000000000000000000000000000000000000..f70d7bba4ae1f07682e0358bd7a2068094fc023b
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/seed
@@ -0,0 +1 @@
+42
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/sep_token_id b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/sep_token_id
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/sep_token_id
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/skip_memory_metrics b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/skip_memory_metrics
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/skip_memory_metrics
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/sortish_sampler b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/sortish_sampler
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/sortish_sampler
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/suppress_tokens b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/suppress_tokens
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/suppress_tokens
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/task_specific_params b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/task_specific_params
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/task_specific_params
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/temperature b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/temperature
new file mode 100644
index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/temperature
@@ -0,0 +1 @@
+1.0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tf32 b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tf32
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tf32
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tf_legacy_loss b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tf_legacy_loss
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tf_legacy_loss
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tie_encoder_decoder b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tie_encoder_decoder
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tie_encoder_decoder
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tie_word_embeddings b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tie_word_embeddings
new file mode 100644
index 0000000000000000000000000000000000000000..4791ed5559bd77f54e1520025768e2b368705876
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tie_word_embeddings
@@ -0,0 +1 @@
+True
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tokenizer_class b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tokenizer_class
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tokenizer_class
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/top_k b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/top_k
new file mode 100644
index 0000000000000000000000000000000000000000..c5b431b6cba29540b4b284840ff229bce0460886
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/top_k
@@ -0,0 +1 @@
+50
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/top_p b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/top_p
new file mode 100644
index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/top_p
@@ -0,0 +1 @@
+1.0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torch_compile b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torch_compile
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torch_compile
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torch_compile_backend b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torch_compile_backend
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torch_compile_backend
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torch_compile_mode b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torch_compile_mode
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torch_compile_mode
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torch_empty_cache_steps b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torch_empty_cache_steps
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torch_empty_cache_steps
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torchdynamo b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torchdynamo
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torchdynamo
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torchscript b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torchscript
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/torchscript
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tpu_metrics_debug b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tpu_metrics_debug
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tpu_metrics_debug
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tpu_num_cores b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tpu_num_cores
new file mode 100644
index 0000000000000000000000000000000000000000..4af18322e32f3dd19579c80e26e4a306ad11e049
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/tpu_num_cores
@@ -0,0 +1 @@
+None
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/trackio_space_id b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/trackio_space_id
new file mode 100644
index 0000000000000000000000000000000000000000..ce8d952594b1b4638b748fcc5486c6c20e791dcc
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/trackio_space_id
@@ -0,0 +1 @@
+trackio
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/transformers_version b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/transformers_version
new file mode 100644
index 0000000000000000000000000000000000000000..e5a11132710d05d4da277d6ae5402768cc434018
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/transformers_version
@@ -0,0 +1 @@
+4.57.1
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/typical_p b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/typical_p
new file mode 100644
index 0000000000000000000000000000000000000000..9f8e9b69a33f4e8067d5b21661a35d8856758aba
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/typical_p
@@ -0,0 +1 @@
+1.0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_bfloat16 b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_bfloat16
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_bfloat16
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_cache b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_cache
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_cache
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_cpu b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_cpu
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_cpu
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_legacy_prediction_loop b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_legacy_prediction_loop
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_legacy_prediction_loop
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_liger_kernel b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_liger_kernel
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_liger_kernel
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_mps_device b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_mps_device
new file mode 100644
index 0000000000000000000000000000000000000000..c1f22fbc23bb6ee67824843d6685826db10313d3
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/use_mps_device
@@ -0,0 +1 @@
+False
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/vocab_size b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/vocab_size
new file mode 100644
index 0000000000000000000000000000000000000000..34c44b19378193a3b4fa853df4426fc426c15535
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/vocab_size
@@ -0,0 +1 @@
+128256
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/warmup_ratio b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/warmup_ratio
new file mode 100644
index 0000000000000000000000000000000000000000..ceab6e11ece0bcec917c12e11d350946f085d549
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/warmup_ratio
@@ -0,0 +1 @@
+0.1
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/warmup_steps b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/warmup_steps
new file mode 100644
index 0000000000000000000000000000000000000000..c227083464fb9af8955c90d2924774ee50abb547
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/warmup_steps
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/weight_decay b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/weight_decay
new file mode 100644
index 0000000000000000000000000000000000000000..d1c6331b3109accd73f01907062e6c174e28200a
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/params/weight_decay
@@ -0,0 +1 @@
+0.01
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/tags/mlflow.runName b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/tags/mlflow.runName
new file mode 100644
index 0000000000000000000000000000000000000000..0f23e0877477455534bae4babc7610a7dc7cea17
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/tags/mlflow.runName
@@ -0,0 +1 @@
+llama3b_think_sft_nopack_lr1.5e5_ep3
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/tags/mlflow.source.git.commit b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/tags/mlflow.source.git.commit
new file mode 100644
index 0000000000000000000000000000000000000000..873686f91a08caa39c5cde9e33e5231dc5ae03d8
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/tags/mlflow.source.git.commit
@@ -0,0 +1 @@
+25f2ae49189f9a73cdd23bd5845e544915a0d04d
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/tags/mlflow.source.name b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/tags/mlflow.source.name
new file mode 100644
index 0000000000000000000000000000000000000000..fd98106671732f814b803d95444e53a6abcae75d
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/tags/mlflow.source.name
@@ -0,0 +1 @@
+/home/salman/reward-signal-analysis/LLaMA-Factory/src/llamafactory/launcher.py
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/tags/mlflow.source.type b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/tags/mlflow.source.type
new file mode 100644
index 0000000000000000000000000000000000000000..0c2c1fe9dc63b7040bb81006635e50fd528f056f
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/tags/mlflow.source.type
@@ -0,0 +1 @@
+LOCAL
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/tags/mlflow.user b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/tags/mlflow.user
new file mode 100644
index 0000000000000000000000000000000000000000..ed08908948e0c35bdf8cbdcc82956d2ad0b81915
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/c370ae36b3594e5b8e4483476b3515b7/tags/mlflow.user
@@ -0,0 +1 @@
+salman
\ No newline at end of file
diff --git a/global_step_0/mlflow/356092632336622637/meta.yaml b/global_step_0/mlflow/356092632336622637/meta.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f786490c4d5776f1c2bb3ab1faa0ac1d57ab00ff
--- /dev/null
+++ b/global_step_0/mlflow/356092632336622637/meta.yaml
@@ -0,0 +1,6 @@
+artifact_location: file:///local2/salman/model/sft_model_llama_mid_train/think_sft_nopack_lr1.5e5_ep3/mlflow/356092632336622637
+creation_time: 1772751716696
+experiment_id: '356092632336622637'
+last_update_time: 1772751716696
+lifecycle_stage: active
+name: llama3b_think_sft_nopack_lr1.5e5_ep3
diff --git a/global_step_0/model-00001-of-00002.safetensors b/global_step_0/model-00001-of-00002.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4a218d0c87ffcbc29804e0963ce8c5052cd944d5
--- /dev/null
+++ b/global_step_0/model-00001-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8958e10d9375f6edb777844678f32b35d6e200642d892b10cf23fba9d56b880
+size 4965799096
diff --git a/global_step_0/model-00002-of-00002.safetensors b/global_step_0/model-00002-of-00002.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..38fbe7a316db54d2943b4a5a55e17c3f8e3d4de5
--- /dev/null
+++ b/global_step_0/model-00002-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:552e715adc327a92f6466821e15c47bb806883b331891b871056fbbd7a783271
+size 2247734992
diff --git a/global_step_0/model.safetensors.index.json b/global_step_0/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..887d493f4ade1ceaea9675859e66b09891033710
--- /dev/null
+++ b/global_step_0/model.safetensors.index.json
@@ -0,0 +1,263 @@
+{
+ "metadata": {
+ "total_parameters": 3212749824,
+ "total_size": 7213504512
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00002-of-00002.safetensors",
+ "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
+ "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+ "model.norm.weight": "model-00002-of-00002.safetensors"
+ }
+}
diff --git a/global_step_0/special_tokens_map.json b/global_step_0/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d6f1d09511c78a15675d7e3bfece8089df89a1d
--- /dev/null
+++ b/global_step_0/special_tokens_map.json
@@ -0,0 +1,32 @@
+{
+ "additional_special_tokens": [
+ {
+ "content": "<|eom_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+ ],
+ "bos_token": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<|end_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/global_step_0/tokenizer.json b/global_step_0/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2
--- /dev/null
+++ b/global_step_0/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
+size 17209920
diff --git a/global_step_0/tokenizer_config.json b/global_step_0/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..ca4f78cbcbcdef77e48625636066f7d40adec5dd
--- /dev/null
+++ b/global_step_0/tokenizer_config.json
@@ -0,0 +1,2068 @@
+{
+ "added_tokens_decoder": {
+ "128000": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128001": {
+ "content": "<|end_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128002": {
+ "content": "<|reserved_special_token_0|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128003": {
+ "content": "<|reserved_special_token_1|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128004": {
+ "content": "<|finetune_right_pad_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128005": {
+ "content": "<|reserved_special_token_2|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128006": {
+ "content": "<|start_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128007": {
+ "content": "<|end_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128008": {
+ "content": "<|eom_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128009": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128010": {
+ "content": "<|python_tag|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128011": {
+ "content": "<|reserved_special_token_3|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128012": {
+ "content": "<|reserved_special_token_4|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128013": {
+ "content": "<|reserved_special_token_5|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128014": {
+ "content": "<|reserved_special_token_6|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128015": {
+ "content": "<|reserved_special_token_7|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128016": {
+ "content": "<|reserved_special_token_8|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128017": {
+ "content": "<|reserved_special_token_9|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128018": {
+ "content": "<|reserved_special_token_10|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128019": {
+ "content": "<|reserved_special_token_11|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128020": {
+ "content": "<|reserved_special_token_12|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128021": {
+ "content": "<|reserved_special_token_13|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128022": {
+ "content": "<|reserved_special_token_14|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128023": {
+ "content": "<|reserved_special_token_15|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128024": {
+ "content": "<|reserved_special_token_16|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128025": {
+ "content": "<|reserved_special_token_17|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128026": {
+ "content": "<|reserved_special_token_18|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128027": {
+ "content": "<|reserved_special_token_19|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128028": {
+ "content": "<|reserved_special_token_20|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128029": {
+ "content": "<|reserved_special_token_21|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128030": {
+ "content": "<|reserved_special_token_22|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128031": {
+ "content": "<|reserved_special_token_23|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128032": {
+ "content": "<|reserved_special_token_24|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128033": {
+ "content": "<|reserved_special_token_25|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128034": {
+ "content": "<|reserved_special_token_26|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128035": {
+ "content": "<|reserved_special_token_27|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128036": {
+ "content": "<|reserved_special_token_28|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128037": {
+ "content": "<|reserved_special_token_29|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128038": {
+ "content": "<|reserved_special_token_30|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128039": {
+ "content": "<|reserved_special_token_31|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128040": {
+ "content": "<|reserved_special_token_32|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128041": {
+ "content": "<|reserved_special_token_33|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128042": {
+ "content": "<|reserved_special_token_34|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128043": {
+ "content": "<|reserved_special_token_35|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128044": {
+ "content": "<|reserved_special_token_36|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128045": {
+ "content": "<|reserved_special_token_37|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128046": {
+ "content": "<|reserved_special_token_38|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128047": {
+ "content": "<|reserved_special_token_39|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128048": {
+ "content": "<|reserved_special_token_40|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128049": {
+ "content": "<|reserved_special_token_41|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128050": {
+ "content": "<|reserved_special_token_42|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128051": {
+ "content": "<|reserved_special_token_43|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128052": {
+ "content": "<|reserved_special_token_44|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128053": {
+ "content": "<|reserved_special_token_45|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128054": {
+ "content": "<|reserved_special_token_46|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128055": {
+ "content": "<|reserved_special_token_47|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128056": {
+ "content": "<|reserved_special_token_48|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128057": {
+ "content": "<|reserved_special_token_49|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128058": {
+ "content": "<|reserved_special_token_50|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128059": {
+ "content": "<|reserved_special_token_51|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128060": {
+ "content": "<|reserved_special_token_52|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128061": {
+ "content": "<|reserved_special_token_53|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128062": {
+ "content": "<|reserved_special_token_54|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128063": {
+ "content": "<|reserved_special_token_55|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128064": {
+ "content": "<|reserved_special_token_56|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128065": {
+ "content": "<|reserved_special_token_57|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128066": {
+ "content": "<|reserved_special_token_58|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128067": {
+ "content": "<|reserved_special_token_59|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128068": {
+ "content": "<|reserved_special_token_60|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128069": {
+ "content": "<|reserved_special_token_61|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128070": {
+ "content": "<|reserved_special_token_62|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128071": {
+ "content": "<|reserved_special_token_63|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128072": {
+ "content": "<|reserved_special_token_64|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128073": {
+ "content": "<|reserved_special_token_65|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128074": {
+ "content": "<|reserved_special_token_66|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128075": {
+ "content": "<|reserved_special_token_67|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128076": {
+ "content": "<|reserved_special_token_68|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128077": {
+ "content": "<|reserved_special_token_69|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128078": {
+ "content": "<|reserved_special_token_70|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128079": {
+ "content": "<|reserved_special_token_71|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128080": {
+ "content": "<|reserved_special_token_72|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128081": {
+ "content": "<|reserved_special_token_73|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128082": {
+ "content": "<|reserved_special_token_74|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128083": {
+ "content": "<|reserved_special_token_75|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128084": {
+ "content": "<|reserved_special_token_76|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128085": {
+ "content": "<|reserved_special_token_77|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128086": {
+ "content": "<|reserved_special_token_78|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128087": {
+ "content": "<|reserved_special_token_79|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128088": {
+ "content": "<|reserved_special_token_80|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128089": {
+ "content": "<|reserved_special_token_81|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128090": {
+ "content": "<|reserved_special_token_82|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128091": {
+ "content": "<|reserved_special_token_83|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128092": {
+ "content": "<|reserved_special_token_84|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128093": {
+ "content": "<|reserved_special_token_85|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128094": {
+ "content": "<|reserved_special_token_86|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128095": {
+ "content": "<|reserved_special_token_87|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128096": {
+ "content": "<|reserved_special_token_88|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128097": {
+ "content": "<|reserved_special_token_89|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128098": {
+ "content": "<|reserved_special_token_90|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128099": {
+ "content": "<|reserved_special_token_91|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128100": {
+ "content": "<|reserved_special_token_92|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128101": {
+ "content": "<|reserved_special_token_93|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128102": {
+ "content": "<|reserved_special_token_94|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128103": {
+ "content": "<|reserved_special_token_95|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128104": {
+ "content": "<|reserved_special_token_96|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128105": {
+ "content": "<|reserved_special_token_97|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128106": {
+ "content": "<|reserved_special_token_98|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128107": {
+ "content": "<|reserved_special_token_99|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128108": {
+ "content": "<|reserved_special_token_100|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128109": {
+ "content": "<|reserved_special_token_101|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128110": {
+ "content": "<|reserved_special_token_102|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128111": {
+ "content": "<|reserved_special_token_103|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128112": {
+ "content": "<|reserved_special_token_104|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128113": {
+ "content": "<|reserved_special_token_105|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128114": {
+ "content": "<|reserved_special_token_106|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128115": {
+ "content": "<|reserved_special_token_107|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128116": {
+ "content": "<|reserved_special_token_108|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128117": {
+ "content": "<|reserved_special_token_109|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128118": {
+ "content": "<|reserved_special_token_110|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128119": {
+ "content": "<|reserved_special_token_111|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128120": {
+ "content": "<|reserved_special_token_112|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128121": {
+ "content": "<|reserved_special_token_113|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128122": {
+ "content": "<|reserved_special_token_114|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128123": {
+ "content": "<|reserved_special_token_115|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128124": {
+ "content": "<|reserved_special_token_116|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128125": {
+ "content": "<|reserved_special_token_117|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128126": {
+ "content": "<|reserved_special_token_118|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128127": {
+ "content": "<|reserved_special_token_119|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128128": {
+ "content": "<|reserved_special_token_120|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128129": {
+ "content": "<|reserved_special_token_121|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128130": {
+ "content": "<|reserved_special_token_122|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128131": {
+ "content": "<|reserved_special_token_123|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128132": {
+ "content": "<|reserved_special_token_124|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128133": {
+ "content": "<|reserved_special_token_125|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128134": {
+ "content": "<|reserved_special_token_126|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128135": {
+ "content": "<|reserved_special_token_127|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128136": {
+ "content": "<|reserved_special_token_128|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128137": {
+ "content": "<|reserved_special_token_129|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128138": {
+ "content": "<|reserved_special_token_130|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128139": {
+ "content": "<|reserved_special_token_131|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128140": {
+ "content": "<|reserved_special_token_132|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128141": {
+ "content": "<|reserved_special_token_133|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128142": {
+ "content": "<|reserved_special_token_134|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128143": {
+ "content": "<|reserved_special_token_135|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128144": {
+ "content": "<|reserved_special_token_136|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128145": {
+ "content": "<|reserved_special_token_137|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128146": {
+ "content": "<|reserved_special_token_138|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128147": {
+ "content": "<|reserved_special_token_139|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128148": {
+ "content": "<|reserved_special_token_140|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128149": {
+ "content": "<|reserved_special_token_141|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128150": {
+ "content": "<|reserved_special_token_142|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128151": {
+ "content": "<|reserved_special_token_143|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128152": {
+ "content": "<|reserved_special_token_144|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128153": {
+ "content": "<|reserved_special_token_145|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128154": {
+ "content": "<|reserved_special_token_146|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128155": {
+ "content": "<|reserved_special_token_147|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128156": {
+ "content": "<|reserved_special_token_148|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128157": {
+ "content": "<|reserved_special_token_149|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128158": {
+ "content": "<|reserved_special_token_150|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128159": {
+ "content": "<|reserved_special_token_151|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128160": {
+ "content": "<|reserved_special_token_152|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128161": {
+ "content": "<|reserved_special_token_153|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128162": {
+ "content": "<|reserved_special_token_154|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128163": {
+ "content": "<|reserved_special_token_155|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128164": {
+ "content": "<|reserved_special_token_156|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128165": {
+ "content": "<|reserved_special_token_157|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128166": {
+ "content": "<|reserved_special_token_158|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128167": {
+ "content": "<|reserved_special_token_159|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128168": {
+ "content": "<|reserved_special_token_160|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128169": {
+ "content": "<|reserved_special_token_161|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128170": {
+ "content": "<|reserved_special_token_162|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128171": {
+ "content": "<|reserved_special_token_163|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128172": {
+ "content": "<|reserved_special_token_164|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128173": {
+ "content": "<|reserved_special_token_165|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128174": {
+ "content": "<|reserved_special_token_166|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128175": {
+ "content": "<|reserved_special_token_167|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128176": {
+ "content": "<|reserved_special_token_168|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128177": {
+ "content": "<|reserved_special_token_169|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128178": {
+ "content": "<|reserved_special_token_170|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128179": {
+ "content": "<|reserved_special_token_171|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128180": {
+ "content": "<|reserved_special_token_172|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128181": {
+ "content": "<|reserved_special_token_173|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128182": {
+ "content": "<|reserved_special_token_174|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128183": {
+ "content": "<|reserved_special_token_175|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128184": {
+ "content": "<|reserved_special_token_176|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128185": {
+ "content": "<|reserved_special_token_177|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128186": {
+ "content": "<|reserved_special_token_178|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128187": {
+ "content": "<|reserved_special_token_179|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128188": {
+ "content": "<|reserved_special_token_180|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128189": {
+ "content": "<|reserved_special_token_181|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128190": {
+ "content": "<|reserved_special_token_182|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128191": {
+ "content": "<|reserved_special_token_183|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128192": {
+ "content": "<|reserved_special_token_184|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128193": {
+ "content": "<|reserved_special_token_185|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128194": {
+ "content": "<|reserved_special_token_186|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128195": {
+ "content": "<|reserved_special_token_187|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128196": {
+ "content": "<|reserved_special_token_188|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128197": {
+ "content": "<|reserved_special_token_189|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128198": {
+ "content": "<|reserved_special_token_190|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128199": {
+ "content": "<|reserved_special_token_191|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128200": {
+ "content": "<|reserved_special_token_192|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128201": {
+ "content": "<|reserved_special_token_193|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128202": {
+ "content": "<|reserved_special_token_194|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128203": {
+ "content": "<|reserved_special_token_195|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128204": {
+ "content": "<|reserved_special_token_196|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128205": {
+ "content": "<|reserved_special_token_197|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128206": {
+ "content": "<|reserved_special_token_198|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128207": {
+ "content": "<|reserved_special_token_199|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128208": {
+ "content": "<|reserved_special_token_200|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128209": {
+ "content": "<|reserved_special_token_201|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128210": {
+ "content": "<|reserved_special_token_202|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128211": {
+ "content": "<|reserved_special_token_203|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128212": {
+ "content": "<|reserved_special_token_204|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128213": {
+ "content": "<|reserved_special_token_205|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128214": {
+ "content": "<|reserved_special_token_206|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128215": {
+ "content": "<|reserved_special_token_207|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128216": {
+ "content": "<|reserved_special_token_208|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128217": {
+ "content": "<|reserved_special_token_209|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128218": {
+ "content": "<|reserved_special_token_210|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128219": {
+ "content": "<|reserved_special_token_211|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128220": {
+ "content": "<|reserved_special_token_212|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128221": {
+ "content": "<|reserved_special_token_213|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128222": {
+ "content": "<|reserved_special_token_214|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128223": {
+ "content": "<|reserved_special_token_215|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128224": {
+ "content": "<|reserved_special_token_216|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128225": {
+ "content": "<|reserved_special_token_217|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128226": {
+ "content": "<|reserved_special_token_218|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128227": {
+ "content": "<|reserved_special_token_219|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128228": {
+ "content": "<|reserved_special_token_220|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128229": {
+ "content": "<|reserved_special_token_221|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128230": {
+ "content": "<|reserved_special_token_222|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128231": {
+ "content": "<|reserved_special_token_223|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128232": {
+ "content": "<|reserved_special_token_224|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128233": {
+ "content": "<|reserved_special_token_225|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128234": {
+ "content": "<|reserved_special_token_226|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128235": {
+ "content": "<|reserved_special_token_227|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128236": {
+ "content": "<|reserved_special_token_228|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128237": {
+ "content": "<|reserved_special_token_229|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128238": {
+ "content": "<|reserved_special_token_230|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128239": {
+ "content": "<|reserved_special_token_231|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128240": {
+ "content": "<|reserved_special_token_232|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128241": {
+ "content": "<|reserved_special_token_233|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128242": {
+ "content": "<|reserved_special_token_234|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128243": {
+ "content": "<|reserved_special_token_235|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128244": {
+ "content": "<|reserved_special_token_236|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128245": {
+ "content": "<|reserved_special_token_237|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128246": {
+ "content": "<|reserved_special_token_238|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128247": {
+ "content": "<|reserved_special_token_239|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128248": {
+ "content": "<|reserved_special_token_240|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128249": {
+ "content": "<|reserved_special_token_241|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128250": {
+ "content": "<|reserved_special_token_242|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128251": {
+ "content": "<|reserved_special_token_243|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128252": {
+ "content": "<|reserved_special_token_244|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128253": {
+ "content": "<|reserved_special_token_245|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128254": {
+ "content": "<|reserved_special_token_246|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128255": {
+ "content": "<|reserved_special_token_247|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "<|eom_id|>"
+ ],
+ "bos_token": "<|begin_of_text|>",
+ "clean_up_tokenization_spaces": true,
+ "eos_token": "<|eot_id|>",
+ "extra_special_tokens": {},
+ "model_input_names": [
+ "input_ids",
+ "attention_mask"
+ ],
+ "model_max_length": 131072,
+ "pad_token": "<|end_of_text|>",
+ "padding_side": "right",
+ "split_special_tokens": false,
+ "tokenizer_class": "PreTrainedTokenizerFast"
+}
diff --git a/global_step_0/train_results.json b/global_step_0/train_results.json
new file mode 100644
index 0000000000000000000000000000000000000000..361e4e132f240d363d25c252389f9999a100c486
--- /dev/null
+++ b/global_step_0/train_results.json
@@ -0,0 +1,8 @@
+{
+ "epoch": 3.0,
+ "total_flos": 1.1980638081930756e+19,
+ "train_loss": 0.49363853406255476,
+ "train_runtime": 40041.2675,
+ "train_samples_per_second": 3.261,
+ "train_steps_per_second": 0.013
+}
\ No newline at end of file
diff --git a/global_step_0/trainer_log.jsonl b/global_step_0/trainer_log.jsonl
new file mode 100644
index 0000000000000000000000000000000000000000..d7510fa335f5a5beee10778a5977bd69b542abd9
--- /dev/null
+++ b/global_step_0/trainer_log.jsonl
@@ -0,0 +1,52 @@
+{"current_steps": 10, "total_steps": 513, "loss": 0.8486, "lr": 2.596153846153846e-06, "epoch": 0.05881271825032163, "percentage": 1.95, "elapsed_time": "0:08:49", "remaining_time": "7:23:45"}
+{"current_steps": 20, "total_steps": 513, "loss": 0.7412, "lr": 5.480769230769231e-06, "epoch": 0.11762543650064326, "percentage": 3.9, "elapsed_time": "0:17:27", "remaining_time": "7:10:32"}
+{"current_steps": 30, "total_steps": 513, "loss": 0.6532, "lr": 8.365384615384616e-06, "epoch": 0.1764381547509649, "percentage": 5.85, "elapsed_time": "0:26:04", "remaining_time": "6:59:45"}
+{"current_steps": 40, "total_steps": 513, "loss": 0.6102, "lr": 1.125e-05, "epoch": 0.23525087300128653, "percentage": 7.8, "elapsed_time": "0:34:48", "remaining_time": "6:51:32"}
+{"current_steps": 50, "total_steps": 513, "loss": 0.5784, "lr": 1.4134615384615384e-05, "epoch": 0.29406359125160814, "percentage": 9.75, "elapsed_time": "0:43:26", "remaining_time": "6:42:12"}
+{"current_steps": 60, "total_steps": 513, "loss": 0.5641, "lr": 1.4991468156423456e-05, "epoch": 0.3528763095019298, "percentage": 11.7, "elapsed_time": "0:51:59", "remaining_time": "6:32:33"}
+{"current_steps": 70, "total_steps": 513, "loss": 0.5469, "lr": 1.494972625749433e-05, "epoch": 0.4116890277522514, "percentage": 13.65, "elapsed_time": "1:00:35", "remaining_time": "6:23:28"}
+{"current_steps": 80, "total_steps": 513, "loss": 0.5424, "lr": 1.4873400764197756e-05, "epoch": 0.47050174600257305, "percentage": 15.59, "elapsed_time": "1:09:12", "remaining_time": "6:14:36"}
+{"current_steps": 90, "total_steps": 513, "loss": 0.5293, "lr": 1.4762845999606666e-05, "epoch": 0.5293144642528946, "percentage": 17.54, "elapsed_time": "1:21:01", "remaining_time": "6:20:49"}
+{"current_steps": 100, "total_steps": 513, "loss": 0.5266, "lr": 1.4618575188100301e-05, "epoch": 0.5881271825032163, "percentage": 19.49, "elapsed_time": "1:36:07", "remaining_time": "6:37:00"}
+{"current_steps": 110, "total_steps": 513, "loss": 0.522, "lr": 1.4441258072841264e-05, "epoch": 0.6469399007535379, "percentage": 21.44, "elapsed_time": "1:51:33", "remaining_time": "6:48:42"}
+{"current_steps": 120, "total_steps": 513, "loss": 0.5222, "lr": 1.4231717806651086e-05, "epoch": 0.7057526190038595, "percentage": 23.39, "elapsed_time": "2:03:27", "remaining_time": "6:44:18"}
+{"current_steps": 130, "total_steps": 513, "loss": 0.5106, "lr": 1.3990927130717711e-05, "epoch": 0.7645653372541812, "percentage": 25.34, "elapsed_time": "2:14:59", "remaining_time": "6:37:41"}
+{"current_steps": 140, "total_steps": 513, "loss": 0.5114, "lr": 1.3720003858874311e-05, "epoch": 0.8233780555045028, "percentage": 27.29, "elapsed_time": "2:27:07", "remaining_time": "6:31:58"}
+{"current_steps": 150, "total_steps": 513, "loss": 0.5099, "lr": 1.3420205688412603e-05, "epoch": 0.8821907737548245, "percentage": 29.24, "elapsed_time": "2:38:51", "remaining_time": "6:24:27"}
+{"current_steps": 160, "total_steps": 513, "loss": 0.5086, "lr": 1.3092924361520291e-05, "epoch": 0.9410034920051461, "percentage": 31.19, "elapsed_time": "2:51:13", "remaining_time": "6:17:45"}
+{"current_steps": 170, "total_steps": 513, "loss": 0.5061, "lr": 1.2739679204446694e-05, "epoch": 0.9998162102554677, "percentage": 33.14, "elapsed_time": "3:03:05", "remaining_time": "6:09:25"}
+{"current_steps": 180, "total_steps": 513, "loss": 0.4746, "lr": 1.236211007438955e-05, "epoch": 1.0529314464252895, "percentage": 35.09, "elapsed_time": "3:13:45", "remaining_time": "5:58:27"}
+{"current_steps": 190, "total_steps": 513, "loss": 0.478, "lr": 1.1961969746845325e-05, "epoch": 1.1117441646756112, "percentage": 37.04, "elapsed_time": "3:25:56", "remaining_time": "5:50:06"}
+{"current_steps": 200, "total_steps": 513, "loss": 0.4755, "lr": 1.1541115778763038e-05, "epoch": 1.1705568829259327, "percentage": 38.99, "elapsed_time": "3:37:41", "remaining_time": "5:40:41"}
+{"current_steps": 210, "total_steps": 513, "loss": 0.4765, "lr": 1.1101501885274894e-05, "epoch": 1.2293696011762543, "percentage": 40.94, "elapsed_time": "3:50:20", "remaining_time": "5:32:20"}
+{"current_steps": 220, "total_steps": 513, "loss": 0.4706, "lr": 1.0645168870035313e-05, "epoch": 1.288182319426576, "percentage": 42.88, "elapsed_time": "4:02:56", "remaining_time": "5:23:33"}
+{"current_steps": 230, "total_steps": 513, "loss": 0.4681, "lr": 1.0174235151272025e-05, "epoch": 1.3469950376768978, "percentage": 44.83, "elapsed_time": "4:15:23", "remaining_time": "5:14:14"}
+{"current_steps": 240, "total_steps": 513, "loss": 0.4715, "lr": 9.690886927529886e-06, "epoch": 1.4058077559272193, "percentage": 46.78, "elapsed_time": "4:28:11", "remaining_time": "5:05:03"}
+{"current_steps": 250, "total_steps": 513, "loss": 0.4711, "lr": 9.197368028760536e-06, "epoch": 1.4646204741775408, "percentage": 48.73, "elapsed_time": "4:40:41", "remaining_time": "4:55:16"}
+{"current_steps": 260, "total_steps": 513, "loss": 0.4685, "lr": 8.695969499871911e-06, "epoch": 1.5234331924278626, "percentage": 50.68, "elapsed_time": "4:53:35", "remaining_time": "4:45:41"}
+{"current_steps": 270, "total_steps": 513, "loss": 0.4688, "lr": 8.18901896509343e-06, "epoch": 1.5822459106781843, "percentage": 52.63, "elapsed_time": "5:07:00", "remaining_time": "4:36:18"}
+{"current_steps": 280, "total_steps": 513, "loss": 0.4722, "lr": 7.678869822530362e-06, "epoch": 1.6410586289285058, "percentage": 54.58, "elapsed_time": "5:18:58", "remaining_time": "4:25:25"}
+{"current_steps": 290, "total_steps": 513, "loss": 0.4649, "lr": 7.167890319069035e-06, "epoch": 1.6998713471788274, "percentage": 56.53, "elapsed_time": "5:30:51", "remaining_time": "4:14:25"}
+{"current_steps": 300, "total_steps": 513, "loss": 0.4692, "lr": 6.658452556350092e-06, "epoch": 1.758684065429149, "percentage": 58.48, "elapsed_time": "5:42:50", "remaining_time": "4:03:25"}
+{"current_steps": 310, "total_steps": 513, "loss": 0.4653, "lr": 6.152921478846986e-06, "epoch": 1.8174967836794707, "percentage": 60.43, "elapsed_time": "5:54:40", "remaining_time": "3:52:15"}
+{"current_steps": 320, "total_steps": 513, "loss": 0.4664, "lr": 5.65364389516988e-06, "epoch": 1.8763095019297924, "percentage": 62.38, "elapsed_time": "6:06:49", "remaining_time": "3:41:14"}
+{"current_steps": 330, "total_steps": 513, "loss": 0.4621, "lr": 5.162937583561072e-06, "epoch": 1.935122220180114, "percentage": 64.33, "elapsed_time": "6:18:35", "remaining_time": "3:29:56"}
+{"current_steps": 340, "total_steps": 513, "loss": 0.4675, "lr": 4.683080532156986e-06, "epoch": 1.9939349384304355, "percentage": 66.28, "elapsed_time": "6:30:39", "remaining_time": "3:18:46"}
+{"current_steps": 350, "total_steps": 513, "loss": 0.44, "lr": 4.216300363966383e-06, "epoch": 2.0470501746002574, "percentage": 68.23, "elapsed_time": "6:41:31", "remaining_time": "3:06:59"}
+{"current_steps": 360, "total_steps": 513, "loss": 0.4447, "lr": 3.7647639956567304e-06, "epoch": 2.105862892850579, "percentage": 70.18, "elapsed_time": "6:56:27", "remaining_time": "2:56:59"}
+{"current_steps": 370, "total_steps": 513, "loss": 0.4391, "lr": 3.3305675781554655e-06, "epoch": 2.1646756111009005, "percentage": 72.12, "elapsed_time": "7:12:51", "remaining_time": "2:47:17"}
+{"current_steps": 380, "total_steps": 513, "loss": 0.4419, "lr": 2.915726765764453e-06, "epoch": 2.2234883293512224, "percentage": 74.07, "elapsed_time": "7:29:12", "remaining_time": "2:37:13"}
+{"current_steps": 390, "total_steps": 513, "loss": 0.4377, "lr": 2.522167358961046e-06, "epoch": 2.282301047601544, "percentage": 76.02, "elapsed_time": "7:45:42", "remaining_time": "2:26:52"}
+{"current_steps": 400, "total_steps": 513, "loss": 0.4387, "lr": 2.151716364324264e-06, "epoch": 2.3411137658518655, "percentage": 77.97, "elapsed_time": "8:02:07", "remaining_time": "2:16:11"}
+{"current_steps": 410, "total_steps": 513, "loss": 0.4426, "lr": 1.806093513088348e-06, "epoch": 2.399926484102187, "percentage": 79.92, "elapsed_time": "8:18:53", "remaining_time": "2:05:19"}
+{"current_steps": 420, "total_steps": 513, "loss": 0.442, "lr": 1.486903277696733e-06, "epoch": 2.4587392023525085, "percentage": 81.87, "elapsed_time": "8:35:37", "remaining_time": "1:54:10"}
+{"current_steps": 430, "total_steps": 513, "loss": 0.4449, "lr": 1.1956274234177322e-06, "epoch": 2.5175519206028305, "percentage": 83.82, "elapsed_time": "8:51:59", "remaining_time": "1:42:41"}
+{"current_steps": 440, "total_steps": 513, "loss": 0.4391, "lr": 9.336181295993204e-07, "epoch": 2.576364638853152, "percentage": 85.77, "elapsed_time": "9:08:26", "remaining_time": "1:30:59"}
+{"current_steps": 450, "total_steps": 513, "loss": 0.4374, "lr": 7.02091712495907e-07, "epoch": 2.6351773571034736, "percentage": 87.72, "elapsed_time": "9:24:54", "remaining_time": "1:19:05"}
+{"current_steps": 460, "total_steps": 513, "loss": 0.4403, "lr": 5.021229788074589e-07, "epoch": 2.6939900753537955, "percentage": 89.67, "elapsed_time": "9:41:06", "remaining_time": "1:06:57"}
+{"current_steps": 470, "total_steps": 513, "loss": 0.44, "lr": 3.3464023614327683e-07, "epoch": 2.7528027936041166, "percentage": 91.62, "elapsed_time": "9:57:23", "remaining_time": "0:54:39"}
+{"current_steps": 480, "total_steps": 513, "loss": 0.4358, "lr": 2.0042098357321209e-07, "epoch": 2.8116155118544386, "percentage": 93.57, "elapsed_time": "10:13:50", "remaining_time": "0:42:12"}
+{"current_steps": 490, "total_steps": 513, "loss": 0.4384, "lr": 1.0008830227189431e-07, "epoch": 2.87042823010476, "percentage": 95.52, "elapsed_time": "10:30:28", "remaining_time": "0:29:35"}
+{"current_steps": 500, "total_steps": 513, "loss": 0.4438, "lr": 3.410796301156205e-08, "epoch": 2.9292409483550816, "percentage": 97.47, "elapsed_time": "10:47:13", "remaining_time": "0:16:49"}
+{"current_steps": 510, "total_steps": 513, "loss": 0.4378, "lr": 2.7862639312792317e-09, "epoch": 2.9880536666054036, "percentage": 99.42, "elapsed_time": "11:03:34", "remaining_time": "0:03:54"}
+{"current_steps": 513, "total_steps": 513, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "11:07:21", "remaining_time": "0:00:00"}
diff --git a/global_step_0/trainer_state.json b/global_step_0/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..6b5a94f48182bc4b5c93c421caa831865f6e9b46
--- /dev/null
+++ b/global_step_0/trainer_state.json
@@ -0,0 +1,400 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 3.0,
+ "eval_steps": 500,
+ "global_step": 513,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.05881271825032163,
+ "grad_norm": 1.3511555194854736,
+ "learning_rate": 2.596153846153846e-06,
+ "loss": 0.8486,
+ "step": 10
+ },
+ {
+ "epoch": 0.11762543650064326,
+ "grad_norm": 0.7383383512496948,
+ "learning_rate": 5.480769230769231e-06,
+ "loss": 0.7412,
+ "step": 20
+ },
+ {
+ "epoch": 0.1764381547509649,
+ "grad_norm": 0.47219017148017883,
+ "learning_rate": 8.365384615384616e-06,
+ "loss": 0.6532,
+ "step": 30
+ },
+ {
+ "epoch": 0.23525087300128653,
+ "grad_norm": 0.30038249492645264,
+ "learning_rate": 1.125e-05,
+ "loss": 0.6102,
+ "step": 40
+ },
+ {
+ "epoch": 0.29406359125160814,
+ "grad_norm": 0.2751595377922058,
+ "learning_rate": 1.4134615384615384e-05,
+ "loss": 0.5784,
+ "step": 50
+ },
+ {
+ "epoch": 0.3528763095019298,
+ "grad_norm": 0.26936954259872437,
+ "learning_rate": 1.4991468156423456e-05,
+ "loss": 0.5641,
+ "step": 60
+ },
+ {
+ "epoch": 0.4116890277522514,
+ "grad_norm": 0.25376981496810913,
+ "learning_rate": 1.494972625749433e-05,
+ "loss": 0.5469,
+ "step": 70
+ },
+ {
+ "epoch": 0.47050174600257305,
+ "grad_norm": 0.2703434228897095,
+ "learning_rate": 1.4873400764197756e-05,
+ "loss": 0.5424,
+ "step": 80
+ },
+ {
+ "epoch": 0.5293144642528946,
+ "grad_norm": 0.3386951684951782,
+ "learning_rate": 1.4762845999606666e-05,
+ "loss": 0.5293,
+ "step": 90
+ },
+ {
+ "epoch": 0.5881271825032163,
+ "grad_norm": 0.30952027440071106,
+ "learning_rate": 1.4618575188100301e-05,
+ "loss": 0.5266,
+ "step": 100
+ },
+ {
+ "epoch": 0.6469399007535379,
+ "grad_norm": 0.2706937789916992,
+ "learning_rate": 1.4441258072841264e-05,
+ "loss": 0.522,
+ "step": 110
+ },
+ {
+ "epoch": 0.7057526190038595,
+ "grad_norm": 0.286222368478775,
+ "learning_rate": 1.4231717806651086e-05,
+ "loss": 0.5222,
+ "step": 120
+ },
+ {
+ "epoch": 0.7645653372541812,
+ "grad_norm": 0.2553636431694031,
+ "learning_rate": 1.3990927130717711e-05,
+ "loss": 0.5106,
+ "step": 130
+ },
+ {
+ "epoch": 0.8233780555045028,
+ "grad_norm": 0.2975357472896576,
+ "learning_rate": 1.3720003858874311e-05,
+ "loss": 0.5114,
+ "step": 140
+ },
+ {
+ "epoch": 0.8821907737548245,
+ "grad_norm": 0.24958086013793945,
+ "learning_rate": 1.3420205688412603e-05,
+ "loss": 0.5099,
+ "step": 150
+ },
+ {
+ "epoch": 0.9410034920051461,
+ "grad_norm": 0.302441269159317,
+ "learning_rate": 1.3092924361520291e-05,
+ "loss": 0.5086,
+ "step": 160
+ },
+ {
+ "epoch": 0.9998162102554677,
+ "grad_norm": 0.24974007904529572,
+ "learning_rate": 1.2739679204446694e-05,
+ "loss": 0.5061,
+ "step": 170
+ },
+ {
+ "epoch": 1.0529314464252895,
+ "grad_norm": 0.35062289237976074,
+ "learning_rate": 1.236211007438955e-05,
+ "loss": 0.4746,
+ "step": 180
+ },
+ {
+ "epoch": 1.1117441646756112,
+ "grad_norm": 0.28535276651382446,
+ "learning_rate": 1.1961969746845325e-05,
+ "loss": 0.478,
+ "step": 190
+ },
+ {
+ "epoch": 1.1705568829259327,
+ "grad_norm": 0.2474713921546936,
+ "learning_rate": 1.1541115778763038e-05,
+ "loss": 0.4755,
+ "step": 200
+ },
+ {
+ "epoch": 1.2293696011762543,
+ "grad_norm": 0.23004528880119324,
+ "learning_rate": 1.1101501885274894e-05,
+ "loss": 0.4765,
+ "step": 210
+ },
+ {
+ "epoch": 1.288182319426576,
+ "grad_norm": 0.23046620190143585,
+ "learning_rate": 1.0645168870035313e-05,
+ "loss": 0.4706,
+ "step": 220
+ },
+ {
+ "epoch": 1.3469950376768978,
+ "grad_norm": 0.243893101811409,
+ "learning_rate": 1.0174235151272025e-05,
+ "loss": 0.4681,
+ "step": 230
+ },
+ {
+ "epoch": 1.4058077559272193,
+ "grad_norm": 0.2657492160797119,
+ "learning_rate": 9.690886927529886e-06,
+ "loss": 0.4715,
+ "step": 240
+ },
+ {
+ "epoch": 1.4646204741775408,
+ "grad_norm": 0.24003422260284424,
+ "learning_rate": 9.197368028760536e-06,
+ "loss": 0.4711,
+ "step": 250
+ },
+ {
+ "epoch": 1.5234331924278626,
+ "grad_norm": 0.238833948969841,
+ "learning_rate": 8.695969499871911e-06,
+ "loss": 0.4685,
+ "step": 260
+ },
+ {
+ "epoch": 1.5822459106781843,
+ "grad_norm": 0.237404927611351,
+ "learning_rate": 8.18901896509343e-06,
+ "loss": 0.4688,
+ "step": 270
+ },
+ {
+ "epoch": 1.6410586289285058,
+ "grad_norm": 0.22758300602436066,
+ "learning_rate": 7.678869822530362e-06,
+ "loss": 0.4722,
+ "step": 280
+ },
+ {
+ "epoch": 1.6998713471788274,
+ "grad_norm": 0.22680319845676422,
+ "learning_rate": 7.167890319069035e-06,
+ "loss": 0.4649,
+ "step": 290
+ },
+ {
+ "epoch": 1.758684065429149,
+ "grad_norm": 0.2401188611984253,
+ "learning_rate": 6.658452556350092e-06,
+ "loss": 0.4692,
+ "step": 300
+ },
+ {
+ "epoch": 1.8174967836794707,
+ "grad_norm": 0.2211555689573288,
+ "learning_rate": 6.152921478846986e-06,
+ "loss": 0.4653,
+ "step": 310
+ },
+ {
+ "epoch": 1.8763095019297924,
+ "grad_norm": 0.24088308215141296,
+ "learning_rate": 5.65364389516988e-06,
+ "loss": 0.4664,
+ "step": 320
+ },
+ {
+ "epoch": 1.935122220180114,
+ "grad_norm": 0.21008798480033875,
+ "learning_rate": 5.162937583561072e-06,
+ "loss": 0.4621,
+ "step": 330
+ },
+ {
+ "epoch": 1.9939349384304355,
+ "grad_norm": 0.2156449556350708,
+ "learning_rate": 4.683080532156986e-06,
+ "loss": 0.4675,
+ "step": 340
+ },
+ {
+ "epoch": 2.0470501746002574,
+ "grad_norm": 0.2731837034225464,
+ "learning_rate": 4.216300363966383e-06,
+ "loss": 0.44,
+ "step": 350
+ },
+ {
+ "epoch": 2.105862892850579,
+ "grad_norm": 0.2207324057817459,
+ "learning_rate": 3.7647639956567304e-06,
+ "loss": 0.4447,
+ "step": 360
+ },
+ {
+ "epoch": 2.1646756111009005,
+ "grad_norm": 0.21577142179012299,
+ "learning_rate": 3.3305675781554655e-06,
+ "loss": 0.4391,
+ "step": 370
+ },
+ {
+ "epoch": 2.2234883293512224,
+ "grad_norm": 0.22381627559661865,
+ "learning_rate": 2.915726765764453e-06,
+ "loss": 0.4419,
+ "step": 380
+ },
+ {
+ "epoch": 2.282301047601544,
+ "grad_norm": 0.2167045623064041,
+ "learning_rate": 2.522167358961046e-06,
+ "loss": 0.4377,
+ "step": 390
+ },
+ {
+ "epoch": 2.3411137658518655,
+ "grad_norm": 0.2239835262298584,
+ "learning_rate": 2.151716364324264e-06,
+ "loss": 0.4387,
+ "step": 400
+ },
+ {
+ "epoch": 2.399926484102187,
+ "grad_norm": 0.2177765816450119,
+ "learning_rate": 1.806093513088348e-06,
+ "loss": 0.4426,
+ "step": 410
+ },
+ {
+ "epoch": 2.4587392023525085,
+ "grad_norm": 0.21108600497245789,
+ "learning_rate": 1.486903277696733e-06,
+ "loss": 0.442,
+ "step": 420
+ },
+ {
+ "epoch": 2.5175519206028305,
+ "grad_norm": 0.20833276212215424,
+ "learning_rate": 1.1956274234177322e-06,
+ "loss": 0.4449,
+ "step": 430
+ },
+ {
+ "epoch": 2.576364638853152,
+ "grad_norm": 0.20782434940338135,
+ "learning_rate": 9.336181295993204e-07,
+ "loss": 0.4391,
+ "step": 440
+ },
+ {
+ "epoch": 2.6351773571034736,
+ "grad_norm": 0.20101866126060486,
+ "learning_rate": 7.02091712495907e-07,
+ "loss": 0.4374,
+ "step": 450
+ },
+ {
+ "epoch": 2.6939900753537955,
+ "grad_norm": 0.1978382021188736,
+ "learning_rate": 5.021229788074589e-07,
+ "loss": 0.4403,
+ "step": 460
+ },
+ {
+ "epoch": 2.7528027936041166,
+ "grad_norm": 0.20072239637374878,
+ "learning_rate": 3.3464023614327683e-07,
+ "loss": 0.44,
+ "step": 470
+ },
+ {
+ "epoch": 2.8116155118544386,
+ "grad_norm": 0.2036609798669815,
+ "learning_rate": 2.0042098357321209e-07,
+ "loss": 0.4358,
+ "step": 480
+ },
+ {
+ "epoch": 2.87042823010476,
+ "grad_norm": 0.20166757702827454,
+ "learning_rate": 1.0008830227189431e-07,
+ "loss": 0.4384,
+ "step": 490
+ },
+ {
+ "epoch": 2.9292409483550816,
+ "grad_norm": 0.20334972441196442,
+ "learning_rate": 3.410796301156205e-08,
+ "loss": 0.4438,
+ "step": 500
+ },
+ {
+ "epoch": 2.9880536666054036,
+ "grad_norm": 0.20352092385292053,
+ "learning_rate": 2.7862639312792317e-09,
+ "loss": 0.4378,
+ "step": 510
+ },
+ {
+ "epoch": 3.0,
+ "step": 513,
+ "total_flos": 1.1980638081930756e+19,
+ "train_loss": 0.49363853406255476,
+ "train_runtime": 40041.2675,
+ "train_samples_per_second": 3.261,
+ "train_steps_per_second": 0.013
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 513,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 200,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": true
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 1.1980638081930756e+19,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/global_step_0/training_args.bin b/global_step_0/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a0751554beb923f03f8962dcba69f3ba8297e954
--- /dev/null
+++ b/global_step_0/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:149830d4281846c68c4643d93a8b6007f2239eca267a1d2803fe599ad5194580
+size 7608
diff --git a/global_step_0/training_loss.png b/global_step_0/training_loss.png
new file mode 100644
index 0000000000000000000000000000000000000000..aa2945bbc426b07aedad3fde8d37b9cd365fb5fd
Binary files /dev/null and b/global_step_0/training_loss.png differ