Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

25_128_e3_3e-5/README.md +63 -0
25_128_e3_3e-5/adapter_config.json +39 -0
25_128_e3_3e-5/adapter_model.safetensors +3 -0
25_128_e3_3e-5/all_results.json +9 -0
25_128_e3_3e-5/config.json +32 -0
25_128_e3_3e-5/merges.txt +0 -0
25_128_e3_3e-5/special_tokens_map.json +45 -0
25_128_e3_3e-5/tokenizer.json +0 -0
25_128_e3_3e-5/tokenizer_config.json +188 -0
25_128_e3_3e-5/train_results.json +9 -0
25_128_e3_3e-5/trainer_state.json +1226 -0
25_128_e3_3e-5/training_args.bin +3 -0
25_128_e3_3e-5/vocab.json +0 -0

25_128_e3_3e-5/README.md ADDED Viewed

	@@ -0,0 +1,63 @@

+---
+library_name: peft
+license: apache-2.0
+base_model: ibm-granite/granite-3.3-8b-base
+tags:
+- alignment-handbook
+- generated_from_trainer
+datasets:
+- data/knowledge_lora_training_data_2000
+model-index:
+- name: 25_128_e3_3e-5
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# 25_128_e3_3e-5
+This model is a fine-tuned version of [ibm-granite/granite-3.3-8b-base](https://huggingface.co/ibm-granite/granite-3.3-8b-base) on the data/knowledge_lora_training_data_2000 dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 3e-05
+- train_batch_size: 2
+- eval_batch_size: 8
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 8
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 32
+- total_eval_batch_size: 64
+- optimizer: Use adamw_torch with betas=(0.9,0.95) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.05
+- num_epochs: 3.0
+### Training results
+### Framework versions
+- PEFT 0.15.2
+- Transformers 4.52.4
+- Pytorch 2.7.0+cu126
+- Datasets 3.6.0
+- Tokenizers 0.21.2

25_128_e3_3e-5/adapter_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "ibm-granite/granite-3.3-8b-base",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 256,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "up_proj",
+    "gate_proj",
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

25_128_e3_3e-5/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:646c9ad9b4d4ade658ce1b8b0436581e9d47e65130b1f23a7356577c712418f9
+size 791751704

25_128_e3_3e-5/all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.0,
+    "total_flos": 1.2069706775889183e+18,
+    "train_loss": 0.5579275732626588,
+    "train_runtime": 555.4859,
+    "train_samples": 8994,
+    "train_samples_per_second": 48.574,
+    "train_steps_per_second": 1.523
+}

25_128_e3_3e-5/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "architectures": [
+    "GraniteForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0,
+  "attention_multiplier": 0.0078125,
+  "bos_token_id": 0,
+  "embedding_multiplier": 12.0,
+  "eos_token_id": 0,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12800,
+  "logits_scaling": 16.0,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "granite",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 8,
+  "pad_token_id": 0,
+  "residual_multiplier": 0.22,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000000.0,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.52.4",
+  "use_cache": true,
+  "vocab_size": 49152
+}

25_128_e3_3e-5/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

25_128_e3_3e-5/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "<fim_prefix>",
+    "<fim_middle>",
+    "<fim_suffix>",
+    "<fim_pad>",
+    "<filename>",
+    "<gh_stars>",
+    "<issue_start>",
+    "<issue_comment>",
+    "<issue_closed>",
+    "<jupyter_start>",
+    "<jupyter_text>",
+    "<jupyter_code>",
+    "<jupyter_output>",
+    "<empty_output>",
+    "<commit_before>",
+    "<commit_msg>",
+    "<commit_after>",
+    "<reponame>"
+  ],
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<reponame>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

25_128_e3_3e-5/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

25_128_e3_3e-5/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,188 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<fim_prefix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<fim_middle>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<fim_suffix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<fim_pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<filename>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<gh_stars>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<issue_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "<issue_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<issue_closed>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "<jupyter_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "<jupyter_text>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "<jupyter_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "<jupyter_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "<empty_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "<commit_before>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "<commit_msg>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "17": {
+      "content": "<commit_after>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "18": {
+      "content": "<reponame>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "<fim_prefix>",
+    "<fim_middle>",
+    "<fim_suffix>",
+    "<fim_pad>",
+    "<filename>",
+    "<gh_stars>",
+    "<issue_start>",
+    "<issue_comment>",
+    "<issue_closed>",
+    "<jupyter_start>",
+    "<jupyter_text>",
+    "<jupyter_code>",
+    "<jupyter_output>",
+    "<empty_output>",
+    "<commit_before>",
+    "<commit_msg>",
+    "<commit_after>",
+    "<reponame>"
+  ],
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": {},
+  "model_max_length": 8192,
+  "pad_token": "<reponame>",
+  "padding_side": "left",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>",
+  "vocab_size": 49152
+}

25_128_e3_3e-5/train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.0,
+    "total_flos": 1.2069706775889183e+18,
+    "train_loss": 0.5579275732626588,
+    "train_runtime": 555.4859,
+    "train_samples": 8994,
+    "train_samples_per_second": 48.574,
+    "train_steps_per_second": 1.523
+}

25_128_e3_3e-5/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1226 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 846,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.017761989342806393,
+      "grad_norm": 1.3231275081634521,
+      "learning_rate": 2.7906976744186046e-06,
+      "loss": 1.315,
+      "step": 5
+    },
+    {
+      "epoch": 0.035523978685612786,
+      "grad_norm": 0.701774537563324,
+      "learning_rate": 6.279069767441861e-06,
+      "loss": 1.3216,
+      "step": 10
+    },
+    {
+      "epoch": 0.05328596802841918,
+      "grad_norm": 0.6771687269210815,
+      "learning_rate": 9.767441860465117e-06,
+      "loss": 1.2848,
+      "step": 15
+    },
+    {
+      "epoch": 0.07104795737122557,
+      "grad_norm": 0.5326308012008667,
+      "learning_rate": 1.3255813953488373e-05,
+      "loss": 1.3087,
+      "step": 20
+    },
+    {
+      "epoch": 0.08880994671403197,
+      "grad_norm": 0.5053356885910034,
+      "learning_rate": 1.674418604651163e-05,
+      "loss": 1.2309,
+      "step": 25
+    },
+    {
+      "epoch": 0.10657193605683836,
+      "grad_norm": 0.5353951454162598,
+      "learning_rate": 2.0232558139534883e-05,
+      "loss": 1.2124,
+      "step": 30
+    },
+    {
+      "epoch": 0.12433392539964476,
+      "grad_norm": 0.4567808210849762,
+      "learning_rate": 2.3720930232558138e-05,
+      "loss": 1.2386,
+      "step": 35
+    },
+    {
+      "epoch": 0.14209591474245115,
+      "grad_norm": 0.5020460486412048,
+      "learning_rate": 2.7209302325581395e-05,
+      "loss": 1.1801,
+      "step": 40
+    },
+    {
+      "epoch": 0.15985790408525755,
+      "grad_norm": 0.44761934876441956,
+      "learning_rate": 2.999988520331045e-05,
+      "loss": 1.1471,
+      "step": 45
+    },
+    {
+      "epoch": 0.17761989342806395,
+      "grad_norm": 0.4367366135120392,
+      "learning_rate": 2.9995867503669133e-05,
+      "loss": 1.1527,
+      "step": 50
+    },
+    {
+      "epoch": 0.19538188277087035,
+      "grad_norm": 0.5834628343582153,
+      "learning_rate": 2.998611172653345e-05,
+      "loss": 1.1714,
+      "step": 55
+    },
+    {
+      "epoch": 0.21314387211367672,
+      "grad_norm": 0.48230427503585815,
+      "learning_rate": 2.997062160489219e-05,
+      "loss": 1.1064,
+      "step": 60
+    },
+    {
+      "epoch": 0.23090586145648312,
+      "grad_norm": 0.46752408146858215,
+      "learning_rate": 2.9949403065946194e-05,
+      "loss": 1.1452,
+      "step": 65
+    },
+    {
+      "epoch": 0.24866785079928952,
+      "grad_norm": 0.6091781258583069,
+      "learning_rate": 2.992246422884032e-05,
+      "loss": 1.1293,
+      "step": 70
+    },
+    {
+      "epoch": 0.2664298401420959,
+      "grad_norm": 0.48189669847488403,
+      "learning_rate": 2.9889815401556762e-05,
+      "loss": 1.0857,
+      "step": 75
+    },
+    {
+      "epoch": 0.2841918294849023,
+      "grad_norm": 0.5191056132316589,
+      "learning_rate": 2.985146907697069e-05,
+      "loss": 1.0908,
+      "step": 80
+    },
+    {
+      "epoch": 0.3019538188277087,
+      "grad_norm": 0.5504825115203857,
+      "learning_rate": 2.9807439928069994e-05,
+      "loss": 1.0999,
+      "step": 85
+    },
+    {
+      "epoch": 0.3197158081705151,
+      "grad_norm": 0.5600771903991699,
+      "learning_rate": 2.975774480234071e-05,
+      "loss": 1.0105,
+      "step": 90
+    },
+    {
+      "epoch": 0.33747779751332146,
+      "grad_norm": 0.5626690983772278,
+      "learning_rate": 2.970240271532045e-05,
+      "loss": 1.0566,
+      "step": 95
+    },
+    {
+      "epoch": 0.3552397868561279,
+      "grad_norm": 0.5404466390609741,
+      "learning_rate": 2.964143484332221e-05,
+      "loss": 1.0193,
+      "step": 100
+    },
+    {
+      "epoch": 0.37300177619893427,
+      "grad_norm": 0.5624865889549255,
+      "learning_rate": 2.957486451533141e-05,
+      "loss": 1.0111,
+      "step": 105
+    },
+    {
+      "epoch": 0.3907637655417407,
+      "grad_norm": 0.5766105651855469,
+      "learning_rate": 2.9502717204079163e-05,
+      "loss": 0.9937,
+      "step": 110
+    },
+    {
+      "epoch": 0.40852575488454707,
+      "grad_norm": 0.5540288090705872,
+      "learning_rate": 2.942502051629532e-05,
+      "loss": 0.9447,
+      "step": 115
+    },
+    {
+      "epoch": 0.42628774422735344,
+      "grad_norm": 0.6186960935592651,
+      "learning_rate": 2.9341804182144896e-05,
+      "loss": 0.9791,
+      "step": 120
+    },
+    {
+      "epoch": 0.44404973357015987,
+      "grad_norm": 0.5866276025772095,
+      "learning_rate": 2.9253100043852037e-05,
+      "loss": 0.9629,
+      "step": 125
+    },
+    {
+      "epoch": 0.46181172291296624,
+      "grad_norm": 0.6540477275848389,
+      "learning_rate": 2.915894204351575e-05,
+      "loss": 0.9535,
+      "step": 130
+    },
+    {
+      "epoch": 0.47957371225577267,
+      "grad_norm": 0.6920923590660095,
+      "learning_rate": 2.9059366210122185e-05,
+      "loss": 0.9492,
+      "step": 135
+    },
+    {
+      "epoch": 0.49733570159857904,
+      "grad_norm": 0.7717536687850952,
+      "learning_rate": 2.895441064575837e-05,
+      "loss": 0.8635,
+      "step": 140
+    },
+    {
+      "epoch": 0.5150976909413855,
+      "grad_norm": 0.8334298133850098,
+      "learning_rate": 2.884411551103266e-05,
+      "loss": 0.8679,
+      "step": 145
+    },
+    {
+      "epoch": 0.5328596802841918,
+      "grad_norm": 0.6875845789909363,
+      "learning_rate": 2.872852300970753e-05,
+      "loss": 0.9134,
+      "step": 150
+    },
+    {
+      "epoch": 0.5506216696269982,
+      "grad_norm": 0.7502499222755432,
+      "learning_rate": 2.8607677372550573e-05,
+      "loss": 0.8622,
+      "step": 155
+    },
+    {
+      "epoch": 0.5683836589698046,
+      "grad_norm": 0.8312597870826721,
+      "learning_rate": 2.8481624840409862e-05,
+      "loss": 0.8886,
+      "step": 160
+    },
+    {
+      "epoch": 0.5861456483126111,
+      "grad_norm": 0.8703055381774902,
+      "learning_rate": 2.835041364652015e-05,
+      "loss": 0.921,
+      "step": 165
+    },
+    {
+      "epoch": 0.6039076376554174,
+      "grad_norm": 1.0531996488571167,
+      "learning_rate": 2.821409399804675e-05,
+      "loss": 0.8508,
+      "step": 170
+    },
+    {
+      "epoch": 0.6216696269982238,
+      "grad_norm": 0.7947110533714294,
+      "learning_rate": 2.807271805687404e-05,
+      "loss": 0.849,
+      "step": 175
+    },
+    {
+      "epoch": 0.6394316163410302,
+      "grad_norm": 0.8096781969070435,
+      "learning_rate": 2.792633991964606e-05,
+      "loss": 0.8264,
+      "step": 180
+    },
+    {
+      "epoch": 0.6571936056838366,
+      "grad_norm": 0.8436487913131714,
+      "learning_rate": 2.7775015597066726e-05,
+      "loss": 0.811,
+      "step": 185
+    },
+    {
+      "epoch": 0.6749555950266429,
+      "grad_norm": 0.9463394284248352,
+      "learning_rate": 2.7618802992467718e-05,
+      "loss": 0.7928,
+      "step": 190
+    },
+    {
+      "epoch": 0.6927175843694494,
+      "grad_norm": 0.8976422548294067,
+      "learning_rate": 2.7457761879652084e-05,
+      "loss": 0.7814,
+      "step": 195
+    },
+    {
+      "epoch": 0.7104795737122558,
+      "grad_norm": 0.9050946831703186,
+      "learning_rate": 2.7291953880022184e-05,
+      "loss": 0.8017,
+      "step": 200
+    },
+    {
+      "epoch": 0.7282415630550622,
+      "grad_norm": 0.8991579413414001,
+      "learning_rate": 2.7121442439000614e-05,
+      "loss": 0.7883,
+      "step": 205
+    },
+    {
+      "epoch": 0.7460035523978685,
+      "grad_norm": 0.9045212864875793,
+      "learning_rate": 2.6946292801753227e-05,
+      "loss": 0.709,
+      "step": 210
+    },
+    {
+      "epoch": 0.7637655417406749,
+      "grad_norm": 0.9687134027481079,
+      "learning_rate": 2.6766571988223436e-05,
+      "loss": 0.7271,
+      "step": 215
+    },
+    {
+      "epoch": 0.7815275310834814,
+      "grad_norm": 0.8315659165382385,
+      "learning_rate": 2.6582348767487466e-05,
+      "loss": 0.7252,
+      "step": 220
+    },
+    {
+      "epoch": 0.7992895204262878,
+      "grad_norm": 0.9432047009468079,
+      "learning_rate": 2.639369363144027e-05,
+      "loss": 0.732,
+      "step": 225
+    },
+    {
+      "epoch": 0.8170515097690941,
+      "grad_norm": 0.9895256161689758,
+      "learning_rate": 2.6200678767822256e-05,
+      "loss": 0.7338,
+      "step": 230
+    },
+    {
+      "epoch": 0.8348134991119005,
+      "grad_norm": 1.087295651435852,
+      "learning_rate": 2.600337803259704e-05,
+      "loss": 0.6926,
+      "step": 235
+    },
+    {
+      "epoch": 0.8525754884547069,
+      "grad_norm": 0.8864995241165161,
+      "learning_rate": 2.5801866921690965e-05,
+      "loss": 0.6809,
+      "step": 240
+    },
+    {
+      "epoch": 0.8703374777975134,
+      "grad_norm": 0.9925051927566528,
+      "learning_rate": 2.559622254210502e-05,
+      "loss": 0.7017,
+      "step": 245
+    },
+    {
+      "epoch": 0.8880994671403197,
+      "grad_norm": 1.123909592628479,
+      "learning_rate": 2.5386523582410305e-05,
+      "loss": 0.669,
+      "step": 250
+    },
+    {
+      "epoch": 0.9058614564831261,
+      "grad_norm": 1.0412914752960205,
+      "learning_rate": 2.5172850282638344e-05,
+      "loss": 0.6782,
+      "step": 255
+    },
+    {
+      "epoch": 0.9236234458259325,
+      "grad_norm": 0.9874035716056824,
+      "learning_rate": 2.495528440357772e-05,
+      "loss": 0.664,
+      "step": 260
+    },
+    {
+      "epoch": 0.9413854351687388,
+      "grad_norm": 1.0234025716781616,
+      "learning_rate": 2.4733909195488806e-05,
+      "loss": 0.6844,
+      "step": 265
+    },
+    {
+      "epoch": 0.9591474245115453,
+      "grad_norm": 0.9540771245956421,
+      "learning_rate": 2.4508809366248555e-05,
+      "loss": 0.6767,
+      "step": 270
+    },
+    {
+      "epoch": 0.9769094138543517,
+      "grad_norm": 0.9452133774757385,
+      "learning_rate": 2.428007104893755e-05,
+      "loss": 0.6391,
+      "step": 275
+    },
+    {
+      "epoch": 0.9946714031971581,
+      "grad_norm": 0.9797477126121521,
+      "learning_rate": 2.4047781768881715e-05,
+      "loss": 0.6235,
+      "step": 280
+    },
+    {
+      "epoch": 1.0106571936056838,
+      "grad_norm": 1.0686089992523193,
+      "learning_rate": 2.3812030410161258e-05,
+      "loss": 0.5832,
+      "step": 285
+    },
+    {
+      "epoch": 1.0284191829484903,
+      "grad_norm": 0.9488253593444824,
+      "learning_rate": 2.3572907181599742e-05,
+      "loss": 0.5822,
+      "step": 290
+    },
+    {
+      "epoch": 1.0461811722912966,
+      "grad_norm": 1.0656824111938477,
+      "learning_rate": 2.3330503582246204e-05,
+      "loss": 0.5604,
+      "step": 295
+    },
+    {
+      "epoch": 1.063943161634103,
+      "grad_norm": 1.0964164733886719,
+      "learning_rate": 2.308491236636363e-05,
+      "loss": 0.5829,
+      "step": 300
+    },
+    {
+      "epoch": 1.0817051509769093,
+      "grad_norm": 1.0837876796722412,
+      "learning_rate": 2.2836227507937067e-05,
+      "loss": 0.5631,
+      "step": 305
+    },
+    {
+      "epoch": 1.0994671403197158,
+      "grad_norm": 1.1674449443817139,
+      "learning_rate": 2.2584544164715078e-05,
+      "loss": 0.5655,
+      "step": 310
+    },
+    {
+      "epoch": 1.1172291296625223,
+      "grad_norm": 1.0909093618392944,
+      "learning_rate": 2.2329958641798192e-05,
+      "loss": 0.5455,
+      "step": 315
+    },
+    {
+      "epoch": 1.1349911190053286,
+      "grad_norm": 1.0680902004241943,
+      "learning_rate": 2.207256835478834e-05,
+      "loss": 0.5415,
+      "step": 320
+    },
+    {
+      "epoch": 1.152753108348135,
+      "grad_norm": 1.130262851715088,
+      "learning_rate": 2.1812471792513386e-05,
+      "loss": 0.5076,
+      "step": 325
+    },
+    {
+      "epoch": 1.1705150976909413,
+      "grad_norm": 1.3649592399597168,
+      "learning_rate": 2.154976847934096e-05,
+      "loss": 0.5141,
+      "step": 330
+    },
+    {
+      "epoch": 1.1882770870337478,
+      "grad_norm": 1.0753296613693237,
+      "learning_rate": 2.128455893709606e-05,
+      "loss": 0.4977,
+      "step": 335
+    },
+    {
+      "epoch": 1.206039076376554,
+      "grad_norm": 1.146614670753479,
+      "learning_rate": 2.1016944646597012e-05,
+      "loss": 0.5256,
+      "step": 340
+    },
+    {
+      "epoch": 1.2238010657193605,
+      "grad_norm": 1.0820695161819458,
+      "learning_rate": 2.074702800882442e-05,
+      "loss": 0.52,
+      "step": 345
+    },
+    {
+      "epoch": 1.241563055062167,
+      "grad_norm": 1.0902920961380005,
+      "learning_rate": 2.047491230573808e-05,
+      "loss": 0.5706,
+      "step": 350
+    },
+    {
+      "epoch": 1.2593250444049733,
+      "grad_norm": 1.0888701677322388,
+      "learning_rate": 2.0200701660756745e-05,
+      "loss": 0.5051,
+      "step": 355
+    },
+    {
+      "epoch": 1.2770870337477798,
+      "grad_norm": 1.1430457830429077,
+      "learning_rate": 1.992450099891596e-05,
+      "loss": 0.5092,
+      "step": 360
+    },
+    {
+      "epoch": 1.294849023090586,
+      "grad_norm": 1.441771149635315,
+      "learning_rate": 1.9646416006719095e-05,
+      "loss": 0.5154,
+      "step": 365
+    },
+    {
+      "epoch": 1.3126110124333925,
+      "grad_norm": 1.199514389038086,
+      "learning_rate": 1.9366553091697083e-05,
+      "loss": 0.5809,
+      "step": 370
+    },
+    {
+      "epoch": 1.330373001776199,
+      "grad_norm": 1.2213622331619263,
+      "learning_rate": 1.9085019341692223e-05,
+      "loss": 0.5116,
+      "step": 375
+    },
+    {
+      "epoch": 1.3481349911190053,
+      "grad_norm": 1.081356406211853,
+      "learning_rate": 1.8801922483881632e-05,
+      "loss": 0.4906,
+      "step": 380
+    },
+    {
+      "epoch": 1.3658969804618117,
+      "grad_norm": 1.151087760925293,
+      "learning_rate": 1.8517370843556146e-05,
+      "loss": 0.4389,
+      "step": 385
+    },
+    {
+      "epoch": 1.383658969804618,
+      "grad_norm": 1.05121910572052,
+      "learning_rate": 1.823147330267027e-05,
+      "loss": 0.4825,
+      "step": 390
+    },
+    {
+      "epoch": 1.4014209591474245,
+      "grad_norm": 1.185630440711975,
+      "learning_rate": 1.7944339258179173e-05,
+      "loss": 0.4623,
+      "step": 395
+    },
+    {
+      "epoch": 1.419182948490231,
+      "grad_norm": 1.810645580291748,
+      "learning_rate": 1.765607858017861e-05,
+      "loss": 0.4714,
+      "step": 400
+    },
+    {
+      "epoch": 1.4369449378330372,
+      "grad_norm": 1.272497296333313,
+      "learning_rate": 1.7366801569863798e-05,
+      "loss": 0.4916,
+      "step": 405
+    },
+    {
+      "epoch": 1.4547069271758437,
+      "grad_norm": 1.321583867073059,
+      "learning_rate": 1.707661891732333e-05,
+      "loss": 0.432,
+      "step": 410
+    },
+    {
+      "epoch": 1.47246891651865,
+      "grad_norm": 1.1273342370986938,
+      "learning_rate": 1.6785641659184292e-05,
+      "loss": 0.4509,
+      "step": 415
+    },
+    {
+      "epoch": 1.4902309058614565,
+      "grad_norm": 1.0571781396865845,
+      "learning_rate": 1.6493981136124767e-05,
+      "loss": 0.4251,
+      "step": 420
+    },
+    {
+      "epoch": 1.507992895204263,
+      "grad_norm": 1.2970434427261353,
+      "learning_rate": 1.6201748950269993e-05,
+      "loss": 0.4495,
+      "step": 425
+    },
+    {
+      "epoch": 1.5257548845470694,
+      "grad_norm": 1.1398073434829712,
+      "learning_rate": 1.590905692248853e-05,
+      "loss": 0.4211,
+      "step": 430
+    },
+    {
+      "epoch": 1.5435168738898757,
+      "grad_norm": 1.2868608236312866,
+      "learning_rate": 1.561601704960462e-05,
+      "loss": 0.444,
+      "step": 435
+    },
+    {
+      "epoch": 1.561278863232682,
+      "grad_norm": 1.1401524543762207,
+      "learning_rate": 1.5322741461543334e-05,
+      "loss": 0.4599,
+      "step": 440
+    },
+    {
+      "epoch": 1.5790408525754884,
+      "grad_norm": 1.1858832836151123,
+      "learning_rate": 1.5029342378424729e-05,
+      "loss": 0.4511,
+      "step": 445
+    },
+    {
+      "epoch": 1.596802841918295,
+      "grad_norm": 3.851574420928955,
+      "learning_rate": 1.4735932067623478e-05,
+      "loss": 0.428,
+      "step": 450
+    },
+    {
+      "epoch": 1.6145648312611014,
+      "grad_norm": 1.306156039237976,
+      "learning_rate": 1.4442622800810476e-05,
+      "loss": 0.4302,
+      "step": 455
+    },
+    {
+      "epoch": 1.6323268206039077,
+      "grad_norm": 1.6123629808425903,
+      "learning_rate": 1.4149526810992737e-05,
+      "loss": 0.4882,
+      "step": 460
+    },
+    {
+      "epoch": 1.650088809946714,
+      "grad_norm": 1.4006987810134888,
+      "learning_rate": 1.3856756249568131e-05,
+      "loss": 0.449,
+      "step": 465
+    },
+    {
+      "epoch": 1.6678507992895204,
+      "grad_norm": 1.1376513242721558,
+      "learning_rate": 1.3564423143411341e-05,
+      "loss": 0.3954,
+      "step": 470
+    },
+    {
+      "epoch": 1.6856127886323269,
+      "grad_norm": 1.3232742547988892,
+      "learning_rate": 1.3272639352007442e-05,
+      "loss": 0.4156,
+      "step": 475
+    },
+    {
+      "epoch": 1.7033747779751334,
+      "grad_norm": 1.4243301153182983,
+      "learning_rate": 1.2981516524649575e-05,
+      "loss": 0.3988,
+      "step": 480
+    },
+    {
+      "epoch": 1.7211367673179396,
+      "grad_norm": 1.2779264450073242,
+      "learning_rate": 1.269116605771699e-05,
+      "loss": 0.4164,
+      "step": 485
+    },
+    {
+      "epoch": 1.738898756660746,
+      "grad_norm": 1.2109707593917847,
+      "learning_rate": 1.240169905204993e-05,
+      "loss": 0.457,
+      "step": 490
+    },
+    {
+      "epoch": 1.7566607460035524,
+      "grad_norm": 1.2897119522094727,
+      "learning_rate": 1.2113226270437551e-05,
+      "loss": 0.4535,
+      "step": 495
+    },
+    {
+      "epoch": 1.7744227353463589,
+      "grad_norm": 1.2221955060958862,
+      "learning_rate": 1.182585809523523e-05,
+      "loss": 0.3886,
+      "step": 500
+    },
+    {
+      "epoch": 1.7921847246891653,
+      "grad_norm": 1.1318753957748413,
+      "learning_rate": 1.1539704486127423e-05,
+      "loss": 0.3635,
+      "step": 505
+    },
+    {
+      "epoch": 1.8099467140319716,
+      "grad_norm": 1.1975198984146118,
+      "learning_rate": 1.1254874938052231e-05,
+      "loss": 0.4042,
+      "step": 510
+    },
+    {
+      "epoch": 1.8277087033747779,
+      "grad_norm": 1.1606299877166748,
+      "learning_rate": 1.097147843930388e-05,
+      "loss": 0.3812,
+      "step": 515
+    },
+    {
+      "epoch": 1.8454706927175843,
+      "grad_norm": 1.1118510961532593,
+      "learning_rate": 1.068962342982898e-05,
+      "loss": 0.3869,
+      "step": 520
+    },
+    {
+      "epoch": 1.8632326820603908,
+      "grad_norm": 1.0953401327133179,
+      "learning_rate": 1.040941775973265e-05,
+      "loss": 0.3903,
+      "step": 525
+    },
+    {
+      "epoch": 1.8809946714031973,
+      "grad_norm": 1.3009462356567383,
+      "learning_rate": 1.0130968648010307e-05,
+      "loss": 0.3621,
+      "step": 530
+    },
+    {
+      "epoch": 1.8987566607460036,
+      "grad_norm": 1.1924844980239868,
+      "learning_rate": 9.854382641521017e-06,
+      "loss": 0.4229,
+      "step": 535
+    },
+    {
+      "epoch": 1.9165186500888098,
+      "grad_norm": 1.3684569597244263,
+      "learning_rate": 9.579765574217943e-06,
+      "loss": 0.378,
+      "step": 540
+    },
+    {
+      "epoch": 1.9342806394316163,
+      "grad_norm": 1.234144926071167,
+      "learning_rate": 9.307222526651649e-06,
+      "loss": 0.3585,
+      "step": 545
+    },
+    {
+      "epoch": 1.9520426287744228,
+      "grad_norm": 1.1517488956451416,
+      "learning_rate": 9.036857785761664e-06,
+      "loss": 0.3664,
+      "step": 550
+    },
+    {
+      "epoch": 1.9698046181172293,
+      "grad_norm": 1.2272323369979858,
+      "learning_rate": 8.768774804971705e-06,
+      "loss": 0.3907,
+      "step": 555
+    },
+    {
+      "epoch": 1.9875666074600356,
+      "grad_norm": 1.1386855840682983,
+      "learning_rate": 8.503076164603873e-06,
+      "loss": 0.3457,
+      "step": 560
+    },
+    {
+      "epoch": 2.003552397868561,
+      "grad_norm": 1.290396809577942,
+      "learning_rate": 8.239863532626914e-06,
+      "loss": 0.3526,
+      "step": 565
+    },
+    {
+      "epoch": 2.0213143872113677,
+      "grad_norm": 1.2628382444381714,
+      "learning_rate": 7.979237625753575e-06,
+      "loss": 0.2756,
+      "step": 570
+    },
+    {
+      "epoch": 2.039076376554174,
+      "grad_norm": 1.2747254371643066,
+      "learning_rate": 7.721298170901968e-06,
+      "loss": 0.3108,
+      "step": 575
+    },
+    {
+      "epoch": 2.0568383658969807,
+      "grad_norm": 1.2029812335968018,
+      "learning_rate": 7.46614386703569e-06,
+      "loss": 0.3084,
+      "step": 580
+    },
+    {
+      "epoch": 2.0746003552397867,
+      "grad_norm": 1.3086997270584106,
+      "learning_rate": 7.213872347397252e-06,
+      "loss": 0.3092,
+      "step": 585
+    },
+    {
+      "epoch": 2.092362344582593,
+      "grad_norm": 1.2387416362762451,
+      "learning_rate": 6.964580142149306e-06,
+      "loss": 0.3155,
+      "step": 590
+    },
+    {
+      "epoch": 2.1101243339253997,
+      "grad_norm": 1.3628038167953491,
+      "learning_rate": 6.718362641437983e-06,
+      "loss": 0.2964,
+      "step": 595
+    },
+    {
+      "epoch": 2.127886323268206,
+      "grad_norm": 1.1450494527816772,
+      "learning_rate": 6.475314058892414e-06,
+      "loss": 0.3173,
+      "step": 600
+    },
+    {
+      "epoch": 2.1456483126110126,
+      "grad_norm": 1.2115252017974854,
+      "learning_rate": 6.235527395574473e-06,
+      "loss": 0.2965,
+      "step": 605
+    },
+    {
+      "epoch": 2.1634103019538187,
+      "grad_norm": 1.2262935638427734,
+      "learning_rate": 5.999094404392479e-06,
+      "loss": 0.2663,
+      "step": 610
+    },
+    {
+      "epoch": 2.181172291296625,
+      "grad_norm": 1.1969001293182373,
+      "learning_rate": 5.766105554992511e-06,
+      "loss": 0.3013,
+      "step": 615
+    },
+    {
+      "epoch": 2.1989342806394316,
+      "grad_norm": 1.1103568077087402,
+      "learning_rate": 5.536649999140728e-06,
+      "loss": 0.2982,
+      "step": 620
+    },
+    {
+      "epoch": 2.216696269982238,
+      "grad_norm": 1.3679698705673218,
+      "learning_rate": 5.310815536610015e-06,
+      "loss": 0.2713,
+      "step": 625
+    },
+    {
+      "epoch": 2.2344582593250446,
+      "grad_norm": 1.2028425931930542,
+      "learning_rate": 5.088688581583921e-06,
+      "loss": 0.2822,
+      "step": 630
+    },
+    {
+      "epoch": 2.2522202486678506,
+      "grad_norm": 1.2171648740768433,
+      "learning_rate": 4.8703541295907885e-06,
+      "loss": 0.2772,
+      "step": 635
+    },
+    {
+      "epoch": 2.269982238010657,
+      "grad_norm": 1.2585508823394775,
+      "learning_rate": 4.655895724980761e-06,
+      "loss": 0.2765,
+      "step": 640
+    },
+    {
+      "epoch": 2.2877442273534636,
+      "grad_norm": 1.300795316696167,
+      "learning_rate": 4.445395428958e-06,
+      "loss": 0.2563,
+      "step": 645
+    },
+    {
+      "epoch": 2.30550621669627,
+      "grad_norm": 1.1971309185028076,
+      "learning_rate": 4.238933788180502e-06,
+      "loss": 0.3082,
+      "step": 650
+    },
+    {
+      "epoch": 2.323268206039076,
+      "grad_norm": 1.1617590188980103,
+      "learning_rate": 4.0365898039393985e-06,
+      "loss": 0.305,
+      "step": 655
+    },
+    {
+      "epoch": 2.3410301953818826,
+      "grad_norm": 1.2532880306243896,
+      "learning_rate": 3.8384409019295945e-06,
+      "loss": 0.2928,
+      "step": 660
+    },
+    {
+      "epoch": 2.358792184724689,
+      "grad_norm": 1.252830147743225,
+      "learning_rate": 3.6445629026232736e-06,
+      "loss": 0.2775,
+      "step": 665
+    },
+    {
+      "epoch": 2.3765541740674956,
+      "grad_norm": 1.2107690572738647,
+      "learning_rate": 3.455029992257705e-06,
+      "loss": 0.2773,
+      "step": 670
+    },
+    {
+      "epoch": 2.394316163410302,
+      "grad_norm": 1.2647984027862549,
+      "learning_rate": 3.269914694448282e-06,
+      "loss": 0.2908,
+      "step": 675
+    },
+    {
+      "epoch": 2.412078152753108,
+      "grad_norm": 1.391261100769043,
+      "learning_rate": 3.0892878424378313e-06,
+      "loss": 0.2658,
+      "step": 680
+    },
+    {
+      "epoch": 2.4298401420959146,
+      "grad_norm": 1.3045754432678223,
+      "learning_rate": 2.913218551992684e-06,
+      "loss": 0.2931,
+      "step": 685
+    },
+    {
+      "epoch": 2.447602131438721,
+      "grad_norm": 1.2543214559555054,
+      "learning_rate": 2.7417741949559634e-06,
+      "loss": 0.2609,
+      "step": 690
+    },
+    {
+      "epoch": 2.4653641207815276,
+      "grad_norm": 1.2797883749008179,
+      "learning_rate": 2.5750203734681555e-06,
+      "loss": 0.2745,
+      "step": 695
+    },
+    {
+      "epoch": 2.483126110124334,
+      "grad_norm": 1.2370916604995728,
+      "learning_rate": 2.413020894864844e-06,
+      "loss": 0.2903,
+      "step": 700
+    },
+    {
+      "epoch": 2.50088809946714,
+      "grad_norm": 1.0997259616851807,
+      "learning_rate": 2.2558377472612236e-06,
+      "loss": 0.2752,
+      "step": 705
+    },
+    {
+      "epoch": 2.5186500888099466,
+      "grad_norm": 1.2443797588348389,
+      "learning_rate": 2.103531075832727e-06,
+      "loss": 0.2867,
+      "step": 710
+    },
+    {
+      "epoch": 2.536412078152753,
+      "grad_norm": 1.3215539455413818,
+      "learning_rate": 1.956159159800835e-06,
+      "loss": 0.2617,
+      "step": 715
+    },
+    {
+      "epoch": 2.5541740674955595,
+      "grad_norm": 1.339474081993103,
+      "learning_rate": 1.813778390132893e-06,
+      "loss": 0.253,
+      "step": 720
+    },
+    {
+      "epoch": 2.571936056838366,
+      "grad_norm": 1.1505285501480103,
+      "learning_rate": 1.6764432479644442e-06,
+      "loss": 0.2703,
+      "step": 725
+    },
+    {
+      "epoch": 2.589698046181172,
+      "grad_norm": 1.2844033241271973,
+      "learning_rate": 1.5442062837523546e-06,
+      "loss": 0.3137,
+      "step": 730
+    },
+    {
+      "epoch": 2.6074600355239785,
+      "grad_norm": 1.2572717666625977,
+      "learning_rate": 1.4171180971667063e-06,
+      "loss": 0.2562,
+      "step": 735
+    },
+    {
+      "epoch": 2.625222024866785,
+      "grad_norm": 1.2211933135986328,
+      "learning_rate": 1.2952273177291374e-06,
+      "loss": 0.2752,
+      "step": 740
+    },
+    {
+      "epoch": 2.6429840142095915,
+      "grad_norm": 1.2600083351135254,
+      "learning_rate": 1.1785805862050524e-06,
+      "loss": 0.2903,
+      "step": 745
+    },
+    {
+      "epoch": 2.660746003552398,
+      "grad_norm": 1.0863158702850342,
+      "learning_rate": 1.0672225367568194e-06,
+      "loss": 0.2876,
+      "step": 750
+    },
+    {
+      "epoch": 2.678507992895204,
+      "grad_norm": 1.1689404249191284,
+      "learning_rate": 9.611957798647641e-07,
+      "loss": 0.2589,
+      "step": 755
+    },
+    {
+      "epoch": 2.6962699822380105,
+      "grad_norm": 1.2573766708374023,
+      "learning_rate": 8.605408860225544e-07,
+      "loss": 0.2966,
+      "step": 760
+    },
+    {
+      "epoch": 2.714031971580817,
+      "grad_norm": 1.3061907291412354,
+      "learning_rate": 7.652963702131266e-07,
+      "loss": 0.2846,
+      "step": 765
+    },
+    {
+      "epoch": 2.7317939609236235,
+      "grad_norm": 1.1642416715621948,
+      "learning_rate": 6.754986771711724e-07,
+      "loss": 0.2613,
+      "step": 770
+    },
+    {
+      "epoch": 2.74955595026643,
+      "grad_norm": 1.2969051599502563,
+      "learning_rate": 5.911821674377776e-07,
+      "loss": 0.2812,
+      "step": 775
+    },
+    {
+      "epoch": 2.767317939609236,
+      "grad_norm": 1.124867558479309,
+      "learning_rate": 5.123791042125742e-07,
+      "loss": 0.2726,
+      "step": 780
+    },
+    {
+      "epoch": 2.7850799289520425,
+      "grad_norm": 1.2344129085540771,
+      "learning_rate": 4.391196410084186e-07,
+      "loss": 0.2638,
+      "step": 785
+    },
+    {
+      "epoch": 2.802841918294849,
+      "grad_norm": 1.174248456954956,
+      "learning_rate": 3.714318101133368e-07,
+      "loss": 0.2661,
+      "step": 790
+    },
+    {
+      "epoch": 2.8206039076376554,
+      "grad_norm": 1.3222744464874268,
+      "learning_rate": 3.093415118641407e-07,
+      "loss": 0.2746,
+      "step": 795
+    },
+    {
+      "epoch": 2.838365896980462,
+      "grad_norm": 1.2366849184036255,
+      "learning_rate": 2.5287250473581434e-07,
+      "loss": 0.2642,
+      "step": 800
+    },
+    {
+      "epoch": 2.856127886323268,
+      "grad_norm": 1.3464922904968262,
+      "learning_rate": 2.020463962504876e-07,
+      "loss": 0.2775,
+      "step": 805
+    },
+    {
+      "epoch": 2.8738898756660745,
+      "grad_norm": 1.2246798276901245,
+      "learning_rate": 1.5688263470944654e-07,
+      "loss": 0.2996,
+      "step": 810
+    },
+    {
+      "epoch": 2.891651865008881,
+      "grad_norm": 1.2219206094741821,
+      "learning_rate": 1.1739850175135457e-07,
+      "loss": 0.241,
+      "step": 815
+    },
+    {
+      "epoch": 2.9094138543516874,
+      "grad_norm": 1.1566717624664307,
+      "learning_rate": 8.360910573954216e-08,
+      "loss": 0.2725,
+      "step": 820
+    },
+    {
+      "epoch": 2.927175843694494,
+      "grad_norm": 1.202154278755188,
+      "learning_rate": 5.5527375980882756e-08,
+      "loss": 0.2525,
+      "step": 825
+    },
+    {
+      "epoch": 2.9449378330373,
+      "grad_norm": 1.2614991664886475,
+      "learning_rate": 3.316405777847697e-08,
+      "loss": 0.2747,
+      "step": 830
+    },
+    {
+      "epoch": 2.9626998223801064,
+      "grad_norm": 1.2126753330230713,
+      "learning_rate": 1.6527708320018266e-08,
+      "loss": 0.2731,
+      "step": 835
+    },
+    {
+      "epoch": 2.980461811722913,
+      "grad_norm": 1.1563338041305542,
+      "learning_rate": 5.624693403442072e-09,
+      "loss": 0.2974,
+      "step": 840
+    },
+    {
+      "epoch": 2.9982238010657194,
+      "grad_norm": 1.399437427520752,
+      "learning_rate": 4.591850010926546e-10,
+      "loss": 0.3024,
+      "step": 845
+    },
+    {
+      "epoch": 3.0,
+      "step": 846,
+      "total_flos": 1.2069706775889183e+18,
+      "train_loss": 0.5579275732626588,
+      "train_runtime": 555.4859,
+      "train_samples_per_second": 48.574,
+      "train_steps_per_second": 1.523
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 846,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 2000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.2069706775889183e+18,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

25_128_e3_3e-5/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c9d0ab9cf9195ce3cfc7a0930282cca9f2b58d50a2b084396d0c1924102167e9
+size 8145

25_128_e3_3e-5/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff