diff --git a/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/chat_template.jinja b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..9dd80303a0d84568dfc4f6dc7a1a27925674f1ae --- /dev/null +++ b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/chat_template.jinja @@ -0,0 +1 @@ +{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content }}{% endif %}{% endfor %} \ No newline at end of file diff --git a/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/config.json b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/config.json new file mode 100644 index 0000000000000000000000000000000000000000..12fc3105b9e8c137abb64ad038516a4bd39e6e98 --- /dev/null +++ b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/config.json @@ -0,0 +1,43 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 2, + "eos_token_id": 3, + "hidden_act": "silu", + "hidden_size": 768, + "initializer_range": 0.02, + "intermediate_size": 3072, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 2048, + "max_window_layers": 24, + "model_type": "qwen2", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.55.0", + "use_cache": false, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 2200 +} diff --git a/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/generation_config.json b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..97a3695efd589e30031248c57e3fe1d9a23e03d1 --- /dev/null +++ b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 2, + "eos_token_id": 3, + "transformers_version": "4.55.0", + "use_cache": false +} diff --git a/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/model.safetensors b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..697d215cdd231cbedc92639e90c647ef080fa94b --- /dev/null +++ b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:902ad9b7f01908657ba18c0f3cec4bb5ecec73fff8c02674e109e90ecf0ec438 +size 412699256 diff --git a/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/optimizer.pt b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b490943f77e29b9b90c5ca2525ecd4c39963e35 --- /dev/null +++ b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67df0f6fff82ff9bd0b8865b8c14f19446ba403cf0bc3aefdbd7c5ba2541fa77 +size 825489611 diff --git a/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/rng_state_0.pth b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..9269d2c338d41e7e53c36da59ce359217c84194e --- /dev/null +++ b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f91f87bd9a92e8739289f46162ef308559319763234c64c5b77db9ec569f87f +size 15429 diff --git a/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/rng_state_1.pth b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..85504fcf5b95eeaceb18fd0eb6e5f135811dc600 --- /dev/null +++ b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7d39b8b541e3fd455fdaac992f8e531a8b6ecbd342d84bd34ce103caf8c4829 +size 15429 diff --git a/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/rng_state_2.pth b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..b27a42d52c6b2c4abe7662f4fdaa41c28a9cc5e8 --- /dev/null +++ b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ff092a382271ec0416bca1eaebe1589a5c027148f661f515a99be89a41550e9 +size 15429 diff --git a/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/rng_state_3.pth b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..336e2fc455a1af65baad649e25993201963256e1 --- /dev/null +++ b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ee31a8654359133d1c40cfb2da8b7350b44cc7eec9bf923c997643420f7061d +size 15429 diff --git a/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/scheduler.pt b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f6d8c5f39891a0b1a5a398a2a8a93bf3c2fce6d --- /dev/null +++ b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95e4bd41eb41853bfd74e259d5aaa6bbc0629c3c4e70811a127b634509017e3d +size 1465 diff --git a/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/special_tokens_map.json b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..35b1d92a95d1a1edf383bdfa7a4999f33281428b --- /dev/null +++ b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/special_tokens_map.json @@ -0,0 +1,38 @@ +{ + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "" + ], + "bos_token": { + "content": "[BOS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "[EOS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "[UNK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/tokenizer.json b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..e78dccaf95b5f11c8def0a9e3d38bb0990b9846e --- /dev/null +++ b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/tokenizer.json @@ -0,0 +1,10104 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "[PAD]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "[BOS]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "[EOS]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 5, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 6, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 7, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 8, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 9, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2196, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2197, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2198, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2199, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2200, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "\\\\[A-Za-z]+|\\\\.|\\\\begin\\s*\\{[A-Za-z*]+\\}|\\\\end\\s*\\{[A-Za-z*]+\\}|<=|>=|!=|:=|->|->>|=>|<=>|::=|==|\\\\to|\\\\mapsto|\\\\implies|\\\\iff|[±≈≅≃≡∼∝∞√∑∏∫∮∇∂∆∈∉⊂⊆⊄⊇⊃∩∪∧∨¬⇒⇔←→↦⟶⟨⟩⋯…]|[_^]|[{}()\\\\[\\\\]]|(?": 4, + "": 5, + "": 6, + "": 7, + "": 8, + "": 9, + "!": 10, + "\"": 11, + "#": 12, + "$": 13, + "%": 14, + "&": 15, + "'": 16, + "(": 17, + ")": 18, + "*": 19, + "+": 20, + ",": 21, + "-": 22, + ".": 23, + "/": 24, + "0": 25, + "1": 26, + "2": 27, + "3": 28, + "4": 29, + "5": 30, + "6": 31, + "7": 32, + "8": 33, + "9": 34, + ":": 35, + ";": 36, + "<": 37, + "=": 38, + ">": 39, + "?": 40, + "@": 41, + "A": 42, + "B": 43, + "C": 44, + "D": 45, + "E": 46, + "F": 47, + "G": 48, + "H": 49, + "I": 50, + "J": 51, + "K": 52, + "L": 53, + "M": 54, + "N": 55, + "O": 56, + "P": 57, + "Q": 58, + "R": 59, + "S": 60, + "T": 61, + "U": 62, + "V": 63, + "W": 64, + "X": 65, + "Y": 66, + "Z": 67, + "[": 68, + "\\": 69, + "]": 70, + "^": 71, + "_": 72, + "`": 73, + "a": 74, + "b": 75, + "c": 76, + "d": 77, + "e": 78, + "f": 79, + "g": 80, + "h": 81, + "i": 82, + "j": 83, + "k": 84, + "l": 85, + "m": 86, + "n": 87, + "o": 88, + "p": 89, + "q": 90, + "r": 91, + "s": 92, + "t": 93, + "u": 94, + "v": 95, + "w": 96, + "x": 97, + "y": 98, + "z": 99, + "{": 100, + "|": 101, + "}": 102, + "~": 103, + "¡": 104, + "¢": 105, + "£": 106, + "¤": 107, + "¥": 108, + "¦": 109, + "§": 110, + "¨": 111, + "©": 112, + "ª": 113, + "«": 114, + "¬": 115, + "®": 116, + "¯": 117, + "°": 118, + "±": 119, + "²": 120, + "³": 121, + "´": 122, + "µ": 123, + "¶": 124, + "·": 125, + "¸": 126, + "¹": 127, + "º": 128, + "»": 129, + "¼": 130, + "½": 131, + "¾": 132, + "¿": 133, + "À": 134, + "Á": 135, + "Â": 136, + "Ã": 137, + "Ä": 138, + "Å": 139, + "Æ": 140, + "Ç": 141, + "È": 142, + "É": 143, + "Ê": 144, + "Ë": 145, + "Ì": 146, + "Í": 147, + "Î": 148, + "Ï": 149, + "Ð": 150, + "Ñ": 151, + "Ò": 152, + "Ó": 153, + "Ô": 154, + "Õ": 155, + "Ö": 156, + "×": 157, + "Ø": 158, + "Ù": 159, + "Ú": 160, + "Û": 161, + "Ü": 162, + "Ý": 163, + "Þ": 164, + "ß": 165, + "à": 166, + "á": 167, + "â": 168, + "ã": 169, + "ä": 170, + "å": 171, + "æ": 172, + "ç": 173, + "è": 174, + "é": 175, + "ê": 176, + "ë": 177, + "ì": 178, + "í": 179, + "î": 180, + "ï": 181, + "ð": 182, + "ñ": 183, + "ò": 184, + "ó": 185, + "ô": 186, + "õ": 187, + "ö": 188, + "÷": 189, + "ø": 190, + "ù": 191, + "ú": 192, + "û": 193, + "ü": 194, + "ý": 195, + "þ": 196, + "ÿ": 197, + "Ā": 198, + "ā": 199, + "Ă": 200, + "ă": 201, + "Ą": 202, + "ą": 203, + "Ć": 204, + "ć": 205, + "Ĉ": 206, + "ĉ": 207, + "Ċ": 208, + "ċ": 209, + "Č": 210, + "č": 211, + "Ď": 212, + "ď": 213, + "Đ": 214, + "đ": 215, + "Ē": 216, + "ē": 217, + "Ĕ": 218, + "ĕ": 219, + "Ė": 220, + "ė": 221, + "Ę": 222, + "ę": 223, + "Ě": 224, + "ě": 225, + "Ĝ": 226, + "ĝ": 227, + "Ğ": 228, + "ğ": 229, + "Ġ": 230, + "ġ": 231, + "Ģ": 232, + "ģ": 233, + "Ĥ": 234, + "ĥ": 235, + "Ħ": 236, + "ħ": 237, + "Ĩ": 238, + "ĩ": 239, + "Ī": 240, + "ī": 241, + "Ĭ": 242, + "ĭ": 243, + "Į": 244, + "į": 245, + "İ": 246, + "ı": 247, + "IJ": 248, + "ij": 249, + "Ĵ": 250, + "ĵ": 251, + "Ķ": 252, + "ķ": 253, + "ĸ": 254, + "Ĺ": 255, + "ĺ": 256, + "Ļ": 257, + "ļ": 258, + "Ľ": 259, + "ľ": 260, + "Ŀ": 261, + "ŀ": 262, + "Ł": 263, + "ł": 264, + "Ń": 265, + "er": 266, + "Ġ+": 267, + "va": 268, + "oo": 269, + "ĠC": 270, + "ĠT": 271, + "fi": 272, + "ul": 273, + "Ġd": 274, + "ua": 275, + "qua": 276, + "vi": 277, + "ad": 278, + "ne": 279, + "Ġm": 280, + "io": 281, + "ĠTh": 282, + "ĠThe": 283, + "ch": 284, + "equa": 285, + "Ġequa": 286, + "ĠF": 287, + "ult": 288, + "iva": 289, + "ovi": 290, + "ovie": 291, + "Ġad": 292, + "Ġadult": 293, + "ĠD": 294, + "ool": 295, + "chool": 296, + "Ġequal": 297, + "ion": 298, + "to": 299, + "Ġ2": 300, + "it": 301, + "ĠM": 302, + "efi": 303, + "ĠDefi": 304, + "ĠDefine": 305, + "Ġ<": 306, + "ar": 307, + "Ġ1": 308, + "al": 309, + "Ġ4": 310, + "tiva": 311, + "ĠFe": 312, + "tival": 313, + "tion": 314, + "Ġ3": 315, + "ĠB": 316, + "Ġto": 317, + "Ġde": 318, + "en": 319, + "re": 320, + "ri": 321, + "ĠR": 322, + "Ġx": 323, + "tal": 324, + "Ġtotal": 325, + "lu": 326, + "Ġc": 327, + "ĠCit": 328, + "ĠCity": 329, + "Ġi": 330, + "ĠS": 331, + "nt": 332, + "ve": 333, + "ro": 334, + "Ġmovie": 335, + "at": 336, + "ĠMovie": 337, + "ay": 338, + "nd": 339, + "ic": 340, + "id": 341, + "Ġw": 342, + "ll": 343, + "Ġ=": 344, + "Ġb": 345, + "Ġ": 2196, + "": 2197, + "": 2198, + "": 2199, + "": 2200 + }, + "merges": [ + [ + "e", + "r" + ], + [ + "Ġ", + "+" + ], + [ + "v", + "a" + ], + [ + "o", + "o" + ], + [ + "Ġ", + "C" + ], + [ + "Ġ", + "T" + ], + [ + "f", + "i" + ], + [ + "u", + "l" + ], + [ + "Ġ", + "d" + ], + [ + "u", + "a" + ], + [ + "q", + "ua" + ], + [ + "v", + "i" + ], + [ + "a", + "d" + ], + [ + "n", + "e" + ], + [ + "Ġ", + "m" + ], + [ + "i", + "o" + ], + [ + "ĠT", + "h" + ], + [ + "ĠTh", + "e" + ], + [ + "c", + "h" + ], + [ + "e", + "qua" + ], + [ + "Ġ", + "equa" + ], + [ + "Ġ", + "F" + ], + [ + "ul", + "t" + ], + [ + "i", + "va" + ], + [ + "o", + "vi" + ], + [ + "ovi", + "e" + ], + [ + "Ġ", + "ad" + ], + [ + "Ġad", + "ult" + ], + [ + "Ġ", + "D" + ], + [ + "oo", + "l" + ], + [ + "ch", + "ool" + ], + [ + "Ġequa", + "l" + ], + [ + "io", + "n" + ], + [ + "t", + "o" + ], + [ + "Ġ", + "2" + ], + [ + "i", + "t" + ], + [ + "Ġ", + "M" + ], + [ + "e", + "fi" + ], + [ + "ĠD", + "efi" + ], + [ + "ĠDefi", + "ne" + ], + [ + "Ġ", + "<" + ], + [ + "a", + "r" + ], + [ + "Ġ", + "1" + ], + [ + "a", + "l" + ], + [ + "Ġ", + "4" + ], + [ + "t", + "iva" + ], + [ + "ĠF", + "e" + ], + [ + "tiva", + "l" + ], + [ + "t", + "ion" + ], + [ + "Ġ", + "3" + ], + [ + "Ġ", + "B" + ], + [ + "Ġ", + "to" + ], + [ + "Ġd", + "e" + ], + [ + "e", + "n" + ], + [ + "r", + "e" + ], + [ + "r", + "i" + ], + [ + "Ġ", + "R" + ], + [ + "Ġ", + "x" + ], + [ + "t", + "al" + ], + [ + "Ġto", + "tal" + ], + [ + "l", + "u" + ], + [ + "Ġ", + "c" + ], + [ + "ĠC", + "it" + ], + [ + "ĠCit", + "y" + ], + [ + "Ġ", + "i" + ], + [ + "Ġ", + "S" + ], + [ + "n", + "t" + ], + [ + "v", + "e" + ], + [ + "r", + "o" + ], + [ + "Ġm", + "ovie" + ], + [ + "a", + "t" + ], + [ + "ĠM", + "ovie" + ], + [ + "a", + "y" + ], + [ + "n", + "d" + ], + [ + "i", + "c" + ], + [ + "i", + "d" + ], + [ + "Ġ", + "w" + ], + [ + "l", + "l" + ], + [ + "Ġ", + "=" + ], + [ + "Ġ", + "b" + ], + [ + "Ġ<", + "/" + ], + [ + "e", + "d" + ], + [ + "i", + "m" + ], + [ + "g", + "e" + ], + [ + "v", + "er" + ], + [ + "Ġ", + "W" + ], + [ + "t", + "e" + ], + [ + "Ġ", + "V" + ], + [ + "u", + "t" + ], + [ + "Ġ", + "f" + ], + [ + "Ġ", + "h" + ], + [ + "o", + "lu" + ], + [ + "u", + "e" + ], + [ + "q", + "ue" + ], + [ + "olu", + "tion" + ], + [ + "Ġ", + "H" + ], + [ + "a", + "nd" + ], + [ + "Ġ", + "and" + ], + [ + "e", + "m" + ], + [ + "l", + "em" + ], + [ + "l", + "i" + ], + [ + "g", + "h" + ], + [ + "o", + "w" + ], + [ + "o", + "nt" + ], + [ + "o", + "r" + ], + [ + "O", + "S" + ], + [ + "w", + "er" + ], + [ + "Ġ", + "u" + ], + [ + "Ġ", + "it" + ], + [ + "Ġ", + "O" + ], + [ + "e", + "t" + ], + [ + "Ġ", + "G" + ], + [ + "al", + "m" + ], + [ + "m", + "a" + ], + [ + "u", + "b" + ], + [ + "ĠW", + "e" + ], + [ + "r", + "a" + ], + [ + "at", + "er" + ], + [ + "Ġw", + "e" + ], + [ + "n", + "im" + ], + [ + "a", + "nim" + ], + [ + "Ġ", + "anim" + ], + [ + "Ġanim", + "al" + ], + [ + "Ġ", + "6" + ], + [ + "id", + "e" + ], + [ + "Ġ", + "*" + ], + [ + "ĠB", + "e" + ], + [ + "i", + "ver" + ], + [ + "ĠR", + "iver" + ], + [ + "a", + "k" + ], + [ + "d", + "ge" + ], + [ + "ri", + "dge" + ], + [ + "ĠO", + "ak" + ], + [ + "o", + "re" + ], + [ + "ll", + "e" + ], + [ + "a", + "i" + ], + [ + "n", + "a" + ], + [ + "m", + "i" + ], + [ + "ĠC", + "l" + ], + [ + "ĠC", + "i" + ], + [ + "va", + "l" + ], + [ + "to", + "n" + ], + [ + "l", + "d" + ], + [ + "ĠB", + "ay" + ], + [ + "ĠH", + "a" + ], + [ + "e", + "ar" + ], + [ + "Ġ", + "k" + ], + [ + "Ġ", + "8" + ], + [ + "Ġ", + "P" + ], + [ + "Ġ", + "A" + ], + [ + "Ġ", + "J" + ], + [ + "Ġ", + "Z" + ], + [ + "Ġ", + "E" + ], + [ + "h", + "a" + ], + [ + "t", + "ha" + ], + [ + "Ġi", + "n" + ], + [ + "e", + "at" + ], + [ + "b", + "eat" + ], + [ + "m", + "ed" + ], + [ + "o", + "li" + ], + [ + "o", + "med" + ], + [ + "Ġm", + "et" + ], + [ + "Ġc", + "omed" + ], + [ + "Ġmet", + "ro" + ], + [ + "Ġcomed", + "y" + ], + [ + "o", + "lem" + ], + [ + "er", + "io" + ], + [ + "Ġd", + "ra" + ], + [ + "olem", + "n" + ], + [ + "erio", + "d" + ], + [ + "Ġdra", + "ma" + ], + [ + "c", + "t" + ], + [ + "h", + "ri" + ], + [ + "i", + "ve" + ], + [ + "t", + "hri" + ], + [ + "Ġ", + "thri" + ], + [ + "Ġde", + "te" + ], + [ + "Ġi", + "nt" + ], + [ + "ll", + "er" + ], + [ + "ct", + "ive" + ], + [ + "Ġthri", + "ller" + ], + [ + "Ġdete", + "ctive" + ], + [ + "Ġint", + "en" + ], + [ + "c", + "i" + ], + [ + "t", + "ic" + ], + [ + "u", + "ri" + ], + [ + "ut", + "uri" + ], + [ + "Ġf", + "uturi" + ], + [ + "Ġ", + "ro" + ], + [ + "Ġc", + "alm" + ], + [ + "Ġro", + "ad" + ], + [ + "e", + "lem" + ], + [ + "t", + "ar" + ], + [ + "Ġ", + "elem" + ], + [ + "en", + "tar" + ], + [ + "Ġelem", + "entar" + ], + [ + "Ġelementar", + "y" + ], + [ + "i", + "a" + ], + [ + "n", + "ar" + ], + [ + "i", + "nar" + ], + [ + "ul", + "inar" + ], + [ + "Ġc", + "ulinar" + ], + [ + "ia", + "n" + ], + [ + "Ġculinar", + "ian" + ], + [ + "i", + "gh" + ], + [ + "l", + "ic" + ], + [ + "Ġh", + "igh" + ], + [ + "ub", + "lic" + ], + [ + "d", + "l" + ], + [ + "r", + "iva" + ], + [ + "Ġm", + "id" + ], + [ + "dl", + "e" + ], + [ + "riva", + "te" + ], + [ + "Ġmid", + "dle" + ], + [ + "g", + "ion" + ], + [ + "Ġ", + "re" + ], + [ + "Ġm", + "ed" + ], + [ + "ic", + "al" + ], + [ + "gion", + "al" + ], + [ + "Ġre", + "gional" + ], + [ + "Ġmed", + "ical" + ], + [ + "f", + "f" + ], + [ + "ff", + "er" + ], + [ + "al", + "l" + ], + [ + "o", + "ut" + ], + [ + "Ġ", + "j" + ], + [ + "B", + "OS" + ], + [ + "E", + "OS" + ], + [ + "Ġ", + "[" + ], + [ + "t", + "im" + ], + [ + "Ġ", + "tim" + ], + [ + "Ġtim", + "e" + ], + [ + "Ġ", + "5" + ], + [ + "lu", + "e" + ], + [ + "Ġf", + "o" + ], + [ + "Ġ1", + "2" + ], + [ + "Ġ", + "g" + ], + [ + "a", + "ve" + ], + [ + "Ġd", + "o" + ], + [ + "Ġh", + "ave" + ], + [ + "Ġ", + "0" + ], + [ + "Ġ", + "y" + ], + [ + "Ġ", + "l" + ], + [ + "Ġ", + "I" + ], + [ + "Ġ", + "-" + ], + [ + "Ġd", + "i" + ], + [ + "l", + "ve" + ], + [ + "n", + "g" + ], + [ + "i", + "ng" + ], + [ + "n", + "ow" + ], + [ + "o", + "lve" + ], + [ + "t", + "er" + ], + [ + "Ġ", + "ter" + ], + [ + "Ġequa", + "tion" + ], + [ + "Ġk", + "now" + ], + [ + "Ġter", + "m" + ], + [ + "e", + "y" + ], + [ + "ĠC", + "ed" + ], + [ + "ĠV", + "all" + ], + [ + "ĠCed", + "ar" + ], + [ + "ĠVall", + "ey" + ], + [ + "i", + "ne" + ], + [ + "ĠR", + "id" + ], + [ + "ĠP", + "ine" + ], + [ + "ĠRid", + "ge" + ], + [ + "e", + "k" + ], + [ + "ĠC", + "re" + ], + [ + "ĠM", + "a" + ], + [ + "ĠCre", + "ek" + ], + [ + "l", + "y" + ], + [ + "ĠF", + "ore" + ], + [ + "ver", + "ly" + ], + [ + "ĠBe", + "verly" + ], + [ + "ĠOak", + "ridge" + ], + [ + "l", + "or" + ], + [ + "ĠT", + "ay" + ], + [ + "ĠTay", + "lor" + ], + [ + "h", + "w" + ], + [ + "t", + "hw" + ], + [ + "oo", + "d" + ], + [ + "or", + "thw" + ], + [ + "orthw", + "ood" + ], + [ + "ª", + "ve" + ], + [ + "Ã", + "ªve" + ], + [ + "vi", + "lle" + ], + [ + "ĠR", + "êve" + ], + [ + "ĠBe", + "lle" + ], + [ + "ĠBelle", + "ville" + ], + [ + "m", + "ont" + ], + [ + "r", + "mont" + ], + [ + "ai", + "rmont" + ], + [ + "ĠCl", + "airmont" + ], + [ + "u", + "mi" + ], + [ + "¨", + "re" + ], + [ + "Ã", + "¨re" + ], + [ + "ĠV", + "alm" + ], + [ + "umi", + "ère" + ], + [ + "ĠValm", + "ont" + ], + [ + "a", + "h" + ], + [ + "ar", + "a" + ], + [ + "ĠS", + "ah" + ], + [ + "ĠSah", + "ara" + ], + [ + "R", + "iva" + ], + [ + "ĠS", + "ai" + ], + [ + "Riva", + "ge" + ], + [ + "ĠSai", + "nt" + ], + [ + "a", + "na" + ], + [ + "o", + "ld" + ], + [ + "ĠB", + "ana" + ], + [ + "ĠG", + "old" + ], + [ + "ĠBana", + "na" + ], + [ + "ĠGold", + "en" + ], + [ + "n", + "ch" + ], + [ + "a", + "nch" + ], + [ + "u", + "nd" + ], + [ + "ĠB", + "und" + ], + [ + "ĠR", + "anch" + ], + [ + "ĠBund", + "l" + ], + [ + "ĠBundl", + "e" + ], + [ + "n", + "Ã" + ], + [ + "©", + "ma" + ], + [ + "ĠM", + "ont" + ], + [ + "re", + "val" + ], + [ + "ĠCi", + "nÃ" + ], + [ + "ĠMont", + "reval" + ], + [ + "ĠCinÃ", + "©ma" + ], + [ + "d", + "i" + ], + [ + "er", + "di" + ], + [ + "ĠV", + "erdi" + ], + [ + "qua", + "ri" + ], + [ + "ĠM", + "ay" + ], + [ + "ĠA", + "quari" + ], + [ + "ĠMay", + "er" + ], + [ + "ĠAquari", + "u" + ], + [ + "ĠAquariu", + "m" + ], + [ + "l", + "ton" + ], + [ + "ĠF", + "ar" + ], + [ + "mi", + "lton" + ], + [ + "ĠHa", + "milton" + ], + [ + "ĠFar", + "m" + ], + [ + "w", + "ater" + ], + [ + "ĠCl", + "ear" + ], + [ + "ĠClear", + "water" + ], + [ + "c", + "u" + ], + [ + "e", + "ffer" + ], + [ + "r", + "cu" + ], + [ + "ĠCi", + "rcu" + ], + [ + "ĠJ", + "effer" + ], + [ + "ĠS", + "out" + ], + [ + "ĠZ", + "oo" + ], + [ + "ĠSout", + "h" + ], + [ + "f", + "or" + ], + [ + "t", + "for" + ], + [ + "ĠB", + "ri" + ], + [ + "gh", + "tfor" + ], + [ + "ĠBri", + "ghtfor" + ], + [ + "ĠBrightfor", + "d" + ], + [ + "ĠRiver", + "ton" + ], + [ + "b", + "ridge" + ], + [ + "ĠOak", + "bridge" + ], + [ + "v", + "en" + ], + [ + "tha", + "ven" + ], + [ + "ver", + "val" + ], + [ + "ĠE", + "verval" + ], + [ + "ĠEverval", + "e" + ], + [ + "ĠR", + "ub" + ], + [ + "ĠRub", + "y" + ], + [ + "h", + "ore" + ], + [ + "ĠS", + "hore" + ], + [ + "li", + "ne" + ], + [ + "ĠShore", + "line" + ], + [ + "b", + "u" + ], + [ + "k", + "e" + ], + [ + "r", + "y" + ], + [ + "w", + "ke" + ], + [ + "ĠHa", + "wke" + ], + [ + "bu", + "ry" + ], + [ + "e", + "ld" + ], + [ + "l", + "en" + ], + [ + "fi", + "eld" + ], + [ + "ĠG", + "len" + ], + [ + "ĠGlen", + "field" + ], + [ + "Ġ", + "(" + ], + [ + "Ġ", + "9" + ], + [ + "Ġ", + "7" + ], + [ + "Ġ1", + "0" + ], + [ + "c", + "oo" + ], + [ + "Ġ", + "ra" + ], + [ + "coo", + "n" + ], + [ + "Ġra", + "coon" + ], + [ + "ar", + "ro" + ], + [ + "arro", + "t" + ], + [ + "Ġb", + "lue" + ], + [ + "Ġj", + "ay" + ], + [ + "Ġfo", + "x" + ], + [ + "a", + "g" + ], + [ + "e", + "ag" + ], + [ + "Ġ", + "eag" + ], + [ + "Ġeag", + "l" + ], + [ + "Ġeagl", + "e" + ], + [ + "l", + "f" + ], + [ + "o", + "lf" + ], + [ + "Ġw", + "olf" + ], + [ + "Ġde", + "er" + ], + [ + "Ġb", + "ear" + ], + [ + "Ġ", + "ow" + ], + [ + "Ġow", + "l" + ], + [ + "Ġc", + "ro" + ], + [ + "Ġcro", + "w" + ], + [ + "Ġ1", + "6" + ], + [ + "h", + "at" + ], + [ + "ĠW", + "hat" + ], + [ + "n", + "y" + ], + [ + "a", + "ny" + ], + [ + "Ġm", + "any" + ], + [ + "ĠH", + "ow" + ], + [ + "Ġdo", + "e" + ], + [ + "Ġ", + "v" + ], + [ + "Ġ", + "z" + ], + [ + "Ġ", + "q" + ], + [ + "Ġ", + "Y" + ], + [ + "Ġ", + "K" + ], + [ + "Ġ", + "r" + ], + [ + "Ġ", + "Q" + ], + [ + "Ġ", + "U" + ], + [ + "e", + "x" + ], + [ + "Ġ", + "tha" + ], + [ + "Ġ", + "ex" + ], + [ + "re", + "ater" + ], + [ + "Ġg", + "reater" + ], + [ + "Ġtha", + "n" + ], + [ + "Ġex", + "i" + ], + [ + "c", + "e" + ], + [ + "e", + "en" + ], + [ + "w", + "een" + ], + [ + "en", + "ce" + ], + [ + "Ġb", + "et" + ], + [ + "ffer", + "ence" + ], + [ + "Ġdi", + "fference" + ], + [ + "Ġbet", + "ween" + ], + [ + "'", + "t" + ], + [ + "c", + "ult" + ], + [ + "d", + "e" + ], + [ + "f", + "y" + ], + [ + "f", + "fi" + ], + [ + "h", + "ro" + ], + [ + "i", + "vi" + ], + [ + "i", + "ll" + ], + [ + "l", + "at" + ], + [ + "o", + "t" + ], + [ + "o", + "ne" + ], + [ + "o", + "ve" + ], + [ + "o", + "lat" + ], + [ + "p", + "s" + ], + [ + "t", + "hro" + ], + [ + "u", + "gh" + ], + [ + "Ġ", + "/" + ], + [ + "Ġ", + "va" + ], + [ + "Ġ", + "fi" + ], + [ + "Ġ", + "que" + ], + [ + "Ġ", + "all" + ], + [ + "Ġ", + "out" + ], + [ + "Ġ", + "one" + ], + [ + "Ġ", + "thro" + ], + [ + "ĠD", + "ivi" + ], + [ + "it", + "h" + ], + [ + "ĠM", + "ove" + ], + [ + "ĠS", + "im" + ], + [ + "ĠS", + "olution" + ], + [ + "Ġw", + "ill" + ], + [ + "Ġw", + "ith" + ], + [ + "ll", + "ow" + ], + [ + "Ġb", + "y" + ], + [ + "Ġb", + "ut" + ], + [ + "Ġb", + "ot" + ], + [ + "li", + "fy" + ], + [ + "Ġfo", + "llow" + ], + [ + "Ġdo", + "n" + ], + [ + "Ġy", + "et" + ], + [ + "Ġl", + "ater" + ], + [ + "Ġdi", + "ffi" + ], + [ + "olat", + "e" + ], + [ + "Ġva", + "lue" + ], + [ + "Ġfi", + "nd" + ], + [ + "Ġthro", + "ugh" + ], + [ + "ĠDivi", + "de" + ], + [ + "Ġbot", + "h" + ], + [ + "lify", + "ing" + ], + [ + "Ġfollow", + "ing" + ], + [ + "Ġdiffi", + "cult" + ], + [ + "Ġ2", + "4" + ], + [ + "Ġ1", + "8" + ], + [ + "Ġ2", + "0" + ], + [ + "Ġ1", + "1" + ], + [ + "Ġ1", + "4" + ], + [ + "Ġ", + "X" + ], + [ + "Ġ1", + "5" + ], + [ + "Ġ1", + "3" + ], + [ + "Ġ3", + "2" + ], + [ + "Ġ3", + "6" + ], + [ + "Ġ2", + "1" + ], + [ + "Ġ2", + "8" + ], + [ + "Ġ1", + "7" + ], + [ + "Ġ2", + "2" + ], + [ + "Ġ1", + "9" + ], + [ + "Ġ3", + "0" + ], + [ + "Ġ2", + "7" + ], + [ + "Ġ4", + "8" + ], + [ + "Ġ4", + "0" + ], + [ + "Ġ2", + "6" + ], + [ + "Ġ2", + "3" + ], + [ + "Ġ2", + "5" + ], + [ + "Ġ3", + "3" + ], + [ + "Ġ3", + "4" + ], + [ + "Ġ4", + "2" + ], + [ + "Ġ4", + "4" + ], + [ + "Ġ6", + "0" + ], + [ + "Ġ2", + "9" + ], + [ + "Ġ3", + "9" + ], + [ + "Ġ3", + "8" + ], + [ + "Ġ3", + "1" + ], + [ + "Ġ6", + "4" + ], + [ + "Ġ7", + "2" + ], + [ + "Ġ5", + "2" + ], + [ + "Ġ5", + "6" + ], + [ + "Ġ3", + "5" + ], + [ + "Ġ4", + "5" + ], + [ + "Ġ5", + "4" + ], + [ + "Ġ3", + "7" + ], + [ + "Ġ8", + "0" + ], + [ + "Ġ4", + "6" + ], + [ + "Ġ5", + "0" + ], + [ + "Ġ5", + "1" + ], + [ + "Ġ9", + "6" + ], + [ + "Ġ6", + "8" + ], + [ + "Ġ4", + "1" + ], + [ + "Ġ8", + "4" + ], + [ + "Ġ4", + "3" + ], + [ + "Ġ6", + "6" + ], + [ + "Ġ5", + "7" + ], + [ + "Ġ7", + "6" + ], + [ + "Ġ6", + "3" + ], + [ + "Ġ4", + "9" + ], + [ + "Ġ5", + "8" + ], + [ + "Ġ4", + "7" + ], + [ + "Ġ10", + "8" + ], + [ + "Ġ12", + "0" + ], + [ + "Ġ8", + "8" + ], + [ + "Ġ6", + "2" + ], + [ + "Ġ7", + "8" + ], + [ + "Ġ5", + "5" + ], + [ + "Ġ9", + "0" + ], + [ + "Ġ5", + "3" + ], + [ + "Ġ7", + "0" + ], + [ + "Ġ7", + "5" + ], + [ + "Ġ10", + "0" + ], + [ + "Ġ14", + "4" + ], + [ + "Ġ6", + "9" + ], + [ + "Ġ11", + "2" + ], + [ + "Ġ8", + "1" + ], + [ + "Ġ5", + "9" + ], + [ + "Ġ9", + "2" + ], + [ + "Ġ10", + "4" + ], + [ + "Ġ7", + "4" + ], + [ + "Ġ6", + "1" + ], + [ + "Ġ12", + "8" + ], + [ + "Ġ6", + "5" + ], + [ + "Ġ6", + "7" + ], + [ + "Ġ13", + "2" + ], + [ + "Ġ8", + "2" + ], + [ + "Ġ10", + "2" + ], + [ + "Ġ8", + "7" + ], + [ + "1", + "2" + ], + [ + "Ġ9", + "9" + ], + [ + "Ġ16", + "0" + ], + [ + "Ġ7", + "3" + ], + [ + "Ġ8", + "6" + ], + [ + "Ġ11", + "6" + ], + [ + "Ġ7", + "1" + ], + [ + "Ġ19", + "2" + ], + [ + "Ġ7", + "7" + ], + [ + "Ġ9", + "3" + ], + [ + "Ġ13", + "6" + ], + [ + "Ġ12", + "4" + ], + [ + "Ġ11", + "4" + ], + [ + "Ġ9", + "8" + ], + [ + "Ġ15", + "6" + ], + [ + "Ġ16", + "8" + ], + [ + "Ġ7", + "9" + ], + [ + "Ġ18", + "0" + ], + [ + "Ġ10", + "5" + ], + [ + "1", + "0" + ], + [ + "Ġ8", + "3" + ], + [ + "Ġ12", + "6" + ], + [ + "Ġ8", + "5" + ], + [ + "Ġ9", + "4" + ], + [ + "Ġ14", + "0" + ], + [ + "Ġ11", + "1" + ], + [ + "Ġ15", + "2" + ], + [ + "Ġ11", + "7" + ], + [ + "1", + "6" + ], + [ + "Ġ9", + "1" + ], + [ + "Ġ14", + "8" + ], + [ + "Ġ11", + "0" + ], + [ + "Ġ10", + "6" + ], + [ + "Ġ24", + "0" + ], + [ + "Ġ21", + "6" + ], + [ + "Ġ17", + "6" + ], + [ + "Ġ8", + "9" + ], + [ + "Ġ12", + "3" + ], + [ + "Ġ13", + "5" + ], + [ + "Ġ9", + "7" + ], + [ + "Ġ9", + "5" + ], + [ + "Ġ15", + "0" + ], + [ + "Ġ13", + "8" + ], + [ + "Ġ16", + "4" + ], + [ + "Ġ16", + "2" + ], + [ + "Ġ20", + "4" + ], + [ + "Ġ12", + "2" + ], + [ + "Ġ10", + "3" + ], + [ + "Ġ13", + "0" + ], + [ + "Ġ10", + "1" + ], + [ + "Ġ11", + "8" + ], + [ + "Ġ20", + "0" + ], + [ + "Ġ28", + "8" + ], + [ + "Ġ12", + "9" + ], + [ + "Ġ14", + "7" + ], + [ + "Ġ18", + "4" + ], + [ + "Ġ20", + "8" + ], + [ + "Ġ17", + "2" + ], + [ + "1", + "1" + ], + [ + "Ġ10", + "9" + ], + [ + "Ġ10", + "7" + ], + [ + "Ġ22", + "4" + ], + [ + "Ġ11", + "5" + ], + [ + "Ġ19", + "6" + ], + [ + "1", + "5" + ], + [ + "Ġ15", + "3" + ], + [ + "Ġ22", + "8" + ], + [ + "Ġ25", + "6" + ], + [ + "1", + "4" + ], + [ + "Ġ13", + "4" + ], + [ + "2", + "0" + ], + [ + "Ġ11", + "3" + ], + [ + "Ġ12", + "1" + ], + [ + "1", + "8" + ], + [ + "2", + "4" + ], + [ + "Ġ25", + "2" + ], + [ + "Ġ14", + "6" + ], + [ + "Ġ14", + "1" + ], + [ + "1", + "3" + ], + [ + "Ġ11", + "9" + ], + [ + "Ġ17", + "4" + ], + [ + "Ġ26", + "4" + ], + [ + "Ġ18", + "8" + ], + [ + "Ġ19", + "8" + ], + [ + "Ġ17", + "1" + ], + [ + "Ġ16", + "5" + ], + [ + "Ġ14", + "2" + ], + [ + "Ġ12", + "5" + ], + [ + "Ġ15", + "4" + ], + [ + "Ġ15", + "9" + ], + [ + "Ġ13", + "1" + ], + [ + "Ġ22", + "0" + ], + [ + "Ġ12", + "7" + ], + [ + "Ġ13", + "3" + ], + [ + "Ġ18", + "6" + ], + [ + "Ġ21", + "2" + ], + [ + "Ġ32", + "0" + ], + [ + "Ġ21", + "0" + ], + [ + "Ġ33", + "6" + ], + [ + "Ġ17", + "0" + ], + [ + "Ġ23", + "2" + ], + [ + "Ġ27", + "2" + ], + [ + "Ġ14", + "5" + ], + [ + "Ġ19", + "5" + ], + [ + "Ġ15", + "8" + ], + [ + "Ġ13", + "9" + ], + [ + "Ġ38", + "4" + ], + [ + "Ġ30", + "0" + ], + [ + "Ġ18", + "3" + ], + [ + "Ġ18", + "9" + ], + [ + "Ġ13", + "7" + ], + [ + "Ġ36", + "0" + ], + [ + "Ġ32", + "4" + ], + [ + "Ġ16", + "6" + ], + [ + "Ġ27", + "6" + ], + [ + "Ġ17", + "7" + ], + [ + "Ġ24", + "4" + ], + [ + "Ġ31", + "2" + ], + [ + "Ġ14", + "3" + ], + [ + "1", + "7" + ], + [ + "Ġ24", + "8" + ], + [ + "Ġ26", + "0" + ], + [ + "Ġ30", + "4" + ], + [ + "Ġ16", + "3" + ], + [ + "Ġ18", + "2" + ], + [ + "Ġ28", + "0" + ], + [ + "Ġ15", + "1" + ], + [ + "Ġ23", + "4" + ], + [ + "Ġ23", + "6" + ], + [ + "Ġ14", + "9" + ], + [ + "Ġ22", + "2" + ], + [ + "Ġ19", + "4" + ], + [ + "2", + "1" + ], + [ + "Ġ15", + "5" + ], + [ + "Ġ20", + "7" + ], + [ + "Ġ17", + "8" + ], + [ + "Ġ43", + "2" + ], + [ + "3", + "2" + ], + [ + "Ġ22", + "5" + ], + [ + "1", + "9" + ], + [ + "Ġ15", + "7" + ], + [ + "Ġ16", + "1" + ], + [ + "Ġ20", + "1" + ], + [ + "Ġ24", + "3" + ], + [ + "3", + "6" + ], + [ + "Ġ27", + "0" + ], + [ + "Ġ48", + "0" + ], + [ + "Ġ19", + "0" + ], + [ + "2", + "8" + ], + [ + "Ġ21", + "9" + ], + [ + "Ġ16", + "9" + ], + [ + "Ġ26", + "8" + ], + [ + "Ġ29", + "6" + ], + [ + "Ġ29", + "2" + ], + [ + "2", + "2" + ], + [ + "Ġ24", + "6" + ], + [ + "Ġ17", + "5" + ], + [ + "Ġ35", + "2" + ], + [ + "Ġ20", + "2" + ], + [ + "Ġ16", + "7" + ], + [ + "Ġ25", + "8" + ], + [ + "Ġ23", + "1" + ], + [ + "3", + "0" + ], + [ + "Ġ21", + "3" + ], + [ + "Ġ40", + "0" + ], + [ + "2", + "7" + ], + [ + "Ġ34", + "8" + ], + [ + "Ġ39", + "6" + ], + [ + "Ġ19", + "3" + ], + [ + "Ġ18", + "1" + ], + [ + "Ġ20", + "6" + ], + [ + "Ġ17", + "3" + ], + [ + "Ġ30", + "8" + ], + [ + "Ġ17", + "9" + ], + [ + "Ġ21", + "8" + ], + [ + "Ġ25", + "5" + ], + [ + "Ġ28", + "4" + ], + [ + "4", + "8" + ], + [ + "Ġ32", + "8" + ], + [ + "Ġ18", + "7" + ], + [ + "Ġ40", + "8" + ], + [ + "Ġ57", + "6" + ], + [ + "Ġ18", + "5" + ], + [ + "Ġ34", + "0" + ], + [ + "4", + "0" + ], + [ + "Ġ24", + "2" + ], + [ + "Ġ29", + "4" + ], + [ + "Ġ37", + "2" + ], + [ + "Ġ36", + "8" + ], + [ + "Ġ22", + "6" + ], + [ + "Ġ42", + "0" + ], + [ + "Ġ21", + "4" + ], + [ + "Ġ19", + "9" + ], + [ + "Ġ23", + "7" + ], + [ + "Ġ23", + "0" + ], + [ + "Ġ30", + "6" + ], + [ + "2", + "5" + ], + [ + "2", + "6" + ], + [ + "Ġ19", + "7" + ], + [ + "Ġ26", + "1" + ], + [ + "Ġ31", + "6" + ], + [ + "Ġ34", + "4" + ], + [ + "Ġ41", + "6" + ], + [ + "Ġ44", + "8" + ], + [ + "Ġ20", + "3" + ], + [ + "2", + "3" + ], + [ + "Ġ19", + "1" + ], + [ + "Ġ29", + "1" + ], + [ + "Ġ20", + "5" + ], + [ + "Ġ24", + "9" + ], + [ + "Ġ29", + "7" + ], + [ + "Ġ28", + "2" + ], + [ + "Ġ21", + "1" + ], + [ + "Ġ26", + "7" + ], + [ + "Ġ21", + "7" + ], + [ + "Ġ25", + "0" + ], + [ + "Ġ39", + "2" + ], + [ + "Ġ50", + "4" + ], + [ + "Ġ23", + "8" + ], + [ + "Ġ52", + "8" + ], + [ + "Ġ45", + "6" + ], + [ + "Ġ33", + "2" + ], + [ + "Ġ27", + "3" + ], + [ + "Ġ20", + "9" + ], + [ + "Ġ31", + "5" + ], + [ + "Ġ46", + "8" + ], + [ + "Ġ36", + "4" + ], + [ + "Ġ29", + "0" + ], + [ + "Ġ27", + "9" + ], + [ + "Ġ44", + "4" + ], + [ + "Ġ33", + "0" + ], + [ + "Ġ38", + "8" + ], + [ + "Ġ22", + "7" + ], + [ + "Ġ51", + "2" + ], + [ + "Ġ34", + "2" + ], + [ + "Ġ25", + "4" + ], + [ + "Ġ35", + "6" + ], + [ + "Ġ22", + "1" + ], + [ + "Ġ54", + "0" + ], + [ + "Ġ24", + "1" + ], + [ + "Ġ21", + "5" + ], + [ + "Ġ22", + "3" + ], + [ + "Ġ28", + "5" + ], + [ + "Ġ37", + "6" + ], + [ + "3", + "3" + ], + [ + "Ġ26", + "2" + ], + [ + "Ġ31", + "8" + ], + [ + "Ġ26", + "6" + ], + [ + "Ġ46", + "4" + ], + [ + "Ġ38", + "0" + ], + [ + "Ġ22", + "9" + ], + [ + "Ġ27", + "4" + ], + [ + "Ġ44", + "0" + ], + [ + "Ġ25", + "9" + ], + [ + "Ġ72", + "0" + ], + [ + "Ġ37", + "8" + ], + [ + "Ġ24", + "5" + ], + [ + "Ġ23", + "5" + ], + [ + "Ġ30", + "3" + ], + [ + "Ġ24", + "7" + ], + [ + "2", + "9" + ], + [ + "Ġ64", + "0" + ], + [ + "Ġ49", + "2" + ], + [ + "Ġ39", + "0" + ], + [ + "Ġ62", + "4" + ], + [ + "3", + "4" + ], + [ + "Ġ49", + "6" + ], + [ + "4", + "4" + ], + [ + "Ġ67", + "2" + ], + [ + "Ġ42", + "4" + ], + [ + "Ġ60", + "0" + ], + [ + "Ġ33", + "3" + ], + [ + "Ġ54", + "4" + ], + [ + "Ġ32", + "7" + ], + [ + "Ġ25", + "7" + ], + [ + "6", + "0" + ], + [ + "Ġ23", + "3" + ], + [ + "3", + "9" + ], + [ + "Ġ40", + "4" + ], + [ + "Ġ76", + "8" + ], + [ + "Ġ35", + "4" + ], + [ + "4", + "2" + ], + [ + "Ġ35", + "1" + ], + [ + "Ġ43", + "6" + ], + [ + "3", + "1" + ], + [ + "Ġ27", + "8" + ], + [ + "Ġ30", + "9" + ], + [ + "Ġ64", + "8" + ], + [ + "Ġ33", + "9" + ], + [ + "Ġ25", + "3" + ], + [ + "Ġ56", + "0" + ], + [ + "Ġ36", + "6" + ], + [ + "Ġ51", + "6" + ], + [ + "6", + "4" + ], + [ + "Ġ32", + "2" + ], + [ + "Ġ28", + "6" + ], + [ + "Ġ36", + "3" + ], + [ + "Ġ32", + "1" + ], + [ + "Ġ28", + "9" + ], + [ + "Ġ38", + "7" + ], + [ + "Ġ55", + "2" + ], + [ + "Ġ29", + "8" + ], + [ + "Ġ45", + "0" + ], + [ + "Ġ41", + "2" + ], + [ + "Ġ23", + "9" + ], + [ + "Ġ26", + "5" + ], + [ + "Ġ30", + "2" + ], + [ + "Ġ27", + "5" + ], + [ + "3", + "5" + ], + [ + "Ġ48", + "4" + ], + [ + "Ġ26", + "3" + ], + [ + "Ġ58", + "8" + ], + [ + "Ġ25", + "1" + ], + [ + "Ġ31", + "0" + ], + [ + "Ġ32", + "6" + ], + [ + "7", + "2" + ], + [ + "5", + "2" + ], + [ + "Ġ34", + "5" + ], + [ + "4", + "5" + ], + [ + "3", + "8" + ], + [ + "Ġ52", + "0" + ], + [ + "Ġ40", + "5" + ], + [ + "Ġ33", + "8" + ], + [ + "Ġ27", + "1" + ], + [ + "Ġ36", + "9" + ], + [ + "Ġ41", + "4" + ], + [ + "Ġ40", + "2" + ], + [ + "Ġ61", + "2" + ], + [ + "Ġ46", + "0" + ], + [ + "Ġ31", + "4" + ], + [ + "Ġ45", + "2" + ], + [ + "Ġ48", + "8" + ], + [ + "Ġ47", + "2" + ], + [ + "Ġ59", + "2" + ], + [ + "Ġ86", + "4" + ], + [ + "5", + "6" + ], + [ + "Ġ29", + "5" + ], + [ + "Ġ42", + "8" + ], + [ + "Ġ60", + "8" + ], + [ + "Ġ43", + "5" + ], + [ + "Ġ28", + "3" + ], + [ + "Ġ32", + "3" + ], + [ + "Ġ37", + "5" + ], + [ + "Ġ48", + "6" + ], + [ + "Ġ35", + "7" + ], + [ + "Ġ29", + "3" + ], + [ + "3", + "7" + ], + [ + "Ġ27", + "7" + ], + [ + "Ġ56", + "4" + ], + [ + "5", + "4" + ], + [ + "Ġ36", + "2" + ], + [ + "Ġ43", + "8" + ], + [ + "Ġ30", + "1" + ], + [ + "Ġ26", + "9" + ], + [ + "Ġ38", + "6" + ], + [ + "Ġ50", + "0" + ], + [ + "Ġ58", + "0" + ], + [ + "Ġ30", + "7" + ], + [ + "Ġ44", + "1" + ], + [ + "Ġ96", + "0" + ], + [ + "Ġ28", + "1" + ], + [ + "Ġ32", + "5" + ], + [ + "Ġ66", + "0" + ], + [ + "Ġ28", + "7" + ], + [ + "Ġ30", + "5" + ], + [ + "Ġ29", + "9" + ], + [ + "Ġ47", + "6" + ], + [ + "Ġ35", + "0" + ], + [ + "Ġ70", + "4" + ], + [ + "Ġ33", + "4" + ], + [ + "Ġ39", + "9" + ], + [ + "Ġ39", + "3" + ], + [ + "Ġ68", + "4" + ], + [ + "Ġ42", + "3" + ], + [ + "Ġ42", + "6" + ], + [ + "Ġ65", + "6" + ], + [ + "9", + "6" + ], + [ + "5", + "1" + ], + [ + "Ġ34", + "6" + ], + [ + "Ġ58", + "4" + ], + [ + "Ġ37", + "0" + ], + [ + "8", + "0" + ], + [ + "Ġ81", + "6" + ], + [ + "Ġ45", + "9" + ], + [ + "Ġ80", + "0" + ], + [ + "Ġ53", + "6" + ], + [ + "Ġ38", + "1" + ], + [ + "Ġ31", + "3" + ], + [ + "Ġ46", + "2" + ], + [ + "Ġ33", + "7" + ], + [ + "Ġ53", + "2" + ], + [ + "Ġ50", + "8" + ], + [ + "Ġ48", + "3" + ], + [ + "5", + "0" + ], + [ + "Ġ31", + "9" + ], + [ + "4", + "1" + ], + [ + "Ġ41", + "1" + ], + [ + "Ġ52", + "4" + ], + [ + "Ġ63", + "6" + ], + [ + "Ġ75", + "6" + ], + [ + "Ġ43", + "4" + ], + [ + "Ġ34", + "3" + ], + [ + "Ġ38", + "5" + ], + [ + "Ġ69", + "6" + ], + [ + "Ġ79", + "2" + ], + [ + "Ġ51", + "0" + ], + [ + "Ġ31", + "1" + ], + [ + "Ġ35", + "8" + ], + [ + "Ġ41", + "7" + ], + [ + "Ġ68", + "8" + ], + [ + "Ġ33", + "1" + ], + [ + "Ġ31", + "7" + ], + [ + "Ġ37", + "4" + ], + [ + "Ġ32", + "9" + ], + [ + "Ġ36", + "1" + ], + [ + "Ġ49", + "5" + ], + [ + "Ġ61", + "6" + ], + [ + "Ġ78", + "4" + ], + [ + "Ġ35", + "5" + ], + [ + "Ġ39", + "4" + ], + [ + "Ġ73", + "6" + ], + [ + "Ġ56", + "8" + ], + [ + "4", + "6" + ], + [ + "Ġ78", + "0" + ], + [ + "Ġ41", + "0" + ], + [ + "Ġ54", + "8" + ], + [ + "6", + "8" + ], + [ + "4", + "3" + ], + [ + "8", + "4" + ], + [ + "Ġ34", + "1" + ], + [ + "Ġ9", + "12" + ], + [ + "Ġ38", + "2" + ], + [ + "Ġ47", + "4" + ], + [ + "4", + "9" + ], + [ + "Ġ39", + "8" + ], + [ + "Ġ84", + "0" + ], + [ + "Ġ52", + "2" + ], + [ + "Ġ49", + "8" + ], + [ + "Ġ42", + "9" + ], + [ + "Ġ51", + "3" + ], + [ + "Ġ48", + "2" + ], + [ + "Ġ68", + "0" + ], + [ + "Ġ44", + "7" + ], + [ + "Ġ83", + "2" + ], + [ + "Ġ74", + "4" + ], + [ + "Ġ70", + "8" + ], + [ + "Ġ41", + "8" + ], + [ + "Ġ40", + "6" + ], + [ + "Ġ55", + "6" + ], + [ + "Ġ33", + "5" + ], + [ + "Ġ73", + "2" + ], + [ + "Ġ89", + "6" + ], + [ + "Ġ40", + "3" + ], + [ + "Ġ59", + "4" + ], + [ + "Ġ57", + "2" + ], + [ + "Ġ64", + "4" + ], + [ + "Ġ34", + "7" + ], + [ + "Ġ35", + "3" + ], + [ + "6", + "3" + ], + [ + "Ġ36", + "5" + ], + [ + "Ġ46", + "5" + ], + [ + "Ġ47", + "7" + ], + [ + "Ġ45", + "3" + ], + [ + "Ġ50", + "7" + ], + [ + "Ġ57", + "9" + ], + [ + "Ġ36", + "7" + ], + [ + "Ġ58", + "2" + ], + [ + "Ġ63", + "0" + ], + [ + "Ġ42", + "2" + ], + [ + "Ġ90", + "0" + ], + [ + "Ġ59", + "6" + ], + [ + "Ġ54", + "6" + ], + [ + "5", + "7" + ], + [ + "Ġ39", + "1" + ], + [ + "Ġ63", + "2" + ], + [ + "Ġ47", + "1" + ], + [ + "Ġ53", + "1" + ], + [ + "Ġ34", + "9" + ], + [ + "Ġ44", + "2" + ], + [ + "Ġ60", + "4" + ], + [ + "Ġ65", + "2" + ], + [ + "7", + "6" + ], + [ + "Ġ75", + "2" + ], + [ + "Ġ48", + "9" + ], + [ + "Ġ37", + "1" + ], + [ + "Ġ58", + "5" + ], + [ + "Ġ67", + "6" + ], + [ + "Ġ88", + "0" + ], + [ + "6", + "6" + ], + [ + "Ġ38", + "9" + ], + [ + "Ġ66", + "4" + ], + [ + "Ġ55", + "8" + ], + [ + "Ġ53", + "4" + ], + [ + "Ġ62", + "0" + ], + [ + "Ġ62", + "8" + ], + [ + "Ġ43", + "3" + ], + [ + "Ġ35", + "9" + ], + [ + "10", + "8" + ], + [ + "Ġ37", + "3" + ], + [ + "4", + "7" + ], + [ + "Ġ80", + "4" + ], + [ + "Ġ39", + "5" + ], + [ + "Ġ56", + "7" + ], + [ + "Ġ82", + "8" + ], + [ + "Ġ57", + "8" + ], + [ + "Ġ57", + "0" + ], + [ + "Ġ40", + "1" + ], + [ + "Ġ72", + "4" + ], + [ + "Ġ37", + "9" + ], + [ + "Ġ93", + "6" + ], + [ + "Ġ49", + "0" + ], + [ + "Ġ54", + "3" + ], + [ + "Ġ40", + "9" + ], + [ + "Ġ45", + "1" + ], + [ + "Ġ45", + "4" + ], + [ + "Ġ44", + "6" + ], + [ + "Ġ39", + "7" + ], + [ + "Ġ54", + "9" + ], + [ + "Ġ43", + "0" + ], + [ + "Ġ67", + "5" + ], + [ + "Ġ52", + "5" + ], + [ + "Ġ88", + "8" + ], + [ + "Ġ72", + "8" + ], + [ + "Ġ77", + "2" + ], + [ + "Ġ45", + "8" + ], + [ + "Ġ37", + "7" + ], + [ + "Ġ76", + "0" + ], + [ + "Ġ97", + "2" + ], + [ + "14", + "4" + ], + [ + "Ġ70", + "0" + ], + [ + "Ġ40", + "7" + ], + [ + "Ġ84", + "8" + ], + [ + "Ġ48", + "1" + ], + [ + "5", + "5" + ], + [ + "Ġ47", + "0" + ], + [ + "Ġ77", + "6" + ], + [ + "Ġ41", + "9" + ], + [ + "Ġ38", + "3" + ], + [ + "Ġ51", + "4" + ], + [ + "Ġ7", + "12" + ], + [ + "Ġ42", + "1" + ], + [ + "Ġ60", + "3" + ], + [ + "12", + "0" + ], + [ + "Ġ50", + "1" + ], + [ + "Ġ87", + "6" + ], + [ + "5", + "8" + ], + [ + "Ġ50", + "6" + ], + [ + "Ġ55", + "5" + ], + [ + "Ġ46", + "6" + ], + [ + "Ġ64", + "2" + ], + [ + "Ġ43", + "7" + ], + [ + "Ġ53", + "0" + ], + [ + "Ġ85", + "2" + ], + [ + "Ġ51", + "9" + ], + [ + "5", + "3" + ], + [ + "8", + "8" + ], + [ + "Ġ41", + "5" + ], + [ + "Ġ42", + "5" + ], + [ + "7", + "5" + ], + [ + "Ġ66", + "8" + ], + [ + "Ġ43", + "9" + ], + [ + "Ġ66", + "6" + ], + [ + "Ġ49", + "4" + ], + [ + "Ġ74", + "0" + ], + [ + "Ġ60", + "6" + ], + [ + "Ġ44", + "5" + ], + [ + "Ġ44", + "3" + ], + [ + "Ġ70", + "2" + ], + [ + "Ġ92", + "8" + ], + [ + "Ġ51", + "8" + ], + [ + "Ġ61", + "8" + ], + [ + "Ġ92", + "4" + ], + [ + "Ġ44", + "9" + ], + [ + "6", + "2" + ], + [ + "Ġ56", + "1" + ], + [ + "Ġ42", + "7" + ], + [ + "Ġ51", + "5" + ], + [ + "8", + "1" + ], + [ + "9", + "0" + ], + [ + "10", + "0" + ], + [ + "Ġ61", + "5" + ], + [ + "1", + "12" + ], + [ + "12", + "8" + ], + [ + "Ġ97", + "6" + ], + [ + "Ġ54", + "2" + ], + [ + "Ġ41", + "3" + ], + [ + "Ġ69", + "2" + ], + [ + "7", + "8" + ], + [ + "Ġ48", + "5" + ], + [ + "Ġ45", + "5" + ], + [ + "Ġ62", + "7" + ], + [ + "Ġ65", + "4" + ], + [ + "Ġ59", + "1" + ], + [ + "6", + "9" + ], + [ + "Ġ53", + "7" + ], + [ + "Ġ62", + "1" + ], + [ + "Ġ65", + "1" + ], + [ + "Ġ48", + "7" + ], + [ + "Ġ72", + "3" + ], + [ + "Ġ86", + "8" + ], + [ + "Ġ47", + "8" + ], + [ + "6", + "5" + ], + [ + "Ġ45", + "7" + ], + [ + "Ġ71", + "6" + ], + [ + "Ġ81", + "0" + ], + [ + "Ġ57", + "7" + ], + [ + "Ġ67", + "8" + ], + [ + "Ġ80", + "8" + ], + [ + "5", + "9" + ], + [ + "Ġ50", + "2" + ], + [ + "Ġ72", + "9" + ], + [ + "7", + "0" + ], + [ + "Ġ65", + "7" + ], + [ + "Ġ98", + "4" + ], + [ + "Ġ94", + "4" + ], + [ + "Ġ46", + "3" + ], + [ + "Ġ72", + "6" + ], + [ + "Ġ46", + "9" + ], + [ + "Ġ74", + "8" + ], + [ + "Ġ82", + "0" + ], + [ + "Ġ46", + "7" + ], + [ + "Ġ99", + "2" + ], + [ + "Ġ73", + "8" + ], + [ + "Ġ63", + "9" + ], + [ + "6", + "1" + ], + [ + "Ġ50", + "5" + ], + [ + "Ġ69", + "0" + ], + [ + "Ġ77", + "4" + ], + [ + "Ġ78", + "8" + ], + [ + "Ġ94", + "8" + ], + [ + "Ġ55", + "0" + ], + [ + "Ġ56", + "2" + ], + [ + "Ġ77", + "1" + ], + [ + "6", + "7" + ], + [ + "Ġ65", + "0" + ], + [ + "Ġ82", + "4" + ], + [ + "Ġ49", + "9" + ], + [ + "Ġ59", + "7" + ], + [ + "Ġ57", + "3" + ], + [ + "Ġ43", + "1" + ], + [ + "Ġ47", + "5" + ], + [ + "10", + "4" + ], + [ + "Ġ96", + "4" + ], + [ + "Ġ69", + "3" + ], + [ + "Ġ79", + "6" + ], + [ + "Ġ60", + "2" + ], + [ + "Ġ60", + "9" + ], + [ + "Ġ87", + "2" + ], + [ + "Ġ46", + "1" + ], + [ + "7", + "4" + ], + [ + "Ġ72", + "2" + ], + [ + "Ġ49", + "3" + ], + [ + "Ġ52", + "6" + ], + [ + "19", + "2" + ], + [ + "Ġ66", + "3" + ], + [ + "9", + "2" + ], + [ + "Ġ76", + "4" + ], + [ + "Ġ49", + "7" + ], + [ + "Ġ75", + "0" + ], + [ + "Ġ83", + "6" + ], + [ + "Ġ77", + "0" + ], + [ + "Ġ52", + "9" + ], + [ + "Ġ64", + "5" + ], + [ + "Ġ58", + "6" + ], + [ + "Ġ90", + "4" + ], + [ + "Ġ62", + "6" + ], + [ + "Ġ54", + "7" + ], + [ + "Ġ59", + "0" + ], + [ + "Ġ55", + "4" + ], + [ + "Ġ76", + "5" + ], + [ + "Ġ51", + "1" + ], + [ + "Ġ49", + "1" + ], + [ + "Ġ71", + "4" + ], + [ + "Ġ51", + "7" + ], + [ + "Ġ99", + "6" + ], + [ + "Ġ47", + "3" + ], + [ + "Ġ92", + "0" + ], + [ + "Ġ64", + "3" + ], + [ + "Ġ81", + "2" + ], + [ + "Ġ63", + "3" + ], + [ + "Ġ52", + "3" + ], + [ + "Ġ67", + "4" + ], + [ + "Ġ53", + "8" + ], + [ + "Ġ88", + "4" + ], + [ + "Ġ86", + "7" + ], + [ + "Ġ85", + "6" + ], + [ + "Ġ58", + "1" + ], + [ + "Ġ54", + "5" + ], + [ + "Ġ61", + "0" + ], + [ + "Ġ96", + "8" + ], + [ + "Ġ58", + "3" + ], + [ + "Ġ73", + "5" + ], + [ + "Ġ71", + "1" + ], + [ + "Ġ84", + "4" + ], + [ + "Ġ68", + "7" + ], + [ + "9", + "9" + ], + [ + "16", + "0" + ], + [ + "7", + "3" + ], + [ + "Ġ87", + "0" + ], + [ + "Ġ56", + "3" + ], + [ + "Ġ78", + "3" + ], + [ + "Ġ47", + "9" + ], + [ + "Ġ59", + "8" + ], + [ + "Ġ74", + "7" + ], + [ + "13", + "2" + ], + [ + "Ġ50", + "9" + ], + [ + "Ġ81", + "9" + ], + [ + "Ġ54", + "1" + ], + [ + "Ġ80", + "1" + ], + [ + "Ġ57", + "4" + ], + [ + "Ġ64", + "6" + ], + [ + "Ġ88", + "2" + ], + [ + "Ġ59", + "5" + ], + [ + "Ġ50", + "3" + ], + [ + "Ġ53", + "5" + ], + [ + "Ġ56", + "6" + ], + [ + "Ġ9", + "16" + ], + [ + "Ġ52", + "1" + ], + [ + "Ġ61", + "4" + ], + [ + "Ġ98", + "0" + ], + [ + "Ġ100", + "0" + ], + [ + "Ġ78", + "6" + ], + [ + "Ġ86", + "0" + ], + [ + "8", + "7" + ], + [ + "Ġ95", + "2" + ], + [ + "Ġ53", + "3" + ], + [ + "Ġ70", + "5" + ], + [ + "Ġ68", + "1" + ], + [ + "Ġ58", + "9" + ], + [ + "Ġ52", + "7" + ], + [ + "Ġ76", + "2" + ], + [ + "Ġ79", + "8" + ], + [ + "Ġ66", + "9" + ], + [ + "Ġ96", + "3" + ], + [ + "Ġ53", + "9" + ], + [ + "8", + "2" + ], + [ + "Ġ75", + "9" + ], + [ + "10", + "2" + ], + [ + "Ġ55", + "3" + ], + [ + "Ġ89", + "2" + ], + [ + "7", + "1" + ], + [ + "Ġ91", + "8" + ], + [ + "Ġ60", + "1" + ], + [ + "Ġ87", + "3" + ], + [ + "Ġ84", + "6" + ], + [ + "Ġ55", + "1" + ], + [ + "Ġ86", + "6" + ], + [ + "Ġ62", + "5" + ], + [ + "Ġ64", + "9" + ], + [ + "Ġ58", + "7" + ], + [ + "Ġ64", + "1" + ], + [ + "Ġ79", + "5" + ], + [ + "Ġ65", + "8" + ], + [ + "Ġ63", + "4" + ], + [ + "Ġ83", + "4" + ], + [ + "Ġ69", + "9" + ], + [ + "Ġ82", + "2" + ], + [ + "Ġ61", + "1" + ], + [ + "7", + "7" + ], + [ + "Ġ72", + "1" + ], + [ + "Ġ94", + "0" + ], + [ + "Ġ89", + "1" + ], + [ + "Ġ90", + "8" + ], + [ + "Ġ67", + "3" + ], + [ + "Ġ76", + "9" + ], + [ + "Ġ57", + "1" + ], + [ + "Ġ74", + "1" + ], + [ + "Ġ94", + "5" + ], + [ + "Ġ68", + "2" + ], + [ + "Ġ56", + "5" + ], + [ + "Ġ62", + "2" + ], + [ + "Ġ73", + "0" + ], + [ + "18", + "0" + ], + [ + "Ġ66", + "2" + ], + [ + "Ġ77", + "7" + ], + [ + "Ġ68", + "6" + ], + [ + "Ġ70", + "6" + ], + [ + "15", + "6" + ], + [ + "Ġ57", + "5" + ], + [ + "Ġ96", + "6" + ], + [ + "9", + "8" + ], + [ + "Ġ99", + "0" + ], + [ + "1", + "16" + ], + [ + "9", + "3" + ], + [ + "12", + "4" + ], + [ + "10", + "5" + ], + [ + "Ġ85", + "5" + ], + [ + "8", + "5" + ], + [ + "Ġ83", + "7" + ], + [ + "7", + "9" + ], + [ + "Ġ96", + "2" + ], + [ + "Ġ67", + "0" + ], + [ + "16", + "8" + ], + [ + "Ġ55", + "9" + ], + [ + "Ġ63", + "8" + ], + [ + "Ġ61", + "3" + ], + [ + "8", + "6" + ], + [ + "Ġ59", + "3" + ], + [ + "8", + "3" + ], + [ + "Ġ80", + "2" + ], + [ + "Ġ71", + "7" + ], + [ + "13", + "6" + ], + [ + "Ġ60", + "5" + ], + [ + "Ġ93", + "2" + ], + [ + "Ġ56", + "9" + ], + [ + "Ġ85", + "8" + ], + [ + "Ġ55", + "7" + ], + [ + "Ġ90", + "3" + ], + [ + "Ġ84", + "3" + ], + [ + "Ġ98", + "8" + ], + [ + "Ġ75", + "8" + ], + [ + "Ġ67", + "9" + ], + [ + "Ġ71", + "0" + ], + [ + "Ġ63", + "1" + ], + [ + "Ġ95", + "6" + ], + [ + "Ġ65", + "5" + ], + [ + "Ġ97", + "5" + ], + [ + "Ġ95", + "4" + ], + [ + "Ġ82", + "5" + ], + [ + "12", + "6" + ], + [ + "Ġ75", + "3" + ], + [ + "Ġ70", + "7" + ], + [ + "Ġ78", + "2" + ], + [ + "Ġ60", + "7" + ], + [ + "Ġ78", + "9" + ], + [ + "24", + "0" + ], + [ + "Ġ59", + "9" + ], + [ + "Ġ66", + "1" + ], + [ + "Ġ91", + "5" + ], + [ + "Ġ64", + "7" + ], + [ + "Ġ93", + "0" + ], + [ + "Ġ69", + "8" + ], + [ + "11", + "4" + ], + [ + "Ġ77", + "8" + ], + [ + "Ġ61", + "7" + ], + [ + "Ġ67", + "7" + ], + [ + "Ġ61", + "9" + ], + [ + "Ġ73", + "4" + ], + [ + "Ġ89", + "4" + ], + [ + "Ġ81", + "8" + ], + [ + "2", + "16" + ], + [ + "11", + "1" + ], + [ + "9", + "7" + ], + [ + "14", + "0" + ], + [ + "Ġ63", + "7" + ], + [ + "Ġ62", + "3" + ], + [ + "Ġ65", + "3" + ], + [ + "Ġ65", + "9" + ], + [ + "Ġ81", + "3" + ], + [ + "Ġ80", + "3" + ], + [ + "Ġ72", + "5" + ], + [ + "Ġ86", + "5" + ], + [ + "Ġ79", + "4" + ], + [ + "Ġ63", + "5" + ], + [ + "Ġ97", + "8" + ], + [ + "Ġ90", + "6" + ], + [ + "Ġ62", + "9" + ], + [ + "Ġ80", + "7" + ], + [ + "11", + "7" + ], + [ + "Ġ74", + "2" + ], + [ + "14", + "8" + ], + [ + "Ġ77", + "5" + ], + [ + "Ġ68", + "3" + ], + [ + "Ġ92", + "7" + ], + [ + "Ġ73", + "9" + ], + [ + "Ġ72", + "7" + ], + [ + "Ġ84", + "2" + ], + [ + "Ġ69", + "4" + ], + [ + "Ġ68", + "5" + ], + [ + "Ġ90", + "9" + ], + [ + "13", + "5" + ], + [ + "Ġ89", + "7" + ], + [ + "Ġ86", + "1" + ], + [ + "17", + "6" + ], + [ + "9", + "1" + ], + [ + "Ġ66", + "5" + ], + [ + "9", + "4" + ], + [ + "Ġ98", + "1" + ], + [ + "Ġ71", + "5" + ], + [ + "Ġ75", + "4" + ], + [ + "Ġ94", + "2" + ], + [ + "Ġ87", + "9" + ], + [ + "Ġ90", + "2" + ], + [ + "Ġ88", + "5" + ], + [ + "Ġ96", + "1" + ], + [ + "Ġ83", + "1" + ], + [ + "Ġ77", + "3" + ], + [ + "Ġ66", + "7" + ], + [ + "Ġ96", + "9" + ], + [ + "Ġ93", + "9" + ], + [ + "1", + "10" + ], + [ + "Ġ78", + "7" + ], + [ + "Ġ71", + "8" + ], + [ + "Ġ69", + "1" + ], + [ + "Ġ73", + "1" + ], + [ + "15", + "2" + ], + [ + "Ġ80", + "6" + ], + [ + "Ġ84", + "9" + ], + [ + "8", + "9" + ], + [ + "Ġ74", + "6" + ], + [ + "Ġ69", + "7" + ], + [ + "Ġ76", + "6" + ], + [ + "Ġ89", + "8" + ], + [ + "Ġ68", + "9" + ], + [ + "Ġ78", + "1" + ], + [ + "Ġ83", + "5" + ], + [ + "28", + "8" + ], + [ + "Ġ73", + "7" + ], + [ + "Ġ78", + "5" + ], + [ + "Ġ83", + "0" + ], + [ + "Ġ85", + "0" + ], + [ + "Ġ89", + "9" + ], + [ + "Ġ73", + "3" + ], + [ + "Ġ70", + "3" + ], + [ + "Ġ79", + "3" + ], + [ + "12", + "3" + ], + [ + "Ġ70", + "9" + ], + [ + "Ġ81", + "4" + ], + [ + "Ġ91", + "4" + ], + [ + "Ġ99", + "9" + ], + [ + "Ġ92", + "1" + ], + [ + "14", + "7" + ], + [ + "9", + "5" + ], + [ + "Ġ74", + "5" + ], + [ + "Ġ79", + "0" + ], + [ + "16", + "2" + ], + [ + "Ġ75", + "7" + ], + [ + "Ġ87", + "4" + ], + [ + "Ġ69", + "5" + ], + [ + "Ġ95", + "7" + ], + [ + "20", + "8" + ], + [ + "Ġ67", + "1" + ], + [ + "Ġ76", + "3" + ], + [ + "Ġ81", + "7" + ], + [ + "15", + "0" + ], + [ + "Ġ97", + "0" + ], + [ + "Ġ88", + "3" + ], + [ + "20", + "4" + ], + [ + "Ġ77", + "9" + ], + [ + "Ġ84", + "1" + ], + [ + "Ġ82", + "6" + ], + [ + "Ġ87", + "1" + ], + [ + "Ġ83", + "8" + ], + [ + "Ġ86", + "9" + ], + [ + "10", + "1" + ], + [ + "Ġ76", + "1" + ], + [ + "25", + "6" + ], + [ + "Ġ75", + "5" + ], + [ + "10", + "3" + ], + [ + "Ġ89", + "0" + ], + [ + "Ġ83", + "3" + ], + [ + "Ġ99", + "3" + ], + [ + "Ġ98", + "7" + ], + [ + "Ġ70", + "1" + ], + [ + "Ġ97", + "4" + ], + [ + "Ġ93", + "3" + ], + [ + "10", + "6" + ], + [ + "Ġ80", + "5" + ], + [ + "16", + "4" + ], + [ + "2", + "24" + ], + [ + "Ġ95", + "1" + ], + [ + "Ġ96", + "5" + ], + [ + "Ġ87", + "8" + ], + [ + "Ġ93", + "8" + ], + [ + "Ġ81", + "1" + ], + [ + "10", + "9" + ], + [ + "Ġ9", + "10" + ], + [ + "Ġ74", + "3" + ], + [ + "12", + "2" + ], + [ + "Ġ79", + "9" + ], + [ + "13", + "0" + ], + [ + "Ġ75", + "1" + ], + [ + "Ġ71", + "3" + ], + [ + "19", + "6" + ], + [ + "20", + "0" + ], + [ + "Ġ85", + "4" + ], + [ + "12", + "9" + ], + [ + "Ġ88", + "6" + ], + [ + "Ġ90", + "1" + ], + [ + "15", + "3" + ], + [ + "13", + "8" + ], + [ + "Ġ87", + "5" + ], + [ + "Ġ86", + "2" + ], + [ + "Ġ99", + "4" + ], + [ + "Ġ96", + "7" + ], + [ + "Ġ71", + "9" + ], + [ + "12", + "1" + ], + [ + "Ġ95", + "0" + ], + [ + "11", + "5" + ], + [ + "Ġ82", + "3" + ], + [ + "Ġ91", + "3" + ], + [ + "Ġ81", + "5" + ], + [ + "Ġ92", + "6" + ], + [ + "Ġ74", + "9" + ], + [ + "Ġ76", + "7" + ], + [ + "18", + "4" + ], + [ + "Ġ84", + "5" + ], + [ + "Ġ98", + "2" + ], + [ + "25", + "2" + ], + [ + "11", + "3" + ], + [ + "14", + "6" + ], + [ + "Ġ79", + "1" + ], + [ + "Ġ92", + "2" + ], + [ + "Ġ97", + "3" + ], + [ + "Ġ98", + "6" + ], + [ + "Ġ82", + "1" + ], + [ + "Ġ88", + "9" + ], + [ + "Ġ93", + "1" + ], + [ + "Ġ82", + "7" + ], + [ + "Ġ80", + "9" + ], + [ + "2", + "28" + ], + [ + "Ġ84", + "7" + ], + [ + "Ġ94", + "6" + ], + [ + "Ġ85", + "9" + ], + [ + "Ġ85", + "1" + ], + [ + "Ġ82", + "9" + ], + [ + "Ġ97", + "9" + ], + [ + "13", + "4" + ], + [ + "10", + "7" + ], + [ + "11", + "8" + ], + [ + "Ġ95", + "8" + ], + [ + "13", + "1" + ], + [ + "Ġ94", + "7" + ], + [ + "17", + "2" + ], + [ + "Ġ90", + "5" + ], + [ + "Ġ87", + "7" + ], + [ + "Ġ79", + "7" + ], + [ + "Ġ93", + "7" + ], + [ + "Ġ89", + "5" + ], + [ + "Ġ83", + "9" + ], + [ + "Ġ90", + "7" + ], + [ + "3", + "20" + ], + [ + "Ġ88", + "1" + ], + [ + "Ġ88", + "7" + ], + [ + "14", + "5" + ], + [ + "11", + "9" + ], + [ + "19", + "5" + ], + [ + "Ġ97", + "1" + ], + [ + "18", + "9" + ], + [ + "Ġ93", + "4" + ], + [ + "14", + "1" + ], + [ + "38", + "4" + ], + [ + "Ġ99", + "5" + ], + [ + "Ġ97", + "7" + ], + [ + "Ġ85", + "3" + ], + [ + "Ġ91", + "7" + ], + [ + "Ġ95", + "5" + ], + [ + "Ġ94", + "9" + ], + [ + "12", + "5" + ], + [ + "Ġ94", + "1" + ], + [ + "Ġ85", + "7" + ], + [ + "19", + "8" + ], + [ + "Ġ91", + "9" + ], + [ + "Ġ98", + "5" + ], + [ + "18", + "8" + ], + [ + "Ġ86", + "3" + ], + [ + "Ġ99", + "8" + ], + [ + "Ġ92", + "3" + ], + [ + "16", + "5" + ], + [ + "17", + "1" + ], + [ + "Ġ92", + "5" + ], + [ + "Ġ93", + "5" + ], + [ + "13", + "3" + ], + [ + "Ġ89", + "3" + ], + [ + "12", + "7" + ], + [ + "Ġ95", + "3" + ], + [ + "Ġ98", + "3" + ], + [ + "Ġ91", + "1" + ], + [ + "15", + "9" + ], + [ + "Ġ92", + "9" + ], + [ + "2", + "20" + ], + [ + "Ġ99", + "1" + ], + [ + "18", + "3" + ], + [ + "16", + "3" + ], + [ + "3", + "36" + ], + [ + "27", + "2" + ], + [ + "19", + "4" + ], + [ + "3", + "24" + ], + [ + "26", + "4" + ], + [ + "Ġ94", + "3" + ], + [ + "2", + "12" + ], + [ + "16", + "1" + ], + [ + "14", + "2" + ], + [ + "17", + "4" + ], + [ + "13", + "9" + ], + [ + "2", + "10" + ], + [ + "18", + "6" + ], + [ + "24", + "3" + ], + [ + "30", + "0" + ], + [ + "Ġ99", + "7" + ], + [ + "24", + "4" + ], + [ + "15", + "4" + ], + [ + "14", + "3" + ], + [ + "13", + "7" + ], + [ + "17", + "0" + ], + [ + "4", + "32" + ], + [ + "26", + "0" + ], + [ + "22", + "5" + ], + [ + "2", + "32" + ], + [ + "19", + "3" + ], + [ + "14", + "9" + ], + [ + "Ġ95", + "9" + ], + [ + "17", + "7" + ], + [ + "24", + "8" + ], + [ + "30", + "4" + ], + [ + "Ġ98", + "9" + ], + [ + "36", + "0" + ], + [ + "18", + "2" + ], + [ + "15", + "8" + ], + [ + "16", + "6" + ], + [ + "3", + "12" + ], + [ + "27", + "6" + ], + [ + "20", + "1" + ], + [ + "15", + "1" + ], + [ + "23", + "4" + ], + [ + "15", + "7" + ], + [ + "21", + "9" + ], + [ + "22", + "2" + ], + [ + "2", + "36" + ], + [ + "16", + "9" + ], + [ + "15", + "5" + ], + [ + "20", + "7" + ], + [ + "18", + "1" + ], + [ + "27", + "0" + ], + [ + "19", + "0" + ], + [ + "48", + "0" + ], + [ + "28", + "0" + ], + [ + "2", + "18" + ], + [ + "17", + "8" + ], + [ + "29", + "2" + ], + [ + "57", + "6" + ], + [ + "17", + "5" + ], + [ + "16", + "7" + ], + [ + "35", + "2" + ], + [ + "24", + "2" + ], + [ + "24", + "6" + ], + [ + "23", + "1" + ], + [ + "2", + "13" + ], + [ + "19", + "7" + ], + [ + "23", + "7" + ], + [ + "4", + "48" + ], + [ + "40", + "0" + ], + [ + "25", + "5" + ], + [ + "17", + "3" + ], + [ + "19", + "9" + ], + [ + "29", + "6" + ], + [ + "29", + "7" + ], + [ + "24", + "1" + ], + [ + "17", + "9" + ], + [ + "26", + "8" + ], + [ + "20", + "2" + ], + [ + "2", + "17" + ], + [ + "18", + "5" + ], + [ + "24", + "9" + ], + [ + "39", + "6" + ], + [ + "18", + "7" + ], + [ + "28", + "4" + ], + [ + "30", + "6" + ], + [ + "30", + "8" + ], + [ + "25", + "8" + ], + [ + "29", + "1" + ], + [ + "29", + "4" + ], + [ + "19", + "1" + ], + [ + "22", + "7" + ], + [ + "20", + "9" + ], + [ + "40", + "8" + ], + [ + "4", + "16" + ], + [ + "29", + "0" + ], + [ + "20", + "3" + ], + [ + "36", + "8" + ], + [ + "20", + "5" + ], + [ + "20", + "6" + ], + [ + "27", + "9" + ], + [ + "5", + "12" + ], + [ + "25", + "9" + ], + [ + "2", + "11" + ], + [ + "3", + "72" + ], + [ + "24", + "5" + ], + [ + "2", + "30" + ], + [ + "4", + "20" + ], + [ + "28", + "9" + ], + [ + "25", + "7" + ], + [ + "26", + "1" + ], + [ + "2", + "14" + ], + [ + "3", + "48" + ], + [ + "5", + "28" + ], + [ + "38", + "8" + ], + [ + "3", + "21" + ], + [ + "7", + "68" + ], + [ + "32", + "8" + ], + [ + "39", + "2" + ], + [ + "3", + "16" + ], + [ + "22", + "6" + ], + [ + "3", + "15" + ], + [ + "46", + "8" + ], + [ + "3", + "40" + ], + [ + "6", + "40" + ], + [ + "7", + "20" + ], + [ + "23", + "8" + ], + [ + "32", + "2" + ], + [ + "27", + "3" + ], + [ + "3", + "30" + ], + [ + "22", + "3" + ], + [ + "26", + "2" + ], + [ + "44", + "4" + ], + [ + "38", + "7" + ], + [ + "2", + "15" + ], + [ + "2", + "21" + ], + [ + "5", + "40" + ], + [ + "48", + "4" + ], + [ + "45", + "6" + ], + [ + "24", + "7" + ], + [ + "30", + "3" + ], + [ + "34", + "4" + ], + [ + "37", + "8" + ], + [ + "4", + "35" + ], + [ + "32", + "7" + ], + [ + "28", + "5" + ], + [ + "25", + "3" + ], + [ + "26", + "7" + ], + [ + "50", + "4" + ], + [ + "3", + "18" + ], + [ + "4", + "64" + ], + [ + "6", + "72" + ], + [ + "22", + "9" + ], + [ + "27", + "4" + ], + [ + "25", + "0" + ], + [ + "26", + "5" + ], + [ + "4", + "24" + ], + [ + "4", + "96" + ], + [ + "5", + "60" + ], + [ + "36", + "3" + ], + [ + "36", + "4" + ], + [ + "28", + "2" + ], + [ + "25", + "4" + ], + [ + "23", + "5" + ], + [ + "38", + "0" + ], + [ + "38", + "6" + ], + [ + "3", + "32" + ], + [ + "4", + "36" + ], + [ + "8", + "64" + ], + [ + "Ġ115", + "2" + ], + [ + "32", + "6" + ], + [ + "27", + "8" + ], + [ + "40", + "4" + ], + [ + "40", + "5" + ], + [ + "23", + "9" + ], + [ + "44", + "1" + ], + [ + "38", + "3" + ], + [ + "37", + "6" + ], + [ + "5", + "44" + ], + [ + "32", + "5" + ], + [ + "28", + "6" + ], + [ + "28", + "7" + ], + [ + "30", + "2" + ], + [ + "26", + "6" + ], + [ + "33", + "3" + ], + [ + "29", + "8" + ], + [ + "45", + "0" + ], + [ + "80", + "0" + ], + [ + "3", + "13" + ], + [ + "4", + "12" + ], + [ + "4", + "76" + ], + [ + "5", + "80" + ], + [ + "6", + "24" + ], + [ + "32", + "3" + ], + [ + "25", + "1" + ], + [ + "26", + "3" + ], + [ + "23", + "3" + ], + [ + "33", + "8" + ], + [ + "39", + "0" + ], + [ + "39", + "3" + ], + [ + "35", + "6" + ], + [ + "3", + "14" + ], + [ + "4", + "28" + ], + [ + "4", + "40" + ], + [ + "4", + "60" + ], + [ + "4", + "72" + ], + [ + "5", + "16" + ], + [ + "6", + "12" + ], + [ + "6", + "48" + ], + [ + "36", + "1" + ], + [ + "36", + "6" + ], + [ + "27", + "1" + ], + [ + "33", + "7" + ], + [ + "34", + "5" + ], + [ + "39", + "9" + ], + [ + "35", + "1" + ], + [ + "35", + "7" + ], + [ + "49", + "2" + ], + [ + "58", + "8" + ], + [ + "4", + "38" + ], + [ + "5", + "14" + ], + [ + "5", + "64" + ], + [ + "7", + "72" + ], + [ + "Ġ10", + "12" + ], + [ + "Ġ108", + "8" + ], + [ + "Ġ100", + "8" + ], + [ + "Ġ102", + "4" + ], + [ + "Ġ116", + "0" + ], + [ + "Ġ105", + "6" + ], + [ + "Ġ117", + "6" + ], + [ + "Ġ110", + "4" + ], + [ + "Ġ118", + "4" + ], + [ + "Ġ118", + "8" + ], + [ + "Ġ129", + "6" + ], + [ + "Ġ107", + "1" + ], + [ + "32", + "9" + ], + [ + "36", + "2" + ], + [ + "28", + "3" + ], + [ + "30", + "1" + ], + [ + "30", + "7" + ], + [ + "33", + "9" + ], + [ + "29", + "5" + ], + [ + "29", + "9" + ], + [ + "34", + "1" + ], + [ + "34", + "2" + ], + [ + "60", + "0" + ], + [ + "39", + "4" + ], + [ + "35", + "4" + ], + [ + "45", + "9" + ], + [ + "38", + "5" + ], + [ + "57", + "9" + ], + [ + "70", + "4" + ], + [ + "3", + "17" + ], + [ + "4", + "22" + ], + [ + "4", + "30" + ], + [ + "4", + "33" + ], + [ + "5", + "10" + ], + [ + "5", + "24" + ], + [ + "5", + "13" + ], + [ + "5", + "36" + ], + [ + "5", + "84" + ], + [ + "6", + "32" + ], + [ + "6", + "30" + ], + [ + "6", + "56" + ], + [ + "7", + "29" + ], + [ + "7", + "71" + ], + [ + "8", + "16" + ], + [ + "8", + "96" + ], + [ + "9", + "60" + ], + [ + "30", + "5" + ], + [ + "30", + "9" + ], + [ + "27", + "5" + ], + [ + "27", + "7" + ], + [ + "48", + "2" + ], + [ + "48", + "8" + ], + [ + "40", + "2" + ], + [ + "40", + "3" + ], + [ + "26", + "9" + ], + [ + "34", + "3" + ], + [ + "60", + "8" + ], + [ + "35", + "5" + ], + [ + "38", + "9" + ], + [ + "50", + "0" + ], + [ + "57", + "7" + ], + [ + "55", + "8" + ], + [ + "59", + "2" + ] + ] + } +} \ No newline at end of file diff --git a/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/tokenizer_config.json b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..28cd080337f28d0ed8fae1845f668b6644fedd09 --- /dev/null +++ b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/tokenizer_config.json @@ -0,0 +1,143 @@ +{ + "add_prefix_space": false, + "added_tokens_decoder": { + "0": { + "content": "[UNK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "[PAD]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "[BOS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "[EOS]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "5": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "", + "", + "" + ], + "bos_token": "[BOS]", + "clean_up_tokenization_spaces": false, + "eos_token": "[EOS]", + "extra_special_tokens": {}, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "[PAD]", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast", + "unk_token": "[UNK]" +} diff --git a/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/trainer_state.json b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..211f6d9a7f6dfa93ecf535d1f0ff23cf9d645182 --- /dev/null +++ b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/trainer_state.json @@ -0,0 +1,17912 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6601194014421213, + "eval_steps": 500, + "global_step": 25542, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00025844467991626395, + "grad_norm": 0.580073356628418, + "learning_rate": 1.5506547208821502e-07, + "loss": 0.1417, + "step": 10 + }, + { + "epoch": 0.0005168893598325279, + "grad_norm": 0.5506889224052429, + "learning_rate": 3.273604410751206e-07, + "loss": 0.141, + "step": 20 + }, + { + "epoch": 0.0007753340397487917, + "grad_norm": 0.5140657424926758, + "learning_rate": 4.996554100620262e-07, + "loss": 0.1406, + "step": 30 + }, + { + "epoch": 0.0010337787196650558, + "grad_norm": 0.378294974565506, + "learning_rate": 6.719503790489318e-07, + "loss": 0.1376, + "step": 40 + }, + { + "epoch": 0.0012922233995813195, + "grad_norm": 0.22087976336479187, + "learning_rate": 8.442453480358374e-07, + "loss": 0.1345, + "step": 50 + }, + { + "epoch": 0.0015506680794975835, + "grad_norm": 0.18212825059890747, + "learning_rate": 1.016540317022743e-06, + "loss": 0.132, + "step": 60 + }, + { + "epoch": 0.0018091127594138474, + "grad_norm": 0.12162523716688156, + "learning_rate": 1.1888352860096486e-06, + "loss": 0.1295, + "step": 70 + }, + { + "epoch": 0.0020675574393301116, + "grad_norm": 0.09799095243215561, + "learning_rate": 1.3611302549965542e-06, + "loss": 0.1279, + "step": 80 + }, + { + "epoch": 0.002326002119246375, + "grad_norm": 0.05276430770754814, + "learning_rate": 1.5334252239834598e-06, + "loss": 0.1272, + "step": 90 + }, + { + "epoch": 0.002584446799162639, + "grad_norm": 0.0423586331307888, + "learning_rate": 1.7057201929703652e-06, + "loss": 0.1267, + "step": 100 + }, + { + "epoch": 0.002842891479078903, + "grad_norm": 0.044790226966142654, + "learning_rate": 1.8780151619572708e-06, + "loss": 0.127, + "step": 110 + }, + { + "epoch": 0.003101336158995167, + "grad_norm": 0.033687494695186615, + "learning_rate": 2.0503101309441764e-06, + "loss": 0.1269, + "step": 120 + }, + { + "epoch": 0.003359780838911431, + "grad_norm": 0.03180452063679695, + "learning_rate": 2.222605099931082e-06, + "loss": 0.1261, + "step": 130 + }, + { + "epoch": 0.003618225518827695, + "grad_norm": 0.03529069945216179, + "learning_rate": 2.3949000689179876e-06, + "loss": 0.1257, + "step": 140 + }, + { + "epoch": 0.003876670198743959, + "grad_norm": 0.04638217017054558, + "learning_rate": 2.567195037904893e-06, + "loss": 0.1259, + "step": 150 + }, + { + "epoch": 0.004135114878660223, + "grad_norm": 0.03103143349289894, + "learning_rate": 2.7394900068917992e-06, + "loss": 0.1264, + "step": 160 + }, + { + "epoch": 0.004393559558576486, + "grad_norm": 0.03597632795572281, + "learning_rate": 2.911784975878705e-06, + "loss": 0.1252, + "step": 170 + }, + { + "epoch": 0.00465200423849275, + "grad_norm": 0.03456826135516167, + "learning_rate": 3.08407994486561e-06, + "loss": 0.1249, + "step": 180 + }, + { + "epoch": 0.004910448918409014, + "grad_norm": 0.0378618985414505, + "learning_rate": 3.2563749138525156e-06, + "loss": 0.126, + "step": 190 + }, + { + "epoch": 0.005168893598325278, + "grad_norm": 0.033420465886592865, + "learning_rate": 3.428669882839421e-06, + "loss": 0.1253, + "step": 200 + }, + { + "epoch": 0.005427338278241542, + "grad_norm": 0.029043620452284813, + "learning_rate": 3.6009648518263264e-06, + "loss": 0.125, + "step": 210 + }, + { + "epoch": 0.005685782958157806, + "grad_norm": 0.035771116614341736, + "learning_rate": 3.7732598208132324e-06, + "loss": 0.1245, + "step": 220 + }, + { + "epoch": 0.00594422763807407, + "grad_norm": 0.03690936788916588, + "learning_rate": 3.945554789800138e-06, + "loss": 0.1247, + "step": 230 + }, + { + "epoch": 0.006202672317990334, + "grad_norm": 0.03747721388936043, + "learning_rate": 4.117849758787044e-06, + "loss": 0.1255, + "step": 240 + }, + { + "epoch": 0.006461116997906598, + "grad_norm": 0.029691796749830246, + "learning_rate": 4.290144727773949e-06, + "loss": 0.1244, + "step": 250 + }, + { + "epoch": 0.006719561677822862, + "grad_norm": 0.032449208199977875, + "learning_rate": 4.462439696760855e-06, + "loss": 0.1241, + "step": 260 + }, + { + "epoch": 0.006978006357739126, + "grad_norm": 0.03326250985264778, + "learning_rate": 4.63473466574776e-06, + "loss": 0.1252, + "step": 270 + }, + { + "epoch": 0.00723645103765539, + "grad_norm": 0.04825210943818092, + "learning_rate": 4.807029634734666e-06, + "loss": 0.1248, + "step": 280 + }, + { + "epoch": 0.007494895717571654, + "grad_norm": 0.033368490636348724, + "learning_rate": 4.979324603721572e-06, + "loss": 0.1244, + "step": 290 + }, + { + "epoch": 0.007753340397487918, + "grad_norm": 0.042467497289180756, + "learning_rate": 5.151619572708477e-06, + "loss": 0.1245, + "step": 300 + }, + { + "epoch": 0.008011785077404182, + "grad_norm": 0.033714018762111664, + "learning_rate": 5.323914541695383e-06, + "loss": 0.1248, + "step": 310 + }, + { + "epoch": 0.008270229757320446, + "grad_norm": 0.045443516224622726, + "learning_rate": 5.496209510682288e-06, + "loss": 0.1243, + "step": 320 + }, + { + "epoch": 0.008528674437236709, + "grad_norm": 0.050872571766376495, + "learning_rate": 5.668504479669194e-06, + "loss": 0.1248, + "step": 330 + }, + { + "epoch": 0.008787119117152973, + "grad_norm": 0.0625002309679985, + "learning_rate": 5.8407994486561e-06, + "loss": 0.1242, + "step": 340 + }, + { + "epoch": 0.009045563797069237, + "grad_norm": 0.04048369824886322, + "learning_rate": 6.013094417643005e-06, + "loss": 0.1247, + "step": 350 + }, + { + "epoch": 0.0093040084769855, + "grad_norm": 0.03662728890776634, + "learning_rate": 6.185389386629911e-06, + "loss": 0.1249, + "step": 360 + }, + { + "epoch": 0.009562453156901764, + "grad_norm": 0.031706925481557846, + "learning_rate": 6.357684355616816e-06, + "loss": 0.1244, + "step": 370 + }, + { + "epoch": 0.009820897836818028, + "grad_norm": 0.041105374693870544, + "learning_rate": 6.529979324603722e-06, + "loss": 0.1242, + "step": 380 + }, + { + "epoch": 0.010079342516734292, + "grad_norm": 0.047242436558008194, + "learning_rate": 6.702274293590627e-06, + "loss": 0.1241, + "step": 390 + }, + { + "epoch": 0.010337787196650556, + "grad_norm": 0.04901286959648132, + "learning_rate": 6.874569262577533e-06, + "loss": 0.1243, + "step": 400 + }, + { + "epoch": 0.01059623187656682, + "grad_norm": 0.06665607541799545, + "learning_rate": 7.046864231564439e-06, + "loss": 0.1247, + "step": 410 + }, + { + "epoch": 0.010854676556483084, + "grad_norm": 0.03250494599342346, + "learning_rate": 7.2191592005513436e-06, + "loss": 0.1236, + "step": 420 + }, + { + "epoch": 0.011113121236399348, + "grad_norm": 0.04212265461683273, + "learning_rate": 7.391454169538249e-06, + "loss": 0.1237, + "step": 430 + }, + { + "epoch": 0.011371565916315612, + "grad_norm": 0.054139163345098495, + "learning_rate": 7.563749138525156e-06, + "loss": 0.1244, + "step": 440 + }, + { + "epoch": 0.011630010596231876, + "grad_norm": 0.02945440262556076, + "learning_rate": 7.736044107512061e-06, + "loss": 0.1242, + "step": 450 + }, + { + "epoch": 0.01188845527614814, + "grad_norm": 0.033181872218847275, + "learning_rate": 7.908339076498966e-06, + "loss": 0.1236, + "step": 460 + }, + { + "epoch": 0.012146899956064404, + "grad_norm": 0.046553533524274826, + "learning_rate": 8.080634045485872e-06, + "loss": 0.1242, + "step": 470 + }, + { + "epoch": 0.012405344635980668, + "grad_norm": 0.045686036348342896, + "learning_rate": 8.252929014472779e-06, + "loss": 0.1239, + "step": 480 + }, + { + "epoch": 0.012663789315896932, + "grad_norm": 0.030697941780090332, + "learning_rate": 8.425223983459684e-06, + "loss": 0.124, + "step": 490 + }, + { + "epoch": 0.012922233995813196, + "grad_norm": 0.03413194417953491, + "learning_rate": 8.597518952446588e-06, + "loss": 0.1244, + "step": 500 + }, + { + "epoch": 0.01318067867572946, + "grad_norm": 0.030660457909107208, + "learning_rate": 8.769813921433495e-06, + "loss": 0.124, + "step": 510 + }, + { + "epoch": 0.013439123355645724, + "grad_norm": 0.047199517488479614, + "learning_rate": 8.9421088904204e-06, + "loss": 0.1245, + "step": 520 + }, + { + "epoch": 0.013697568035561988, + "grad_norm": 0.058523520827293396, + "learning_rate": 9.114403859407306e-06, + "loss": 0.1239, + "step": 530 + }, + { + "epoch": 0.013956012715478252, + "grad_norm": 0.06476254016160965, + "learning_rate": 9.28669882839421e-06, + "loss": 0.1243, + "step": 540 + }, + { + "epoch": 0.014214457395394516, + "grad_norm": 0.041853196918964386, + "learning_rate": 9.458993797381117e-06, + "loss": 0.1238, + "step": 550 + }, + { + "epoch": 0.01447290207531078, + "grad_norm": 0.05013502761721611, + "learning_rate": 9.631288766368022e-06, + "loss": 0.1237, + "step": 560 + }, + { + "epoch": 0.014731346755227043, + "grad_norm": 0.05611172318458557, + "learning_rate": 9.803583735354928e-06, + "loss": 0.124, + "step": 570 + }, + { + "epoch": 0.014989791435143307, + "grad_norm": 0.03264360874891281, + "learning_rate": 9.975878704341833e-06, + "loss": 0.1242, + "step": 580 + }, + { + "epoch": 0.015248236115059571, + "grad_norm": 0.037222299724817276, + "learning_rate": 1.014817367332874e-05, + "loss": 0.1242, + "step": 590 + }, + { + "epoch": 0.015506680794975835, + "grad_norm": 0.03524187579751015, + "learning_rate": 1.0320468642315644e-05, + "loss": 0.1237, + "step": 600 + }, + { + "epoch": 0.0157651254748921, + "grad_norm": 0.039270102977752686, + "learning_rate": 1.049276361130255e-05, + "loss": 0.1243, + "step": 610 + }, + { + "epoch": 0.016023570154808365, + "grad_norm": 0.0661996379494667, + "learning_rate": 1.0665058580289456e-05, + "loss": 0.1234, + "step": 620 + }, + { + "epoch": 0.016282014834724627, + "grad_norm": 0.0738006979227066, + "learning_rate": 1.0837353549276362e-05, + "loss": 0.124, + "step": 630 + }, + { + "epoch": 0.016540459514640893, + "grad_norm": 0.06153467297554016, + "learning_rate": 1.1009648518263267e-05, + "loss": 0.1243, + "step": 640 + }, + { + "epoch": 0.016798904194557155, + "grad_norm": 0.03686348348855972, + "learning_rate": 1.1181943487250173e-05, + "loss": 0.1246, + "step": 650 + }, + { + "epoch": 0.017057348874473417, + "grad_norm": 0.03352544084191322, + "learning_rate": 1.135423845623708e-05, + "loss": 0.1232, + "step": 660 + }, + { + "epoch": 0.017315793554389683, + "grad_norm": 0.039459966123104095, + "learning_rate": 1.1526533425223984e-05, + "loss": 0.1235, + "step": 670 + }, + { + "epoch": 0.017574238234305945, + "grad_norm": 0.061304185539484024, + "learning_rate": 1.169882839421089e-05, + "loss": 0.1232, + "step": 680 + }, + { + "epoch": 0.01783268291422221, + "grad_norm": 0.057572197169065475, + "learning_rate": 1.1871123363197794e-05, + "loss": 0.1232, + "step": 690 + }, + { + "epoch": 0.018091127594138473, + "grad_norm": 0.07665688544511795, + "learning_rate": 1.2043418332184702e-05, + "loss": 0.1239, + "step": 700 + }, + { + "epoch": 0.01834957227405474, + "grad_norm": 0.04998056963086128, + "learning_rate": 1.2215713301171607e-05, + "loss": 0.1235, + "step": 710 + }, + { + "epoch": 0.018608016953971, + "grad_norm": 0.06162230670452118, + "learning_rate": 1.2388008270158512e-05, + "loss": 0.1226, + "step": 720 + }, + { + "epoch": 0.018866461633887267, + "grad_norm": 0.03174498677253723, + "learning_rate": 1.2560303239145418e-05, + "loss": 0.1234, + "step": 730 + }, + { + "epoch": 0.01912490631380353, + "grad_norm": 0.05730857700109482, + "learning_rate": 1.2732598208132323e-05, + "loss": 0.1234, + "step": 740 + }, + { + "epoch": 0.019383350993719795, + "grad_norm": 0.0782034695148468, + "learning_rate": 1.290489317711923e-05, + "loss": 0.1233, + "step": 750 + }, + { + "epoch": 0.019641795673636057, + "grad_norm": 0.06307530403137207, + "learning_rate": 1.3077188146106134e-05, + "loss": 0.1235, + "step": 760 + }, + { + "epoch": 0.019900240353552322, + "grad_norm": 0.10483033955097198, + "learning_rate": 1.3249483115093039e-05, + "loss": 0.1239, + "step": 770 + }, + { + "epoch": 0.020158685033468585, + "grad_norm": 0.04088316485285759, + "learning_rate": 1.3421778084079945e-05, + "loss": 0.1232, + "step": 780 + }, + { + "epoch": 0.02041712971338485, + "grad_norm": 0.08645454794168472, + "learning_rate": 1.359407305306685e-05, + "loss": 0.1235, + "step": 790 + }, + { + "epoch": 0.020675574393301113, + "grad_norm": 0.03517518565058708, + "learning_rate": 1.3766368022053758e-05, + "loss": 0.1236, + "step": 800 + }, + { + "epoch": 0.020934019073217378, + "grad_norm": 0.07828167825937271, + "learning_rate": 1.3938662991040663e-05, + "loss": 0.1236, + "step": 810 + }, + { + "epoch": 0.02119246375313364, + "grad_norm": 0.031180894002318382, + "learning_rate": 1.4110957960027568e-05, + "loss": 0.1239, + "step": 820 + }, + { + "epoch": 0.021450908433049906, + "grad_norm": 0.06358879804611206, + "learning_rate": 1.4283252929014474e-05, + "loss": 0.1228, + "step": 830 + }, + { + "epoch": 0.02170935311296617, + "grad_norm": 0.076687291264534, + "learning_rate": 1.4455547898001379e-05, + "loss": 0.1225, + "step": 840 + }, + { + "epoch": 0.021967797792882434, + "grad_norm": 0.06648306548595428, + "learning_rate": 1.4627842866988284e-05, + "loss": 0.123, + "step": 850 + }, + { + "epoch": 0.022226242472798696, + "grad_norm": 0.06174162030220032, + "learning_rate": 1.4800137835975188e-05, + "loss": 0.1232, + "step": 860 + }, + { + "epoch": 0.022484687152714962, + "grad_norm": 0.035604078322649, + "learning_rate": 1.4972432804962096e-05, + "loss": 0.1234, + "step": 870 + }, + { + "epoch": 0.022743131832631224, + "grad_norm": 0.042023904621601105, + "learning_rate": 1.5144727773949003e-05, + "loss": 0.123, + "step": 880 + }, + { + "epoch": 0.02300157651254749, + "grad_norm": 0.04869980365037918, + "learning_rate": 1.5317022742935908e-05, + "loss": 0.1235, + "step": 890 + }, + { + "epoch": 0.023260021192463752, + "grad_norm": 0.061389122158288956, + "learning_rate": 1.5489317711922814e-05, + "loss": 0.1229, + "step": 900 + }, + { + "epoch": 0.023518465872380018, + "grad_norm": 0.06544310599565506, + "learning_rate": 1.5661612680909717e-05, + "loss": 0.1228, + "step": 910 + }, + { + "epoch": 0.02377691055229628, + "grad_norm": 0.06568736582994461, + "learning_rate": 1.5833907649896624e-05, + "loss": 0.1233, + "step": 920 + }, + { + "epoch": 0.024035355232212546, + "grad_norm": 0.047814104706048965, + "learning_rate": 1.600620261888353e-05, + "loss": 0.1232, + "step": 930 + }, + { + "epoch": 0.024293799912128808, + "grad_norm": 0.05237136781215668, + "learning_rate": 1.6178497587870433e-05, + "loss": 0.1228, + "step": 940 + }, + { + "epoch": 0.024552244592045074, + "grad_norm": 0.03621436282992363, + "learning_rate": 1.6350792556857343e-05, + "loss": 0.1232, + "step": 950 + }, + { + "epoch": 0.024810689271961336, + "grad_norm": 0.04138467088341713, + "learning_rate": 1.6523087525844246e-05, + "loss": 0.1231, + "step": 960 + }, + { + "epoch": 0.0250691339518776, + "grad_norm": 0.05057816952466965, + "learning_rate": 1.6695382494831152e-05, + "loss": 0.1231, + "step": 970 + }, + { + "epoch": 0.025327578631793864, + "grad_norm": 0.04099060222506523, + "learning_rate": 1.686767746381806e-05, + "loss": 0.1229, + "step": 980 + }, + { + "epoch": 0.02558602331171013, + "grad_norm": 0.08345464617013931, + "learning_rate": 1.7039972432804962e-05, + "loss": 0.1229, + "step": 990 + }, + { + "epoch": 0.02584446799162639, + "grad_norm": 0.08455627411603928, + "learning_rate": 1.721226740179187e-05, + "loss": 0.1233, + "step": 1000 + }, + { + "epoch": 0.026102912671542657, + "grad_norm": 0.06264887005090714, + "learning_rate": 1.738456237077877e-05, + "loss": 0.1231, + "step": 1010 + }, + { + "epoch": 0.02636135735145892, + "grad_norm": 0.0624031126499176, + "learning_rate": 1.755685733976568e-05, + "loss": 0.1227, + "step": 1020 + }, + { + "epoch": 0.026619802031375185, + "grad_norm": 0.06571601331233978, + "learning_rate": 1.7729152308752588e-05, + "loss": 0.1231, + "step": 1030 + }, + { + "epoch": 0.026878246711291447, + "grad_norm": 0.08202008157968521, + "learning_rate": 1.790144727773949e-05, + "loss": 0.1222, + "step": 1040 + }, + { + "epoch": 0.027136691391207713, + "grad_norm": 0.04064911603927612, + "learning_rate": 1.8073742246726397e-05, + "loss": 0.1229, + "step": 1050 + }, + { + "epoch": 0.027395136071123975, + "grad_norm": 0.055493999272584915, + "learning_rate": 1.82460372157133e-05, + "loss": 0.1233, + "step": 1060 + }, + { + "epoch": 0.02765358075104024, + "grad_norm": 0.03765128180384636, + "learning_rate": 1.8418332184700207e-05, + "loss": 0.1243, + "step": 1070 + }, + { + "epoch": 0.027912025430956503, + "grad_norm": 0.05344291031360626, + "learning_rate": 1.8590627153687113e-05, + "loss": 0.1231, + "step": 1080 + }, + { + "epoch": 0.02817047011087277, + "grad_norm": 0.058499764651060104, + "learning_rate": 1.8762922122674016e-05, + "loss": 0.1229, + "step": 1090 + }, + { + "epoch": 0.02842891479078903, + "grad_norm": 0.05688465014100075, + "learning_rate": 1.8935217091660926e-05, + "loss": 0.1235, + "step": 1100 + }, + { + "epoch": 0.028687359470705297, + "grad_norm": 0.04814527928829193, + "learning_rate": 1.910751206064783e-05, + "loss": 0.1234, + "step": 1110 + }, + { + "epoch": 0.02894580415062156, + "grad_norm": 0.06947214901447296, + "learning_rate": 1.9279807029634736e-05, + "loss": 0.1227, + "step": 1120 + }, + { + "epoch": 0.029204248830537825, + "grad_norm": 0.0810081884264946, + "learning_rate": 1.9452101998621642e-05, + "loss": 0.1227, + "step": 1130 + }, + { + "epoch": 0.029462693510454087, + "grad_norm": 0.04069865494966507, + "learning_rate": 1.9624396967608545e-05, + "loss": 0.1229, + "step": 1140 + }, + { + "epoch": 0.029721138190370353, + "grad_norm": 0.056520044803619385, + "learning_rate": 1.979669193659545e-05, + "loss": 0.1226, + "step": 1150 + }, + { + "epoch": 0.029979582870286615, + "grad_norm": 0.04641279578208923, + "learning_rate": 1.9968986905582358e-05, + "loss": 0.1225, + "step": 1160 + }, + { + "epoch": 0.03023802755020288, + "grad_norm": 0.04418419674038887, + "learning_rate": 2.014128187456926e-05, + "loss": 0.1222, + "step": 1170 + }, + { + "epoch": 0.030496472230119143, + "grad_norm": 0.09186393022537231, + "learning_rate": 2.031357684355617e-05, + "loss": 0.1231, + "step": 1180 + }, + { + "epoch": 0.03075491691003541, + "grad_norm": 0.08453690260648727, + "learning_rate": 2.0485871812543074e-05, + "loss": 0.1225, + "step": 1190 + }, + { + "epoch": 0.03101336158995167, + "grad_norm": 0.04817875102162361, + "learning_rate": 2.065816678152998e-05, + "loss": 0.1236, + "step": 1200 + }, + { + "epoch": 0.03127180626986793, + "grad_norm": 0.06666923314332962, + "learning_rate": 2.0830461750516887e-05, + "loss": 0.1229, + "step": 1210 + }, + { + "epoch": 0.0315302509497842, + "grad_norm": 0.048177093267440796, + "learning_rate": 2.100275671950379e-05, + "loss": 0.1224, + "step": 1220 + }, + { + "epoch": 0.031788695629700464, + "grad_norm": 0.05482740327715874, + "learning_rate": 2.1175051688490696e-05, + "loss": 0.123, + "step": 1230 + }, + { + "epoch": 0.03204714030961673, + "grad_norm": 0.04234013333916664, + "learning_rate": 2.1347346657477603e-05, + "loss": 0.1221, + "step": 1240 + }, + { + "epoch": 0.03230558498953299, + "grad_norm": 0.09711142629384995, + "learning_rate": 2.151964162646451e-05, + "loss": 0.1231, + "step": 1250 + }, + { + "epoch": 0.032564029669449254, + "grad_norm": 0.04528702795505524, + "learning_rate": 2.1691936595451416e-05, + "loss": 0.1233, + "step": 1260 + }, + { + "epoch": 0.03282247434936552, + "grad_norm": 0.17061662673950195, + "learning_rate": 2.186423156443832e-05, + "loss": 0.1236, + "step": 1270 + }, + { + "epoch": 0.033080919029281786, + "grad_norm": 0.18225379288196564, + "learning_rate": 2.2036526533425225e-05, + "loss": 0.1237, + "step": 1280 + }, + { + "epoch": 0.033339363709198044, + "grad_norm": 0.09786848723888397, + "learning_rate": 2.220882150241213e-05, + "loss": 0.123, + "step": 1290 + }, + { + "epoch": 0.03359780838911431, + "grad_norm": 0.1106666699051857, + "learning_rate": 2.2381116471399035e-05, + "loss": 0.1229, + "step": 1300 + }, + { + "epoch": 0.033856253069030576, + "grad_norm": 0.05375261977314949, + "learning_rate": 2.255341144038594e-05, + "loss": 0.1238, + "step": 1310 + }, + { + "epoch": 0.034114697748946834, + "grad_norm": 0.04952960088849068, + "learning_rate": 2.2725706409372848e-05, + "loss": 0.1227, + "step": 1320 + }, + { + "epoch": 0.0343731424288631, + "grad_norm": 0.068538136780262, + "learning_rate": 2.2898001378359754e-05, + "loss": 0.1222, + "step": 1330 + }, + { + "epoch": 0.034631587108779366, + "grad_norm": 0.14062538743019104, + "learning_rate": 2.307029634734666e-05, + "loss": 0.1226, + "step": 1340 + }, + { + "epoch": 0.03489003178869563, + "grad_norm": 0.10071218013763428, + "learning_rate": 2.3242591316333564e-05, + "loss": 0.1229, + "step": 1350 + }, + { + "epoch": 0.03514847646861189, + "grad_norm": 0.07993683964014053, + "learning_rate": 2.341488628532047e-05, + "loss": 0.123, + "step": 1360 + }, + { + "epoch": 0.035406921148528156, + "grad_norm": 0.03962239250540733, + "learning_rate": 2.3587181254307376e-05, + "loss": 0.1222, + "step": 1370 + }, + { + "epoch": 0.03566536582844442, + "grad_norm": 0.07989293336868286, + "learning_rate": 2.375947622329428e-05, + "loss": 0.123, + "step": 1380 + }, + { + "epoch": 0.03592381050836069, + "grad_norm": 0.07650604099035263, + "learning_rate": 2.3931771192281186e-05, + "loss": 0.1232, + "step": 1390 + }, + { + "epoch": 0.036182255188276946, + "grad_norm": 0.08324531465768814, + "learning_rate": 2.4104066161268092e-05, + "loss": 0.1232, + "step": 1400 + }, + { + "epoch": 0.03644069986819321, + "grad_norm": 0.049224723130464554, + "learning_rate": 2.4276361130255e-05, + "loss": 0.1231, + "step": 1410 + }, + { + "epoch": 0.03669914454810948, + "grad_norm": 0.06569185853004456, + "learning_rate": 2.4448656099241905e-05, + "loss": 0.1228, + "step": 1420 + }, + { + "epoch": 0.03695758922802574, + "grad_norm": 0.0934646800160408, + "learning_rate": 2.462095106822881e-05, + "loss": 0.1215, + "step": 1430 + }, + { + "epoch": 0.037216033907942, + "grad_norm": 0.03685333579778671, + "learning_rate": 2.4793246037215715e-05, + "loss": 0.1236, + "step": 1440 + }, + { + "epoch": 0.03747447858785827, + "grad_norm": 0.03639655560255051, + "learning_rate": 2.496554100620262e-05, + "loss": 0.1225, + "step": 1450 + }, + { + "epoch": 0.03773292326777453, + "grad_norm": 0.10260403156280518, + "learning_rate": 2.5137835975189528e-05, + "loss": 0.1228, + "step": 1460 + }, + { + "epoch": 0.0379913679476908, + "grad_norm": 0.0797412320971489, + "learning_rate": 2.531013094417643e-05, + "loss": 0.1233, + "step": 1470 + }, + { + "epoch": 0.03824981262760706, + "grad_norm": 0.11771167814731598, + "learning_rate": 2.5482425913163337e-05, + "loss": 0.1239, + "step": 1480 + }, + { + "epoch": 0.03850825730752332, + "grad_norm": 0.07466382533311844, + "learning_rate": 2.565472088215024e-05, + "loss": 0.1229, + "step": 1490 + }, + { + "epoch": 0.03876670198743959, + "grad_norm": 0.11618846654891968, + "learning_rate": 2.582701585113715e-05, + "loss": 0.1226, + "step": 1500 + }, + { + "epoch": 0.039025146667355855, + "grad_norm": 0.0967508926987648, + "learning_rate": 2.599931082012405e-05, + "loss": 0.1231, + "step": 1510 + }, + { + "epoch": 0.039283591347272113, + "grad_norm": 0.11404047161340714, + "learning_rate": 2.617160578911096e-05, + "loss": 0.1227, + "step": 1520 + }, + { + "epoch": 0.03954203602718838, + "grad_norm": 0.07313144952058792, + "learning_rate": 2.6343900758097866e-05, + "loss": 0.1236, + "step": 1530 + }, + { + "epoch": 0.039800480707104645, + "grad_norm": 0.08839472383260727, + "learning_rate": 2.651619572708477e-05, + "loss": 0.1232, + "step": 1540 + }, + { + "epoch": 0.04005892538702091, + "grad_norm": 0.09503539651632309, + "learning_rate": 2.668849069607168e-05, + "loss": 0.1229, + "step": 1550 + }, + { + "epoch": 0.04031737006693717, + "grad_norm": 0.14443273842334747, + "learning_rate": 2.686078566505858e-05, + "loss": 0.1226, + "step": 1560 + }, + { + "epoch": 0.040575814746853435, + "grad_norm": 0.0738818496465683, + "learning_rate": 2.703308063404549e-05, + "loss": 0.1223, + "step": 1570 + }, + { + "epoch": 0.0408342594267697, + "grad_norm": 0.11289133876562119, + "learning_rate": 2.720537560303239e-05, + "loss": 0.1232, + "step": 1580 + }, + { + "epoch": 0.041092704106685966, + "grad_norm": 0.08375398814678192, + "learning_rate": 2.7377670572019298e-05, + "loss": 0.1221, + "step": 1590 + }, + { + "epoch": 0.041351148786602225, + "grad_norm": 0.15204329788684845, + "learning_rate": 2.7549965541006208e-05, + "loss": 0.1234, + "step": 1600 + }, + { + "epoch": 0.04160959346651849, + "grad_norm": 0.08777295798063278, + "learning_rate": 2.7722260509993107e-05, + "loss": 0.1229, + "step": 1610 + }, + { + "epoch": 0.041868038146434756, + "grad_norm": 0.04249228537082672, + "learning_rate": 2.7894555478980017e-05, + "loss": 0.1226, + "step": 1620 + }, + { + "epoch": 0.04212648282635102, + "grad_norm": 0.09020353108644485, + "learning_rate": 2.806685044796692e-05, + "loss": 0.1224, + "step": 1630 + }, + { + "epoch": 0.04238492750626728, + "grad_norm": 0.07953955978155136, + "learning_rate": 2.8239145416953827e-05, + "loss": 0.1223, + "step": 1640 + }, + { + "epoch": 0.04264337218618355, + "grad_norm": 0.07199359685182571, + "learning_rate": 2.841144038594073e-05, + "loss": 0.1226, + "step": 1650 + }, + { + "epoch": 0.04290181686609981, + "grad_norm": 0.0655318945646286, + "learning_rate": 2.8583735354927636e-05, + "loss": 0.1226, + "step": 1660 + }, + { + "epoch": 0.04316026154601608, + "grad_norm": 0.10120917111635208, + "learning_rate": 2.8756030323914546e-05, + "loss": 0.123, + "step": 1670 + }, + { + "epoch": 0.04341870622593234, + "grad_norm": 0.08806953579187393, + "learning_rate": 2.892832529290145e-05, + "loss": 0.1231, + "step": 1680 + }, + { + "epoch": 0.0436771509058486, + "grad_norm": 0.0696093738079071, + "learning_rate": 2.9100620261888356e-05, + "loss": 0.1226, + "step": 1690 + }, + { + "epoch": 0.04393559558576487, + "grad_norm": 0.060308948159217834, + "learning_rate": 2.927291523087526e-05, + "loss": 0.1226, + "step": 1700 + }, + { + "epoch": 0.044194040265681134, + "grad_norm": 0.09358316659927368, + "learning_rate": 2.9445210199862165e-05, + "loss": 0.1229, + "step": 1710 + }, + { + "epoch": 0.04445248494559739, + "grad_norm": 0.0817575678229332, + "learning_rate": 2.9617505168849068e-05, + "loss": 0.1233, + "step": 1720 + }, + { + "epoch": 0.04471092962551366, + "grad_norm": 0.08947563916444778, + "learning_rate": 2.9789800137835978e-05, + "loss": 0.1229, + "step": 1730 + }, + { + "epoch": 0.044969374305429924, + "grad_norm": 0.06575481593608856, + "learning_rate": 2.996209510682288e-05, + "loss": 0.123, + "step": 1740 + }, + { + "epoch": 0.04522781898534619, + "grad_norm": 0.05418599024415016, + "learning_rate": 3.0134390075809788e-05, + "loss": 0.123, + "step": 1750 + }, + { + "epoch": 0.04548626366526245, + "grad_norm": 0.1033121645450592, + "learning_rate": 3.0306685044796694e-05, + "loss": 0.1229, + "step": 1760 + }, + { + "epoch": 0.045744708345178714, + "grad_norm": 0.06530603021383286, + "learning_rate": 3.0478980013783597e-05, + "loss": 0.1227, + "step": 1770 + }, + { + "epoch": 0.04600315302509498, + "grad_norm": 0.13506680727005005, + "learning_rate": 3.0651274982770504e-05, + "loss": 0.1222, + "step": 1780 + }, + { + "epoch": 0.046261597705011245, + "grad_norm": 0.06490384042263031, + "learning_rate": 3.0823569951757407e-05, + "loss": 0.1222, + "step": 1790 + }, + { + "epoch": 0.046520042384927504, + "grad_norm": 0.10440021753311157, + "learning_rate": 3.0995864920744316e-05, + "loss": 0.1234, + "step": 1800 + }, + { + "epoch": 0.04677848706484377, + "grad_norm": 0.08765871822834015, + "learning_rate": 3.116815988973122e-05, + "loss": 0.1228, + "step": 1810 + }, + { + "epoch": 0.047036931744760035, + "grad_norm": 0.12401014566421509, + "learning_rate": 3.134045485871813e-05, + "loss": 0.1232, + "step": 1820 + }, + { + "epoch": 0.0472953764246763, + "grad_norm": 0.07814408093690872, + "learning_rate": 3.151274982770503e-05, + "loss": 0.1221, + "step": 1830 + }, + { + "epoch": 0.04755382110459256, + "grad_norm": 0.08047276735305786, + "learning_rate": 3.1685044796691935e-05, + "loss": 0.1229, + "step": 1840 + }, + { + "epoch": 0.047812265784508826, + "grad_norm": 0.07007250934839249, + "learning_rate": 3.1857339765678845e-05, + "loss": 0.1227, + "step": 1850 + }, + { + "epoch": 0.04807071046442509, + "grad_norm": 0.08911903947591782, + "learning_rate": 3.202963473466575e-05, + "loss": 0.1231, + "step": 1860 + }, + { + "epoch": 0.04832915514434136, + "grad_norm": 0.07673286646604538, + "learning_rate": 3.220192970365266e-05, + "loss": 0.123, + "step": 1870 + }, + { + "epoch": 0.048587599824257616, + "grad_norm": 0.07106916606426239, + "learning_rate": 3.237422467263956e-05, + "loss": 0.1226, + "step": 1880 + }, + { + "epoch": 0.04884604450417388, + "grad_norm": 0.11209211498498917, + "learning_rate": 3.2546519641626464e-05, + "loss": 0.1226, + "step": 1890 + }, + { + "epoch": 0.04910448918409015, + "grad_norm": 0.07801423966884613, + "learning_rate": 3.2718814610613374e-05, + "loss": 0.122, + "step": 1900 + }, + { + "epoch": 0.04936293386400641, + "grad_norm": 0.13266247510910034, + "learning_rate": 3.289110957960028e-05, + "loss": 0.1229, + "step": 1910 + }, + { + "epoch": 0.04962137854392267, + "grad_norm": 0.05087581276893616, + "learning_rate": 3.306340454858719e-05, + "loss": 0.1228, + "step": 1920 + }, + { + "epoch": 0.04987982322383894, + "grad_norm": 0.10973642766475677, + "learning_rate": 3.323569951757409e-05, + "loss": 0.1222, + "step": 1930 + }, + { + "epoch": 0.0501382679037552, + "grad_norm": 0.09865803271532059, + "learning_rate": 3.340799448656099e-05, + "loss": 0.1236, + "step": 1940 + }, + { + "epoch": 0.05039671258367147, + "grad_norm": 0.07750454545021057, + "learning_rate": 3.3580289455547896e-05, + "loss": 0.1237, + "step": 1950 + }, + { + "epoch": 0.05065515726358773, + "grad_norm": 0.03502718359231949, + "learning_rate": 3.3752584424534806e-05, + "loss": 0.123, + "step": 1960 + }, + { + "epoch": 0.05091360194350399, + "grad_norm": 0.046701829880476, + "learning_rate": 3.3924879393521716e-05, + "loss": 0.1224, + "step": 1970 + }, + { + "epoch": 0.05117204662342026, + "grad_norm": 0.047475650906562805, + "learning_rate": 3.409717436250862e-05, + "loss": 0.1233, + "step": 1980 + }, + { + "epoch": 0.05143049130333652, + "grad_norm": 0.0709228664636612, + "learning_rate": 3.426946933149552e-05, + "loss": 0.1222, + "step": 1990 + }, + { + "epoch": 0.05168893598325278, + "grad_norm": 0.08590517938137054, + "learning_rate": 3.4441764300482425e-05, + "loss": 0.1226, + "step": 2000 + }, + { + "epoch": 0.05194738066316905, + "grad_norm": 0.08481516689062119, + "learning_rate": 3.4614059269469335e-05, + "loss": 0.1227, + "step": 2010 + }, + { + "epoch": 0.052205825343085314, + "grad_norm": 0.06590903550386429, + "learning_rate": 3.478635423845624e-05, + "loss": 0.1231, + "step": 2020 + }, + { + "epoch": 0.05246427002300157, + "grad_norm": 0.06652143597602844, + "learning_rate": 3.495864920744315e-05, + "loss": 0.123, + "step": 2030 + }, + { + "epoch": 0.05272271470291784, + "grad_norm": 0.046942099928855896, + "learning_rate": 3.5130944176430044e-05, + "loss": 0.1224, + "step": 2040 + }, + { + "epoch": 0.052981159382834105, + "grad_norm": 0.0796431452035904, + "learning_rate": 3.5303239145416954e-05, + "loss": 0.1226, + "step": 2050 + }, + { + "epoch": 0.05323960406275037, + "grad_norm": 0.04292640835046768, + "learning_rate": 3.5475534114403864e-05, + "loss": 0.1219, + "step": 2060 + }, + { + "epoch": 0.05349804874266663, + "grad_norm": 0.04194321855902672, + "learning_rate": 3.564782908339077e-05, + "loss": 0.1223, + "step": 2070 + }, + { + "epoch": 0.053756493422582895, + "grad_norm": 0.0479915477335453, + "learning_rate": 3.582012405237768e-05, + "loss": 0.1236, + "step": 2080 + }, + { + "epoch": 0.05401493810249916, + "grad_norm": 0.05698119476437569, + "learning_rate": 3.599241902136457e-05, + "loss": 0.1227, + "step": 2090 + }, + { + "epoch": 0.054273382782415426, + "grad_norm": 0.05959772691130638, + "learning_rate": 3.616471399035148e-05, + "loss": 0.1228, + "step": 2100 + }, + { + "epoch": 0.054531827462331685, + "grad_norm": 0.04669308662414551, + "learning_rate": 3.6337008959338386e-05, + "loss": 0.1234, + "step": 2110 + }, + { + "epoch": 0.05479027214224795, + "grad_norm": 0.045805882662534714, + "learning_rate": 3.6509303928325296e-05, + "loss": 0.1229, + "step": 2120 + }, + { + "epoch": 0.055048716822164216, + "grad_norm": 0.04557011276483536, + "learning_rate": 3.6681598897312205e-05, + "loss": 0.1218, + "step": 2130 + }, + { + "epoch": 0.05530716150208048, + "grad_norm": 0.12122908979654312, + "learning_rate": 3.68538938662991e-05, + "loss": 0.1227, + "step": 2140 + }, + { + "epoch": 0.05556560618199674, + "grad_norm": 0.0866415873169899, + "learning_rate": 3.702618883528601e-05, + "loss": 0.1234, + "step": 2150 + }, + { + "epoch": 0.055824050861913006, + "grad_norm": 0.0965719223022461, + "learning_rate": 3.7198483804272915e-05, + "loss": 0.1225, + "step": 2160 + }, + { + "epoch": 0.05608249554182927, + "grad_norm": 0.05061042308807373, + "learning_rate": 3.7370778773259825e-05, + "loss": 0.1227, + "step": 2170 + }, + { + "epoch": 0.05634094022174554, + "grad_norm": 0.0704331025481224, + "learning_rate": 3.754307374224673e-05, + "loss": 0.1227, + "step": 2180 + }, + { + "epoch": 0.056599384901661796, + "grad_norm": 0.10036037862300873, + "learning_rate": 3.771536871123364e-05, + "loss": 0.1224, + "step": 2190 + }, + { + "epoch": 0.05685782958157806, + "grad_norm": 0.11323834210634232, + "learning_rate": 3.788766368022054e-05, + "loss": 0.1229, + "step": 2200 + }, + { + "epoch": 0.05711627426149433, + "grad_norm": 0.053230322897434235, + "learning_rate": 3.8059958649207444e-05, + "loss": 0.1228, + "step": 2210 + }, + { + "epoch": 0.057374718941410593, + "grad_norm": 0.08697234094142914, + "learning_rate": 3.823225361819435e-05, + "loss": 0.1222, + "step": 2220 + }, + { + "epoch": 0.05763316362132685, + "grad_norm": 0.03745349869132042, + "learning_rate": 3.8404548587181256e-05, + "loss": 0.1226, + "step": 2230 + }, + { + "epoch": 0.05789160830124312, + "grad_norm": 0.03589538484811783, + "learning_rate": 3.8576843556168166e-05, + "loss": 0.1228, + "step": 2240 + }, + { + "epoch": 0.058150052981159384, + "grad_norm": 0.2848452627658844, + "learning_rate": 3.874913852515506e-05, + "loss": 0.1236, + "step": 2250 + }, + { + "epoch": 0.05840849766107565, + "grad_norm": 0.11436636745929718, + "learning_rate": 3.892143349414197e-05, + "loss": 0.1223, + "step": 2260 + }, + { + "epoch": 0.05866694234099191, + "grad_norm": 0.04620843380689621, + "learning_rate": 3.9093728463128875e-05, + "loss": 0.123, + "step": 2270 + }, + { + "epoch": 0.058925387020908174, + "grad_norm": 0.03212074935436249, + "learning_rate": 3.9266023432115785e-05, + "loss": 0.1222, + "step": 2280 + }, + { + "epoch": 0.05918383170082444, + "grad_norm": 0.043459177017211914, + "learning_rate": 3.9438318401102695e-05, + "loss": 0.1225, + "step": 2290 + }, + { + "epoch": 0.059442276380740705, + "grad_norm": 0.09468097984790802, + "learning_rate": 3.961061337008959e-05, + "loss": 0.1224, + "step": 2300 + }, + { + "epoch": 0.059700721060656964, + "grad_norm": 0.09504854679107666, + "learning_rate": 3.97829083390765e-05, + "loss": 0.123, + "step": 2310 + }, + { + "epoch": 0.05995916574057323, + "grad_norm": 0.07441641390323639, + "learning_rate": 3.9955203308063404e-05, + "loss": 0.1235, + "step": 2320 + }, + { + "epoch": 0.060217610420489495, + "grad_norm": 0.04445243254303932, + "learning_rate": 4.0127498277050314e-05, + "loss": 0.123, + "step": 2330 + }, + { + "epoch": 0.06047605510040576, + "grad_norm": 0.05347610265016556, + "learning_rate": 4.029979324603722e-05, + "loss": 0.1225, + "step": 2340 + }, + { + "epoch": 0.06073449978032202, + "grad_norm": 0.04007500410079956, + "learning_rate": 4.047208821502412e-05, + "loss": 0.1233, + "step": 2350 + }, + { + "epoch": 0.060992944460238285, + "grad_norm": 0.06312885135412216, + "learning_rate": 4.064438318401103e-05, + "loss": 0.1219, + "step": 2360 + }, + { + "epoch": 0.06125138914015455, + "grad_norm": 0.09112309664487839, + "learning_rate": 4.081667815299793e-05, + "loss": 0.123, + "step": 2370 + }, + { + "epoch": 0.06150983382007082, + "grad_norm": 0.04379124939441681, + "learning_rate": 4.098897312198484e-05, + "loss": 0.1224, + "step": 2380 + }, + { + "epoch": 0.061768278499987075, + "grad_norm": 0.047634780406951904, + "learning_rate": 4.1161268090971746e-05, + "loss": 0.1226, + "step": 2390 + }, + { + "epoch": 0.06202672317990334, + "grad_norm": 0.05278099328279495, + "learning_rate": 4.133356305995865e-05, + "loss": 0.1229, + "step": 2400 + }, + { + "epoch": 0.06228516785981961, + "grad_norm": 0.06965649873018265, + "learning_rate": 4.150585802894555e-05, + "loss": 0.1229, + "step": 2410 + }, + { + "epoch": 0.06254361253973587, + "grad_norm": 0.14786341786384583, + "learning_rate": 4.167815299793246e-05, + "loss": 0.1223, + "step": 2420 + }, + { + "epoch": 0.06280205721965214, + "grad_norm": 0.056304682046175, + "learning_rate": 4.185044796691937e-05, + "loss": 0.1217, + "step": 2430 + }, + { + "epoch": 0.0630605018995684, + "grad_norm": 0.09119052439928055, + "learning_rate": 4.2022742935906275e-05, + "loss": 0.1236, + "step": 2440 + }, + { + "epoch": 0.06331894657948466, + "grad_norm": 0.12715300917625427, + "learning_rate": 4.219503790489318e-05, + "loss": 0.1227, + "step": 2450 + }, + { + "epoch": 0.06357739125940093, + "grad_norm": 0.10588313639163971, + "learning_rate": 4.236733287388008e-05, + "loss": 0.1233, + "step": 2460 + }, + { + "epoch": 0.06383583593931719, + "grad_norm": 0.06457073986530304, + "learning_rate": 4.253962784286699e-05, + "loss": 0.1228, + "step": 2470 + }, + { + "epoch": 0.06409428061923346, + "grad_norm": 0.08691989630460739, + "learning_rate": 4.2711922811853894e-05, + "loss": 0.123, + "step": 2480 + }, + { + "epoch": 0.06435272529914972, + "grad_norm": 0.058130763471126556, + "learning_rate": 4.2884217780840804e-05, + "loss": 0.1226, + "step": 2490 + }, + { + "epoch": 0.06461116997906598, + "grad_norm": 0.10724621266126633, + "learning_rate": 4.305651274982771e-05, + "loss": 0.1231, + "step": 2500 + }, + { + "epoch": 0.06486961465898225, + "grad_norm": 0.09582070261240005, + "learning_rate": 4.322880771881461e-05, + "loss": 0.1226, + "step": 2510 + }, + { + "epoch": 0.06512805933889851, + "grad_norm": 0.09975564479827881, + "learning_rate": 4.340110268780152e-05, + "loss": 0.1223, + "step": 2520 + }, + { + "epoch": 0.06538650401881477, + "grad_norm": 0.04424052685499191, + "learning_rate": 4.357339765678842e-05, + "loss": 0.1232, + "step": 2530 + }, + { + "epoch": 0.06564494869873104, + "grad_norm": 0.08506426960229874, + "learning_rate": 4.374569262577533e-05, + "loss": 0.1222, + "step": 2540 + }, + { + "epoch": 0.0659033933786473, + "grad_norm": 0.22666363418102264, + "learning_rate": 4.3917987594762236e-05, + "loss": 0.1222, + "step": 2550 + }, + { + "epoch": 0.06616183805856357, + "grad_norm": 0.05729035288095474, + "learning_rate": 4.409028256374914e-05, + "loss": 0.1226, + "step": 2560 + }, + { + "epoch": 0.06642028273847983, + "grad_norm": 0.05613674223423004, + "learning_rate": 4.426257753273604e-05, + "loss": 0.1228, + "step": 2570 + }, + { + "epoch": 0.06667872741839609, + "grad_norm": 0.03760998323559761, + "learning_rate": 4.443487250172295e-05, + "loss": 0.1219, + "step": 2580 + }, + { + "epoch": 0.06693717209831236, + "grad_norm": 0.12485086917877197, + "learning_rate": 4.460716747070986e-05, + "loss": 0.1228, + "step": 2590 + }, + { + "epoch": 0.06719561677822862, + "grad_norm": 0.10450302064418793, + "learning_rate": 4.4779462439696764e-05, + "loss": 0.1229, + "step": 2600 + }, + { + "epoch": 0.06745406145814488, + "grad_norm": 0.05455094948410988, + "learning_rate": 4.495175740868367e-05, + "loss": 0.122, + "step": 2610 + }, + { + "epoch": 0.06771250613806115, + "grad_norm": 0.05045665428042412, + "learning_rate": 4.512405237767057e-05, + "loss": 0.1234, + "step": 2620 + }, + { + "epoch": 0.06797095081797741, + "grad_norm": 0.046153802424669266, + "learning_rate": 4.529634734665748e-05, + "loss": 0.1224, + "step": 2630 + }, + { + "epoch": 0.06822939549789367, + "grad_norm": 0.10910719633102417, + "learning_rate": 4.5468642315644383e-05, + "loss": 0.1229, + "step": 2640 + }, + { + "epoch": 0.06848784017780994, + "grad_norm": 0.039895158261060715, + "learning_rate": 4.564093728463129e-05, + "loss": 0.1228, + "step": 2650 + }, + { + "epoch": 0.0687462848577262, + "grad_norm": 0.05702727288007736, + "learning_rate": 4.5813232253618196e-05, + "loss": 0.1231, + "step": 2660 + }, + { + "epoch": 0.06900472953764247, + "grad_norm": 0.07900247722864151, + "learning_rate": 4.59855272226051e-05, + "loss": 0.1226, + "step": 2670 + }, + { + "epoch": 0.06926317421755873, + "grad_norm": 0.08567679673433304, + "learning_rate": 4.615782219159201e-05, + "loss": 0.1231, + "step": 2680 + }, + { + "epoch": 0.06952161889747499, + "grad_norm": 0.044867727905511856, + "learning_rate": 4.633011716057891e-05, + "loss": 0.1229, + "step": 2690 + }, + { + "epoch": 0.06978006357739126, + "grad_norm": 0.0893058031797409, + "learning_rate": 4.650241212956582e-05, + "loss": 0.1228, + "step": 2700 + }, + { + "epoch": 0.07003850825730752, + "grad_norm": 0.08285655081272125, + "learning_rate": 4.6674707098552725e-05, + "loss": 0.1224, + "step": 2710 + }, + { + "epoch": 0.07029695293722378, + "grad_norm": 0.06980638206005096, + "learning_rate": 4.684700206753963e-05, + "loss": 0.1227, + "step": 2720 + }, + { + "epoch": 0.07055539761714005, + "grad_norm": 0.06307683140039444, + "learning_rate": 4.701929703652654e-05, + "loss": 0.1228, + "step": 2730 + }, + { + "epoch": 0.07081384229705631, + "grad_norm": 0.045305073261260986, + "learning_rate": 4.719159200551344e-05, + "loss": 0.1231, + "step": 2740 + }, + { + "epoch": 0.07107228697697258, + "grad_norm": 0.07818667590618134, + "learning_rate": 4.736388697450035e-05, + "loss": 0.1228, + "step": 2750 + }, + { + "epoch": 0.07133073165688884, + "grad_norm": 0.09739340841770172, + "learning_rate": 4.7536181943487254e-05, + "loss": 0.1223, + "step": 2760 + }, + { + "epoch": 0.0715891763368051, + "grad_norm": 0.11781520396471024, + "learning_rate": 4.770847691247416e-05, + "loss": 0.1222, + "step": 2770 + }, + { + "epoch": 0.07184762101672137, + "grad_norm": 0.9360899329185486, + "learning_rate": 4.788077188146106e-05, + "loss": 0.223, + "step": 2780 + }, + { + "epoch": 0.07210606569663763, + "grad_norm": 0.13441406190395355, + "learning_rate": 4.805306685044797e-05, + "loss": 0.1519, + "step": 2790 + }, + { + "epoch": 0.07236451037655389, + "grad_norm": 0.05997923016548157, + "learning_rate": 4.822536181943487e-05, + "loss": 0.1319, + "step": 2800 + }, + { + "epoch": 0.07262295505647016, + "grad_norm": 0.05620058998465538, + "learning_rate": 4.839765678842178e-05, + "loss": 0.128, + "step": 2810 + }, + { + "epoch": 0.07288139973638642, + "grad_norm": 0.033109452575445175, + "learning_rate": 4.8569951757408686e-05, + "loss": 0.1251, + "step": 2820 + }, + { + "epoch": 0.0731398444163027, + "grad_norm": 0.08467695862054825, + "learning_rate": 4.874224672639559e-05, + "loss": 0.1249, + "step": 2830 + }, + { + "epoch": 0.07339828909621895, + "grad_norm": 0.046401672065258026, + "learning_rate": 4.89145416953825e-05, + "loss": 0.1242, + "step": 2840 + }, + { + "epoch": 0.07365673377613521, + "grad_norm": 0.07141178846359253, + "learning_rate": 4.90868366643694e-05, + "loss": 0.124, + "step": 2850 + }, + { + "epoch": 0.07391517845605149, + "grad_norm": 0.07576319575309753, + "learning_rate": 4.925913163335631e-05, + "loss": 0.123, + "step": 2860 + }, + { + "epoch": 0.07417362313596775, + "grad_norm": 0.05812068283557892, + "learning_rate": 4.9431426602343215e-05, + "loss": 0.123, + "step": 2870 + }, + { + "epoch": 0.074432067815884, + "grad_norm": 0.13982614874839783, + "learning_rate": 4.960372157133012e-05, + "loss": 0.1235, + "step": 2880 + }, + { + "epoch": 0.07469051249580028, + "grad_norm": 0.034210145473480225, + "learning_rate": 4.977601654031703e-05, + "loss": 0.1238, + "step": 2890 + }, + { + "epoch": 0.07494895717571654, + "grad_norm": 0.09808455407619476, + "learning_rate": 4.994831150930393e-05, + "loss": 0.1237, + "step": 2900 + }, + { + "epoch": 0.07520740185563281, + "grad_norm": 0.03436051309108734, + "learning_rate": 5.0120606478290834e-05, + "loss": 0.1237, + "step": 2910 + }, + { + "epoch": 0.07546584653554907, + "grad_norm": 0.11650115251541138, + "learning_rate": 5.0292901447277744e-05, + "loss": 0.1232, + "step": 2920 + }, + { + "epoch": 0.07572429121546533, + "grad_norm": 0.0716601014137268, + "learning_rate": 5.046519641626465e-05, + "loss": 0.1239, + "step": 2930 + }, + { + "epoch": 0.0759827358953816, + "grad_norm": 0.04548223316669464, + "learning_rate": 5.0637491385251557e-05, + "loss": 0.1226, + "step": 2940 + }, + { + "epoch": 0.07624118057529786, + "grad_norm": 0.061111096292734146, + "learning_rate": 5.080978635423845e-05, + "loss": 0.1227, + "step": 2950 + }, + { + "epoch": 0.07649962525521412, + "grad_norm": 0.06596088409423828, + "learning_rate": 5.098208132322536e-05, + "loss": 0.1228, + "step": 2960 + }, + { + "epoch": 0.07675806993513039, + "grad_norm": 0.10006445646286011, + "learning_rate": 5.115437629221227e-05, + "loss": 0.1222, + "step": 2970 + }, + { + "epoch": 0.07701651461504665, + "grad_norm": 0.08285020291805267, + "learning_rate": 5.1326671261199176e-05, + "loss": 0.1223, + "step": 2980 + }, + { + "epoch": 0.07727495929496292, + "grad_norm": 0.042629268020391464, + "learning_rate": 5.1498966230186085e-05, + "loss": 0.1227, + "step": 2990 + }, + { + "epoch": 0.07753340397487918, + "grad_norm": 0.04260925576090813, + "learning_rate": 5.167126119917298e-05, + "loss": 0.1218, + "step": 3000 + }, + { + "epoch": 0.07779184865479544, + "grad_norm": 0.05720480531454086, + "learning_rate": 5.184355616815989e-05, + "loss": 0.1228, + "step": 3010 + }, + { + "epoch": 0.07805029333471171, + "grad_norm": 0.08730859309434891, + "learning_rate": 5.20158511371468e-05, + "loss": 0.1227, + "step": 3020 + }, + { + "epoch": 0.07830873801462797, + "grad_norm": 0.0851985365152359, + "learning_rate": 5.2188146106133704e-05, + "loss": 0.1224, + "step": 3030 + }, + { + "epoch": 0.07856718269454423, + "grad_norm": 0.14108945429325104, + "learning_rate": 5.2360441075120614e-05, + "loss": 0.123, + "step": 3040 + }, + { + "epoch": 0.0788256273744605, + "grad_norm": 0.0634116381406784, + "learning_rate": 5.253273604410751e-05, + "loss": 0.1226, + "step": 3050 + }, + { + "epoch": 0.07908407205437676, + "grad_norm": 0.08557610958814621, + "learning_rate": 5.270503101309442e-05, + "loss": 0.1228, + "step": 3060 + }, + { + "epoch": 0.07934251673429303, + "grad_norm": 0.12258125841617584, + "learning_rate": 5.287732598208133e-05, + "loss": 0.1228, + "step": 3070 + }, + { + "epoch": 0.07960096141420929, + "grad_norm": 0.0662710964679718, + "learning_rate": 5.304962095106823e-05, + "loss": 0.1218, + "step": 3080 + }, + { + "epoch": 0.07985940609412555, + "grad_norm": 0.05238689109683037, + "learning_rate": 5.322191592005513e-05, + "loss": 0.1234, + "step": 3090 + }, + { + "epoch": 0.08011785077404182, + "grad_norm": 0.0742919072508812, + "learning_rate": 5.339421088904204e-05, + "loss": 0.1225, + "step": 3100 + }, + { + "epoch": 0.08037629545395808, + "grad_norm": 0.0753747895359993, + "learning_rate": 5.356650585802895e-05, + "loss": 0.1226, + "step": 3110 + }, + { + "epoch": 0.08063474013387434, + "grad_norm": 0.034092552959918976, + "learning_rate": 5.373880082701586e-05, + "loss": 0.1228, + "step": 3120 + }, + { + "epoch": 0.08089318481379061, + "grad_norm": 0.07082927972078323, + "learning_rate": 5.391109579600276e-05, + "loss": 0.1227, + "step": 3130 + }, + { + "epoch": 0.08115162949370687, + "grad_norm": 0.034534044563770294, + "learning_rate": 5.408339076498966e-05, + "loss": 0.1224, + "step": 3140 + }, + { + "epoch": 0.08141007417362314, + "grad_norm": 0.08891891688108444, + "learning_rate": 5.425568573397657e-05, + "loss": 0.1222, + "step": 3150 + }, + { + "epoch": 0.0816685188535394, + "grad_norm": 0.0535048246383667, + "learning_rate": 5.442798070296348e-05, + "loss": 0.1231, + "step": 3160 + }, + { + "epoch": 0.08192696353345566, + "grad_norm": 0.04806249588727951, + "learning_rate": 5.460027567195039e-05, + "loss": 0.123, + "step": 3170 + }, + { + "epoch": 0.08218540821337193, + "grad_norm": 0.08760054409503937, + "learning_rate": 5.4772570640937284e-05, + "loss": 0.122, + "step": 3180 + }, + { + "epoch": 0.08244385289328819, + "grad_norm": 0.10274428874254227, + "learning_rate": 5.494486560992419e-05, + "loss": 0.1228, + "step": 3190 + }, + { + "epoch": 0.08270229757320445, + "grad_norm": 0.04508817195892334, + "learning_rate": 5.51171605789111e-05, + "loss": 0.123, + "step": 3200 + }, + { + "epoch": 0.08296074225312072, + "grad_norm": 0.07910098880529404, + "learning_rate": 5.528945554789801e-05, + "loss": 0.122, + "step": 3210 + }, + { + "epoch": 0.08321918693303698, + "grad_norm": 0.08255916088819504, + "learning_rate": 5.546175051688492e-05, + "loss": 0.1223, + "step": 3220 + }, + { + "epoch": 0.08347763161295325, + "grad_norm": 0.08366689085960388, + "learning_rate": 5.563404548587181e-05, + "loss": 0.1228, + "step": 3230 + }, + { + "epoch": 0.08373607629286951, + "grad_norm": 0.09197453409433365, + "learning_rate": 5.5806340454858716e-05, + "loss": 0.1227, + "step": 3240 + }, + { + "epoch": 0.08399452097278577, + "grad_norm": 0.06316634267568588, + "learning_rate": 5.5978635423845626e-05, + "loss": 0.1224, + "step": 3250 + }, + { + "epoch": 0.08425296565270204, + "grad_norm": 0.045066073536872864, + "learning_rate": 5.6150930392832536e-05, + "loss": 0.1213, + "step": 3260 + }, + { + "epoch": 0.0845114103326183, + "grad_norm": 0.0808643102645874, + "learning_rate": 5.6323225361819446e-05, + "loss": 0.1222, + "step": 3270 + }, + { + "epoch": 0.08476985501253456, + "grad_norm": 0.074496328830719, + "learning_rate": 5.649552033080634e-05, + "loss": 0.1228, + "step": 3280 + }, + { + "epoch": 0.08502829969245083, + "grad_norm": 0.07612734287977219, + "learning_rate": 5.6667815299793245e-05, + "loss": 0.1226, + "step": 3290 + }, + { + "epoch": 0.0852867443723671, + "grad_norm": 0.06702574342489243, + "learning_rate": 5.6840110268780155e-05, + "loss": 0.1226, + "step": 3300 + }, + { + "epoch": 0.08554518905228335, + "grad_norm": 0.09477569162845612, + "learning_rate": 5.7012405237767065e-05, + "loss": 0.1219, + "step": 3310 + }, + { + "epoch": 0.08580363373219962, + "grad_norm": 0.07749473303556442, + "learning_rate": 5.718470020675396e-05, + "loss": 0.123, + "step": 3320 + }, + { + "epoch": 0.08606207841211588, + "grad_norm": 0.10147205740213394, + "learning_rate": 5.735699517574087e-05, + "loss": 0.1225, + "step": 3330 + }, + { + "epoch": 0.08632052309203216, + "grad_norm": 0.06562674790620804, + "learning_rate": 5.7529290144727774e-05, + "loss": 0.1222, + "step": 3340 + }, + { + "epoch": 0.08657896777194841, + "grad_norm": 0.049398452043533325, + "learning_rate": 5.7701585113714684e-05, + "loss": 0.1217, + "step": 3350 + }, + { + "epoch": 0.08683741245186467, + "grad_norm": 0.04137035831809044, + "learning_rate": 5.7873880082701594e-05, + "loss": 0.1231, + "step": 3360 + }, + { + "epoch": 0.08709585713178095, + "grad_norm": 0.04426753893494606, + "learning_rate": 5.804617505168849e-05, + "loss": 0.1226, + "step": 3370 + }, + { + "epoch": 0.0873543018116972, + "grad_norm": 0.06112112104892731, + "learning_rate": 5.82184700206754e-05, + "loss": 0.1229, + "step": 3380 + }, + { + "epoch": 0.08761274649161346, + "grad_norm": 0.043427493423223495, + "learning_rate": 5.83907649896623e-05, + "loss": 0.1224, + "step": 3390 + }, + { + "epoch": 0.08787119117152974, + "grad_norm": 0.10385109484195709, + "learning_rate": 5.856305995864921e-05, + "loss": 0.1222, + "step": 3400 + }, + { + "epoch": 0.088129635851446, + "grad_norm": 0.10103520005941391, + "learning_rate": 5.873535492763611e-05, + "loss": 0.1229, + "step": 3410 + }, + { + "epoch": 0.08838808053136227, + "grad_norm": 0.18527595698833466, + "learning_rate": 5.890764989662302e-05, + "loss": 0.1234, + "step": 3420 + }, + { + "epoch": 0.08864652521127853, + "grad_norm": 0.07813168317079544, + "learning_rate": 5.907994486560993e-05, + "loss": 0.1226, + "step": 3430 + }, + { + "epoch": 0.08890496989119478, + "grad_norm": 0.0695757120847702, + "learning_rate": 5.925223983459683e-05, + "loss": 0.1227, + "step": 3440 + }, + { + "epoch": 0.08916341457111106, + "grad_norm": 0.04970187693834305, + "learning_rate": 5.942453480358374e-05, + "loss": 0.1231, + "step": 3450 + }, + { + "epoch": 0.08942185925102732, + "grad_norm": 0.04609990492463112, + "learning_rate": 5.959682977257064e-05, + "loss": 0.1234, + "step": 3460 + }, + { + "epoch": 0.08968030393094358, + "grad_norm": 0.08185649663209915, + "learning_rate": 5.976912474155755e-05, + "loss": 0.1225, + "step": 3470 + }, + { + "epoch": 0.08993874861085985, + "grad_norm": 0.05953223258256912, + "learning_rate": 5.994141971054446e-05, + "loss": 0.1225, + "step": 3480 + }, + { + "epoch": 0.0901971932907761, + "grad_norm": 0.053057145327329636, + "learning_rate": 6.011371467953136e-05, + "loss": 0.1231, + "step": 3490 + }, + { + "epoch": 0.09045563797069238, + "grad_norm": 0.034726157784461975, + "learning_rate": 6.028600964851827e-05, + "loss": 0.1225, + "step": 3500 + }, + { + "epoch": 0.09071408265060864, + "grad_norm": 0.11597289144992828, + "learning_rate": 6.0458304617505167e-05, + "loss": 0.123, + "step": 3510 + }, + { + "epoch": 0.0909725273305249, + "grad_norm": 0.04248828440904617, + "learning_rate": 6.0630599586492076e-05, + "loss": 0.1226, + "step": 3520 + }, + { + "epoch": 0.09123097201044117, + "grad_norm": 0.04990701377391815, + "learning_rate": 6.0802894555478986e-05, + "loss": 0.1232, + "step": 3530 + }, + { + "epoch": 0.09148941669035743, + "grad_norm": 0.03539624065160751, + "learning_rate": 6.097518952446589e-05, + "loss": 0.1221, + "step": 3540 + }, + { + "epoch": 0.09174786137027369, + "grad_norm": 0.05631623789668083, + "learning_rate": 6.114748449345279e-05, + "loss": 0.1219, + "step": 3550 + }, + { + "epoch": 0.09200630605018996, + "grad_norm": 0.17052865028381348, + "learning_rate": 6.13197794624397e-05, + "loss": 0.1231, + "step": 3560 + }, + { + "epoch": 0.09226475073010622, + "grad_norm": 0.035124436020851135, + "learning_rate": 6.14920744314266e-05, + "loss": 0.1216, + "step": 3570 + }, + { + "epoch": 0.09252319541002249, + "grad_norm": 0.082002654671669, + "learning_rate": 6.166436940041351e-05, + "loss": 0.1231, + "step": 3580 + }, + { + "epoch": 0.09278164008993875, + "grad_norm": 0.057984985411167145, + "learning_rate": 6.183666436940042e-05, + "loss": 0.1223, + "step": 3590 + }, + { + "epoch": 0.09304008476985501, + "grad_norm": 0.06301140040159225, + "learning_rate": 6.200895933838731e-05, + "loss": 0.1226, + "step": 3600 + }, + { + "epoch": 0.09329852944977128, + "grad_norm": 0.05630588158965111, + "learning_rate": 6.218125430737422e-05, + "loss": 0.1225, + "step": 3610 + }, + { + "epoch": 0.09355697412968754, + "grad_norm": 0.07191000133752823, + "learning_rate": 6.235354927636113e-05, + "loss": 0.1218, + "step": 3620 + }, + { + "epoch": 0.0938154188096038, + "grad_norm": 0.05299285426735878, + "learning_rate": 6.252584424534804e-05, + "loss": 0.1223, + "step": 3630 + }, + { + "epoch": 0.09407386348952007, + "grad_norm": 0.08216344565153122, + "learning_rate": 6.269813921433495e-05, + "loss": 0.1233, + "step": 3640 + }, + { + "epoch": 0.09433230816943633, + "grad_norm": 0.06184655427932739, + "learning_rate": 6.287043418332185e-05, + "loss": 0.122, + "step": 3650 + }, + { + "epoch": 0.0945907528493526, + "grad_norm": 0.05294346436858177, + "learning_rate": 6.304272915230876e-05, + "loss": 0.1228, + "step": 3660 + }, + { + "epoch": 0.09484919752926886, + "grad_norm": 0.06311880052089691, + "learning_rate": 6.321502412129566e-05, + "loss": 0.122, + "step": 3670 + }, + { + "epoch": 0.09510764220918512, + "grad_norm": 0.04857679456472397, + "learning_rate": 6.338731909028257e-05, + "loss": 0.1225, + "step": 3680 + }, + { + "epoch": 0.09536608688910139, + "grad_norm": 0.12748484313488007, + "learning_rate": 6.355961405926946e-05, + "loss": 0.1232, + "step": 3690 + }, + { + "epoch": 0.09562453156901765, + "grad_norm": 0.044000301510095596, + "learning_rate": 6.373190902825637e-05, + "loss": 0.1225, + "step": 3700 + }, + { + "epoch": 0.09588297624893391, + "grad_norm": 0.043208472430706024, + "learning_rate": 6.390420399724328e-05, + "loss": 0.1222, + "step": 3710 + }, + { + "epoch": 0.09614142092885018, + "grad_norm": 0.10808105766773224, + "learning_rate": 6.407649896623019e-05, + "loss": 0.1219, + "step": 3720 + }, + { + "epoch": 0.09639986560876644, + "grad_norm": 0.039964232593774796, + "learning_rate": 6.42487939352171e-05, + "loss": 0.1224, + "step": 3730 + }, + { + "epoch": 0.09665831028868271, + "grad_norm": 0.03914389759302139, + "learning_rate": 6.4421088904204e-05, + "loss": 0.1226, + "step": 3740 + }, + { + "epoch": 0.09691675496859897, + "grad_norm": 0.04341758042573929, + "learning_rate": 6.459338387319091e-05, + "loss": 0.1223, + "step": 3750 + }, + { + "epoch": 0.09717519964851523, + "grad_norm": 0.0676141232252121, + "learning_rate": 6.476567884217782e-05, + "loss": 0.1228, + "step": 3760 + }, + { + "epoch": 0.0974336443284315, + "grad_norm": 0.03766927495598793, + "learning_rate": 6.493797381116473e-05, + "loss": 0.1222, + "step": 3770 + }, + { + "epoch": 0.09769208900834776, + "grad_norm": 0.1450009047985077, + "learning_rate": 6.511026878015162e-05, + "loss": 0.122, + "step": 3780 + }, + { + "epoch": 0.09795053368826402, + "grad_norm": 0.06709213554859161, + "learning_rate": 6.528256374913852e-05, + "loss": 0.1227, + "step": 3790 + }, + { + "epoch": 0.0982089783681803, + "grad_norm": 0.1288677603006363, + "learning_rate": 6.545485871812543e-05, + "loss": 0.1219, + "step": 3800 + }, + { + "epoch": 0.09846742304809655, + "grad_norm": 0.07477253675460815, + "learning_rate": 6.562715368711234e-05, + "loss": 0.123, + "step": 3810 + }, + { + "epoch": 0.09872586772801283, + "grad_norm": 0.066032774746418, + "learning_rate": 6.579944865609925e-05, + "loss": 0.1227, + "step": 3820 + }, + { + "epoch": 0.09898431240792908, + "grad_norm": 0.05806377902626991, + "learning_rate": 6.597174362508615e-05, + "loss": 0.1224, + "step": 3830 + }, + { + "epoch": 0.09924275708784534, + "grad_norm": 0.04779495671391487, + "learning_rate": 6.614403859407306e-05, + "loss": 0.1221, + "step": 3840 + }, + { + "epoch": 0.09950120176776162, + "grad_norm": 0.052797332406044006, + "learning_rate": 6.631633356305997e-05, + "loss": 0.1235, + "step": 3850 + }, + { + "epoch": 0.09975964644767787, + "grad_norm": 0.11657536029815674, + "learning_rate": 6.648862853204688e-05, + "loss": 0.1226, + "step": 3860 + }, + { + "epoch": 0.10001809112759413, + "grad_norm": 0.04988914355635643, + "learning_rate": 6.666092350103379e-05, + "loss": 0.1221, + "step": 3870 + }, + { + "epoch": 0.1002765358075104, + "grad_norm": 0.05598466843366623, + "learning_rate": 6.683321847002068e-05, + "loss": 0.1223, + "step": 3880 + }, + { + "epoch": 0.10053498048742666, + "grad_norm": 0.033486753702163696, + "learning_rate": 6.700551343900758e-05, + "loss": 0.1218, + "step": 3890 + }, + { + "epoch": 0.10079342516734294, + "grad_norm": 0.1374129205942154, + "learning_rate": 6.717780840799449e-05, + "loss": 0.1228, + "step": 3900 + }, + { + "epoch": 0.1010518698472592, + "grad_norm": 0.059307750314474106, + "learning_rate": 6.73501033769814e-05, + "loss": 0.1226, + "step": 3910 + }, + { + "epoch": 0.10131031452717545, + "grad_norm": 0.04299231991171837, + "learning_rate": 6.75223983459683e-05, + "loss": 0.1217, + "step": 3920 + }, + { + "epoch": 0.10156875920709173, + "grad_norm": 0.09884443134069443, + "learning_rate": 6.76946933149552e-05, + "loss": 0.1211, + "step": 3930 + }, + { + "epoch": 0.10182720388700799, + "grad_norm": 0.038886383175849915, + "learning_rate": 6.786698828394211e-05, + "loss": 0.1227, + "step": 3940 + }, + { + "epoch": 0.10208564856692424, + "grad_norm": 0.09966158121824265, + "learning_rate": 6.803928325292902e-05, + "loss": 0.1227, + "step": 3950 + }, + { + "epoch": 0.10234409324684052, + "grad_norm": 0.08910645544528961, + "learning_rate": 6.821157822191593e-05, + "loss": 0.1226, + "step": 3960 + }, + { + "epoch": 0.10260253792675678, + "grad_norm": 0.07237894088029861, + "learning_rate": 6.838387319090283e-05, + "loss": 0.122, + "step": 3970 + }, + { + "epoch": 0.10286098260667303, + "grad_norm": 0.05976925417780876, + "learning_rate": 6.855616815988974e-05, + "loss": 0.1227, + "step": 3980 + }, + { + "epoch": 0.10311942728658931, + "grad_norm": 0.05027018487453461, + "learning_rate": 6.872846312887664e-05, + "loss": 0.1219, + "step": 3990 + }, + { + "epoch": 0.10337787196650557, + "grad_norm": 0.0975559875369072, + "learning_rate": 6.890075809786355e-05, + "loss": 0.122, + "step": 4000 + }, + { + "epoch": 0.10363631664642184, + "grad_norm": 0.0403752475976944, + "learning_rate": 6.907305306685044e-05, + "loss": 0.1231, + "step": 4010 + }, + { + "epoch": 0.1038947613263381, + "grad_norm": 0.0713694840669632, + "learning_rate": 6.924534803583735e-05, + "loss": 0.1224, + "step": 4020 + }, + { + "epoch": 0.10415320600625436, + "grad_norm": 0.3694309890270233, + "learning_rate": 6.941764300482426e-05, + "loss": 0.1223, + "step": 4030 + }, + { + "epoch": 0.10441165068617063, + "grad_norm": 0.03761943057179451, + "learning_rate": 6.958993797381117e-05, + "loss": 0.1233, + "step": 4040 + }, + { + "epoch": 0.10467009536608689, + "grad_norm": 0.03457685932517052, + "learning_rate": 6.976223294279808e-05, + "loss": 0.1226, + "step": 4050 + }, + { + "epoch": 0.10492854004600315, + "grad_norm": 0.0534125454723835, + "learning_rate": 6.993452791178498e-05, + "loss": 0.1224, + "step": 4060 + }, + { + "epoch": 0.10518698472591942, + "grad_norm": 0.04451997950673103, + "learning_rate": 7.010682288077189e-05, + "loss": 0.1224, + "step": 4070 + }, + { + "epoch": 0.10544542940583568, + "grad_norm": 0.04510563984513283, + "learning_rate": 7.02791178497588e-05, + "loss": 0.1228, + "step": 4080 + }, + { + "epoch": 0.10570387408575195, + "grad_norm": 0.03580479696393013, + "learning_rate": 7.045141281874569e-05, + "loss": 0.1224, + "step": 4090 + }, + { + "epoch": 0.10596231876566821, + "grad_norm": 0.07300784438848495, + "learning_rate": 7.06237077877326e-05, + "loss": 0.1224, + "step": 4100 + }, + { + "epoch": 0.10622076344558447, + "grad_norm": 0.06231321021914482, + "learning_rate": 7.07960027567195e-05, + "loss": 0.1223, + "step": 4110 + }, + { + "epoch": 0.10647920812550074, + "grad_norm": 0.05126164108514786, + "learning_rate": 7.096829772570641e-05, + "loss": 0.1226, + "step": 4120 + }, + { + "epoch": 0.106737652805417, + "grad_norm": 0.09065715968608856, + "learning_rate": 7.114059269469332e-05, + "loss": 0.1229, + "step": 4130 + }, + { + "epoch": 0.10699609748533326, + "grad_norm": 0.08329204469919205, + "learning_rate": 7.131288766368023e-05, + "loss": 0.1223, + "step": 4140 + }, + { + "epoch": 0.10725454216524953, + "grad_norm": 0.06036970764398575, + "learning_rate": 7.148518263266712e-05, + "loss": 0.1218, + "step": 4150 + }, + { + "epoch": 0.10751298684516579, + "grad_norm": 0.1317700892686844, + "learning_rate": 7.165747760165403e-05, + "loss": 0.1224, + "step": 4160 + }, + { + "epoch": 0.10777143152508206, + "grad_norm": 0.0522531159222126, + "learning_rate": 7.182977257064094e-05, + "loss": 0.1232, + "step": 4170 + }, + { + "epoch": 0.10802987620499832, + "grad_norm": 0.05428091064095497, + "learning_rate": 7.200206753962785e-05, + "loss": 0.1225, + "step": 4180 + }, + { + "epoch": 0.10828832088491458, + "grad_norm": 0.063257597386837, + "learning_rate": 7.217436250861475e-05, + "loss": 0.1231, + "step": 4190 + }, + { + "epoch": 0.10854676556483085, + "grad_norm": 0.09137003123760223, + "learning_rate": 7.234665747760165e-05, + "loss": 0.1253, + "step": 4200 + }, + { + "epoch": 0.10880521024474711, + "grad_norm": 0.0453295074403286, + "learning_rate": 7.251895244658856e-05, + "loss": 0.1234, + "step": 4210 + }, + { + "epoch": 0.10906365492466337, + "grad_norm": 0.15534889698028564, + "learning_rate": 7.269124741557547e-05, + "loss": 0.1225, + "step": 4220 + }, + { + "epoch": 0.10932209960457964, + "grad_norm": 0.05509834736585617, + "learning_rate": 7.286354238456238e-05, + "loss": 0.1233, + "step": 4230 + }, + { + "epoch": 0.1095805442844959, + "grad_norm": 0.039166949689388275, + "learning_rate": 7.303583735354927e-05, + "loss": 0.1232, + "step": 4240 + }, + { + "epoch": 0.10983898896441217, + "grad_norm": 0.04186931252479553, + "learning_rate": 7.320813232253618e-05, + "loss": 0.1229, + "step": 4250 + }, + { + "epoch": 0.11009743364432843, + "grad_norm": 0.04699193686246872, + "learning_rate": 7.338042729152309e-05, + "loss": 0.1226, + "step": 4260 + }, + { + "epoch": 0.11035587832424469, + "grad_norm": 0.08616908639669418, + "learning_rate": 7.355272226051e-05, + "loss": 0.122, + "step": 4270 + }, + { + "epoch": 0.11061432300416096, + "grad_norm": 0.04840013012290001, + "learning_rate": 7.372501722949691e-05, + "loss": 0.123, + "step": 4280 + }, + { + "epoch": 0.11087276768407722, + "grad_norm": 0.1387040913105011, + "learning_rate": 7.389731219848381e-05, + "loss": 0.1226, + "step": 4290 + }, + { + "epoch": 0.11113121236399348, + "grad_norm": 0.04259253665804863, + "learning_rate": 7.406960716747072e-05, + "loss": 0.1229, + "step": 4300 + }, + { + "epoch": 0.11138965704390975, + "grad_norm": 0.05232750251889229, + "learning_rate": 7.424190213645761e-05, + "loss": 0.123, + "step": 4310 + }, + { + "epoch": 0.11164810172382601, + "grad_norm": 0.043995991349220276, + "learning_rate": 7.441419710544452e-05, + "loss": 0.1222, + "step": 4320 + }, + { + "epoch": 0.11190654640374229, + "grad_norm": 0.0380793958902359, + "learning_rate": 7.458649207443143e-05, + "loss": 0.1217, + "step": 4330 + }, + { + "epoch": 0.11216499108365854, + "grad_norm": 0.07311289757490158, + "learning_rate": 7.475878704341833e-05, + "loss": 0.1224, + "step": 4340 + }, + { + "epoch": 0.1124234357635748, + "grad_norm": 0.08249492198228836, + "learning_rate": 7.493108201240524e-05, + "loss": 0.1231, + "step": 4350 + }, + { + "epoch": 0.11268188044349108, + "grad_norm": 0.06283778697252274, + "learning_rate": 7.510337698139215e-05, + "loss": 0.1226, + "step": 4360 + }, + { + "epoch": 0.11294032512340733, + "grad_norm": 0.04833317548036575, + "learning_rate": 7.527567195037906e-05, + "loss": 0.1219, + "step": 4370 + }, + { + "epoch": 0.11319876980332359, + "grad_norm": 0.038750141859054565, + "learning_rate": 7.544796691936596e-05, + "loss": 0.1229, + "step": 4380 + }, + { + "epoch": 0.11345721448323987, + "grad_norm": 0.03584839776158333, + "learning_rate": 7.562026188835287e-05, + "loss": 0.1221, + "step": 4390 + }, + { + "epoch": 0.11371565916315612, + "grad_norm": 0.04687226191163063, + "learning_rate": 7.579255685733978e-05, + "loss": 0.1226, + "step": 4400 + }, + { + "epoch": 0.1139741038430724, + "grad_norm": 0.1047288253903389, + "learning_rate": 7.596485182632667e-05, + "loss": 0.1228, + "step": 4410 + }, + { + "epoch": 0.11423254852298866, + "grad_norm": 0.04070582240819931, + "learning_rate": 7.613714679531358e-05, + "loss": 0.1219, + "step": 4420 + }, + { + "epoch": 0.11449099320290491, + "grad_norm": 0.03817753866314888, + "learning_rate": 7.630944176430048e-05, + "loss": 0.1228, + "step": 4430 + }, + { + "epoch": 0.11474943788282119, + "grad_norm": 0.08240567147731781, + "learning_rate": 7.648173673328739e-05, + "loss": 0.1227, + "step": 4440 + }, + { + "epoch": 0.11500788256273745, + "grad_norm": 0.05481071397662163, + "learning_rate": 7.66540317022743e-05, + "loss": 0.1229, + "step": 4450 + }, + { + "epoch": 0.1152663272426537, + "grad_norm": 0.03658709675073624, + "learning_rate": 7.682632667126121e-05, + "loss": 0.1232, + "step": 4460 + }, + { + "epoch": 0.11552477192256998, + "grad_norm": 0.07439705729484558, + "learning_rate": 7.69986216402481e-05, + "loss": 0.1218, + "step": 4470 + }, + { + "epoch": 0.11578321660248624, + "grad_norm": 0.0986822172999382, + "learning_rate": 7.717091660923501e-05, + "loss": 0.1224, + "step": 4480 + }, + { + "epoch": 0.11604166128240251, + "grad_norm": 0.06603804975748062, + "learning_rate": 7.734321157822192e-05, + "loss": 0.1223, + "step": 4490 + }, + { + "epoch": 0.11630010596231877, + "grad_norm": 0.051767513155937195, + "learning_rate": 7.751550654720883e-05, + "loss": 0.1223, + "step": 4500 + }, + { + "epoch": 0.11655855064223503, + "grad_norm": 0.04836812615394592, + "learning_rate": 7.768780151619573e-05, + "loss": 0.1226, + "step": 4510 + }, + { + "epoch": 0.1168169953221513, + "grad_norm": 0.04150184988975525, + "learning_rate": 7.786009648518263e-05, + "loss": 0.1224, + "step": 4520 + }, + { + "epoch": 0.11707544000206756, + "grad_norm": 0.031774718314409256, + "learning_rate": 7.803239145416954e-05, + "loss": 0.1231, + "step": 4530 + }, + { + "epoch": 0.11733388468198382, + "grad_norm": 0.08382511883974075, + "learning_rate": 7.820468642315645e-05, + "loss": 0.1225, + "step": 4540 + }, + { + "epoch": 0.11759232936190009, + "grad_norm": 0.08452083170413971, + "learning_rate": 7.837698139214336e-05, + "loss": 0.1228, + "step": 4550 + }, + { + "epoch": 0.11785077404181635, + "grad_norm": 0.035038746893405914, + "learning_rate": 7.854927636113027e-05, + "loss": 0.1224, + "step": 4560 + }, + { + "epoch": 0.11810921872173262, + "grad_norm": 0.05306951701641083, + "learning_rate": 7.872157133011716e-05, + "loss": 0.1223, + "step": 4570 + }, + { + "epoch": 0.11836766340164888, + "grad_norm": 0.04382127895951271, + "learning_rate": 7.889386629910407e-05, + "loss": 0.1233, + "step": 4580 + }, + { + "epoch": 0.11862610808156514, + "grad_norm": 0.05427292734384537, + "learning_rate": 7.906616126809098e-05, + "loss": 0.1223, + "step": 4590 + }, + { + "epoch": 0.11888455276148141, + "grad_norm": 0.09626989811658859, + "learning_rate": 7.923845623707789e-05, + "loss": 0.1226, + "step": 4600 + }, + { + "epoch": 0.11914299744139767, + "grad_norm": 0.11436836421489716, + "learning_rate": 7.941075120606479e-05, + "loss": 0.1229, + "step": 4610 + }, + { + "epoch": 0.11940144212131393, + "grad_norm": 0.12255112081766129, + "learning_rate": 7.958304617505168e-05, + "loss": 0.1227, + "step": 4620 + }, + { + "epoch": 0.1196598868012302, + "grad_norm": 0.05500219762325287, + "learning_rate": 7.97553411440386e-05, + "loss": 0.1218, + "step": 4630 + }, + { + "epoch": 0.11991833148114646, + "grad_norm": 0.10685759782791138, + "learning_rate": 7.99276361130255e-05, + "loss": 0.1227, + "step": 4640 + }, + { + "epoch": 0.12017677616106272, + "grad_norm": 0.06736599653959274, + "learning_rate": 8.009993108201241e-05, + "loss": 0.1226, + "step": 4650 + }, + { + "epoch": 0.12043522084097899, + "grad_norm": 0.03752527013421059, + "learning_rate": 8.027222605099931e-05, + "loss": 0.1231, + "step": 4660 + }, + { + "epoch": 0.12069366552089525, + "grad_norm": 0.08045470714569092, + "learning_rate": 8.044452101998622e-05, + "loss": 0.1225, + "step": 4670 + }, + { + "epoch": 0.12095211020081152, + "grad_norm": 0.08815325051546097, + "learning_rate": 8.061681598897313e-05, + "loss": 0.1227, + "step": 4680 + }, + { + "epoch": 0.12121055488072778, + "grad_norm": 0.06669121235609055, + "learning_rate": 8.078911095796004e-05, + "loss": 0.1226, + "step": 4690 + }, + { + "epoch": 0.12146899956064404, + "grad_norm": 0.0494580939412117, + "learning_rate": 8.096140592694694e-05, + "loss": 0.1222, + "step": 4700 + }, + { + "epoch": 0.12172744424056031, + "grad_norm": 0.043872687965631485, + "learning_rate": 8.113370089593385e-05, + "loss": 0.1219, + "step": 4710 + }, + { + "epoch": 0.12198588892047657, + "grad_norm": 0.059116628021001816, + "learning_rate": 8.130599586492074e-05, + "loss": 0.1228, + "step": 4720 + }, + { + "epoch": 0.12224433360039283, + "grad_norm": 0.03547026216983795, + "learning_rate": 8.147829083390765e-05, + "loss": 0.1226, + "step": 4730 + }, + { + "epoch": 0.1225027782803091, + "grad_norm": 0.07757695019245148, + "learning_rate": 8.165058580289456e-05, + "loss": 0.1229, + "step": 4740 + }, + { + "epoch": 0.12276122296022536, + "grad_norm": 0.05295100063085556, + "learning_rate": 8.182288077188146e-05, + "loss": 0.1223, + "step": 4750 + }, + { + "epoch": 0.12301966764014163, + "grad_norm": 0.07923055440187454, + "learning_rate": 8.199517574086837e-05, + "loss": 0.1215, + "step": 4760 + }, + { + "epoch": 0.12327811232005789, + "grad_norm": 0.032670192420482635, + "learning_rate": 8.216747070985528e-05, + "loss": 0.1229, + "step": 4770 + }, + { + "epoch": 0.12353655699997415, + "grad_norm": 0.11639686673879623, + "learning_rate": 8.233976567884219e-05, + "loss": 0.1225, + "step": 4780 + }, + { + "epoch": 0.12379500167989042, + "grad_norm": 0.043049536645412445, + "learning_rate": 8.25120606478291e-05, + "loss": 0.1223, + "step": 4790 + }, + { + "epoch": 0.12405344635980668, + "grad_norm": 0.03224676847457886, + "learning_rate": 8.268435561681599e-05, + "loss": 0.1226, + "step": 4800 + }, + { + "epoch": 0.12431189103972294, + "grad_norm": 0.11773455142974854, + "learning_rate": 8.28566505858029e-05, + "loss": 0.1229, + "step": 4810 + }, + { + "epoch": 0.12457033571963921, + "grad_norm": 0.09413377940654755, + "learning_rate": 8.30289455547898e-05, + "loss": 0.1238, + "step": 4820 + }, + { + "epoch": 0.12482878039955547, + "grad_norm": 0.06334367394447327, + "learning_rate": 8.320124052377671e-05, + "loss": 0.1224, + "step": 4830 + }, + { + "epoch": 0.12508722507947173, + "grad_norm": 0.050037506967782974, + "learning_rate": 8.33735354927636e-05, + "loss": 0.1219, + "step": 4840 + }, + { + "epoch": 0.125345669759388, + "grad_norm": 0.032983407378196716, + "learning_rate": 8.354583046175052e-05, + "loss": 0.123, + "step": 4850 + }, + { + "epoch": 0.12560411443930428, + "grad_norm": 0.03815971314907074, + "learning_rate": 8.371812543073743e-05, + "loss": 0.1229, + "step": 4860 + }, + { + "epoch": 0.12586255911922054, + "grad_norm": 0.05136092007160187, + "learning_rate": 8.389042039972433e-05, + "loss": 0.1226, + "step": 4870 + }, + { + "epoch": 0.1261210037991368, + "grad_norm": 0.053991496562957764, + "learning_rate": 8.406271536871124e-05, + "loss": 0.1227, + "step": 4880 + }, + { + "epoch": 0.12637944847905305, + "grad_norm": 0.054335158318281174, + "learning_rate": 8.423501033769814e-05, + "loss": 0.1222, + "step": 4890 + }, + { + "epoch": 0.1266378931589693, + "grad_norm": 0.042548444122076035, + "learning_rate": 8.440730530668505e-05, + "loss": 0.1227, + "step": 4900 + }, + { + "epoch": 0.1268963378388856, + "grad_norm": 0.042858757078647614, + "learning_rate": 8.457960027567196e-05, + "loss": 0.1231, + "step": 4910 + }, + { + "epoch": 0.12715478251880186, + "grad_norm": 0.1008346900343895, + "learning_rate": 8.475189524465886e-05, + "loss": 0.1224, + "step": 4920 + }, + { + "epoch": 0.12741322719871812, + "grad_norm": 0.11128246039152145, + "learning_rate": 8.492419021364577e-05, + "loss": 0.1232, + "step": 4930 + }, + { + "epoch": 0.12767167187863437, + "grad_norm": 0.052652839571237564, + "learning_rate": 8.509648518263266e-05, + "loss": 0.1223, + "step": 4940 + }, + { + "epoch": 0.12793011655855063, + "grad_norm": 0.03955278545618057, + "learning_rate": 8.526878015161957e-05, + "loss": 0.1224, + "step": 4950 + }, + { + "epoch": 0.12818856123846692, + "grad_norm": 0.042201098054647446, + "learning_rate": 8.544107512060648e-05, + "loss": 0.1221, + "step": 4960 + }, + { + "epoch": 0.12844700591838318, + "grad_norm": 0.1115364357829094, + "learning_rate": 8.561337008959339e-05, + "loss": 0.1223, + "step": 4970 + }, + { + "epoch": 0.12870545059829944, + "grad_norm": 0.060178887099027634, + "learning_rate": 8.578566505858029e-05, + "loss": 0.1229, + "step": 4980 + }, + { + "epoch": 0.1289638952782157, + "grad_norm": 0.042597394436597824, + "learning_rate": 8.59579600275672e-05, + "loss": 0.1231, + "step": 4990 + }, + { + "epoch": 0.12922233995813195, + "grad_norm": 0.03702604025602341, + "learning_rate": 8.613025499655411e-05, + "loss": 0.1224, + "step": 5000 + }, + { + "epoch": 0.1294807846380482, + "grad_norm": 0.07880663871765137, + "learning_rate": 8.630254996554102e-05, + "loss": 0.1224, + "step": 5010 + }, + { + "epoch": 0.1297392293179645, + "grad_norm": 0.03913779556751251, + "learning_rate": 8.647484493452791e-05, + "loss": 0.1227, + "step": 5020 + }, + { + "epoch": 0.12999767399788076, + "grad_norm": 0.09998935461044312, + "learning_rate": 8.664713990351482e-05, + "loss": 0.1219, + "step": 5030 + }, + { + "epoch": 0.13025611867779702, + "grad_norm": 0.03601246327161789, + "learning_rate": 8.681943487250172e-05, + "loss": 0.1228, + "step": 5040 + }, + { + "epoch": 0.13051456335771328, + "grad_norm": 0.044729083776474, + "learning_rate": 8.699172984148863e-05, + "loss": 0.1231, + "step": 5050 + }, + { + "epoch": 0.13077300803762953, + "grad_norm": 0.07009709626436234, + "learning_rate": 8.716402481047554e-05, + "loss": 0.1222, + "step": 5060 + }, + { + "epoch": 0.13103145271754582, + "grad_norm": 0.031709179282188416, + "learning_rate": 8.733631977946244e-05, + "loss": 0.1222, + "step": 5070 + }, + { + "epoch": 0.13128989739746208, + "grad_norm": 0.04385843873023987, + "learning_rate": 8.750861474844935e-05, + "loss": 0.123, + "step": 5080 + }, + { + "epoch": 0.13154834207737834, + "grad_norm": 0.042127061635255814, + "learning_rate": 8.768090971743626e-05, + "loss": 0.1218, + "step": 5090 + }, + { + "epoch": 0.1318067867572946, + "grad_norm": 0.07429303228855133, + "learning_rate": 8.785320468642317e-05, + "loss": 0.1224, + "step": 5100 + }, + { + "epoch": 0.13206523143721086, + "grad_norm": 0.08992215245962143, + "learning_rate": 8.802549965541008e-05, + "loss": 0.1225, + "step": 5110 + }, + { + "epoch": 0.13232367611712714, + "grad_norm": 0.040515679866075516, + "learning_rate": 8.819779462439697e-05, + "loss": 0.1231, + "step": 5120 + }, + { + "epoch": 0.1325821207970434, + "grad_norm": 0.07399878650903702, + "learning_rate": 8.837008959338388e-05, + "loss": 0.1218, + "step": 5130 + }, + { + "epoch": 0.13284056547695966, + "grad_norm": 0.12034235894680023, + "learning_rate": 8.854238456237078e-05, + "loss": 0.1228, + "step": 5140 + }, + { + "epoch": 0.13309901015687592, + "grad_norm": 0.05388695001602173, + "learning_rate": 8.871467953135769e-05, + "loss": 0.1227, + "step": 5150 + }, + { + "epoch": 0.13335745483679218, + "grad_norm": 0.053491197526454926, + "learning_rate": 8.88869745003446e-05, + "loss": 0.1221, + "step": 5160 + }, + { + "epoch": 0.13361589951670844, + "grad_norm": 0.04084569588303566, + "learning_rate": 8.90592694693315e-05, + "loss": 0.1228, + "step": 5170 + }, + { + "epoch": 0.13387434419662472, + "grad_norm": 0.0465102382004261, + "learning_rate": 8.92315644383184e-05, + "loss": 0.123, + "step": 5180 + }, + { + "epoch": 0.13413278887654098, + "grad_norm": 0.09565524756908417, + "learning_rate": 8.940385940730531e-05, + "loss": 0.1221, + "step": 5190 + }, + { + "epoch": 0.13439123355645724, + "grad_norm": 0.06772435456514359, + "learning_rate": 8.957615437629222e-05, + "loss": 0.1226, + "step": 5200 + }, + { + "epoch": 0.1346496782363735, + "grad_norm": 0.06020309776067734, + "learning_rate": 8.974844934527912e-05, + "loss": 0.1226, + "step": 5210 + }, + { + "epoch": 0.13490812291628976, + "grad_norm": 0.06735508888959885, + "learning_rate": 8.992074431426603e-05, + "loss": 0.1234, + "step": 5220 + }, + { + "epoch": 0.13516656759620604, + "grad_norm": 0.07625043392181396, + "learning_rate": 9.009303928325294e-05, + "loss": 0.1228, + "step": 5230 + }, + { + "epoch": 0.1354250122761223, + "grad_norm": 0.03906608372926712, + "learning_rate": 9.026533425223984e-05, + "loss": 0.1228, + "step": 5240 + }, + { + "epoch": 0.13568345695603856, + "grad_norm": 0.050199955701828, + "learning_rate": 9.043762922122675e-05, + "loss": 0.1227, + "step": 5250 + }, + { + "epoch": 0.13594190163595482, + "grad_norm": 0.047526754438877106, + "learning_rate": 9.060992419021364e-05, + "loss": 0.1228, + "step": 5260 + }, + { + "epoch": 0.13620034631587108, + "grad_norm": 0.06249832734465599, + "learning_rate": 9.078221915920055e-05, + "loss": 0.1218, + "step": 5270 + }, + { + "epoch": 0.13645879099578734, + "grad_norm": 0.06982988119125366, + "learning_rate": 9.095451412818746e-05, + "loss": 0.1229, + "step": 5280 + }, + { + "epoch": 0.13671723567570362, + "grad_norm": 0.04621538892388344, + "learning_rate": 9.112680909717437e-05, + "loss": 0.123, + "step": 5290 + }, + { + "epoch": 0.13697568035561988, + "grad_norm": 0.04687514528632164, + "learning_rate": 9.129910406616127e-05, + "loss": 0.1224, + "step": 5300 + }, + { + "epoch": 0.13723412503553614, + "grad_norm": 0.1940646767616272, + "learning_rate": 9.147139903514818e-05, + "loss": 0.1224, + "step": 5310 + }, + { + "epoch": 0.1374925697154524, + "grad_norm": 0.0559009313583374, + "learning_rate": 9.164369400413509e-05, + "loss": 0.1233, + "step": 5320 + }, + { + "epoch": 0.13775101439536866, + "grad_norm": 0.068574458360672, + "learning_rate": 9.1815988973122e-05, + "loss": 0.1221, + "step": 5330 + }, + { + "epoch": 0.13800945907528495, + "grad_norm": 0.04342379793524742, + "learning_rate": 9.19882839421089e-05, + "loss": 0.1217, + "step": 5340 + }, + { + "epoch": 0.1382679037552012, + "grad_norm": 0.08951599150896072, + "learning_rate": 9.216057891109579e-05, + "loss": 0.122, + "step": 5350 + }, + { + "epoch": 0.13852634843511746, + "grad_norm": 0.03746691718697548, + "learning_rate": 9.23328738800827e-05, + "loss": 0.1232, + "step": 5360 + }, + { + "epoch": 0.13878479311503372, + "grad_norm": 0.07881396263837814, + "learning_rate": 9.250516884906961e-05, + "loss": 0.122, + "step": 5370 + }, + { + "epoch": 0.13904323779494998, + "grad_norm": 0.030846592038869858, + "learning_rate": 9.267746381805652e-05, + "loss": 0.1225, + "step": 5380 + }, + { + "epoch": 0.13930168247486627, + "grad_norm": 0.07396665215492249, + "learning_rate": 9.284975878704343e-05, + "loss": 0.1228, + "step": 5390 + }, + { + "epoch": 0.13956012715478253, + "grad_norm": 0.05139466002583504, + "learning_rate": 9.302205375603033e-05, + "loss": 0.1229, + "step": 5400 + }, + { + "epoch": 0.13981857183469878, + "grad_norm": 0.056004252284765244, + "learning_rate": 9.319434872501724e-05, + "loss": 0.1223, + "step": 5410 + }, + { + "epoch": 0.14007701651461504, + "grad_norm": 0.04733270779252052, + "learning_rate": 9.336664369400415e-05, + "loss": 0.122, + "step": 5420 + }, + { + "epoch": 0.1403354611945313, + "grad_norm": 0.07832314074039459, + "learning_rate": 9.353893866299106e-05, + "loss": 0.1237, + "step": 5430 + }, + { + "epoch": 0.14059390587444756, + "grad_norm": 0.030930381268262863, + "learning_rate": 9.371123363197795e-05, + "loss": 0.124, + "step": 5440 + }, + { + "epoch": 0.14085235055436385, + "grad_norm": 0.06079782918095589, + "learning_rate": 9.388352860096485e-05, + "loss": 0.1229, + "step": 5450 + }, + { + "epoch": 0.1411107952342801, + "grad_norm": 0.05535131320357323, + "learning_rate": 9.405582356995176e-05, + "loss": 0.1221, + "step": 5460 + }, + { + "epoch": 0.14136923991419637, + "grad_norm": 0.10106604546308517, + "learning_rate": 9.422811853893867e-05, + "loss": 0.1233, + "step": 5470 + }, + { + "epoch": 0.14162768459411262, + "grad_norm": 0.029686246067285538, + "learning_rate": 9.440041350792558e-05, + "loss": 0.1229, + "step": 5480 + }, + { + "epoch": 0.14188612927402888, + "grad_norm": 0.14509092271327972, + "learning_rate": 9.457270847691247e-05, + "loss": 0.1225, + "step": 5490 + }, + { + "epoch": 0.14214457395394517, + "grad_norm": 0.04154964163899422, + "learning_rate": 9.474500344589938e-05, + "loss": 0.123, + "step": 5500 + }, + { + "epoch": 0.14240301863386143, + "grad_norm": 0.12305867671966553, + "learning_rate": 9.49172984148863e-05, + "loss": 0.1229, + "step": 5510 + }, + { + "epoch": 0.1426614633137777, + "grad_norm": 0.044824931770563126, + "learning_rate": 9.50895933838732e-05, + "loss": 0.1226, + "step": 5520 + }, + { + "epoch": 0.14291990799369395, + "grad_norm": 0.053049176931381226, + "learning_rate": 9.52618883528601e-05, + "loss": 0.1227, + "step": 5530 + }, + { + "epoch": 0.1431783526736102, + "grad_norm": 0.06704752892255783, + "learning_rate": 9.543418332184701e-05, + "loss": 0.123, + "step": 5540 + }, + { + "epoch": 0.1434367973535265, + "grad_norm": 0.08480844646692276, + "learning_rate": 9.56064782908339e-05, + "loss": 0.1224, + "step": 5550 + }, + { + "epoch": 0.14369524203344275, + "grad_norm": 0.03661547973752022, + "learning_rate": 9.577877325982082e-05, + "loss": 0.1229, + "step": 5560 + }, + { + "epoch": 0.143953686713359, + "grad_norm": 0.047675423324108124, + "learning_rate": 9.595106822880773e-05, + "loss": 0.1219, + "step": 5570 + }, + { + "epoch": 0.14421213139327527, + "grad_norm": 0.05953799933195114, + "learning_rate": 9.612336319779462e-05, + "loss": 0.123, + "step": 5580 + }, + { + "epoch": 0.14447057607319153, + "grad_norm": 0.07713500410318375, + "learning_rate": 9.629565816678153e-05, + "loss": 0.123, + "step": 5590 + }, + { + "epoch": 0.14472902075310778, + "grad_norm": 0.11610189825296402, + "learning_rate": 9.646795313576844e-05, + "loss": 0.1219, + "step": 5600 + }, + { + "epoch": 0.14498746543302407, + "grad_norm": 0.04153558239340782, + "learning_rate": 9.664024810475535e-05, + "loss": 0.1231, + "step": 5610 + }, + { + "epoch": 0.14524591011294033, + "grad_norm": 0.03742504492402077, + "learning_rate": 9.681254307374226e-05, + "loss": 0.1224, + "step": 5620 + }, + { + "epoch": 0.1455043547928566, + "grad_norm": 0.0383777841925621, + "learning_rate": 9.698483804272916e-05, + "loss": 0.1225, + "step": 5630 + }, + { + "epoch": 0.14576279947277285, + "grad_norm": 0.03921051323413849, + "learning_rate": 9.715713301171607e-05, + "loss": 0.1222, + "step": 5640 + }, + { + "epoch": 0.1460212441526891, + "grad_norm": 0.0732608512043953, + "learning_rate": 9.732942798070296e-05, + "loss": 0.123, + "step": 5650 + }, + { + "epoch": 0.1462796888326054, + "grad_norm": 0.05237583443522453, + "learning_rate": 9.750172294968987e-05, + "loss": 0.1229, + "step": 5660 + }, + { + "epoch": 0.14653813351252165, + "grad_norm": 0.09480612725019455, + "learning_rate": 9.767401791867677e-05, + "loss": 0.1228, + "step": 5670 + }, + { + "epoch": 0.1467965781924379, + "grad_norm": 0.05620969086885452, + "learning_rate": 9.784631288766368e-05, + "loss": 0.1227, + "step": 5680 + }, + { + "epoch": 0.14705502287235417, + "grad_norm": 0.03613579273223877, + "learning_rate": 9.801860785665059e-05, + "loss": 0.1227, + "step": 5690 + }, + { + "epoch": 0.14731346755227043, + "grad_norm": 0.069630928337574, + "learning_rate": 9.81909028256375e-05, + "loss": 0.1224, + "step": 5700 + }, + { + "epoch": 0.1475719122321867, + "grad_norm": 0.04210982844233513, + "learning_rate": 9.836319779462441e-05, + "loss": 0.1228, + "step": 5710 + }, + { + "epoch": 0.14783035691210297, + "grad_norm": 0.11130691319704056, + "learning_rate": 9.85354927636113e-05, + "loss": 0.1229, + "step": 5720 + }, + { + "epoch": 0.14808880159201923, + "grad_norm": 0.05518569424748421, + "learning_rate": 9.870778773259821e-05, + "loss": 0.1227, + "step": 5730 + }, + { + "epoch": 0.1483472462719355, + "grad_norm": 0.039407093077898026, + "learning_rate": 9.888008270158512e-05, + "loss": 0.1222, + "step": 5740 + }, + { + "epoch": 0.14860569095185175, + "grad_norm": 0.0508963018655777, + "learning_rate": 9.905237767057202e-05, + "loss": 0.1222, + "step": 5750 + }, + { + "epoch": 0.148864135631768, + "grad_norm": 0.031615208834409714, + "learning_rate": 9.922467263955893e-05, + "loss": 0.123, + "step": 5760 + }, + { + "epoch": 0.1491225803116843, + "grad_norm": 0.061446405947208405, + "learning_rate": 9.939696760854583e-05, + "loss": 0.1222, + "step": 5770 + }, + { + "epoch": 0.14938102499160055, + "grad_norm": 0.03070332109928131, + "learning_rate": 9.956926257753274e-05, + "loss": 0.123, + "step": 5780 + }, + { + "epoch": 0.1496394696715168, + "grad_norm": 0.06859464198350906, + "learning_rate": 9.974155754651965e-05, + "loss": 0.122, + "step": 5790 + }, + { + "epoch": 0.14989791435143307, + "grad_norm": 0.04705343768000603, + "learning_rate": 9.991385251550656e-05, + "loss": 0.124, + "step": 5800 + }, + { + "epoch": 0.15015635903134933, + "grad_norm": 0.033693280071020126, + "learning_rate": 9.999999600812937e-05, + "loss": 0.1239, + "step": 5810 + }, + { + "epoch": 0.15041480371126562, + "grad_norm": 0.04748392105102539, + "learning_rate": 9.999996407316989e-05, + "loss": 0.1229, + "step": 5820 + }, + { + "epoch": 0.15067324839118187, + "grad_norm": 0.08557397872209549, + "learning_rate": 9.999990020328007e-05, + "loss": 0.1236, + "step": 5830 + }, + { + "epoch": 0.15093169307109813, + "grad_norm": 0.031127164140343666, + "learning_rate": 9.999980439851815e-05, + "loss": 0.1232, + "step": 5840 + }, + { + "epoch": 0.1511901377510144, + "grad_norm": 0.07046674191951752, + "learning_rate": 9.999967665897161e-05, + "loss": 0.1222, + "step": 5850 + }, + { + "epoch": 0.15144858243093065, + "grad_norm": 0.035767097026109695, + "learning_rate": 9.999951698475694e-05, + "loss": 0.1224, + "step": 5860 + }, + { + "epoch": 0.15170702711084694, + "grad_norm": 0.06115670129656792, + "learning_rate": 9.999932537601986e-05, + "loss": 0.1232, + "step": 5870 + }, + { + "epoch": 0.1519654717907632, + "grad_norm": 0.04003420099616051, + "learning_rate": 9.999910183293522e-05, + "loss": 0.1223, + "step": 5880 + }, + { + "epoch": 0.15222391647067945, + "grad_norm": 0.06954909861087799, + "learning_rate": 9.999884635570693e-05, + "loss": 0.1227, + "step": 5890 + }, + { + "epoch": 0.1524823611505957, + "grad_norm": 0.11532850563526154, + "learning_rate": 9.999855894456816e-05, + "loss": 0.1227, + "step": 5900 + }, + { + "epoch": 0.15274080583051197, + "grad_norm": 0.04735326021909714, + "learning_rate": 9.99982395997811e-05, + "loss": 0.1227, + "step": 5910 + }, + { + "epoch": 0.15299925051042823, + "grad_norm": 0.03574826195836067, + "learning_rate": 9.999788832163714e-05, + "loss": 0.1231, + "step": 5920 + }, + { + "epoch": 0.15325769519034452, + "grad_norm": 0.03983622044324875, + "learning_rate": 9.999750511045682e-05, + "loss": 0.1222, + "step": 5930 + }, + { + "epoch": 0.15351613987026078, + "grad_norm": 0.0893009677529335, + "learning_rate": 9.999708996658979e-05, + "loss": 0.1223, + "step": 5940 + }, + { + "epoch": 0.15377458455017703, + "grad_norm": 0.04478004202246666, + "learning_rate": 9.999664289041481e-05, + "loss": 0.1226, + "step": 5950 + }, + { + "epoch": 0.1540330292300933, + "grad_norm": 0.05857371538877487, + "learning_rate": 9.999616388233984e-05, + "loss": 0.1229, + "step": 5960 + }, + { + "epoch": 0.15429147391000955, + "grad_norm": 0.034057389944791794, + "learning_rate": 9.999565294280191e-05, + "loss": 0.1219, + "step": 5970 + }, + { + "epoch": 0.15454991858992584, + "grad_norm": 0.05367177352309227, + "learning_rate": 9.999511007226721e-05, + "loss": 0.1229, + "step": 5980 + }, + { + "epoch": 0.1548083632698421, + "grad_norm": 0.0330672450363636, + "learning_rate": 9.99945352712311e-05, + "loss": 0.1225, + "step": 5990 + }, + { + "epoch": 0.15506680794975836, + "grad_norm": 0.06315747648477554, + "learning_rate": 9.999392854021805e-05, + "loss": 0.1217, + "step": 6000 + }, + { + "epoch": 0.15532525262967461, + "grad_norm": 0.042845889925956726, + "learning_rate": 9.99932898797816e-05, + "loss": 0.1225, + "step": 6010 + }, + { + "epoch": 0.15558369730959087, + "grad_norm": 0.04314880818128586, + "learning_rate": 9.999261929050457e-05, + "loss": 0.1229, + "step": 6020 + }, + { + "epoch": 0.15584214198950713, + "grad_norm": 0.039876632392406464, + "learning_rate": 9.999191677299873e-05, + "loss": 0.1226, + "step": 6030 + }, + { + "epoch": 0.15610058666942342, + "grad_norm": 0.04737882316112518, + "learning_rate": 9.999118232790515e-05, + "loss": 0.123, + "step": 6040 + }, + { + "epoch": 0.15635903134933968, + "grad_norm": 0.029079638421535492, + "learning_rate": 9.999041595589388e-05, + "loss": 0.1223, + "step": 6050 + }, + { + "epoch": 0.15661747602925594, + "grad_norm": 0.08300851285457611, + "learning_rate": 9.998961765766428e-05, + "loss": 0.1232, + "step": 6060 + }, + { + "epoch": 0.1568759207091722, + "grad_norm": 0.03632732480764389, + "learning_rate": 9.998878743394466e-05, + "loss": 0.1235, + "step": 6070 + }, + { + "epoch": 0.15713436538908845, + "grad_norm": 0.06861578673124313, + "learning_rate": 9.998792528549258e-05, + "loss": 0.1219, + "step": 6080 + }, + { + "epoch": 0.15739281006900474, + "grad_norm": 0.0559479258954525, + "learning_rate": 9.998703121309467e-05, + "loss": 0.1229, + "step": 6090 + }, + { + "epoch": 0.157651254748921, + "grad_norm": 0.04722253233194351, + "learning_rate": 9.99861052175667e-05, + "loss": 0.1224, + "step": 6100 + }, + { + "epoch": 0.15790969942883726, + "grad_norm": 0.03625471889972687, + "learning_rate": 9.998514729975361e-05, + "loss": 0.1227, + "step": 6110 + }, + { + "epoch": 0.15816814410875352, + "grad_norm": 0.08052948117256165, + "learning_rate": 9.998415746052938e-05, + "loss": 0.1235, + "step": 6120 + }, + { + "epoch": 0.15842658878866978, + "grad_norm": 0.03705105558037758, + "learning_rate": 9.99831357007972e-05, + "loss": 0.1222, + "step": 6130 + }, + { + "epoch": 0.15868503346858606, + "grad_norm": 0.05433618277311325, + "learning_rate": 9.998208202148934e-05, + "loss": 0.1219, + "step": 6140 + }, + { + "epoch": 0.15894347814850232, + "grad_norm": 0.043679412454366684, + "learning_rate": 9.99809964235672e-05, + "loss": 0.1223, + "step": 6150 + }, + { + "epoch": 0.15920192282841858, + "grad_norm": 0.03310234472155571, + "learning_rate": 9.997987890802134e-05, + "loss": 0.1232, + "step": 6160 + }, + { + "epoch": 0.15946036750833484, + "grad_norm": 0.24289415776729584, + "learning_rate": 9.997872947587137e-05, + "loss": 0.1237, + "step": 6170 + }, + { + "epoch": 0.1597188121882511, + "grad_norm": 0.039240024983882904, + "learning_rate": 9.997754812816609e-05, + "loss": 0.1236, + "step": 6180 + }, + { + "epoch": 0.15997725686816736, + "grad_norm": 0.04712551832199097, + "learning_rate": 9.997633486598339e-05, + "loss": 0.1227, + "step": 6190 + }, + { + "epoch": 0.16023570154808364, + "grad_norm": 0.02947244606912136, + "learning_rate": 9.997508969043028e-05, + "loss": 0.1226, + "step": 6200 + }, + { + "epoch": 0.1604941462279999, + "grad_norm": 0.04263737425208092, + "learning_rate": 9.997381260264289e-05, + "loss": 0.1226, + "step": 6210 + }, + { + "epoch": 0.16075259090791616, + "grad_norm": 0.04998556524515152, + "learning_rate": 9.997250360378649e-05, + "loss": 0.1224, + "step": 6220 + }, + { + "epoch": 0.16101103558783242, + "grad_norm": 0.04966837912797928, + "learning_rate": 9.997116269505542e-05, + "loss": 0.122, + "step": 6230 + }, + { + "epoch": 0.16126948026774868, + "grad_norm": 2.4541170597076416, + "learning_rate": 9.996978987767316e-05, + "loss": 0.1392, + "step": 6240 + }, + { + "epoch": 0.16152792494766496, + "grad_norm": 2.3374593257904053, + "learning_rate": 9.996838515289233e-05, + "loss": 0.273, + "step": 6250 + }, + { + "epoch": 0.16178636962758122, + "grad_norm": 0.7933456301689148, + "learning_rate": 9.996694852199462e-05, + "loss": 0.2465, + "step": 6260 + }, + { + "epoch": 0.16204481430749748, + "grad_norm": 0.1552945226430893, + "learning_rate": 9.996547998629086e-05, + "loss": 0.1878, + "step": 6270 + }, + { + "epoch": 0.16230325898741374, + "grad_norm": 0.06761500984430313, + "learning_rate": 9.9963979547121e-05, + "loss": 0.1431, + "step": 6280 + }, + { + "epoch": 0.16256170366733, + "grad_norm": 0.044655121862888336, + "learning_rate": 9.996244720585406e-05, + "loss": 0.132, + "step": 6290 + }, + { + "epoch": 0.16282014834724629, + "grad_norm": 0.05911582335829735, + "learning_rate": 9.996088296388819e-05, + "loss": 0.1289, + "step": 6300 + }, + { + "epoch": 0.16307859302716254, + "grad_norm": 0.05969958379864693, + "learning_rate": 9.995928682265066e-05, + "loss": 0.1274, + "step": 6310 + }, + { + "epoch": 0.1633370377070788, + "grad_norm": 0.05558212473988533, + "learning_rate": 9.99576587835978e-05, + "loss": 0.1257, + "step": 6320 + }, + { + "epoch": 0.16359548238699506, + "grad_norm": 0.039261166006326675, + "learning_rate": 9.995599884821512e-05, + "loss": 0.1257, + "step": 6330 + }, + { + "epoch": 0.16385392706691132, + "grad_norm": 0.042636334896087646, + "learning_rate": 9.995430701801718e-05, + "loss": 0.1244, + "step": 6340 + }, + { + "epoch": 0.16411237174682758, + "grad_norm": 0.08441650122404099, + "learning_rate": 9.995258329454764e-05, + "loss": 0.1236, + "step": 6350 + }, + { + "epoch": 0.16437081642674387, + "grad_norm": 0.10627511143684387, + "learning_rate": 9.995082767937926e-05, + "loss": 0.1238, + "step": 6360 + }, + { + "epoch": 0.16462926110666012, + "grad_norm": 0.05446818843483925, + "learning_rate": 9.994904017411396e-05, + "loss": 0.1244, + "step": 6370 + }, + { + "epoch": 0.16488770578657638, + "grad_norm": 0.06856249272823334, + "learning_rate": 9.994722078038267e-05, + "loss": 0.1236, + "step": 6380 + }, + { + "epoch": 0.16514615046649264, + "grad_norm": 0.037659402936697006, + "learning_rate": 9.994536949984548e-05, + "loss": 0.1236, + "step": 6390 + }, + { + "epoch": 0.1654045951464089, + "grad_norm": 0.03551631048321724, + "learning_rate": 9.994348633419151e-05, + "loss": 0.1234, + "step": 6400 + }, + { + "epoch": 0.1656630398263252, + "grad_norm": 0.031569816172122955, + "learning_rate": 9.994157128513904e-05, + "loss": 0.1232, + "step": 6410 + }, + { + "epoch": 0.16592148450624145, + "grad_norm": 0.04272282123565674, + "learning_rate": 9.99396243544354e-05, + "loss": 0.1234, + "step": 6420 + }, + { + "epoch": 0.1661799291861577, + "grad_norm": 0.03308616206049919, + "learning_rate": 9.993764554385703e-05, + "loss": 0.124, + "step": 6430 + }, + { + "epoch": 0.16643837386607396, + "grad_norm": 0.052899401634931564, + "learning_rate": 9.993563485520947e-05, + "loss": 0.1238, + "step": 6440 + }, + { + "epoch": 0.16669681854599022, + "grad_norm": 0.035774342715740204, + "learning_rate": 9.99335922903273e-05, + "loss": 0.1234, + "step": 6450 + }, + { + "epoch": 0.1669552632259065, + "grad_norm": 0.050451263785362244, + "learning_rate": 9.993151785107424e-05, + "loss": 0.1235, + "step": 6460 + }, + { + "epoch": 0.16721370790582277, + "grad_norm": 0.08559366315603256, + "learning_rate": 9.992941153934305e-05, + "loss": 0.123, + "step": 6470 + }, + { + "epoch": 0.16747215258573903, + "grad_norm": 0.039069630205631256, + "learning_rate": 9.992727335705556e-05, + "loss": 0.1233, + "step": 6480 + }, + { + "epoch": 0.16773059726565528, + "grad_norm": 0.047745149582624435, + "learning_rate": 9.992510330616275e-05, + "loss": 0.1229, + "step": 6490 + }, + { + "epoch": 0.16798904194557154, + "grad_norm": 0.04739811271429062, + "learning_rate": 9.992290138864462e-05, + "loss": 0.1229, + "step": 6500 + }, + { + "epoch": 0.1682474866254878, + "grad_norm": 0.1085251048207283, + "learning_rate": 9.992066760651027e-05, + "loss": 0.123, + "step": 6510 + }, + { + "epoch": 0.1685059313054041, + "grad_norm": 0.03648116812109947, + "learning_rate": 9.991840196179782e-05, + "loss": 0.1219, + "step": 6520 + }, + { + "epoch": 0.16876437598532035, + "grad_norm": 0.046662673354148865, + "learning_rate": 9.991610445657456e-05, + "loss": 0.1227, + "step": 6530 + }, + { + "epoch": 0.1690228206652366, + "grad_norm": 0.04393250122666359, + "learning_rate": 9.991377509293675e-05, + "loss": 0.1229, + "step": 6540 + }, + { + "epoch": 0.16928126534515286, + "grad_norm": 0.03683726489543915, + "learning_rate": 9.991141387300982e-05, + "loss": 0.1224, + "step": 6550 + }, + { + "epoch": 0.16953971002506912, + "grad_norm": 0.10352063924074173, + "learning_rate": 9.990902079894814e-05, + "loss": 0.1225, + "step": 6560 + }, + { + "epoch": 0.1697981547049854, + "grad_norm": 0.07992726564407349, + "learning_rate": 9.990659587293529e-05, + "loss": 0.1235, + "step": 6570 + }, + { + "epoch": 0.17005659938490167, + "grad_norm": 0.03669346496462822, + "learning_rate": 9.990413909718381e-05, + "loss": 0.123, + "step": 6580 + }, + { + "epoch": 0.17031504406481793, + "grad_norm": 0.05301009118556976, + "learning_rate": 9.990165047393531e-05, + "loss": 0.1225, + "step": 6590 + }, + { + "epoch": 0.1705734887447342, + "grad_norm": 0.04507368430495262, + "learning_rate": 9.989913000546051e-05, + "loss": 0.1224, + "step": 6600 + }, + { + "epoch": 0.17083193342465045, + "grad_norm": 0.03619254752993584, + "learning_rate": 9.989657769405914e-05, + "loss": 0.1227, + "step": 6610 + }, + { + "epoch": 0.1710903781045667, + "grad_norm": 0.049373701214790344, + "learning_rate": 9.989399354206e-05, + "loss": 0.1223, + "step": 6620 + }, + { + "epoch": 0.171348822784483, + "grad_norm": 0.0460193045437336, + "learning_rate": 9.989137755182094e-05, + "loss": 0.1223, + "step": 6630 + }, + { + "epoch": 0.17160726746439925, + "grad_norm": 0.039726439863443375, + "learning_rate": 9.988872972572885e-05, + "loss": 0.1227, + "step": 6640 + }, + { + "epoch": 0.1718657121443155, + "grad_norm": 0.06655838340520859, + "learning_rate": 9.98860500661997e-05, + "loss": 0.1232, + "step": 6650 + }, + { + "epoch": 0.17212415682423177, + "grad_norm": 0.04201202094554901, + "learning_rate": 9.988333857567849e-05, + "loss": 0.1224, + "step": 6660 + }, + { + "epoch": 0.17238260150414803, + "grad_norm": 0.0356159470975399, + "learning_rate": 9.988059525663924e-05, + "loss": 0.1225, + "step": 6670 + }, + { + "epoch": 0.1726410461840643, + "grad_norm": 0.04664057865738869, + "learning_rate": 9.987782011158501e-05, + "loss": 0.1225, + "step": 6680 + }, + { + "epoch": 0.17289949086398057, + "grad_norm": 0.07116857916116714, + "learning_rate": 9.987501314304797e-05, + "loss": 0.1229, + "step": 6690 + }, + { + "epoch": 0.17315793554389683, + "grad_norm": 0.07656478136777878, + "learning_rate": 9.987217435358925e-05, + "loss": 0.1218, + "step": 6700 + }, + { + "epoch": 0.1734163802238131, + "grad_norm": 0.06074786186218262, + "learning_rate": 9.986930374579904e-05, + "loss": 0.1234, + "step": 6710 + }, + { + "epoch": 0.17367482490372935, + "grad_norm": 0.050050485879182816, + "learning_rate": 9.986640132229654e-05, + "loss": 0.123, + "step": 6720 + }, + { + "epoch": 0.17393326958364563, + "grad_norm": 0.039343323558568954, + "learning_rate": 9.986346708573004e-05, + "loss": 0.1228, + "step": 6730 + }, + { + "epoch": 0.1741917142635619, + "grad_norm": 0.03040175884962082, + "learning_rate": 9.98605010387768e-05, + "loss": 0.1224, + "step": 6740 + }, + { + "epoch": 0.17445015894347815, + "grad_norm": 0.0798264667391777, + "learning_rate": 9.98575031841431e-05, + "loss": 0.1223, + "step": 6750 + }, + { + "epoch": 0.1747086036233944, + "grad_norm": 0.032066017389297485, + "learning_rate": 9.985447352456434e-05, + "loss": 0.122, + "step": 6760 + }, + { + "epoch": 0.17496704830331067, + "grad_norm": 0.09105322510004044, + "learning_rate": 9.985141206280477e-05, + "loss": 0.1222, + "step": 6770 + }, + { + "epoch": 0.17522549298322693, + "grad_norm": 0.05528869107365608, + "learning_rate": 9.984831880165781e-05, + "loss": 0.1219, + "step": 6780 + }, + { + "epoch": 0.1754839376631432, + "grad_norm": 0.05631420761346817, + "learning_rate": 9.98451937439458e-05, + "loss": 0.1219, + "step": 6790 + }, + { + "epoch": 0.17574238234305947, + "grad_norm": 0.06409846246242523, + "learning_rate": 9.984203689252019e-05, + "loss": 0.1231, + "step": 6800 + }, + { + "epoch": 0.17600082702297573, + "grad_norm": 0.0698169395327568, + "learning_rate": 9.983884825026128e-05, + "loss": 0.1224, + "step": 6810 + }, + { + "epoch": 0.176259271702892, + "grad_norm": 0.09147924929857254, + "learning_rate": 9.983562782007858e-05, + "loss": 0.1219, + "step": 6820 + }, + { + "epoch": 0.17651771638280825, + "grad_norm": 0.035825274884700775, + "learning_rate": 9.983237560491043e-05, + "loss": 0.1219, + "step": 6830 + }, + { + "epoch": 0.17677616106272454, + "grad_norm": 0.056271541863679886, + "learning_rate": 9.982909160772429e-05, + "loss": 0.1228, + "step": 6840 + }, + { + "epoch": 0.1770346057426408, + "grad_norm": 0.04284139350056648, + "learning_rate": 9.98257758315165e-05, + "loss": 0.1216, + "step": 6850 + }, + { + "epoch": 0.17729305042255705, + "grad_norm": 0.03762742877006531, + "learning_rate": 9.982242827931253e-05, + "loss": 0.1227, + "step": 6860 + }, + { + "epoch": 0.1775514951024733, + "grad_norm": 0.03702646866440773, + "learning_rate": 9.981904895416676e-05, + "loss": 0.1219, + "step": 6870 + }, + { + "epoch": 0.17780993978238957, + "grad_norm": 0.07834722101688385, + "learning_rate": 9.981563785916259e-05, + "loss": 0.1228, + "step": 6880 + }, + { + "epoch": 0.17806838446230586, + "grad_norm": 0.036070816218853, + "learning_rate": 9.981219499741237e-05, + "loss": 0.1228, + "step": 6890 + }, + { + "epoch": 0.17832682914222212, + "grad_norm": 0.08754001557826996, + "learning_rate": 9.980872037205746e-05, + "loss": 0.1223, + "step": 6900 + }, + { + "epoch": 0.17858527382213837, + "grad_norm": 0.04456963390111923, + "learning_rate": 9.980521398626824e-05, + "loss": 0.1218, + "step": 6910 + }, + { + "epoch": 0.17884371850205463, + "grad_norm": 0.04133166745305061, + "learning_rate": 9.980167584324397e-05, + "loss": 0.1218, + "step": 6920 + }, + { + "epoch": 0.1791021631819709, + "grad_norm": 0.08027014881372452, + "learning_rate": 9.979810594621303e-05, + "loss": 0.1223, + "step": 6930 + }, + { + "epoch": 0.17936060786188715, + "grad_norm": 0.042150530964136124, + "learning_rate": 9.979450429843264e-05, + "loss": 0.1224, + "step": 6940 + }, + { + "epoch": 0.17961905254180344, + "grad_norm": 0.03939887136220932, + "learning_rate": 9.979087090318906e-05, + "loss": 0.1226, + "step": 6950 + }, + { + "epoch": 0.1798774972217197, + "grad_norm": 0.03785010054707527, + "learning_rate": 9.978720576379749e-05, + "loss": 0.1224, + "step": 6960 + }, + { + "epoch": 0.18013594190163595, + "grad_norm": 0.04294631630182266, + "learning_rate": 9.97835088836021e-05, + "loss": 0.1225, + "step": 6970 + }, + { + "epoch": 0.1803943865815522, + "grad_norm": 0.053891535848379135, + "learning_rate": 9.977978026597605e-05, + "loss": 0.1223, + "step": 6980 + }, + { + "epoch": 0.18065283126146847, + "grad_norm": 0.05554564669728279, + "learning_rate": 9.977601991432141e-05, + "loss": 0.1226, + "step": 6990 + }, + { + "epoch": 0.18091127594138476, + "grad_norm": 0.04925360530614853, + "learning_rate": 9.977222783206926e-05, + "loss": 0.1224, + "step": 7000 + }, + { + "epoch": 0.18116972062130102, + "grad_norm": 0.045889515429735184, + "learning_rate": 9.976840402267955e-05, + "loss": 0.1222, + "step": 7010 + }, + { + "epoch": 0.18142816530121728, + "grad_norm": 0.05322164297103882, + "learning_rate": 9.976454848964125e-05, + "loss": 0.122, + "step": 7020 + }, + { + "epoch": 0.18168660998113353, + "grad_norm": 0.062010057270526886, + "learning_rate": 9.976066123647225e-05, + "loss": 0.1218, + "step": 7030 + }, + { + "epoch": 0.1819450546610498, + "grad_norm": 0.048816077411174774, + "learning_rate": 9.975674226671943e-05, + "loss": 0.1223, + "step": 7040 + }, + { + "epoch": 0.18220349934096608, + "grad_norm": 0.04729127883911133, + "learning_rate": 9.975279158395851e-05, + "loss": 0.1224, + "step": 7050 + }, + { + "epoch": 0.18246194402088234, + "grad_norm": 0.04638315364718437, + "learning_rate": 9.974880919179424e-05, + "loss": 0.1215, + "step": 7060 + }, + { + "epoch": 0.1827203887007986, + "grad_norm": 0.058827292174100876, + "learning_rate": 9.974479509386022e-05, + "loss": 0.1215, + "step": 7070 + }, + { + "epoch": 0.18297883338071486, + "grad_norm": 0.05009935051202774, + "learning_rate": 9.974074929381907e-05, + "loss": 0.1219, + "step": 7080 + }, + { + "epoch": 0.18323727806063111, + "grad_norm": 0.03476117178797722, + "learning_rate": 9.973667179536225e-05, + "loss": 0.1218, + "step": 7090 + }, + { + "epoch": 0.18349572274054737, + "grad_norm": 0.035353947430849075, + "learning_rate": 9.973256260221022e-05, + "loss": 0.1221, + "step": 7100 + }, + { + "epoch": 0.18375416742046366, + "grad_norm": 0.0336548276245594, + "learning_rate": 9.97284217181123e-05, + "loss": 0.1225, + "step": 7110 + }, + { + "epoch": 0.18401261210037992, + "grad_norm": 0.03782688453793526, + "learning_rate": 9.972424914684674e-05, + "loss": 0.1215, + "step": 7120 + }, + { + "epoch": 0.18427105678029618, + "grad_norm": 0.04295630007982254, + "learning_rate": 9.972004489222073e-05, + "loss": 0.122, + "step": 7130 + }, + { + "epoch": 0.18452950146021244, + "grad_norm": 0.03395915776491165, + "learning_rate": 9.971580895807036e-05, + "loss": 0.1214, + "step": 7140 + }, + { + "epoch": 0.1847879461401287, + "grad_norm": 0.06848350167274475, + "learning_rate": 9.971154134826055e-05, + "loss": 0.1224, + "step": 7150 + }, + { + "epoch": 0.18504639082004498, + "grad_norm": 0.04666189104318619, + "learning_rate": 9.970724206668525e-05, + "loss": 0.1222, + "step": 7160 + }, + { + "epoch": 0.18530483549996124, + "grad_norm": 0.03775160014629364, + "learning_rate": 9.970291111726719e-05, + "loss": 0.1224, + "step": 7170 + }, + { + "epoch": 0.1855632801798775, + "grad_norm": 0.07459957152605057, + "learning_rate": 9.96985485039581e-05, + "loss": 0.1224, + "step": 7180 + }, + { + "epoch": 0.18582172485979376, + "grad_norm": 0.05307992547750473, + "learning_rate": 9.969415423073852e-05, + "loss": 0.1221, + "step": 7190 + }, + { + "epoch": 0.18608016953971002, + "grad_norm": 0.03674167022109032, + "learning_rate": 9.96897283016179e-05, + "loss": 0.1219, + "step": 7200 + }, + { + "epoch": 0.18633861421962628, + "grad_norm": 0.03254202753305435, + "learning_rate": 9.968527072063455e-05, + "loss": 0.1228, + "step": 7210 + }, + { + "epoch": 0.18659705889954256, + "grad_norm": 0.07876992970705032, + "learning_rate": 9.968078149185577e-05, + "loss": 0.1228, + "step": 7220 + }, + { + "epoch": 0.18685550357945882, + "grad_norm": 0.0633842870593071, + "learning_rate": 9.96762606193776e-05, + "loss": 0.1219, + "step": 7230 + }, + { + "epoch": 0.18711394825937508, + "grad_norm": 0.047247692942619324, + "learning_rate": 9.967170810732504e-05, + "loss": 0.1219, + "step": 7240 + }, + { + "epoch": 0.18737239293929134, + "grad_norm": 0.047447435557842255, + "learning_rate": 9.96671239598519e-05, + "loss": 0.1219, + "step": 7250 + }, + { + "epoch": 0.1876308376192076, + "grad_norm": 0.03796818479895592, + "learning_rate": 9.96625081811409e-05, + "loss": 0.1231, + "step": 7260 + }, + { + "epoch": 0.18788928229912388, + "grad_norm": 0.06365779042243958, + "learning_rate": 9.965786077540355e-05, + "loss": 0.1225, + "step": 7270 + }, + { + "epoch": 0.18814772697904014, + "grad_norm": 0.05357684940099716, + "learning_rate": 9.965318174688037e-05, + "loss": 0.1219, + "step": 7280 + }, + { + "epoch": 0.1884061716589564, + "grad_norm": 0.054750505834817886, + "learning_rate": 9.964847109984054e-05, + "loss": 0.1226, + "step": 7290 + }, + { + "epoch": 0.18866461633887266, + "grad_norm": 0.049370910972356796, + "learning_rate": 9.964372883858227e-05, + "loss": 0.1221, + "step": 7300 + }, + { + "epoch": 0.18892306101878892, + "grad_norm": 0.08580011874437332, + "learning_rate": 9.963895496743243e-05, + "loss": 0.122, + "step": 7310 + }, + { + "epoch": 0.1891815056987052, + "grad_norm": 0.046051714569330215, + "learning_rate": 9.963414949074692e-05, + "loss": 0.1225, + "step": 7320 + }, + { + "epoch": 0.18943995037862146, + "grad_norm": 0.05634588375687599, + "learning_rate": 9.962931241291034e-05, + "loss": 0.1217, + "step": 7330 + }, + { + "epoch": 0.18969839505853772, + "grad_norm": 0.03623292222619057, + "learning_rate": 9.962444373833618e-05, + "loss": 0.1215, + "step": 7340 + }, + { + "epoch": 0.18995683973845398, + "grad_norm": 0.054762016981840134, + "learning_rate": 9.961954347146676e-05, + "loss": 0.1212, + "step": 7350 + }, + { + "epoch": 0.19021528441837024, + "grad_norm": 0.04987376555800438, + "learning_rate": 9.961461161677323e-05, + "loss": 0.1227, + "step": 7360 + }, + { + "epoch": 0.1904737290982865, + "grad_norm": 0.03643788769841194, + "learning_rate": 9.960964817875553e-05, + "loss": 0.1223, + "step": 7370 + }, + { + "epoch": 0.19073217377820278, + "grad_norm": 0.057731013745069504, + "learning_rate": 9.960465316194248e-05, + "loss": 0.1218, + "step": 7380 + }, + { + "epoch": 0.19099061845811904, + "grad_norm": 0.03448064252734184, + "learning_rate": 9.959962657089159e-05, + "loss": 0.1224, + "step": 7390 + }, + { + "epoch": 0.1912490631380353, + "grad_norm": 0.054557546973228455, + "learning_rate": 9.959456841018933e-05, + "loss": 0.1228, + "step": 7400 + }, + { + "epoch": 0.19150750781795156, + "grad_norm": 0.0462167002260685, + "learning_rate": 9.958947868445088e-05, + "loss": 0.1219, + "step": 7410 + }, + { + "epoch": 0.19176595249786782, + "grad_norm": 0.04575273022055626, + "learning_rate": 9.958435739832024e-05, + "loss": 0.1223, + "step": 7420 + }, + { + "epoch": 0.1920243971777841, + "grad_norm": 0.07326149195432663, + "learning_rate": 9.957920455647021e-05, + "loss": 0.1214, + "step": 7430 + }, + { + "epoch": 0.19228284185770037, + "grad_norm": 0.04774421826004982, + "learning_rate": 9.95740201636024e-05, + "loss": 0.1228, + "step": 7440 + }, + { + "epoch": 0.19254128653761662, + "grad_norm": 0.05012252926826477, + "learning_rate": 9.956880422444719e-05, + "loss": 0.1207, + "step": 7450 + }, + { + "epoch": 0.19279973121753288, + "grad_norm": 0.03635809198021889, + "learning_rate": 9.956355674376374e-05, + "loss": 0.1223, + "step": 7460 + }, + { + "epoch": 0.19305817589744914, + "grad_norm": 0.03875358775258064, + "learning_rate": 9.955827772634001e-05, + "loss": 0.1219, + "step": 7470 + }, + { + "epoch": 0.19331662057736543, + "grad_norm": 0.0400257408618927, + "learning_rate": 9.95529671769927e-05, + "loss": 0.1218, + "step": 7480 + }, + { + "epoch": 0.1935750652572817, + "grad_norm": 0.046327702701091766, + "learning_rate": 9.954762510056732e-05, + "loss": 0.1232, + "step": 7490 + }, + { + "epoch": 0.19383350993719795, + "grad_norm": 0.04094179719686508, + "learning_rate": 9.954225150193812e-05, + "loss": 0.1221, + "step": 7500 + }, + { + "epoch": 0.1940919546171142, + "grad_norm": 0.0403272919356823, + "learning_rate": 9.95368463860081e-05, + "loss": 0.1223, + "step": 7510 + }, + { + "epoch": 0.19435039929703046, + "grad_norm": 0.08424597233533859, + "learning_rate": 9.953140975770907e-05, + "loss": 0.1225, + "step": 7520 + }, + { + "epoch": 0.19460884397694672, + "grad_norm": 0.04312043637037277, + "learning_rate": 9.952594162200153e-05, + "loss": 0.1216, + "step": 7530 + }, + { + "epoch": 0.194867288656863, + "grad_norm": 0.03296227753162384, + "learning_rate": 9.952044198387477e-05, + "loss": 0.1231, + "step": 7540 + }, + { + "epoch": 0.19512573333677927, + "grad_norm": 0.037525661289691925, + "learning_rate": 9.951491084834683e-05, + "loss": 0.1219, + "step": 7550 + }, + { + "epoch": 0.19538417801669553, + "grad_norm": 0.056468479335308075, + "learning_rate": 9.950934822046445e-05, + "loss": 0.1224, + "step": 7560 + }, + { + "epoch": 0.19564262269661178, + "grad_norm": 0.05679589882493019, + "learning_rate": 9.950375410530313e-05, + "loss": 0.1211, + "step": 7570 + }, + { + "epoch": 0.19590106737652804, + "grad_norm": 0.04780018702149391, + "learning_rate": 9.949812850796707e-05, + "loss": 0.1223, + "step": 7580 + }, + { + "epoch": 0.19615951205644433, + "grad_norm": 0.054278817027807236, + "learning_rate": 9.949247143358926e-05, + "loss": 0.1221, + "step": 7590 + }, + { + "epoch": 0.1964179567363606, + "grad_norm": 0.050607286393642426, + "learning_rate": 9.948678288733135e-05, + "loss": 0.1226, + "step": 7600 + }, + { + "epoch": 0.19667640141627685, + "grad_norm": 0.06641717255115509, + "learning_rate": 9.948106287438372e-05, + "loss": 0.1224, + "step": 7610 + }, + { + "epoch": 0.1969348460961931, + "grad_norm": 0.07509106397628784, + "learning_rate": 9.947531139996548e-05, + "loss": 0.1226, + "step": 7620 + }, + { + "epoch": 0.19719329077610936, + "grad_norm": 0.038541220128536224, + "learning_rate": 9.946952846932445e-05, + "loss": 0.1217, + "step": 7630 + }, + { + "epoch": 0.19745173545602565, + "grad_norm": 0.047377683222293854, + "learning_rate": 9.94637140877371e-05, + "loss": 0.122, + "step": 7640 + }, + { + "epoch": 0.1977101801359419, + "grad_norm": 0.0904233455657959, + "learning_rate": 9.945786826050862e-05, + "loss": 0.1223, + "step": 7650 + }, + { + "epoch": 0.19796862481585817, + "grad_norm": 0.03518153727054596, + "learning_rate": 9.945199099297295e-05, + "loss": 0.1221, + "step": 7660 + }, + { + "epoch": 0.19822706949577443, + "grad_norm": 0.073616623878479, + "learning_rate": 9.944608229049265e-05, + "loss": 0.1218, + "step": 7670 + }, + { + "epoch": 0.19848551417569069, + "grad_norm": 0.08045992255210876, + "learning_rate": 9.944014215845898e-05, + "loss": 0.1222, + "step": 7680 + }, + { + "epoch": 0.19874395885560694, + "grad_norm": 0.05741747468709946, + "learning_rate": 9.943417060229188e-05, + "loss": 0.1217, + "step": 7690 + }, + { + "epoch": 0.19900240353552323, + "grad_norm": 0.10561402142047882, + "learning_rate": 9.942816762743999e-05, + "loss": 0.1224, + "step": 7700 + }, + { + "epoch": 0.1992608482154395, + "grad_norm": 0.048957645893096924, + "learning_rate": 9.942213323938052e-05, + "loss": 0.1227, + "step": 7710 + }, + { + "epoch": 0.19951929289535575, + "grad_norm": 0.06968902796506882, + "learning_rate": 9.941606744361948e-05, + "loss": 0.1225, + "step": 7720 + }, + { + "epoch": 0.199777737575272, + "grad_norm": 0.0291022676974535, + "learning_rate": 9.940997024569146e-05, + "loss": 0.1222, + "step": 7730 + }, + { + "epoch": 0.20003618225518827, + "grad_norm": 0.04826439544558525, + "learning_rate": 9.940384165115968e-05, + "loss": 0.1225, + "step": 7740 + }, + { + "epoch": 0.20029462693510455, + "grad_norm": 0.03650587052106857, + "learning_rate": 9.939768166561605e-05, + "loss": 0.1225, + "step": 7750 + }, + { + "epoch": 0.2005530716150208, + "grad_norm": 0.041354019194841385, + "learning_rate": 9.939149029468111e-05, + "loss": 0.1213, + "step": 7760 + }, + { + "epoch": 0.20081151629493707, + "grad_norm": 0.050018373876810074, + "learning_rate": 9.938526754400405e-05, + "loss": 0.1226, + "step": 7770 + }, + { + "epoch": 0.20106996097485333, + "grad_norm": 0.02911979705095291, + "learning_rate": 9.937901341926267e-05, + "loss": 0.1224, + "step": 7780 + }, + { + "epoch": 0.2013284056547696, + "grad_norm": 0.05774056538939476, + "learning_rate": 9.937272792616343e-05, + "loss": 0.1223, + "step": 7790 + }, + { + "epoch": 0.20158685033468587, + "grad_norm": 0.04959222301840782, + "learning_rate": 9.936641107044133e-05, + "loss": 0.1223, + "step": 7800 + }, + { + "epoch": 0.20184529501460213, + "grad_norm": 0.08697067946195602, + "learning_rate": 9.936006285786011e-05, + "loss": 0.1222, + "step": 7810 + }, + { + "epoch": 0.2021037396945184, + "grad_norm": 0.08765213936567307, + "learning_rate": 9.935368329421199e-05, + "loss": 0.122, + "step": 7820 + }, + { + "epoch": 0.20236218437443465, + "grad_norm": 0.040088582783937454, + "learning_rate": 9.93472723853179e-05, + "loss": 0.123, + "step": 7830 + }, + { + "epoch": 0.2026206290543509, + "grad_norm": 0.046197786927223206, + "learning_rate": 9.934083013702731e-05, + "loss": 0.1227, + "step": 7840 + }, + { + "epoch": 0.20287907373426717, + "grad_norm": 0.06195647642016411, + "learning_rate": 9.933435655521833e-05, + "loss": 0.1218, + "step": 7850 + }, + { + "epoch": 0.20313751841418345, + "grad_norm": 0.04491593316197395, + "learning_rate": 9.93278516457976e-05, + "loss": 0.1215, + "step": 7860 + }, + { + "epoch": 0.2033959630940997, + "grad_norm": 0.04107086732983589, + "learning_rate": 9.932131541470043e-05, + "loss": 0.1222, + "step": 7870 + }, + { + "epoch": 0.20365440777401597, + "grad_norm": 0.0497223362326622, + "learning_rate": 9.931474786789059e-05, + "loss": 0.1215, + "step": 7880 + }, + { + "epoch": 0.20391285245393223, + "grad_norm": 0.034956078976392746, + "learning_rate": 9.930814901136053e-05, + "loss": 0.1222, + "step": 7890 + }, + { + "epoch": 0.2041712971338485, + "grad_norm": 0.06142808869481087, + "learning_rate": 9.930151885113121e-05, + "loss": 0.1218, + "step": 7900 + }, + { + "epoch": 0.20442974181376478, + "grad_norm": 0.041274912655353546, + "learning_rate": 9.929485739325222e-05, + "loss": 0.1219, + "step": 7910 + }, + { + "epoch": 0.20468818649368103, + "grad_norm": 0.07860057055950165, + "learning_rate": 9.928816464380159e-05, + "loss": 0.1227, + "step": 7920 + }, + { + "epoch": 0.2049466311735973, + "grad_norm": 0.028095217421650887, + "learning_rate": 9.928144060888602e-05, + "loss": 0.1217, + "step": 7930 + }, + { + "epoch": 0.20520507585351355, + "grad_norm": 0.03552401438355446, + "learning_rate": 9.927468529464068e-05, + "loss": 0.1216, + "step": 7940 + }, + { + "epoch": 0.2054635205334298, + "grad_norm": 0.03810407221317291, + "learning_rate": 9.926789870722928e-05, + "loss": 0.1223, + "step": 7950 + }, + { + "epoch": 0.20572196521334607, + "grad_norm": 0.031171616166830063, + "learning_rate": 9.926108085284414e-05, + "loss": 0.1226, + "step": 7960 + }, + { + "epoch": 0.20598040989326236, + "grad_norm": 0.0298130065202713, + "learning_rate": 9.925423173770605e-05, + "loss": 0.1224, + "step": 7970 + }, + { + "epoch": 0.20623885457317861, + "grad_norm": 0.046287305653095245, + "learning_rate": 9.924735136806429e-05, + "loss": 0.1221, + "step": 7980 + }, + { + "epoch": 0.20649729925309487, + "grad_norm": 0.08644425123929977, + "learning_rate": 9.924043975019672e-05, + "loss": 0.1225, + "step": 7990 + }, + { + "epoch": 0.20675574393301113, + "grad_norm": 0.033605366945266724, + "learning_rate": 9.923349689040972e-05, + "loss": 0.122, + "step": 8000 + }, + { + "epoch": 0.2070141886129274, + "grad_norm": 0.031145507469773293, + "learning_rate": 9.922652279503812e-05, + "loss": 0.122, + "step": 8010 + }, + { + "epoch": 0.20727263329284368, + "grad_norm": 0.03897827863693237, + "learning_rate": 9.921951747044526e-05, + "loss": 0.1222, + "step": 8020 + }, + { + "epoch": 0.20753107797275994, + "grad_norm": 0.050858255475759506, + "learning_rate": 9.921248092302302e-05, + "loss": 0.1217, + "step": 8030 + }, + { + "epoch": 0.2077895226526762, + "grad_norm": 0.0516897514462471, + "learning_rate": 9.920541315919174e-05, + "loss": 0.1218, + "step": 8040 + }, + { + "epoch": 0.20804796733259245, + "grad_norm": 0.05106513202190399, + "learning_rate": 9.919831418540023e-05, + "loss": 0.1216, + "step": 8050 + }, + { + "epoch": 0.2083064120125087, + "grad_norm": 0.033761270344257355, + "learning_rate": 9.919118400812577e-05, + "loss": 0.1219, + "step": 8060 + }, + { + "epoch": 0.208564856692425, + "grad_norm": 0.04785694181919098, + "learning_rate": 9.918402263387416e-05, + "loss": 0.1215, + "step": 8070 + }, + { + "epoch": 0.20882330137234126, + "grad_norm": 0.06645635515451431, + "learning_rate": 9.917683006917961e-05, + "loss": 0.1225, + "step": 8080 + }, + { + "epoch": 0.20908174605225752, + "grad_norm": 0.06557898223400116, + "learning_rate": 9.916960632060486e-05, + "loss": 0.1221, + "step": 8090 + }, + { + "epoch": 0.20934019073217378, + "grad_norm": 0.03989725932478905, + "learning_rate": 9.916235139474102e-05, + "loss": 0.1223, + "step": 8100 + }, + { + "epoch": 0.20959863541209003, + "grad_norm": 0.0319192111492157, + "learning_rate": 9.915506529820766e-05, + "loss": 0.1225, + "step": 8110 + }, + { + "epoch": 0.2098570800920063, + "grad_norm": 0.053691405802965164, + "learning_rate": 9.914774803765288e-05, + "loss": 0.1218, + "step": 8120 + }, + { + "epoch": 0.21011552477192258, + "grad_norm": 0.031422343105077744, + "learning_rate": 9.914039961975308e-05, + "loss": 0.1219, + "step": 8130 + }, + { + "epoch": 0.21037396945183884, + "grad_norm": 0.028451232239603996, + "learning_rate": 9.913302005121321e-05, + "loss": 0.1217, + "step": 8140 + }, + { + "epoch": 0.2106324141317551, + "grad_norm": 0.03724765405058861, + "learning_rate": 9.912560933876657e-05, + "loss": 0.1222, + "step": 8150 + }, + { + "epoch": 0.21089085881167136, + "grad_norm": 0.03777289763092995, + "learning_rate": 9.911816748917492e-05, + "loss": 0.1215, + "step": 8160 + }, + { + "epoch": 0.21114930349158761, + "grad_norm": 0.051850415766239166, + "learning_rate": 9.911069450922838e-05, + "loss": 0.123, + "step": 8170 + }, + { + "epoch": 0.2114077481715039, + "grad_norm": 0.07588657736778259, + "learning_rate": 9.910319040574552e-05, + "loss": 0.1225, + "step": 8180 + }, + { + "epoch": 0.21166619285142016, + "grad_norm": 0.03968065604567528, + "learning_rate": 9.909565518557326e-05, + "loss": 0.1223, + "step": 8190 + }, + { + "epoch": 0.21192463753133642, + "grad_norm": 0.04995469003915787, + "learning_rate": 9.9088088855587e-05, + "loss": 0.1222, + "step": 8200 + }, + { + "epoch": 0.21218308221125268, + "grad_norm": 0.03428976237773895, + "learning_rate": 9.90804914226904e-05, + "loss": 0.1217, + "step": 8210 + }, + { + "epoch": 0.21244152689116894, + "grad_norm": 0.04541048780083656, + "learning_rate": 9.907286289381563e-05, + "loss": 0.122, + "step": 8220 + }, + { + "epoch": 0.21269997157108522, + "grad_norm": 0.0749245434999466, + "learning_rate": 9.906520327592313e-05, + "loss": 0.1217, + "step": 8230 + }, + { + "epoch": 0.21295841625100148, + "grad_norm": 0.04733029007911682, + "learning_rate": 9.905751257600175e-05, + "loss": 0.1221, + "step": 8240 + }, + { + "epoch": 0.21321686093091774, + "grad_norm": 0.04239179566502571, + "learning_rate": 9.90497908010687e-05, + "loss": 0.1219, + "step": 8250 + }, + { + "epoch": 0.213475305610834, + "grad_norm": 0.06630129367113113, + "learning_rate": 9.904203795816957e-05, + "loss": 0.1229, + "step": 8260 + }, + { + "epoch": 0.21373375029075026, + "grad_norm": 0.032452911138534546, + "learning_rate": 9.90342540543782e-05, + "loss": 0.1218, + "step": 8270 + }, + { + "epoch": 0.21399219497066652, + "grad_norm": 0.035510946065187454, + "learning_rate": 9.902643909679692e-05, + "loss": 0.122, + "step": 8280 + }, + { + "epoch": 0.2142506396505828, + "grad_norm": 0.03665363788604736, + "learning_rate": 9.901859309255623e-05, + "loss": 0.1223, + "step": 8290 + }, + { + "epoch": 0.21450908433049906, + "grad_norm": 0.049987222999334335, + "learning_rate": 9.90107160488151e-05, + "loss": 0.1219, + "step": 8300 + }, + { + "epoch": 0.21476752901041532, + "grad_norm": 0.040580518543720245, + "learning_rate": 9.900280797276076e-05, + "loss": 0.1217, + "step": 8310 + }, + { + "epoch": 0.21502597369033158, + "grad_norm": 0.06626293063163757, + "learning_rate": 9.899486887160872e-05, + "loss": 0.1224, + "step": 8320 + }, + { + "epoch": 0.21528441837024784, + "grad_norm": 0.04808501526713371, + "learning_rate": 9.898689875260285e-05, + "loss": 0.1228, + "step": 8330 + }, + { + "epoch": 0.21554286305016412, + "grad_norm": 0.04514721781015396, + "learning_rate": 9.89788976230153e-05, + "loss": 0.1222, + "step": 8340 + }, + { + "epoch": 0.21580130773008038, + "grad_norm": 0.035548217594623566, + "learning_rate": 9.897086549014656e-05, + "loss": 0.1224, + "step": 8350 + }, + { + "epoch": 0.21605975240999664, + "grad_norm": 0.03878345713019371, + "learning_rate": 9.896280236132531e-05, + "loss": 0.1221, + "step": 8360 + }, + { + "epoch": 0.2163181970899129, + "grad_norm": 0.05042710527777672, + "learning_rate": 9.895470824390862e-05, + "loss": 0.1221, + "step": 8370 + }, + { + "epoch": 0.21657664176982916, + "grad_norm": 0.07641187310218811, + "learning_rate": 9.894658314528173e-05, + "loss": 0.122, + "step": 8380 + }, + { + "epoch": 0.21683508644974545, + "grad_norm": 0.027344530448317528, + "learning_rate": 9.893842707285827e-05, + "loss": 0.122, + "step": 8390 + }, + { + "epoch": 0.2170935311296617, + "grad_norm": 0.040297817438840866, + "learning_rate": 9.893024003408002e-05, + "loss": 0.1226, + "step": 8400 + }, + { + "epoch": 0.21735197580957796, + "grad_norm": 0.051615677773952484, + "learning_rate": 9.892202203641707e-05, + "loss": 0.1212, + "step": 8410 + }, + { + "epoch": 0.21761042048949422, + "grad_norm": 0.02831992693245411, + "learning_rate": 9.891377308736778e-05, + "loss": 0.1224, + "step": 8420 + }, + { + "epoch": 0.21786886516941048, + "grad_norm": 0.0979565754532814, + "learning_rate": 9.890549319445865e-05, + "loss": 0.1222, + "step": 8430 + }, + { + "epoch": 0.21812730984932674, + "grad_norm": 0.08072038739919662, + "learning_rate": 9.889718236524454e-05, + "loss": 0.1226, + "step": 8440 + }, + { + "epoch": 0.21838575452924303, + "grad_norm": 0.028141306713223457, + "learning_rate": 9.888884060730846e-05, + "loss": 0.1214, + "step": 8450 + }, + { + "epoch": 0.21864419920915928, + "grad_norm": 0.05049804225564003, + "learning_rate": 9.888046792826164e-05, + "loss": 0.1226, + "step": 8460 + }, + { + "epoch": 0.21890264388907554, + "grad_norm": 0.029080381616950035, + "learning_rate": 9.887206433574359e-05, + "loss": 0.1221, + "step": 8470 + }, + { + "epoch": 0.2191610885689918, + "grad_norm": 0.03592851012945175, + "learning_rate": 9.886362983742193e-05, + "loss": 0.1215, + "step": 8480 + }, + { + "epoch": 0.21941953324890806, + "grad_norm": 0.03733326494693756, + "learning_rate": 9.885516444099255e-05, + "loss": 0.1221, + "step": 8490 + }, + { + "epoch": 0.21967797792882435, + "grad_norm": 0.05539386719465256, + "learning_rate": 9.88466681541795e-05, + "loss": 0.1223, + "step": 8500 + }, + { + "epoch": 0.2199364226087406, + "grad_norm": 0.037642624229192734, + "learning_rate": 9.883814098473505e-05, + "loss": 0.1222, + "step": 8510 + }, + { + "epoch": 0.22019486728865686, + "grad_norm": 0.05078204348683357, + "learning_rate": 9.88295829404396e-05, + "loss": 0.1228, + "step": 8520 + }, + { + "epoch": 0.22045331196857312, + "grad_norm": 0.049741730093955994, + "learning_rate": 9.882099402910177e-05, + "loss": 0.1218, + "step": 8530 + }, + { + "epoch": 0.22071175664848938, + "grad_norm": 0.05014443397521973, + "learning_rate": 9.88123742585583e-05, + "loss": 0.1221, + "step": 8540 + }, + { + "epoch": 0.22097020132840564, + "grad_norm": 0.030639301985502243, + "learning_rate": 9.88037236366741e-05, + "loss": 0.1225, + "step": 8550 + }, + { + "epoch": 0.22122864600832193, + "grad_norm": 0.07268647104501724, + "learning_rate": 9.879504217134226e-05, + "loss": 0.1223, + "step": 8560 + }, + { + "epoch": 0.2214870906882382, + "grad_norm": 0.03468889743089676, + "learning_rate": 9.878632987048395e-05, + "loss": 0.1224, + "step": 8570 + }, + { + "epoch": 0.22174553536815444, + "grad_norm": 0.06398855149745941, + "learning_rate": 9.877758674204854e-05, + "loss": 0.1222, + "step": 8580 + }, + { + "epoch": 0.2220039800480707, + "grad_norm": 0.03277672827243805, + "learning_rate": 9.87688127940135e-05, + "loss": 0.1223, + "step": 8590 + }, + { + "epoch": 0.22226242472798696, + "grad_norm": 0.060283638536930084, + "learning_rate": 9.876000803438443e-05, + "loss": 0.122, + "step": 8600 + }, + { + "epoch": 0.22252086940790325, + "grad_norm": 0.048453912138938904, + "learning_rate": 9.8751172471195e-05, + "loss": 0.1216, + "step": 8610 + }, + { + "epoch": 0.2227793140878195, + "grad_norm": 0.046268031001091, + "learning_rate": 9.874230611250707e-05, + "loss": 0.122, + "step": 8620 + }, + { + "epoch": 0.22303775876773577, + "grad_norm": 0.03339788690209389, + "learning_rate": 9.873340896641052e-05, + "loss": 0.1223, + "step": 8630 + }, + { + "epoch": 0.22329620344765203, + "grad_norm": 0.03848106041550636, + "learning_rate": 9.872448104102334e-05, + "loss": 0.1222, + "step": 8640 + }, + { + "epoch": 0.22355464812756828, + "grad_norm": 0.06332167983055115, + "learning_rate": 9.871552234449167e-05, + "loss": 0.1224, + "step": 8650 + }, + { + "epoch": 0.22381309280748457, + "grad_norm": 0.03136025741696358, + "learning_rate": 9.87065328849896e-05, + "loss": 0.1222, + "step": 8660 + }, + { + "epoch": 0.22407153748740083, + "grad_norm": 0.04455028101801872, + "learning_rate": 9.869751267071939e-05, + "loss": 0.1223, + "step": 8670 + }, + { + "epoch": 0.2243299821673171, + "grad_norm": 0.02994762733578682, + "learning_rate": 9.868846170991133e-05, + "loss": 0.1223, + "step": 8680 + }, + { + "epoch": 0.22458842684723335, + "grad_norm": 0.03726351633667946, + "learning_rate": 9.867938001082375e-05, + "loss": 0.1221, + "step": 8690 + }, + { + "epoch": 0.2248468715271496, + "grad_norm": 0.056487876921892166, + "learning_rate": 9.867026758174309e-05, + "loss": 0.1226, + "step": 8700 + }, + { + "epoch": 0.22510531620706586, + "grad_norm": 0.03221200034022331, + "learning_rate": 9.866112443098374e-05, + "loss": 0.122, + "step": 8710 + }, + { + "epoch": 0.22536376088698215, + "grad_norm": 0.02844277210533619, + "learning_rate": 9.865195056688818e-05, + "loss": 0.1225, + "step": 8720 + }, + { + "epoch": 0.2256222055668984, + "grad_norm": 0.033517591655254364, + "learning_rate": 9.86427459978269e-05, + "loss": 0.1216, + "step": 8730 + }, + { + "epoch": 0.22588065024681467, + "grad_norm": 0.05982370674610138, + "learning_rate": 9.863351073219837e-05, + "loss": 0.1231, + "step": 8740 + }, + { + "epoch": 0.22613909492673093, + "grad_norm": 0.07647376507520676, + "learning_rate": 9.862424477842916e-05, + "loss": 0.122, + "step": 8750 + }, + { + "epoch": 0.22639753960664719, + "grad_norm": 0.05635329335927963, + "learning_rate": 9.861494814497372e-05, + "loss": 0.1219, + "step": 8760 + }, + { + "epoch": 0.22665598428656347, + "grad_norm": 0.20624591410160065, + "learning_rate": 9.860562084031458e-05, + "loss": 0.1217, + "step": 8770 + }, + { + "epoch": 0.22691442896647973, + "grad_norm": 0.035691998898983, + "learning_rate": 9.859626287296222e-05, + "loss": 0.1214, + "step": 8780 + }, + { + "epoch": 0.227172873646396, + "grad_norm": 0.04513737931847572, + "learning_rate": 9.858687425145512e-05, + "loss": 0.1228, + "step": 8790 + }, + { + "epoch": 0.22743131832631225, + "grad_norm": 0.028885459527373314, + "learning_rate": 9.85774549843597e-05, + "loss": 0.1219, + "step": 8800 + }, + { + "epoch": 0.2276897630062285, + "grad_norm": 0.038439638912677765, + "learning_rate": 9.856800508027037e-05, + "loss": 0.1227, + "step": 8810 + }, + { + "epoch": 0.2279482076861448, + "grad_norm": 0.039657995104789734, + "learning_rate": 9.855852454780951e-05, + "loss": 0.1223, + "step": 8820 + }, + { + "epoch": 0.22820665236606105, + "grad_norm": 0.05446017533540726, + "learning_rate": 9.854901339562739e-05, + "loss": 0.1216, + "step": 8830 + }, + { + "epoch": 0.2284650970459773, + "grad_norm": 0.07781542837619781, + "learning_rate": 9.85394716324022e-05, + "loss": 0.1225, + "step": 8840 + }, + { + "epoch": 0.22872354172589357, + "grad_norm": 0.05162305384874344, + "learning_rate": 9.852989926684022e-05, + "loss": 0.1218, + "step": 8850 + }, + { + "epoch": 0.22898198640580983, + "grad_norm": 0.03983375057578087, + "learning_rate": 9.852029630767543e-05, + "loss": 0.122, + "step": 8860 + }, + { + "epoch": 0.2292404310857261, + "grad_norm": 0.04427746683359146, + "learning_rate": 9.851066276366989e-05, + "loss": 0.1221, + "step": 8870 + }, + { + "epoch": 0.22949887576564237, + "grad_norm": 0.06106181815266609, + "learning_rate": 9.85009986436135e-05, + "loss": 0.1215, + "step": 8880 + }, + { + "epoch": 0.22975732044555863, + "grad_norm": 0.04746083542704582, + "learning_rate": 9.849130395632406e-05, + "loss": 0.1224, + "step": 8890 + }, + { + "epoch": 0.2300157651254749, + "grad_norm": 0.04589366167783737, + "learning_rate": 9.84815787106473e-05, + "loss": 0.1216, + "step": 8900 + }, + { + "epoch": 0.23027420980539115, + "grad_norm": 0.03772230073809624, + "learning_rate": 9.847182291545678e-05, + "loss": 0.1226, + "step": 8910 + }, + { + "epoch": 0.2305326544853074, + "grad_norm": 0.03279818594455719, + "learning_rate": 9.846203657965393e-05, + "loss": 0.1221, + "step": 8920 + }, + { + "epoch": 0.2307910991652237, + "grad_norm": 0.031318992376327515, + "learning_rate": 9.845221971216811e-05, + "loss": 0.1222, + "step": 8930 + }, + { + "epoch": 0.23104954384513995, + "grad_norm": 0.036053042858839035, + "learning_rate": 9.84423723219565e-05, + "loss": 0.1218, + "step": 8940 + }, + { + "epoch": 0.2313079885250562, + "grad_norm": 0.063555508852005, + "learning_rate": 9.84324944180041e-05, + "loss": 0.1224, + "step": 8950 + }, + { + "epoch": 0.23156643320497247, + "grad_norm": 0.04469458386301994, + "learning_rate": 9.842258600932382e-05, + "loss": 0.1226, + "step": 8960 + }, + { + "epoch": 0.23182487788488873, + "grad_norm": 0.055054910480976105, + "learning_rate": 9.841264710495632e-05, + "loss": 0.1226, + "step": 8970 + }, + { + "epoch": 0.23208332256480502, + "grad_norm": 0.051656804978847504, + "learning_rate": 9.840267771397019e-05, + "loss": 0.122, + "step": 8980 + }, + { + "epoch": 0.23234176724472128, + "grad_norm": 0.09327711910009384, + "learning_rate": 9.839267784546172e-05, + "loss": 0.122, + "step": 8990 + }, + { + "epoch": 0.23260021192463753, + "grad_norm": 0.04395702853798866, + "learning_rate": 9.838264750855508e-05, + "loss": 0.1219, + "step": 9000 + }, + { + "epoch": 0.2328586566045538, + "grad_norm": 0.0671134814620018, + "learning_rate": 9.837258671240224e-05, + "loss": 0.1223, + "step": 9010 + }, + { + "epoch": 0.23311710128447005, + "grad_norm": 0.03859454765915871, + "learning_rate": 9.836249546618292e-05, + "loss": 0.1221, + "step": 9020 + }, + { + "epoch": 0.2333755459643863, + "grad_norm": 0.035253580659627914, + "learning_rate": 9.83523737791047e-05, + "loss": 0.1223, + "step": 9030 + }, + { + "epoch": 0.2336339906443026, + "grad_norm": 0.06568124890327454, + "learning_rate": 9.834222166040283e-05, + "loss": 0.1217, + "step": 9040 + }, + { + "epoch": 0.23389243532421886, + "grad_norm": 0.054499249905347824, + "learning_rate": 9.833203911934041e-05, + "loss": 0.1219, + "step": 9050 + }, + { + "epoch": 0.23415088000413511, + "grad_norm": 0.03849209100008011, + "learning_rate": 9.832182616520829e-05, + "loss": 0.1218, + "step": 9060 + }, + { + "epoch": 0.23440932468405137, + "grad_norm": 0.04306337237358093, + "learning_rate": 9.8311582807325e-05, + "loss": 0.1222, + "step": 9070 + }, + { + "epoch": 0.23466776936396763, + "grad_norm": 0.044301122426986694, + "learning_rate": 9.830130905503691e-05, + "loss": 0.1219, + "step": 9080 + }, + { + "epoch": 0.23492621404388392, + "grad_norm": 0.05509497970342636, + "learning_rate": 9.829100491771803e-05, + "loss": 0.1212, + "step": 9090 + }, + { + "epoch": 0.23518465872380018, + "grad_norm": 0.04910295829176903, + "learning_rate": 9.82806704047702e-05, + "loss": 0.1217, + "step": 9100 + }, + { + "epoch": 0.23544310340371644, + "grad_norm": 0.06862427294254303, + "learning_rate": 9.827030552562286e-05, + "loss": 0.1225, + "step": 9110 + }, + { + "epoch": 0.2357015480836327, + "grad_norm": 0.05995047837495804, + "learning_rate": 9.825991028973322e-05, + "loss": 0.122, + "step": 9120 + }, + { + "epoch": 0.23595999276354895, + "grad_norm": 0.04791514202952385, + "learning_rate": 9.824948470658619e-05, + "loss": 0.1229, + "step": 9130 + }, + { + "epoch": 0.23621843744346524, + "grad_norm": 0.03838688135147095, + "learning_rate": 9.823902878569437e-05, + "loss": 0.1229, + "step": 9140 + }, + { + "epoch": 0.2364768821233815, + "grad_norm": 0.03605974465608597, + "learning_rate": 9.822854253659801e-05, + "loss": 0.122, + "step": 9150 + }, + { + "epoch": 0.23673532680329776, + "grad_norm": 0.030459804460406303, + "learning_rate": 9.821802596886505e-05, + "loss": 0.1222, + "step": 9160 + }, + { + "epoch": 0.23699377148321402, + "grad_norm": 0.060631219297647476, + "learning_rate": 9.820747909209109e-05, + "loss": 0.1223, + "step": 9170 + }, + { + "epoch": 0.23725221616313028, + "grad_norm": 0.036420829594135284, + "learning_rate": 9.819690191589943e-05, + "loss": 0.122, + "step": 9180 + }, + { + "epoch": 0.23751066084304653, + "grad_norm": 0.05750449746847153, + "learning_rate": 9.818629444994094e-05, + "loss": 0.1209, + "step": 9190 + }, + { + "epoch": 0.23776910552296282, + "grad_norm": 0.029421767219901085, + "learning_rate": 9.817565670389419e-05, + "loss": 0.122, + "step": 9200 + }, + { + "epoch": 0.23802755020287908, + "grad_norm": 0.044734541326761246, + "learning_rate": 9.816498868746533e-05, + "loss": 0.1223, + "step": 9210 + }, + { + "epoch": 0.23828599488279534, + "grad_norm": 0.05023409426212311, + "learning_rate": 9.815429041038816e-05, + "loss": 0.1224, + "step": 9220 + }, + { + "epoch": 0.2385444395627116, + "grad_norm": 0.05479540675878525, + "learning_rate": 9.814356188242408e-05, + "loss": 0.123, + "step": 9230 + }, + { + "epoch": 0.23880288424262786, + "grad_norm": 0.06324556469917297, + "learning_rate": 9.813280311336211e-05, + "loss": 0.1216, + "step": 9240 + }, + { + "epoch": 0.23906132892254414, + "grad_norm": 0.055781178176403046, + "learning_rate": 9.81220141130188e-05, + "loss": 0.1229, + "step": 9250 + }, + { + "epoch": 0.2393197736024604, + "grad_norm": 0.03384460508823395, + "learning_rate": 9.811119489123837e-05, + "loss": 0.1219, + "step": 9260 + }, + { + "epoch": 0.23957821828237666, + "grad_norm": 0.03441445901989937, + "learning_rate": 9.81003454578926e-05, + "loss": 0.1222, + "step": 9270 + }, + { + "epoch": 0.23983666296229292, + "grad_norm": 0.05035923421382904, + "learning_rate": 9.808946582288075e-05, + "loss": 0.1224, + "step": 9280 + }, + { + "epoch": 0.24009510764220918, + "grad_norm": 0.07732076942920685, + "learning_rate": 9.807855599612974e-05, + "loss": 0.1223, + "step": 9290 + }, + { + "epoch": 0.24035355232212544, + "grad_norm": 0.04535223916172981, + "learning_rate": 9.8067615987594e-05, + "loss": 0.1228, + "step": 9300 + }, + { + "epoch": 0.24061199700204172, + "grad_norm": 0.05055880919098854, + "learning_rate": 9.805664580725545e-05, + "loss": 0.1227, + "step": 9310 + }, + { + "epoch": 0.24087044168195798, + "grad_norm": 0.038474682718515396, + "learning_rate": 9.804564546512362e-05, + "loss": 0.1218, + "step": 9320 + }, + { + "epoch": 0.24112888636187424, + "grad_norm": 0.0761597603559494, + "learning_rate": 9.803461497123553e-05, + "loss": 0.1228, + "step": 9330 + }, + { + "epoch": 0.2413873310417905, + "grad_norm": 0.0632692500948906, + "learning_rate": 9.802355433565569e-05, + "loss": 0.122, + "step": 9340 + }, + { + "epoch": 0.24164577572170676, + "grad_norm": 0.032881688326597214, + "learning_rate": 9.801246356847615e-05, + "loss": 0.122, + "step": 9350 + }, + { + "epoch": 0.24190422040162304, + "grad_norm": 0.03867485374212265, + "learning_rate": 9.80013426798164e-05, + "loss": 0.1215, + "step": 9360 + }, + { + "epoch": 0.2421626650815393, + "grad_norm": 0.027744559571146965, + "learning_rate": 9.799019167982346e-05, + "loss": 0.1221, + "step": 9370 + }, + { + "epoch": 0.24242110976145556, + "grad_norm": 0.054945848882198334, + "learning_rate": 9.797901057867183e-05, + "loss": 0.1219, + "step": 9380 + }, + { + "epoch": 0.24267955444137182, + "grad_norm": 0.03269646689295769, + "learning_rate": 9.796779938656339e-05, + "loss": 0.1218, + "step": 9390 + }, + { + "epoch": 0.24293799912128808, + "grad_norm": 0.09877783805131912, + "learning_rate": 9.795655811372762e-05, + "loss": 0.1213, + "step": 9400 + }, + { + "epoch": 0.24319644380120436, + "grad_norm": 0.03303838521242142, + "learning_rate": 9.794528677042132e-05, + "loss": 0.1217, + "step": 9410 + }, + { + "epoch": 0.24345488848112062, + "grad_norm": 0.0364997461438179, + "learning_rate": 9.793398536692877e-05, + "loss": 0.1219, + "step": 9420 + }, + { + "epoch": 0.24371333316103688, + "grad_norm": 0.052552178502082825, + "learning_rate": 9.792265391356173e-05, + "loss": 0.1222, + "step": 9430 + }, + { + "epoch": 0.24397177784095314, + "grad_norm": 0.03283165767788887, + "learning_rate": 9.791129242065927e-05, + "loss": 0.1214, + "step": 9440 + }, + { + "epoch": 0.2442302225208694, + "grad_norm": 0.03688403218984604, + "learning_rate": 9.789990089858795e-05, + "loss": 0.1218, + "step": 9450 + }, + { + "epoch": 0.24448866720078566, + "grad_norm": 0.029302271082997322, + "learning_rate": 9.788847935774173e-05, + "loss": 0.1223, + "step": 9460 + }, + { + "epoch": 0.24474711188070195, + "grad_norm": 0.029346393421292305, + "learning_rate": 9.787702780854191e-05, + "loss": 0.1222, + "step": 9470 + }, + { + "epoch": 0.2450055565606182, + "grad_norm": 0.11930780857801437, + "learning_rate": 9.78655462614372e-05, + "loss": 0.1228, + "step": 9480 + }, + { + "epoch": 0.24526400124053446, + "grad_norm": 0.025659484788775444, + "learning_rate": 9.785403472690371e-05, + "loss": 0.1213, + "step": 9490 + }, + { + "epoch": 0.24552244592045072, + "grad_norm": 0.03191477060317993, + "learning_rate": 9.784249321544485e-05, + "loss": 0.1218, + "step": 9500 + }, + { + "epoch": 0.24578089060036698, + "grad_norm": 0.08472995460033417, + "learning_rate": 9.783092173759143e-05, + "loss": 0.1223, + "step": 9510 + }, + { + "epoch": 0.24603933528028327, + "grad_norm": 0.02820950746536255, + "learning_rate": 9.781932030390157e-05, + "loss": 0.1222, + "step": 9520 + }, + { + "epoch": 0.24629777996019953, + "grad_norm": 0.05071626603603363, + "learning_rate": 9.780768892496073e-05, + "loss": 0.1224, + "step": 9530 + }, + { + "epoch": 0.24655622464011578, + "grad_norm": 0.03210419788956642, + "learning_rate": 9.779602761138174e-05, + "loss": 0.1227, + "step": 9540 + }, + { + "epoch": 0.24681466932003204, + "grad_norm": 0.036584045737981796, + "learning_rate": 9.778433637380466e-05, + "loss": 0.1216, + "step": 9550 + }, + { + "epoch": 0.2470731139999483, + "grad_norm": 0.03985079750418663, + "learning_rate": 9.777261522289693e-05, + "loss": 0.1224, + "step": 9560 + }, + { + "epoch": 0.2473315586798646, + "grad_norm": 0.04155793786048889, + "learning_rate": 9.776086416935322e-05, + "loss": 0.1222, + "step": 9570 + }, + { + "epoch": 0.24759000335978085, + "grad_norm": 0.04365672171115875, + "learning_rate": 9.774908322389554e-05, + "loss": 0.1222, + "step": 9580 + }, + { + "epoch": 0.2478484480396971, + "grad_norm": 0.06601466983556747, + "learning_rate": 9.773727239727313e-05, + "loss": 0.1212, + "step": 9590 + }, + { + "epoch": 0.24810689271961336, + "grad_norm": 0.03636832907795906, + "learning_rate": 9.772543170026254e-05, + "loss": 0.1215, + "step": 9600 + }, + { + "epoch": 0.24836533739952962, + "grad_norm": 0.04975580796599388, + "learning_rate": 9.771356114366752e-05, + "loss": 0.122, + "step": 9610 + }, + { + "epoch": 0.24862378207944588, + "grad_norm": 0.036439571529626846, + "learning_rate": 9.77016607383191e-05, + "loss": 0.1214, + "step": 9620 + }, + { + "epoch": 0.24888222675936217, + "grad_norm": 0.02810492552816868, + "learning_rate": 9.768973049507553e-05, + "loss": 0.1209, + "step": 9630 + }, + { + "epoch": 0.24914067143927843, + "grad_norm": 0.03966129571199417, + "learning_rate": 9.767777042482231e-05, + "loss": 0.1221, + "step": 9640 + }, + { + "epoch": 0.24939911611919469, + "grad_norm": 0.04330369085073471, + "learning_rate": 9.766578053847215e-05, + "loss": 0.1223, + "step": 9650 + }, + { + "epoch": 0.24965756079911094, + "grad_norm": 0.04128487780690193, + "learning_rate": 9.765376084696492e-05, + "loss": 0.122, + "step": 9660 + }, + { + "epoch": 0.2499160054790272, + "grad_norm": 0.03149688243865967, + "learning_rate": 9.76417113612677e-05, + "loss": 0.1221, + "step": 9670 + }, + { + "epoch": 0.25017445015894346, + "grad_norm": 0.03261106088757515, + "learning_rate": 9.762963209237483e-05, + "loss": 0.1216, + "step": 9680 + }, + { + "epoch": 0.2504328948388597, + "grad_norm": 0.028605492785573006, + "learning_rate": 9.761752305130773e-05, + "loss": 0.122, + "step": 9690 + }, + { + "epoch": 0.250691339518776, + "grad_norm": 0.07347958534955978, + "learning_rate": 9.760538424911505e-05, + "loss": 0.1214, + "step": 9700 + }, + { + "epoch": 0.2509497841986923, + "grad_norm": 0.03559430316090584, + "learning_rate": 9.759321569687253e-05, + "loss": 0.1217, + "step": 9710 + }, + { + "epoch": 0.25120822887860855, + "grad_norm": 0.05108926445245743, + "learning_rate": 9.758101740568311e-05, + "loss": 0.1221, + "step": 9720 + }, + { + "epoch": 0.2514666735585248, + "grad_norm": 0.042759548872709274, + "learning_rate": 9.756878938667685e-05, + "loss": 0.1219, + "step": 9730 + }, + { + "epoch": 0.25172511823844107, + "grad_norm": 0.03588419780135155, + "learning_rate": 9.755653165101091e-05, + "loss": 0.1221, + "step": 9740 + }, + { + "epoch": 0.25198356291835733, + "grad_norm": 0.037554338574409485, + "learning_rate": 9.754424420986963e-05, + "loss": 0.1214, + "step": 9750 + }, + { + "epoch": 0.2522420075982736, + "grad_norm": 0.052110329270362854, + "learning_rate": 9.753192707446437e-05, + "loss": 0.1226, + "step": 9760 + }, + { + "epoch": 0.25250045227818985, + "grad_norm": 0.04198969155550003, + "learning_rate": 9.751958025603362e-05, + "loss": 0.1226, + "step": 9770 + }, + { + "epoch": 0.2527588969581061, + "grad_norm": 0.030077170580625534, + "learning_rate": 9.7507203765843e-05, + "loss": 0.122, + "step": 9780 + }, + { + "epoch": 0.25301734163802236, + "grad_norm": 0.04961973428726196, + "learning_rate": 9.749479761518512e-05, + "loss": 0.1219, + "step": 9790 + }, + { + "epoch": 0.2532757863179386, + "grad_norm": 0.03958809748291969, + "learning_rate": 9.748236181537971e-05, + "loss": 0.1227, + "step": 9800 + }, + { + "epoch": 0.25353423099785494, + "grad_norm": 0.03358643129467964, + "learning_rate": 9.746989637777353e-05, + "loss": 0.1218, + "step": 9810 + }, + { + "epoch": 0.2537926756777712, + "grad_norm": 0.029219435527920723, + "learning_rate": 9.745740131374038e-05, + "loss": 0.1217, + "step": 9820 + }, + { + "epoch": 0.25405112035768745, + "grad_norm": 0.046616241335868835, + "learning_rate": 9.744487663468112e-05, + "loss": 0.1219, + "step": 9830 + }, + { + "epoch": 0.2543095650376037, + "grad_norm": 0.03164884075522423, + "learning_rate": 9.74323223520236e-05, + "loss": 0.1223, + "step": 9840 + }, + { + "epoch": 0.25456800971751997, + "grad_norm": 0.05355691537261009, + "learning_rate": 9.74197384772227e-05, + "loss": 0.1221, + "step": 9850 + }, + { + "epoch": 0.25482645439743623, + "grad_norm": 0.02979048527777195, + "learning_rate": 9.740712502176028e-05, + "loss": 0.1224, + "step": 9860 + }, + { + "epoch": 0.2550848990773525, + "grad_norm": 0.0454348623752594, + "learning_rate": 9.739448199714522e-05, + "loss": 0.1222, + "step": 9870 + }, + { + "epoch": 0.25534334375726875, + "grad_norm": 0.04293311759829521, + "learning_rate": 9.738180941491334e-05, + "loss": 0.1219, + "step": 9880 + }, + { + "epoch": 0.255601788437185, + "grad_norm": 0.044739145785570145, + "learning_rate": 9.73691072866275e-05, + "loss": 0.1214, + "step": 9890 + }, + { + "epoch": 0.25586023311710127, + "grad_norm": 0.023408256471157074, + "learning_rate": 9.73563756238774e-05, + "loss": 0.122, + "step": 9900 + }, + { + "epoch": 0.2561186777970175, + "grad_norm": 0.05259205400943756, + "learning_rate": 9.734361443827984e-05, + "loss": 0.1219, + "step": 9910 + }, + { + "epoch": 0.25637712247693384, + "grad_norm": 0.026263004168868065, + "learning_rate": 9.733082374147842e-05, + "loss": 0.1236, + "step": 9920 + }, + { + "epoch": 0.2566355671568501, + "grad_norm": 0.04978423938155174, + "learning_rate": 9.731800354514373e-05, + "loss": 0.123, + "step": 9930 + }, + { + "epoch": 0.25689401183676636, + "grad_norm": 0.038737472146749496, + "learning_rate": 9.73051538609733e-05, + "loss": 0.1224, + "step": 9940 + }, + { + "epoch": 0.2571524565166826, + "grad_norm": 0.044979847967624664, + "learning_rate": 9.72922747006915e-05, + "loss": 0.122, + "step": 9950 + }, + { + "epoch": 0.2574109011965989, + "grad_norm": 0.09752271324396133, + "learning_rate": 9.727936607604966e-05, + "loss": 0.122, + "step": 9960 + }, + { + "epoch": 0.25766934587651513, + "grad_norm": 0.04083587974309921, + "learning_rate": 9.726642799882594e-05, + "loss": 0.1215, + "step": 9970 + }, + { + "epoch": 0.2579277905564314, + "grad_norm": 0.05542069301009178, + "learning_rate": 9.72534604808254e-05, + "loss": 0.1228, + "step": 9980 + }, + { + "epoch": 0.25818623523634765, + "grad_norm": 0.0566909983754158, + "learning_rate": 9.724046353387999e-05, + "loss": 0.1224, + "step": 9990 + }, + { + "epoch": 0.2584446799162639, + "grad_norm": 0.03165486827492714, + "learning_rate": 9.722743716984845e-05, + "loss": 0.1222, + "step": 10000 + }, + { + "epoch": 0.25870312459618017, + "grad_norm": 0.05801451951265335, + "learning_rate": 9.72143814006164e-05, + "loss": 0.1219, + "step": 10010 + }, + { + "epoch": 0.2589615692760964, + "grad_norm": 0.03029945120215416, + "learning_rate": 9.720129623809629e-05, + "loss": 0.1219, + "step": 10020 + }, + { + "epoch": 0.25922001395601274, + "grad_norm": 0.039547793567180634, + "learning_rate": 9.718818169422739e-05, + "loss": 0.1221, + "step": 10030 + }, + { + "epoch": 0.259478458635929, + "grad_norm": 0.02859160117805004, + "learning_rate": 9.717503778097574e-05, + "loss": 0.1213, + "step": 10040 + }, + { + "epoch": 0.25973690331584526, + "grad_norm": 0.08308100700378418, + "learning_rate": 9.716186451033424e-05, + "loss": 0.1221, + "step": 10050 + }, + { + "epoch": 0.2599953479957615, + "grad_norm": 0.05925880745053291, + "learning_rate": 9.714866189432253e-05, + "loss": 0.1216, + "step": 10060 + }, + { + "epoch": 0.2602537926756778, + "grad_norm": 0.02828199975192547, + "learning_rate": 9.713542994498703e-05, + "loss": 0.1216, + "step": 10070 + }, + { + "epoch": 0.26051223735559403, + "grad_norm": 0.04877236485481262, + "learning_rate": 9.712216867440095e-05, + "loss": 0.1219, + "step": 10080 + }, + { + "epoch": 0.2607706820355103, + "grad_norm": 0.03925655409693718, + "learning_rate": 9.710887809466425e-05, + "loss": 0.1229, + "step": 10090 + }, + { + "epoch": 0.26102912671542655, + "grad_norm": 0.02982957474887371, + "learning_rate": 9.709555821790361e-05, + "loss": 0.122, + "step": 10100 + }, + { + "epoch": 0.2612875713953428, + "grad_norm": 0.03727247938513756, + "learning_rate": 9.708220905627243e-05, + "loss": 0.1214, + "step": 10110 + }, + { + "epoch": 0.26154601607525907, + "grad_norm": 0.04670462757349014, + "learning_rate": 9.706883062195089e-05, + "loss": 0.122, + "step": 10120 + }, + { + "epoch": 0.26180446075517533, + "grad_norm": 0.04104216396808624, + "learning_rate": 9.70554229271458e-05, + "loss": 0.1215, + "step": 10130 + }, + { + "epoch": 0.26206290543509164, + "grad_norm": 0.037916962057352066, + "learning_rate": 9.704198598409073e-05, + "loss": 0.1221, + "step": 10140 + }, + { + "epoch": 0.2623213501150079, + "grad_norm": 0.11144250631332397, + "learning_rate": 9.702851980504595e-05, + "loss": 0.1217, + "step": 10150 + }, + { + "epoch": 0.26257979479492416, + "grad_norm": 0.0292348712682724, + "learning_rate": 9.70150244022983e-05, + "loss": 0.1229, + "step": 10160 + }, + { + "epoch": 0.2628382394748404, + "grad_norm": 0.043081820011138916, + "learning_rate": 9.700149978816142e-05, + "loss": 0.1216, + "step": 10170 + }, + { + "epoch": 0.2630966841547567, + "grad_norm": 0.03769086301326752, + "learning_rate": 9.698794597497552e-05, + "loss": 0.1219, + "step": 10180 + }, + { + "epoch": 0.26335512883467294, + "grad_norm": 0.041134100407361984, + "learning_rate": 9.697436297510747e-05, + "loss": 0.1218, + "step": 10190 + }, + { + "epoch": 0.2636135735145892, + "grad_norm": 0.04823868349194527, + "learning_rate": 9.696075080095077e-05, + "loss": 0.1223, + "step": 10200 + }, + { + "epoch": 0.26387201819450545, + "grad_norm": 0.06679939478635788, + "learning_rate": 9.694710946492556e-05, + "loss": 0.1222, + "step": 10210 + }, + { + "epoch": 0.2641304628744217, + "grad_norm": 0.03446626290678978, + "learning_rate": 9.693343897947853e-05, + "loss": 0.1213, + "step": 10220 + }, + { + "epoch": 0.26438890755433797, + "grad_norm": 0.04713613539934158, + "learning_rate": 9.691973935708306e-05, + "loss": 0.1216, + "step": 10230 + }, + { + "epoch": 0.2646473522342543, + "grad_norm": 0.05146656185388565, + "learning_rate": 9.6906010610239e-05, + "loss": 0.1218, + "step": 10240 + }, + { + "epoch": 0.26490579691417054, + "grad_norm": 0.042079903185367584, + "learning_rate": 9.689225275147289e-05, + "loss": 0.1212, + "step": 10250 + }, + { + "epoch": 0.2651642415940868, + "grad_norm": 0.04334166646003723, + "learning_rate": 9.687846579333777e-05, + "loss": 0.1215, + "step": 10260 + }, + { + "epoch": 0.26542268627400306, + "grad_norm": 0.05105006694793701, + "learning_rate": 9.686464974841321e-05, + "loss": 0.1213, + "step": 10270 + }, + { + "epoch": 0.2656811309539193, + "grad_norm": 0.03609834983944893, + "learning_rate": 9.685080462930539e-05, + "loss": 0.1216, + "step": 10280 + }, + { + "epoch": 0.2659395756338356, + "grad_norm": 0.031515173614025116, + "learning_rate": 9.683693044864694e-05, + "loss": 0.1226, + "step": 10290 + }, + { + "epoch": 0.26619802031375184, + "grad_norm": 0.037369269877672195, + "learning_rate": 9.682302721909706e-05, + "loss": 0.1222, + "step": 10300 + }, + { + "epoch": 0.2664564649936681, + "grad_norm": 0.04690421372652054, + "learning_rate": 9.680909495334146e-05, + "loss": 0.1222, + "step": 10310 + }, + { + "epoch": 0.26671490967358435, + "grad_norm": 0.07065413892269135, + "learning_rate": 9.67951336640923e-05, + "loss": 0.1214, + "step": 10320 + }, + { + "epoch": 0.2669733543535006, + "grad_norm": 0.03427078202366829, + "learning_rate": 9.678114336408823e-05, + "loss": 0.1214, + "step": 10330 + }, + { + "epoch": 0.2672317990334169, + "grad_norm": 0.030120570212602615, + "learning_rate": 9.676712406609441e-05, + "loss": 0.1224, + "step": 10340 + }, + { + "epoch": 0.2674902437133332, + "grad_norm": 0.10920918732881546, + "learning_rate": 9.675307578290243e-05, + "loss": 0.1223, + "step": 10350 + }, + { + "epoch": 0.26774868839324945, + "grad_norm": 0.04685673862695694, + "learning_rate": 9.673899852733033e-05, + "loss": 0.122, + "step": 10360 + }, + { + "epoch": 0.2680071330731657, + "grad_norm": 0.047575581818819046, + "learning_rate": 9.672489231222257e-05, + "loss": 0.1213, + "step": 10370 + }, + { + "epoch": 0.26826557775308196, + "grad_norm": 0.02657945826649666, + "learning_rate": 9.671075715045009e-05, + "loss": 0.1214, + "step": 10380 + }, + { + "epoch": 0.2685240224329982, + "grad_norm": 0.08378177881240845, + "learning_rate": 9.669659305491012e-05, + "loss": 0.1221, + "step": 10390 + }, + { + "epoch": 0.2687824671129145, + "grad_norm": 0.032134804874658585, + "learning_rate": 9.668240003852647e-05, + "loss": 0.1219, + "step": 10400 + }, + { + "epoch": 0.26904091179283074, + "grad_norm": 0.03569498658180237, + "learning_rate": 9.666817811424917e-05, + "loss": 0.1225, + "step": 10410 + }, + { + "epoch": 0.269299356472747, + "grad_norm": 0.036826521158218384, + "learning_rate": 9.665392729505472e-05, + "loss": 0.122, + "step": 10420 + }, + { + "epoch": 0.26955780115266326, + "grad_norm": 0.05678486078977585, + "learning_rate": 9.663964759394595e-05, + "loss": 0.1216, + "step": 10430 + }, + { + "epoch": 0.2698162458325795, + "grad_norm": 0.04821275547146797, + "learning_rate": 9.662533902395205e-05, + "loss": 0.1218, + "step": 10440 + }, + { + "epoch": 0.2700746905124958, + "grad_norm": 0.05988611280918121, + "learning_rate": 9.66110015981286e-05, + "loss": 0.1219, + "step": 10450 + }, + { + "epoch": 0.2703331351924121, + "grad_norm": 0.0310111902654171, + "learning_rate": 9.659663532955736e-05, + "loss": 0.1225, + "step": 10460 + }, + { + "epoch": 0.27059157987232835, + "grad_norm": 0.03386465460062027, + "learning_rate": 9.65822402313466e-05, + "loss": 0.1225, + "step": 10470 + }, + { + "epoch": 0.2708500245522446, + "grad_norm": 0.03002682700753212, + "learning_rate": 9.656781631663079e-05, + "loss": 0.1217, + "step": 10480 + }, + { + "epoch": 0.27110846923216086, + "grad_norm": 0.09015118330717087, + "learning_rate": 9.655336359857068e-05, + "loss": 0.123, + "step": 10490 + }, + { + "epoch": 0.2713669139120771, + "grad_norm": 0.04147396609187126, + "learning_rate": 9.653888209035333e-05, + "loss": 0.1217, + "step": 10500 + }, + { + "epoch": 0.2716253585919934, + "grad_norm": 0.034747414290905, + "learning_rate": 9.652437180519209e-05, + "loss": 0.1224, + "step": 10510 + }, + { + "epoch": 0.27188380327190964, + "grad_norm": 0.02755727805197239, + "learning_rate": 9.650983275632652e-05, + "loss": 0.1224, + "step": 10520 + }, + { + "epoch": 0.2721422479518259, + "grad_norm": 0.043544620275497437, + "learning_rate": 9.649526495702246e-05, + "loss": 0.1225, + "step": 10530 + }, + { + "epoch": 0.27240069263174216, + "grad_norm": 0.044420886784791946, + "learning_rate": 9.648066842057197e-05, + "loss": 0.1218, + "step": 10540 + }, + { + "epoch": 0.2726591373116584, + "grad_norm": 0.04425111413002014, + "learning_rate": 9.646604316029333e-05, + "loss": 0.1224, + "step": 10550 + }, + { + "epoch": 0.2729175819915747, + "grad_norm": 0.04003128036856651, + "learning_rate": 9.645138918953103e-05, + "loss": 0.1229, + "step": 10560 + }, + { + "epoch": 0.273176026671491, + "grad_norm": 0.07105188816785812, + "learning_rate": 9.643670652165575e-05, + "loss": 0.122, + "step": 10570 + }, + { + "epoch": 0.27343447135140725, + "grad_norm": 0.031014278531074524, + "learning_rate": 9.642199517006436e-05, + "loss": 0.1217, + "step": 10580 + }, + { + "epoch": 0.2736929160313235, + "grad_norm": 0.025540539994835854, + "learning_rate": 9.640725514817991e-05, + "loss": 0.1218, + "step": 10590 + }, + { + "epoch": 0.27395136071123977, + "grad_norm": 0.052697375416755676, + "learning_rate": 9.639248646945163e-05, + "loss": 0.1217, + "step": 10600 + }, + { + "epoch": 0.274209805391156, + "grad_norm": 0.03409591317176819, + "learning_rate": 9.637768914735481e-05, + "loss": 0.1222, + "step": 10610 + }, + { + "epoch": 0.2744682500710723, + "grad_norm": 0.05348536744713783, + "learning_rate": 9.636286319539096e-05, + "loss": 0.1216, + "step": 10620 + }, + { + "epoch": 0.27472669475098854, + "grad_norm": 0.04352504387497902, + "learning_rate": 9.63480086270877e-05, + "loss": 0.1218, + "step": 10630 + }, + { + "epoch": 0.2749851394309048, + "grad_norm": 0.06268704682588577, + "learning_rate": 9.633312545599873e-05, + "loss": 0.1217, + "step": 10640 + }, + { + "epoch": 0.27524358411082106, + "grad_norm": 0.05397544056177139, + "learning_rate": 9.631821369570388e-05, + "loss": 0.1222, + "step": 10650 + }, + { + "epoch": 0.2755020287907373, + "grad_norm": 0.04187548905611038, + "learning_rate": 9.630327335980905e-05, + "loss": 0.1213, + "step": 10660 + }, + { + "epoch": 0.27576047347065363, + "grad_norm": 0.03760016709566116, + "learning_rate": 9.628830446194617e-05, + "loss": 0.1224, + "step": 10670 + }, + { + "epoch": 0.2760189181505699, + "grad_norm": 0.07169830054044724, + "learning_rate": 9.627330701577332e-05, + "loss": 0.1223, + "step": 10680 + }, + { + "epoch": 0.27627736283048615, + "grad_norm": 0.04024192690849304, + "learning_rate": 9.625828103497459e-05, + "loss": 0.1226, + "step": 10690 + }, + { + "epoch": 0.2765358075104024, + "grad_norm": 0.03448636084794998, + "learning_rate": 9.624322653326005e-05, + "loss": 0.1225, + "step": 10700 + }, + { + "epoch": 0.27679425219031867, + "grad_norm": 0.046658456325531006, + "learning_rate": 9.622814352436591e-05, + "loss": 0.1219, + "step": 10710 + }, + { + "epoch": 0.2770526968702349, + "grad_norm": 0.03501898795366287, + "learning_rate": 9.621303202205427e-05, + "loss": 0.1218, + "step": 10720 + }, + { + "epoch": 0.2773111415501512, + "grad_norm": 0.030791599303483963, + "learning_rate": 9.619789204011329e-05, + "loss": 0.1218, + "step": 10730 + }, + { + "epoch": 0.27756958623006744, + "grad_norm": 0.03519897535443306, + "learning_rate": 9.618272359235712e-05, + "loss": 0.1222, + "step": 10740 + }, + { + "epoch": 0.2778280309099837, + "grad_norm": 0.031451307237148285, + "learning_rate": 9.616752669262585e-05, + "loss": 0.1218, + "step": 10750 + }, + { + "epoch": 0.27808647558989996, + "grad_norm": 0.034482669085264206, + "learning_rate": 9.615230135478555e-05, + "loss": 0.1218, + "step": 10760 + }, + { + "epoch": 0.2783449202698162, + "grad_norm": 0.028001563623547554, + "learning_rate": 9.613704759272825e-05, + "loss": 0.1213, + "step": 10770 + }, + { + "epoch": 0.27860336494973253, + "grad_norm": 0.053938232362270355, + "learning_rate": 9.612176542037192e-05, + "loss": 0.121, + "step": 10780 + }, + { + "epoch": 0.2788618096296488, + "grad_norm": 0.03522767871618271, + "learning_rate": 9.610645485166038e-05, + "loss": 0.1217, + "step": 10790 + }, + { + "epoch": 0.27912025430956505, + "grad_norm": 0.029587984085083008, + "learning_rate": 9.609111590056348e-05, + "loss": 0.1221, + "step": 10800 + }, + { + "epoch": 0.2793786989894813, + "grad_norm": 0.03145352005958557, + "learning_rate": 9.607574858107686e-05, + "loss": 0.122, + "step": 10810 + }, + { + "epoch": 0.27963714366939757, + "grad_norm": 0.040148936212062836, + "learning_rate": 9.606035290722208e-05, + "loss": 0.1224, + "step": 10820 + }, + { + "epoch": 0.27989558834931383, + "grad_norm": 0.039827894419431686, + "learning_rate": 9.604492889304662e-05, + "loss": 0.1212, + "step": 10830 + }, + { + "epoch": 0.2801540330292301, + "grad_norm": 0.03002399392426014, + "learning_rate": 9.602947655262371e-05, + "loss": 0.1219, + "step": 10840 + }, + { + "epoch": 0.28041247770914635, + "grad_norm": 0.03883709758520126, + "learning_rate": 9.601399590005256e-05, + "loss": 0.1222, + "step": 10850 + }, + { + "epoch": 0.2806709223890626, + "grad_norm": 0.07312025129795074, + "learning_rate": 9.599848694945808e-05, + "loss": 0.1221, + "step": 10860 + }, + { + "epoch": 0.28092936706897886, + "grad_norm": 0.029127132147550583, + "learning_rate": 9.598294971499112e-05, + "loss": 0.1217, + "step": 10870 + }, + { + "epoch": 0.2811878117488951, + "grad_norm": 0.06794220954179764, + "learning_rate": 9.596738421082823e-05, + "loss": 0.1217, + "step": 10880 + }, + { + "epoch": 0.28144625642881144, + "grad_norm": 0.03154011815786362, + "learning_rate": 9.595179045117182e-05, + "loss": 0.1213, + "step": 10890 + }, + { + "epoch": 0.2817047011087277, + "grad_norm": 0.058496829122304916, + "learning_rate": 9.593616845025008e-05, + "loss": 0.1222, + "step": 10900 + }, + { + "epoch": 0.28196314578864395, + "grad_norm": 0.04057440534234047, + "learning_rate": 9.592051822231696e-05, + "loss": 0.1223, + "step": 10910 + }, + { + "epoch": 0.2822215904685602, + "grad_norm": 0.03172251209616661, + "learning_rate": 9.590483978165214e-05, + "loss": 0.1219, + "step": 10920 + }, + { + "epoch": 0.28248003514847647, + "grad_norm": 0.03688961640000343, + "learning_rate": 9.588913314256105e-05, + "loss": 0.1222, + "step": 10930 + }, + { + "epoch": 0.28273847982839273, + "grad_norm": 0.028637727722525597, + "learning_rate": 9.58733983193749e-05, + "loss": 0.1218, + "step": 10940 + }, + { + "epoch": 0.282996924508309, + "grad_norm": 0.02712370455265045, + "learning_rate": 9.585763532645053e-05, + "loss": 0.1217, + "step": 10950 + }, + { + "epoch": 0.28325536918822525, + "grad_norm": 0.029038026928901672, + "learning_rate": 9.584184417817054e-05, + "loss": 0.1219, + "step": 10960 + }, + { + "epoch": 0.2835138138681415, + "grad_norm": 0.03690708428621292, + "learning_rate": 9.582602488894326e-05, + "loss": 0.1222, + "step": 10970 + }, + { + "epoch": 0.28377225854805777, + "grad_norm": 0.02920130454003811, + "learning_rate": 9.581017747320258e-05, + "loss": 0.1224, + "step": 10980 + }, + { + "epoch": 0.2840307032279741, + "grad_norm": 0.02496206760406494, + "learning_rate": 9.579430194540813e-05, + "loss": 0.1221, + "step": 10990 + }, + { + "epoch": 0.28428914790789034, + "grad_norm": 0.026660729199647903, + "learning_rate": 9.57783983200452e-05, + "loss": 0.1217, + "step": 11000 + }, + { + "epoch": 0.2845475925878066, + "grad_norm": 0.03294852003455162, + "learning_rate": 9.576246661162469e-05, + "loss": 0.1219, + "step": 11010 + }, + { + "epoch": 0.28480603726772286, + "grad_norm": 0.03719004988670349, + "learning_rate": 9.574650683468311e-05, + "loss": 0.1221, + "step": 11020 + }, + { + "epoch": 0.2850644819476391, + "grad_norm": 0.03275352343916893, + "learning_rate": 9.573051900378263e-05, + "loss": 0.1215, + "step": 11030 + }, + { + "epoch": 0.2853229266275554, + "grad_norm": 0.043395258486270905, + "learning_rate": 9.571450313351095e-05, + "loss": 0.122, + "step": 11040 + }, + { + "epoch": 0.28558137130747163, + "grad_norm": 0.035229261964559555, + "learning_rate": 9.569845923848142e-05, + "loss": 0.1221, + "step": 11050 + }, + { + "epoch": 0.2858398159873879, + "grad_norm": 0.05891973152756691, + "learning_rate": 9.568238733333292e-05, + "loss": 0.1223, + "step": 11060 + }, + { + "epoch": 0.28609826066730415, + "grad_norm": 0.036541495472192764, + "learning_rate": 9.566628743272992e-05, + "loss": 0.1218, + "step": 11070 + }, + { + "epoch": 0.2863567053472204, + "grad_norm": 0.03188103437423706, + "learning_rate": 9.565015955136236e-05, + "loss": 0.1221, + "step": 11080 + }, + { + "epoch": 0.28661515002713667, + "grad_norm": 0.03983810171484947, + "learning_rate": 9.563400370394581e-05, + "loss": 0.1216, + "step": 11090 + }, + { + "epoch": 0.286873594707053, + "grad_norm": 0.04924715310335159, + "learning_rate": 9.561781990522132e-05, + "loss": 0.1216, + "step": 11100 + }, + { + "epoch": 0.28713203938696924, + "grad_norm": 0.057169798761606216, + "learning_rate": 9.560160816995538e-05, + "loss": 0.1223, + "step": 11110 + }, + { + "epoch": 0.2873904840668855, + "grad_norm": 0.03732907772064209, + "learning_rate": 9.558536851294007e-05, + "loss": 0.1218, + "step": 11120 + }, + { + "epoch": 0.28764892874680176, + "grad_norm": 0.0312440637499094, + "learning_rate": 9.556910094899288e-05, + "loss": 0.1212, + "step": 11130 + }, + { + "epoch": 0.287907373426718, + "grad_norm": 0.03985103219747543, + "learning_rate": 9.555280549295678e-05, + "loss": 0.1216, + "step": 11140 + }, + { + "epoch": 0.2881658181066343, + "grad_norm": 0.02485160529613495, + "learning_rate": 9.553648215970019e-05, + "loss": 0.1217, + "step": 11150 + }, + { + "epoch": 0.28842426278655053, + "grad_norm": 0.03186703100800514, + "learning_rate": 9.552013096411699e-05, + "loss": 0.1225, + "step": 11160 + }, + { + "epoch": 0.2886827074664668, + "grad_norm": 0.03438564017415047, + "learning_rate": 9.550375192112642e-05, + "loss": 0.1222, + "step": 11170 + }, + { + "epoch": 0.28894115214638305, + "grad_norm": 0.055788807570934296, + "learning_rate": 9.548734504567323e-05, + "loss": 0.1219, + "step": 11180 + }, + { + "epoch": 0.2891995968262993, + "grad_norm": 0.038891859352588654, + "learning_rate": 9.547091035272744e-05, + "loss": 0.1226, + "step": 11190 + }, + { + "epoch": 0.28945804150621557, + "grad_norm": 0.032545894384384155, + "learning_rate": 9.545444785728457e-05, + "loss": 0.1225, + "step": 11200 + }, + { + "epoch": 0.2897164861861319, + "grad_norm": 0.0399077907204628, + "learning_rate": 9.543795757436542e-05, + "loss": 0.122, + "step": 11210 + }, + { + "epoch": 0.28997493086604814, + "grad_norm": 0.03598653897643089, + "learning_rate": 9.542143951901621e-05, + "loss": 0.1226, + "step": 11220 + }, + { + "epoch": 0.2902333755459644, + "grad_norm": 0.029344823211431503, + "learning_rate": 9.540489370630843e-05, + "loss": 0.1221, + "step": 11230 + }, + { + "epoch": 0.29049182022588066, + "grad_norm": 0.03245185315608978, + "learning_rate": 9.538832015133897e-05, + "loss": 0.1219, + "step": 11240 + }, + { + "epoch": 0.2907502649057969, + "grad_norm": 0.04201042279601097, + "learning_rate": 9.537171886922994e-05, + "loss": 0.1222, + "step": 11250 + }, + { + "epoch": 0.2910087095857132, + "grad_norm": 0.02830476500093937, + "learning_rate": 9.535508987512886e-05, + "loss": 0.1218, + "step": 11260 + }, + { + "epoch": 0.29126715426562944, + "grad_norm": 0.04598303884267807, + "learning_rate": 9.53384331842085e-05, + "loss": 0.1218, + "step": 11270 + }, + { + "epoch": 0.2915255989455457, + "grad_norm": 0.06198582798242569, + "learning_rate": 9.532174881166683e-05, + "loss": 0.1207, + "step": 11280 + }, + { + "epoch": 0.29178404362546195, + "grad_norm": 0.04971132054924965, + "learning_rate": 9.530503677272713e-05, + "loss": 0.122, + "step": 11290 + }, + { + "epoch": 0.2920424883053782, + "grad_norm": 0.033479951322078705, + "learning_rate": 9.528829708263798e-05, + "loss": 0.1215, + "step": 11300 + }, + { + "epoch": 0.29230093298529447, + "grad_norm": 0.03751823306083679, + "learning_rate": 9.527152975667309e-05, + "loss": 0.1215, + "step": 11310 + }, + { + "epoch": 0.2925593776652108, + "grad_norm": 0.028247853741049767, + "learning_rate": 9.525473481013144e-05, + "loss": 0.1215, + "step": 11320 + }, + { + "epoch": 0.29281782234512704, + "grad_norm": 0.040623102337121964, + "learning_rate": 9.52379122583372e-05, + "loss": 0.1215, + "step": 11330 + }, + { + "epoch": 0.2930762670250433, + "grad_norm": 0.030654165893793106, + "learning_rate": 9.522106211663973e-05, + "loss": 0.1221, + "step": 11340 + }, + { + "epoch": 0.29333471170495956, + "grad_norm": 0.04300300404429436, + "learning_rate": 9.520418440041356e-05, + "loss": 0.1209, + "step": 11350 + }, + { + "epoch": 0.2935931563848758, + "grad_norm": 0.05364068225026131, + "learning_rate": 9.51872791250584e-05, + "loss": 0.1217, + "step": 11360 + }, + { + "epoch": 0.2938516010647921, + "grad_norm": 0.03670886904001236, + "learning_rate": 9.517034630599905e-05, + "loss": 0.1215, + "step": 11370 + }, + { + "epoch": 0.29411004574470834, + "grad_norm": 0.054548002779483795, + "learning_rate": 9.515338595868553e-05, + "loss": 0.1216, + "step": 11380 + }, + { + "epoch": 0.2943684904246246, + "grad_norm": 0.06050057336688042, + "learning_rate": 9.513639809859289e-05, + "loss": 0.1226, + "step": 11390 + }, + { + "epoch": 0.29462693510454085, + "grad_norm": 0.04351698234677315, + "learning_rate": 9.511938274122132e-05, + "loss": 0.122, + "step": 11400 + }, + { + "epoch": 0.2948853797844571, + "grad_norm": 0.05338112264871597, + "learning_rate": 9.510233990209613e-05, + "loss": 0.1229, + "step": 11410 + }, + { + "epoch": 0.2951438244643734, + "grad_norm": 0.09659484773874283, + "learning_rate": 9.508526959676767e-05, + "loss": 0.1217, + "step": 11420 + }, + { + "epoch": 0.2954022691442897, + "grad_norm": 0.03333357721567154, + "learning_rate": 9.506817184081132e-05, + "loss": 0.1221, + "step": 11430 + }, + { + "epoch": 0.29566071382420595, + "grad_norm": 0.025346217676997185, + "learning_rate": 9.505104664982758e-05, + "loss": 0.1215, + "step": 11440 + }, + { + "epoch": 0.2959191585041222, + "grad_norm": 0.033819057047367096, + "learning_rate": 9.503389403944194e-05, + "loss": 0.1218, + "step": 11450 + }, + { + "epoch": 0.29617760318403846, + "grad_norm": 0.027412768453359604, + "learning_rate": 9.501671402530489e-05, + "loss": 0.1218, + "step": 11460 + }, + { + "epoch": 0.2964360478639547, + "grad_norm": 0.042046885937452316, + "learning_rate": 9.499950662309196e-05, + "loss": 0.1228, + "step": 11470 + }, + { + "epoch": 0.296694492543871, + "grad_norm": 0.03448958322405815, + "learning_rate": 9.49822718485037e-05, + "loss": 0.1222, + "step": 11480 + }, + { + "epoch": 0.29695293722378724, + "grad_norm": 0.028676558285951614, + "learning_rate": 9.496500971726552e-05, + "loss": 0.1219, + "step": 11490 + }, + { + "epoch": 0.2972113819037035, + "grad_norm": 0.03608618304133415, + "learning_rate": 9.494772024512793e-05, + "loss": 0.1221, + "step": 11500 + }, + { + "epoch": 0.29746982658361976, + "grad_norm": 0.03581104427576065, + "learning_rate": 9.493040344786626e-05, + "loss": 0.1223, + "step": 11510 + }, + { + "epoch": 0.297728271263536, + "grad_norm": 0.02875709906220436, + "learning_rate": 9.491305934128085e-05, + "loss": 0.1224, + "step": 11520 + }, + { + "epoch": 0.29798671594345233, + "grad_norm": 0.03437475115060806, + "learning_rate": 9.489568794119699e-05, + "loss": 0.1218, + "step": 11530 + }, + { + "epoch": 0.2982451606233686, + "grad_norm": 0.032591212540864944, + "learning_rate": 9.487828926346475e-05, + "loss": 0.1218, + "step": 11540 + }, + { + "epoch": 0.29850360530328485, + "grad_norm": 0.05015110224485397, + "learning_rate": 9.486086332395922e-05, + "loss": 0.1211, + "step": 11550 + }, + { + "epoch": 0.2987620499832011, + "grad_norm": 0.06170334666967392, + "learning_rate": 9.484341013858027e-05, + "loss": 0.1215, + "step": 11560 + }, + { + "epoch": 0.29902049466311736, + "grad_norm": 0.038552962243556976, + "learning_rate": 9.48259297232527e-05, + "loss": 0.1217, + "step": 11570 + }, + { + "epoch": 0.2992789393430336, + "grad_norm": 0.0400848314166069, + "learning_rate": 9.480842209392609e-05, + "loss": 0.1216, + "step": 11580 + }, + { + "epoch": 0.2995373840229499, + "grad_norm": 0.051416147500276566, + "learning_rate": 9.479088726657487e-05, + "loss": 0.1216, + "step": 11590 + }, + { + "epoch": 0.29979582870286614, + "grad_norm": 0.034614019095897675, + "learning_rate": 9.477332525719838e-05, + "loss": 0.1213, + "step": 11600 + }, + { + "epoch": 0.3000542733827824, + "grad_norm": 0.05758015811443329, + "learning_rate": 9.475573608182057e-05, + "loss": 0.1226, + "step": 11610 + }, + { + "epoch": 0.30031271806269866, + "grad_norm": 0.04334770515561104, + "learning_rate": 9.473811975649037e-05, + "loss": 0.1216, + "step": 11620 + }, + { + "epoch": 0.3005711627426149, + "grad_norm": 0.04165912792086601, + "learning_rate": 9.472047629728137e-05, + "loss": 0.1218, + "step": 11630 + }, + { + "epoch": 0.30082960742253123, + "grad_norm": 0.06714773923158646, + "learning_rate": 9.470280572029195e-05, + "loss": 0.1217, + "step": 11640 + }, + { + "epoch": 0.3010880521024475, + "grad_norm": 0.0517171174287796, + "learning_rate": 9.468510804164523e-05, + "loss": 0.1221, + "step": 11650 + }, + { + "epoch": 0.30134649678236375, + "grad_norm": 0.030924350023269653, + "learning_rate": 9.466738327748906e-05, + "loss": 0.1221, + "step": 11660 + }, + { + "epoch": 0.30160494146228, + "grad_norm": 0.04076073691248894, + "learning_rate": 9.464963144399599e-05, + "loss": 0.1219, + "step": 11670 + }, + { + "epoch": 0.30186338614219627, + "grad_norm": 0.04362453147768974, + "learning_rate": 9.463185255736328e-05, + "loss": 0.1218, + "step": 11680 + }, + { + "epoch": 0.3021218308221125, + "grad_norm": 0.032455187290906906, + "learning_rate": 9.461404663381295e-05, + "loss": 0.1217, + "step": 11690 + }, + { + "epoch": 0.3023802755020288, + "grad_norm": 0.047971613705158234, + "learning_rate": 9.459621368959151e-05, + "loss": 0.1227, + "step": 11700 + }, + { + "epoch": 0.30263872018194504, + "grad_norm": 0.026270419359207153, + "learning_rate": 9.457835374097029e-05, + "loss": 0.1216, + "step": 11710 + }, + { + "epoch": 0.3028971648618613, + "grad_norm": 0.03116699494421482, + "learning_rate": 9.456046680424515e-05, + "loss": 0.1228, + "step": 11720 + }, + { + "epoch": 0.30315560954177756, + "grad_norm": 0.049423083662986755, + "learning_rate": 9.454255289573666e-05, + "loss": 0.1214, + "step": 11730 + }, + { + "epoch": 0.3034140542216939, + "grad_norm": 0.03586959466338158, + "learning_rate": 9.452461203179e-05, + "loss": 0.1219, + "step": 11740 + }, + { + "epoch": 0.30367249890161013, + "grad_norm": 0.03690750524401665, + "learning_rate": 9.450664422877482e-05, + "loss": 0.1224, + "step": 11750 + }, + { + "epoch": 0.3039309435815264, + "grad_norm": 0.04143058508634567, + "learning_rate": 9.448864950308549e-05, + "loss": 0.1225, + "step": 11760 + }, + { + "epoch": 0.30418938826144265, + "grad_norm": 0.03551993519067764, + "learning_rate": 9.447062787114091e-05, + "loss": 0.1212, + "step": 11770 + }, + { + "epoch": 0.3044478329413589, + "grad_norm": 0.027150055393576622, + "learning_rate": 9.445257934938448e-05, + "loss": 0.1219, + "step": 11780 + }, + { + "epoch": 0.30470627762127517, + "grad_norm": 0.03489183634519577, + "learning_rate": 9.443450395428417e-05, + "loss": 0.1221, + "step": 11790 + }, + { + "epoch": 0.3049647223011914, + "grad_norm": 0.03224897012114525, + "learning_rate": 9.441640170233247e-05, + "loss": 0.1227, + "step": 11800 + }, + { + "epoch": 0.3052231669811077, + "grad_norm": 0.03808844834566116, + "learning_rate": 9.43982726100464e-05, + "loss": 0.122, + "step": 11810 + }, + { + "epoch": 0.30548161166102394, + "grad_norm": 0.03806736320257187, + "learning_rate": 9.43801166939674e-05, + "loss": 0.1223, + "step": 11820 + }, + { + "epoch": 0.3057400563409402, + "grad_norm": 0.0605003647506237, + "learning_rate": 9.436193397066144e-05, + "loss": 0.1212, + "step": 11830 + }, + { + "epoch": 0.30599850102085646, + "grad_norm": 0.02827383391559124, + "learning_rate": 9.434372445671897e-05, + "loss": 0.1215, + "step": 11840 + }, + { + "epoch": 0.3062569457007728, + "grad_norm": 0.03829476237297058, + "learning_rate": 9.432548816875481e-05, + "loss": 0.1224, + "step": 11850 + }, + { + "epoch": 0.30651539038068903, + "grad_norm": 0.048770755529403687, + "learning_rate": 9.430722512340828e-05, + "loss": 0.1221, + "step": 11860 + }, + { + "epoch": 0.3067738350606053, + "grad_norm": 0.044404536485672, + "learning_rate": 9.428893533734306e-05, + "loss": 0.1218, + "step": 11870 + }, + { + "epoch": 0.30703227974052155, + "grad_norm": 0.025631699711084366, + "learning_rate": 9.427061882724727e-05, + "loss": 0.1216, + "step": 11880 + }, + { + "epoch": 0.3072907244204378, + "grad_norm": 0.04114828258752823, + "learning_rate": 9.425227560983341e-05, + "loss": 0.1212, + "step": 11890 + }, + { + "epoch": 0.30754916910035407, + "grad_norm": 0.052144210785627365, + "learning_rate": 9.423390570183829e-05, + "loss": 0.1219, + "step": 11900 + }, + { + "epoch": 0.30780761378027033, + "grad_norm": 0.02854691445827484, + "learning_rate": 9.421550912002319e-05, + "loss": 0.1211, + "step": 11910 + }, + { + "epoch": 0.3080660584601866, + "grad_norm": 0.029760172590613365, + "learning_rate": 9.41970858811736e-05, + "loss": 0.1223, + "step": 11920 + }, + { + "epoch": 0.30832450314010285, + "grad_norm": 0.032578274607658386, + "learning_rate": 9.417863600209942e-05, + "loss": 0.1214, + "step": 11930 + }, + { + "epoch": 0.3085829478200191, + "grad_norm": 0.02445974200963974, + "learning_rate": 9.416015949963479e-05, + "loss": 0.1215, + "step": 11940 + }, + { + "epoch": 0.30884139249993536, + "grad_norm": 0.04304877668619156, + "learning_rate": 9.41416563906382e-05, + "loss": 0.1216, + "step": 11950 + }, + { + "epoch": 0.3090998371798517, + "grad_norm": 0.028510145843029022, + "learning_rate": 9.412312669199243e-05, + "loss": 0.1217, + "step": 11960 + }, + { + "epoch": 0.30935828185976794, + "grad_norm": 0.03230534493923187, + "learning_rate": 9.410457042060446e-05, + "loss": 0.1221, + "step": 11970 + }, + { + "epoch": 0.3096167265396842, + "grad_norm": 0.08605611324310303, + "learning_rate": 9.408598759340552e-05, + "loss": 0.1218, + "step": 11980 + }, + { + "epoch": 0.30987517121960045, + "grad_norm": 0.03076208010315895, + "learning_rate": 9.40673782273511e-05, + "loss": 0.1218, + "step": 11990 + }, + { + "epoch": 0.3101336158995167, + "grad_norm": 0.052168216556310654, + "learning_rate": 9.404874233942094e-05, + "loss": 0.122, + "step": 12000 + }, + { + "epoch": 0.31039206057943297, + "grad_norm": 0.0480206161737442, + "learning_rate": 9.40300799466189e-05, + "loss": 0.1214, + "step": 12010 + }, + { + "epoch": 0.31065050525934923, + "grad_norm": 0.03104020655155182, + "learning_rate": 9.401139106597305e-05, + "loss": 0.1217, + "step": 12020 + }, + { + "epoch": 0.3109089499392655, + "grad_norm": 0.029468342661857605, + "learning_rate": 9.39926757145357e-05, + "loss": 0.1226, + "step": 12030 + }, + { + "epoch": 0.31116739461918175, + "grad_norm": 0.039604831486940384, + "learning_rate": 9.397393390938318e-05, + "loss": 0.1218, + "step": 12040 + }, + { + "epoch": 0.311425839299098, + "grad_norm": 0.036681536585092545, + "learning_rate": 9.395516566761606e-05, + "loss": 0.1221, + "step": 12050 + }, + { + "epoch": 0.31168428397901426, + "grad_norm": 0.03916449844837189, + "learning_rate": 9.393637100635899e-05, + "loss": 0.1215, + "step": 12060 + }, + { + "epoch": 0.3119427286589306, + "grad_norm": 0.0390469916164875, + "learning_rate": 9.391754994276077e-05, + "loss": 0.1208, + "step": 12070 + }, + { + "epoch": 0.31220117333884684, + "grad_norm": 0.03181254863739014, + "learning_rate": 9.38987024939942e-05, + "loss": 0.1215, + "step": 12080 + }, + { + "epoch": 0.3124596180187631, + "grad_norm": 0.04172994941473007, + "learning_rate": 9.387982867725626e-05, + "loss": 0.1219, + "step": 12090 + }, + { + "epoch": 0.31271806269867936, + "grad_norm": 0.03231843560934067, + "learning_rate": 9.38609285097679e-05, + "loss": 0.122, + "step": 12100 + }, + { + "epoch": 0.3129765073785956, + "grad_norm": 0.0416342094540596, + "learning_rate": 9.38420020087742e-05, + "loss": 0.1217, + "step": 12110 + }, + { + "epoch": 0.3132349520585119, + "grad_norm": 0.03614883869886398, + "learning_rate": 9.382304919154419e-05, + "loss": 0.1214, + "step": 12120 + }, + { + "epoch": 0.31349339673842813, + "grad_norm": 0.03711793199181557, + "learning_rate": 9.380407007537096e-05, + "loss": 0.122, + "step": 12130 + }, + { + "epoch": 0.3137518414183444, + "grad_norm": 0.05027097836136818, + "learning_rate": 9.378506467757154e-05, + "loss": 0.1214, + "step": 12140 + }, + { + "epoch": 0.31401028609826065, + "grad_norm": 0.024195624515414238, + "learning_rate": 9.376603301548699e-05, + "loss": 0.1212, + "step": 12150 + }, + { + "epoch": 0.3142687307781769, + "grad_norm": 0.045561086386442184, + "learning_rate": 9.374697510648236e-05, + "loss": 0.1223, + "step": 12160 + }, + { + "epoch": 0.3145271754580932, + "grad_norm": 0.029482401907444, + "learning_rate": 9.372789096794659e-05, + "loss": 0.1216, + "step": 12170 + }, + { + "epoch": 0.3147856201380095, + "grad_norm": 0.025346694514155388, + "learning_rate": 9.370878061729256e-05, + "loss": 0.1218, + "step": 12180 + }, + { + "epoch": 0.31504406481792574, + "grad_norm": 0.025767819955945015, + "learning_rate": 9.368964407195708e-05, + "loss": 0.1218, + "step": 12190 + }, + { + "epoch": 0.315302509497842, + "grad_norm": 0.03474888578057289, + "learning_rate": 9.367048134940087e-05, + "loss": 0.1217, + "step": 12200 + }, + { + "epoch": 0.31556095417775826, + "grad_norm": 0.024724144488573074, + "learning_rate": 9.365129246710853e-05, + "loss": 0.1219, + "step": 12210 + }, + { + "epoch": 0.3158193988576745, + "grad_norm": 0.05747416242957115, + "learning_rate": 9.36320774425885e-05, + "loss": 0.122, + "step": 12220 + }, + { + "epoch": 0.3160778435375908, + "grad_norm": 0.03269296884536743, + "learning_rate": 9.361283629337315e-05, + "loss": 0.1221, + "step": 12230 + }, + { + "epoch": 0.31633628821750703, + "grad_norm": 0.03313225507736206, + "learning_rate": 9.359356903701857e-05, + "loss": 0.1217, + "step": 12240 + }, + { + "epoch": 0.3165947328974233, + "grad_norm": 0.02902820333838463, + "learning_rate": 9.35742756911048e-05, + "loss": 0.122, + "step": 12250 + }, + { + "epoch": 0.31685317757733955, + "grad_norm": 0.042222216725349426, + "learning_rate": 9.355495627323554e-05, + "loss": 0.122, + "step": 12260 + }, + { + "epoch": 0.3171116222572558, + "grad_norm": 0.04144883528351784, + "learning_rate": 9.353561080103843e-05, + "loss": 0.1217, + "step": 12270 + }, + { + "epoch": 0.3173700669371721, + "grad_norm": 0.026003165170550346, + "learning_rate": 9.351623929216478e-05, + "loss": 0.1213, + "step": 12280 + }, + { + "epoch": 0.3176285116170884, + "grad_norm": 0.03859533742070198, + "learning_rate": 9.349684176428967e-05, + "loss": 0.1224, + "step": 12290 + }, + { + "epoch": 0.31788695629700464, + "grad_norm": 0.04796777293086052, + "learning_rate": 9.347741823511198e-05, + "loss": 0.1224, + "step": 12300 + }, + { + "epoch": 0.3181454009769209, + "grad_norm": 0.04421311989426613, + "learning_rate": 9.345796872235426e-05, + "loss": 0.1221, + "step": 12310 + }, + { + "epoch": 0.31840384565683716, + "grad_norm": 0.0835820734500885, + "learning_rate": 9.343849324376276e-05, + "loss": 0.1209, + "step": 12320 + }, + { + "epoch": 0.3186622903367534, + "grad_norm": 0.03914449363946915, + "learning_rate": 9.341899181710747e-05, + "loss": 0.1213, + "step": 12330 + }, + { + "epoch": 0.3189207350166697, + "grad_norm": 0.03202883526682854, + "learning_rate": 9.339946446018199e-05, + "loss": 0.1218, + "step": 12340 + }, + { + "epoch": 0.31917917969658594, + "grad_norm": 0.03898052126169205, + "learning_rate": 9.337991119080365e-05, + "loss": 0.1214, + "step": 12350 + }, + { + "epoch": 0.3194376243765022, + "grad_norm": 0.02943085879087448, + "learning_rate": 9.33603320268134e-05, + "loss": 0.1215, + "step": 12360 + }, + { + "epoch": 0.31969606905641845, + "grad_norm": 0.0431303009390831, + "learning_rate": 9.334072698607576e-05, + "loss": 0.1225, + "step": 12370 + }, + { + "epoch": 0.3199545137363347, + "grad_norm": 0.04247934743762016, + "learning_rate": 9.332109608647897e-05, + "loss": 0.1217, + "step": 12380 + }, + { + "epoch": 0.320212958416251, + "grad_norm": 0.03772643953561783, + "learning_rate": 9.330143934593476e-05, + "loss": 0.1217, + "step": 12390 + }, + { + "epoch": 0.3204714030961673, + "grad_norm": 0.03108297474682331, + "learning_rate": 9.32817567823785e-05, + "loss": 0.1216, + "step": 12400 + }, + { + "epoch": 0.32072984777608354, + "grad_norm": 0.03130755200982094, + "learning_rate": 9.326204841376909e-05, + "loss": 0.1214, + "step": 12410 + }, + { + "epoch": 0.3209882924559998, + "grad_norm": 0.029347671195864677, + "learning_rate": 9.324231425808898e-05, + "loss": 0.1218, + "step": 12420 + }, + { + "epoch": 0.32124673713591606, + "grad_norm": 0.04438617452979088, + "learning_rate": 9.32225543333442e-05, + "loss": 0.1218, + "step": 12430 + }, + { + "epoch": 0.3215051818158323, + "grad_norm": 0.028577731922268867, + "learning_rate": 9.320276865756419e-05, + "loss": 0.1214, + "step": 12440 + }, + { + "epoch": 0.3217636264957486, + "grad_norm": 0.0271168015897274, + "learning_rate": 9.318295724880197e-05, + "loss": 0.1216, + "step": 12450 + }, + { + "epoch": 0.32202207117566484, + "grad_norm": 0.03334367275238037, + "learning_rate": 9.316312012513404e-05, + "loss": 0.1212, + "step": 12460 + }, + { + "epoch": 0.3222805158555811, + "grad_norm": 0.03410699963569641, + "learning_rate": 9.314325730466031e-05, + "loss": 0.1225, + "step": 12470 + }, + { + "epoch": 0.32253896053549735, + "grad_norm": 0.055193282663822174, + "learning_rate": 9.312336880550418e-05, + "loss": 0.1215, + "step": 12480 + }, + { + "epoch": 0.3227974052154136, + "grad_norm": 0.03262905031442642, + "learning_rate": 9.310345464581245e-05, + "loss": 0.1214, + "step": 12490 + }, + { + "epoch": 0.3230558498953299, + "grad_norm": 0.044232212007045746, + "learning_rate": 9.308351484375535e-05, + "loss": 0.1212, + "step": 12500 + }, + { + "epoch": 0.3233142945752462, + "grad_norm": 0.028944650664925575, + "learning_rate": 9.30635494175265e-05, + "loss": 0.1217, + "step": 12510 + }, + { + "epoch": 0.32357273925516244, + "grad_norm": 0.03899698704481125, + "learning_rate": 9.304355838534291e-05, + "loss": 0.1216, + "step": 12520 + }, + { + "epoch": 0.3238311839350787, + "grad_norm": 0.036555949598550797, + "learning_rate": 9.302354176544497e-05, + "loss": 0.1215, + "step": 12530 + }, + { + "epoch": 0.32408962861499496, + "grad_norm": 0.03711419180035591, + "learning_rate": 9.300349957609636e-05, + "loss": 0.1215, + "step": 12540 + }, + { + "epoch": 0.3243480732949112, + "grad_norm": 0.0644671767950058, + "learning_rate": 9.298343183558414e-05, + "loss": 0.1212, + "step": 12550 + }, + { + "epoch": 0.3246065179748275, + "grad_norm": 0.04341734200716019, + "learning_rate": 9.296333856221864e-05, + "loss": 0.1228, + "step": 12560 + }, + { + "epoch": 0.32486496265474374, + "grad_norm": 0.03166285157203674, + "learning_rate": 9.294321977433357e-05, + "loss": 0.1223, + "step": 12570 + }, + { + "epoch": 0.32512340733466, + "grad_norm": 0.025116942822933197, + "learning_rate": 9.292307549028582e-05, + "loss": 0.1218, + "step": 12580 + }, + { + "epoch": 0.32538185201457626, + "grad_norm": 0.05537975952029228, + "learning_rate": 9.290290572845564e-05, + "loss": 0.1216, + "step": 12590 + }, + { + "epoch": 0.32564029669449257, + "grad_norm": 0.04656730219721794, + "learning_rate": 9.288271050724642e-05, + "loss": 0.1217, + "step": 12600 + }, + { + "epoch": 0.32589874137440883, + "grad_norm": 0.04235406592488289, + "learning_rate": 9.286248984508486e-05, + "loss": 0.1222, + "step": 12610 + }, + { + "epoch": 0.3261571860543251, + "grad_norm": 0.027028178796172142, + "learning_rate": 9.284224376042083e-05, + "loss": 0.1216, + "step": 12620 + }, + { + "epoch": 0.32641563073424135, + "grad_norm": 0.04051770269870758, + "learning_rate": 9.282197227172744e-05, + "loss": 0.1213, + "step": 12630 + }, + { + "epoch": 0.3266740754141576, + "grad_norm": 0.036147598177194595, + "learning_rate": 9.280167539750096e-05, + "loss": 0.1217, + "step": 12640 + }, + { + "epoch": 0.32693252009407386, + "grad_norm": 0.024418434128165245, + "learning_rate": 9.278135315626079e-05, + "loss": 0.1208, + "step": 12650 + }, + { + "epoch": 0.3271909647739901, + "grad_norm": 0.03569045290350914, + "learning_rate": 9.276100556654954e-05, + "loss": 0.1215, + "step": 12660 + }, + { + "epoch": 0.3274494094539064, + "grad_norm": 0.03175372630357742, + "learning_rate": 9.274063264693287e-05, + "loss": 0.1218, + "step": 12670 + }, + { + "epoch": 0.32770785413382264, + "grad_norm": 0.029030537232756615, + "learning_rate": 9.27202344159996e-05, + "loss": 0.1216, + "step": 12680 + }, + { + "epoch": 0.3279662988137389, + "grad_norm": 0.02810235135257244, + "learning_rate": 9.269981089236164e-05, + "loss": 0.1219, + "step": 12690 + }, + { + "epoch": 0.32822474349365516, + "grad_norm": 0.07774066179990768, + "learning_rate": 9.2679362094654e-05, + "loss": 0.1212, + "step": 12700 + }, + { + "epoch": 0.32848318817357147, + "grad_norm": 0.04416198283433914, + "learning_rate": 9.265888804153468e-05, + "loss": 0.1219, + "step": 12710 + }, + { + "epoch": 0.32874163285348773, + "grad_norm": 0.06565920263528824, + "learning_rate": 9.263838875168482e-05, + "loss": 0.1221, + "step": 12720 + }, + { + "epoch": 0.329000077533404, + "grad_norm": 0.028355496004223824, + "learning_rate": 9.261786424380849e-05, + "loss": 0.1213, + "step": 12730 + }, + { + "epoch": 0.32925852221332025, + "grad_norm": 0.029273847118020058, + "learning_rate": 9.259731453663285e-05, + "loss": 0.1223, + "step": 12740 + }, + { + "epoch": 0.3295169668932365, + "grad_norm": 0.03665174916386604, + "learning_rate": 9.2576739648908e-05, + "loss": 0.1224, + "step": 12750 + }, + { + "epoch": 0.32977541157315277, + "grad_norm": 0.04154312238097191, + "learning_rate": 9.255613959940705e-05, + "loss": 0.121, + "step": 12760 + }, + { + "epoch": 0.330033856253069, + "grad_norm": 0.03225771337747574, + "learning_rate": 9.253551440692603e-05, + "loss": 0.122, + "step": 12770 + }, + { + "epoch": 0.3302923009329853, + "grad_norm": 0.035893041640520096, + "learning_rate": 9.251486409028396e-05, + "loss": 0.1216, + "step": 12780 + }, + { + "epoch": 0.33055074561290154, + "grad_norm": 0.06265976279973984, + "learning_rate": 9.249418866832272e-05, + "loss": 0.1221, + "step": 12790 + }, + { + "epoch": 0.3308091902928178, + "grad_norm": 0.03532573953270912, + "learning_rate": 9.247348815990717e-05, + "loss": 0.1219, + "step": 12800 + }, + { + "epoch": 0.33106763497273406, + "grad_norm": 0.03387587144970894, + "learning_rate": 9.245276258392502e-05, + "loss": 0.1218, + "step": 12810 + }, + { + "epoch": 0.3313260796526504, + "grad_norm": 0.03035661391913891, + "learning_rate": 9.243201195928682e-05, + "loss": 0.1221, + "step": 12820 + }, + { + "epoch": 0.33158452433256663, + "grad_norm": 0.032076217234134674, + "learning_rate": 9.241123630492603e-05, + "loss": 0.1217, + "step": 12830 + }, + { + "epoch": 0.3318429690124829, + "grad_norm": 0.04841256141662598, + "learning_rate": 9.239043563979896e-05, + "loss": 0.1218, + "step": 12840 + }, + { + "epoch": 0.33210141369239915, + "grad_norm": 0.03420884162187576, + "learning_rate": 9.236960998288466e-05, + "loss": 0.1221, + "step": 12850 + }, + { + "epoch": 0.3323598583723154, + "grad_norm": 0.033336903899908066, + "learning_rate": 9.234875935318507e-05, + "loss": 0.1214, + "step": 12860 + }, + { + "epoch": 0.33261830305223167, + "grad_norm": 0.031326305121183395, + "learning_rate": 9.232788376972488e-05, + "loss": 0.1211, + "step": 12870 + }, + { + "epoch": 0.3328767477321479, + "grad_norm": 0.02930474281311035, + "learning_rate": 9.230698325155149e-05, + "loss": 0.1217, + "step": 12880 + }, + { + "epoch": 0.3331351924120642, + "grad_norm": 0.02888321503996849, + "learning_rate": 9.228605781773518e-05, + "loss": 0.1221, + "step": 12890 + }, + { + "epoch": 0.33339363709198044, + "grad_norm": 0.027098756283521652, + "learning_rate": 9.226510748736887e-05, + "loss": 0.121, + "step": 12900 + }, + { + "epoch": 0.3336520817718967, + "grad_norm": 0.029863417148590088, + "learning_rate": 9.224413227956818e-05, + "loss": 0.1215, + "step": 12910 + }, + { + "epoch": 0.333910526451813, + "grad_norm": 0.07136815041303635, + "learning_rate": 9.222313221347152e-05, + "loss": 0.1213, + "step": 12920 + }, + { + "epoch": 0.3341689711317293, + "grad_norm": 0.06506688892841339, + "learning_rate": 9.220210730823989e-05, + "loss": 0.1213, + "step": 12930 + }, + { + "epoch": 0.33442741581164553, + "grad_norm": 0.030564984306693077, + "learning_rate": 9.218105758305702e-05, + "loss": 0.1214, + "step": 12940 + }, + { + "epoch": 0.3346858604915618, + "grad_norm": 0.04437290132045746, + "learning_rate": 9.215998305712923e-05, + "loss": 0.1216, + "step": 12950 + }, + { + "epoch": 0.33494430517147805, + "grad_norm": 0.027453498914837837, + "learning_rate": 9.213888374968548e-05, + "loss": 0.1218, + "step": 12960 + }, + { + "epoch": 0.3352027498513943, + "grad_norm": 0.06030607223510742, + "learning_rate": 9.211775967997744e-05, + "loss": 0.1213, + "step": 12970 + }, + { + "epoch": 0.33546119453131057, + "grad_norm": 0.03820481151342392, + "learning_rate": 9.209661086727923e-05, + "loss": 0.1209, + "step": 12980 + }, + { + "epoch": 0.33571963921122683, + "grad_norm": 0.03127723187208176, + "learning_rate": 9.207543733088761e-05, + "loss": 0.1216, + "step": 12990 + }, + { + "epoch": 0.3359780838911431, + "grad_norm": 0.049876440316438675, + "learning_rate": 9.205423909012194e-05, + "loss": 0.1213, + "step": 13000 + }, + { + "epoch": 0.33623652857105935, + "grad_norm": 0.03116350620985031, + "learning_rate": 9.203301616432402e-05, + "loss": 0.1203, + "step": 13010 + }, + { + "epoch": 0.3364949732509756, + "grad_norm": 0.038726434111595154, + "learning_rate": 9.201176857285827e-05, + "loss": 0.1223, + "step": 13020 + }, + { + "epoch": 0.3367534179308919, + "grad_norm": 0.02992267534136772, + "learning_rate": 9.199049633511157e-05, + "loss": 0.1217, + "step": 13030 + }, + { + "epoch": 0.3370118626108082, + "grad_norm": 0.030919082462787628, + "learning_rate": 9.196919947049328e-05, + "loss": 0.1213, + "step": 13040 + }, + { + "epoch": 0.33727030729072444, + "grad_norm": 0.030193444341421127, + "learning_rate": 9.194787799843525e-05, + "loss": 0.1214, + "step": 13050 + }, + { + "epoch": 0.3375287519706407, + "grad_norm": 0.03270939365029335, + "learning_rate": 9.192653193839179e-05, + "loss": 0.1214, + "step": 13060 + }, + { + "epoch": 0.33778719665055695, + "grad_norm": 0.033571552485227585, + "learning_rate": 9.190516130983964e-05, + "loss": 0.122, + "step": 13070 + }, + { + "epoch": 0.3380456413304732, + "grad_norm": 0.03965329751372337, + "learning_rate": 9.188376613227791e-05, + "loss": 0.122, + "step": 13080 + }, + { + "epoch": 0.33830408601038947, + "grad_norm": 0.033574312925338745, + "learning_rate": 9.18623464252282e-05, + "loss": 0.1208, + "step": 13090 + }, + { + "epoch": 0.33856253069030573, + "grad_norm": 0.030309537425637245, + "learning_rate": 9.18409022082344e-05, + "loss": 0.1219, + "step": 13100 + }, + { + "epoch": 0.338820975370222, + "grad_norm": 0.0387999527156353, + "learning_rate": 9.181943350086282e-05, + "loss": 0.1211, + "step": 13110 + }, + { + "epoch": 0.33907942005013825, + "grad_norm": 0.036861252039670944, + "learning_rate": 9.17979403227021e-05, + "loss": 0.1205, + "step": 13120 + }, + { + "epoch": 0.3393378647300545, + "grad_norm": 0.02583259716629982, + "learning_rate": 9.177642269336322e-05, + "loss": 0.121, + "step": 13130 + }, + { + "epoch": 0.3395963094099708, + "grad_norm": 0.034050341695547104, + "learning_rate": 9.175488063247942e-05, + "loss": 0.1219, + "step": 13140 + }, + { + "epoch": 0.3398547540898871, + "grad_norm": 0.032020434737205505, + "learning_rate": 9.173331415970629e-05, + "loss": 0.1221, + "step": 13150 + }, + { + "epoch": 0.34011319876980334, + "grad_norm": 0.03812558948993683, + "learning_rate": 9.171172329472169e-05, + "loss": 0.1219, + "step": 13160 + }, + { + "epoch": 0.3403716434497196, + "grad_norm": 0.03834966570138931, + "learning_rate": 9.169010805722571e-05, + "loss": 0.1212, + "step": 13170 + }, + { + "epoch": 0.34063008812963586, + "grad_norm": 0.03149382025003433, + "learning_rate": 9.166846846694066e-05, + "loss": 0.1223, + "step": 13180 + }, + { + "epoch": 0.3408885328095521, + "grad_norm": 0.03641461208462715, + "learning_rate": 9.164680454361115e-05, + "loss": 0.1214, + "step": 13190 + }, + { + "epoch": 0.3411469774894684, + "grad_norm": 0.03819182887673378, + "learning_rate": 9.162511630700388e-05, + "loss": 0.1218, + "step": 13200 + }, + { + "epoch": 0.34140542216938463, + "grad_norm": 0.040327273309230804, + "learning_rate": 9.160340377690783e-05, + "loss": 0.122, + "step": 13210 + }, + { + "epoch": 0.3416638668493009, + "grad_norm": 0.03260789066553116, + "learning_rate": 9.15816669731341e-05, + "loss": 0.1221, + "step": 13220 + }, + { + "epoch": 0.34192231152921715, + "grad_norm": 0.0474945530295372, + "learning_rate": 9.155990591551595e-05, + "loss": 0.1211, + "step": 13230 + }, + { + "epoch": 0.3421807562091334, + "grad_norm": 0.03384767472743988, + "learning_rate": 9.153812062390879e-05, + "loss": 0.1203, + "step": 13240 + }, + { + "epoch": 0.3424392008890497, + "grad_norm": 0.05048695206642151, + "learning_rate": 9.151631111819009e-05, + "loss": 0.1217, + "step": 13250 + }, + { + "epoch": 0.342697645568966, + "grad_norm": 0.03766482695937157, + "learning_rate": 9.149447741825943e-05, + "loss": 0.122, + "step": 13260 + }, + { + "epoch": 0.34295609024888224, + "grad_norm": 0.04187513142824173, + "learning_rate": 9.147261954403852e-05, + "loss": 0.1218, + "step": 13270 + }, + { + "epoch": 0.3432145349287985, + "grad_norm": 0.03806815668940544, + "learning_rate": 9.145073751547106e-05, + "loss": 0.1218, + "step": 13280 + }, + { + "epoch": 0.34347297960871476, + "grad_norm": 0.03000558167695999, + "learning_rate": 9.142883135252283e-05, + "loss": 0.1213, + "step": 13290 + }, + { + "epoch": 0.343731424288631, + "grad_norm": 0.037972886115312576, + "learning_rate": 9.140690107518159e-05, + "loss": 0.1217, + "step": 13300 + }, + { + "epoch": 0.3439898689685473, + "grad_norm": 0.025591548532247543, + "learning_rate": 9.138494670345717e-05, + "loss": 0.121, + "step": 13310 + }, + { + "epoch": 0.34424831364846353, + "grad_norm": 0.022261573001742363, + "learning_rate": 9.136296825738129e-05, + "loss": 0.1216, + "step": 13320 + }, + { + "epoch": 0.3445067583283798, + "grad_norm": 0.039037156850099564, + "learning_rate": 9.134096575700772e-05, + "loss": 0.1214, + "step": 13330 + }, + { + "epoch": 0.34476520300829605, + "grad_norm": 0.03112884610891342, + "learning_rate": 9.131893922241215e-05, + "loss": 0.1216, + "step": 13340 + }, + { + "epoch": 0.34502364768821236, + "grad_norm": 0.048142462968826294, + "learning_rate": 9.129688867369221e-05, + "loss": 0.1217, + "step": 13350 + }, + { + "epoch": 0.3452820923681286, + "grad_norm": 0.031068528071045876, + "learning_rate": 9.127481413096739e-05, + "loss": 0.1216, + "step": 13360 + }, + { + "epoch": 0.3455405370480449, + "grad_norm": 0.032215237617492676, + "learning_rate": 9.125271561437911e-05, + "loss": 0.1212, + "step": 13370 + }, + { + "epoch": 0.34579898172796114, + "grad_norm": 0.04012627899646759, + "learning_rate": 9.123059314409072e-05, + "loss": 0.1216, + "step": 13380 + }, + { + "epoch": 0.3460574264078774, + "grad_norm": 0.04136539250612259, + "learning_rate": 9.12084467402873e-05, + "loss": 0.1217, + "step": 13390 + }, + { + "epoch": 0.34631587108779366, + "grad_norm": 0.03219679370522499, + "learning_rate": 9.11862764231759e-05, + "loss": 0.122, + "step": 13400 + }, + { + "epoch": 0.3465743157677099, + "grad_norm": 0.12267234176397324, + "learning_rate": 9.116408221298529e-05, + "loss": 0.1225, + "step": 13410 + }, + { + "epoch": 0.3468327604476262, + "grad_norm": 0.445069283246994, + "learning_rate": 9.114186412996608e-05, + "loss": 0.2143, + "step": 13420 + }, + { + "epoch": 0.34709120512754243, + "grad_norm": 0.06402876228094101, + "learning_rate": 9.111962219439067e-05, + "loss": 0.1463, + "step": 13430 + }, + { + "epoch": 0.3473496498074587, + "grad_norm": 0.06800436228513718, + "learning_rate": 9.109735642655321e-05, + "loss": 0.1278, + "step": 13440 + }, + { + "epoch": 0.34760809448737495, + "grad_norm": 0.04666073992848396, + "learning_rate": 9.10750668467696e-05, + "loss": 0.1256, + "step": 13450 + }, + { + "epoch": 0.34786653916729127, + "grad_norm": 0.055893171578645706, + "learning_rate": 9.105275347537748e-05, + "loss": 0.1244, + "step": 13460 + }, + { + "epoch": 0.3481249838472075, + "grad_norm": 0.03187018632888794, + "learning_rate": 9.103041633273616e-05, + "loss": 0.1241, + "step": 13470 + }, + { + "epoch": 0.3483834285271238, + "grad_norm": 0.023847341537475586, + "learning_rate": 9.100805543922668e-05, + "loss": 0.1231, + "step": 13480 + }, + { + "epoch": 0.34864187320704004, + "grad_norm": 0.03425797075033188, + "learning_rate": 9.09856708152517e-05, + "loss": 0.1223, + "step": 13490 + }, + { + "epoch": 0.3489003178869563, + "grad_norm": 0.028313126415014267, + "learning_rate": 9.096326248123563e-05, + "loss": 0.1216, + "step": 13500 + }, + { + "epoch": 0.34915876256687256, + "grad_norm": 0.05023081973195076, + "learning_rate": 9.09408304576244e-05, + "loss": 0.1222, + "step": 13510 + }, + { + "epoch": 0.3494172072467888, + "grad_norm": 0.039673615247011185, + "learning_rate": 9.091837476488563e-05, + "loss": 0.1218, + "step": 13520 + }, + { + "epoch": 0.3496756519267051, + "grad_norm": 0.04772160202264786, + "learning_rate": 9.089589542350851e-05, + "loss": 0.1218, + "step": 13530 + }, + { + "epoch": 0.34993409660662134, + "grad_norm": 0.03976023197174072, + "learning_rate": 9.087339245400379e-05, + "loss": 0.1225, + "step": 13540 + }, + { + "epoch": 0.3501925412865376, + "grad_norm": 0.02901560440659523, + "learning_rate": 9.085086587690381e-05, + "loss": 0.1216, + "step": 13550 + }, + { + "epoch": 0.35045098596645385, + "grad_norm": 0.02778632380068302, + "learning_rate": 9.082831571276247e-05, + "loss": 0.1215, + "step": 13560 + }, + { + "epoch": 0.35070943064637017, + "grad_norm": 0.03755004703998566, + "learning_rate": 9.08057419821551e-05, + "loss": 0.1212, + "step": 13570 + }, + { + "epoch": 0.3509678753262864, + "grad_norm": 0.027834445238113403, + "learning_rate": 9.078314470567863e-05, + "loss": 0.1217, + "step": 13580 + }, + { + "epoch": 0.3512263200062027, + "grad_norm": 0.022795420140028, + "learning_rate": 9.076052390395145e-05, + "loss": 0.1216, + "step": 13590 + }, + { + "epoch": 0.35148476468611894, + "grad_norm": 0.02732360176742077, + "learning_rate": 9.073787959761337e-05, + "loss": 0.1214, + "step": 13600 + }, + { + "epoch": 0.3517432093660352, + "grad_norm": 0.036977618932724, + "learning_rate": 9.07152118073257e-05, + "loss": 0.1215, + "step": 13610 + }, + { + "epoch": 0.35200165404595146, + "grad_norm": 0.03406471759080887, + "learning_rate": 9.069252055377113e-05, + "loss": 0.1208, + "step": 13620 + }, + { + "epoch": 0.3522600987258677, + "grad_norm": 0.02790599875152111, + "learning_rate": 9.066980585765381e-05, + "loss": 0.1215, + "step": 13630 + }, + { + "epoch": 0.352518543405784, + "grad_norm": 0.06513118743896484, + "learning_rate": 9.064706773969924e-05, + "loss": 0.1214, + "step": 13640 + }, + { + "epoch": 0.35277698808570024, + "grad_norm": 0.022249823436141014, + "learning_rate": 9.062430622065432e-05, + "loss": 0.1203, + "step": 13650 + }, + { + "epoch": 0.3530354327656165, + "grad_norm": 0.03870299085974693, + "learning_rate": 9.060152132128727e-05, + "loss": 0.1205, + "step": 13660 + }, + { + "epoch": 0.3532938774455328, + "grad_norm": 0.03262690082192421, + "learning_rate": 9.057871306238767e-05, + "loss": 0.1213, + "step": 13670 + }, + { + "epoch": 0.35355232212544907, + "grad_norm": 0.02956850454211235, + "learning_rate": 9.055588146476642e-05, + "loss": 0.1209, + "step": 13680 + }, + { + "epoch": 0.35381076680536533, + "grad_norm": 0.05366363003849983, + "learning_rate": 9.053302654925564e-05, + "loss": 0.1216, + "step": 13690 + }, + { + "epoch": 0.3540692114852816, + "grad_norm": 0.031151797622442245, + "learning_rate": 9.051014833670883e-05, + "loss": 0.1219, + "step": 13700 + }, + { + "epoch": 0.35432765616519785, + "grad_norm": 0.034888025373220444, + "learning_rate": 9.04872468480007e-05, + "loss": 0.1205, + "step": 13710 + }, + { + "epoch": 0.3545861008451141, + "grad_norm": 0.07956263422966003, + "learning_rate": 9.046432210402717e-05, + "loss": 0.121, + "step": 13720 + }, + { + "epoch": 0.35484454552503036, + "grad_norm": 0.045207954943180084, + "learning_rate": 9.044137412570544e-05, + "loss": 0.121, + "step": 13730 + }, + { + "epoch": 0.3551029902049466, + "grad_norm": 0.034098803997039795, + "learning_rate": 9.041840293397385e-05, + "loss": 0.1209, + "step": 13740 + }, + { + "epoch": 0.3553614348848629, + "grad_norm": 0.026122646406292915, + "learning_rate": 9.039540854979198e-05, + "loss": 0.1216, + "step": 13750 + }, + { + "epoch": 0.35561987956477914, + "grad_norm": 0.031135277822613716, + "learning_rate": 9.037239099414052e-05, + "loss": 0.1213, + "step": 13760 + }, + { + "epoch": 0.3558783242446954, + "grad_norm": 0.03749995678663254, + "learning_rate": 9.034935028802131e-05, + "loss": 0.1213, + "step": 13770 + }, + { + "epoch": 0.3561367689246117, + "grad_norm": 0.03721682354807854, + "learning_rate": 9.032628645245734e-05, + "loss": 0.1215, + "step": 13780 + }, + { + "epoch": 0.35639521360452797, + "grad_norm": 0.03912218287587166, + "learning_rate": 9.030319950849266e-05, + "loss": 0.1211, + "step": 13790 + }, + { + "epoch": 0.35665365828444423, + "grad_norm": 0.05804960057139397, + "learning_rate": 9.028008947719248e-05, + "loss": 0.121, + "step": 13800 + }, + { + "epoch": 0.3569121029643605, + "grad_norm": 0.04613836109638214, + "learning_rate": 9.025695637964303e-05, + "loss": 0.1212, + "step": 13810 + }, + { + "epoch": 0.35717054764427675, + "grad_norm": 0.026790676638484, + "learning_rate": 9.023380023695152e-05, + "loss": 0.1211, + "step": 13820 + }, + { + "epoch": 0.357428992324193, + "grad_norm": 0.031718842685222626, + "learning_rate": 9.021062107024632e-05, + "loss": 0.1214, + "step": 13830 + }, + { + "epoch": 0.35768743700410927, + "grad_norm": 0.04419882968068123, + "learning_rate": 9.018741890067667e-05, + "loss": 0.1214, + "step": 13840 + }, + { + "epoch": 0.3579458816840255, + "grad_norm": 0.03483813256025314, + "learning_rate": 9.016419374941294e-05, + "loss": 0.1207, + "step": 13850 + }, + { + "epoch": 0.3582043263639418, + "grad_norm": 0.043418727815151215, + "learning_rate": 9.014094563764637e-05, + "loss": 0.1215, + "step": 13860 + }, + { + "epoch": 0.35846277104385804, + "grad_norm": 0.025582218542695045, + "learning_rate": 9.011767458658915e-05, + "loss": 0.1212, + "step": 13870 + }, + { + "epoch": 0.3587212157237743, + "grad_norm": 0.03553914651274681, + "learning_rate": 9.009438061747448e-05, + "loss": 0.1204, + "step": 13880 + }, + { + "epoch": 0.3589796604036906, + "grad_norm": 0.03632952645421028, + "learning_rate": 9.007106375155636e-05, + "loss": 0.1209, + "step": 13890 + }, + { + "epoch": 0.3592381050836069, + "grad_norm": 0.03157756105065346, + "learning_rate": 9.004772401010979e-05, + "loss": 0.1212, + "step": 13900 + }, + { + "epoch": 0.35949654976352313, + "grad_norm": 0.03582099825143814, + "learning_rate": 9.002436141443056e-05, + "loss": 0.1216, + "step": 13910 + }, + { + "epoch": 0.3597549944434394, + "grad_norm": 0.043058060109615326, + "learning_rate": 9.000097598583537e-05, + "loss": 0.1209, + "step": 13920 + }, + { + "epoch": 0.36001343912335565, + "grad_norm": 0.04315120354294777, + "learning_rate": 8.997756774566168e-05, + "loss": 0.1206, + "step": 13930 + }, + { + "epoch": 0.3602718838032719, + "grad_norm": 0.04328585043549538, + "learning_rate": 8.995413671526789e-05, + "loss": 0.1212, + "step": 13940 + }, + { + "epoch": 0.36053032848318817, + "grad_norm": 0.03706385940313339, + "learning_rate": 8.993068291603306e-05, + "loss": 0.121, + "step": 13950 + }, + { + "epoch": 0.3607887731631044, + "grad_norm": 0.05707446113228798, + "learning_rate": 8.990720636935707e-05, + "loss": 0.1205, + "step": 13960 + }, + { + "epoch": 0.3610472178430207, + "grad_norm": 0.03478289395570755, + "learning_rate": 8.988370709666064e-05, + "loss": 0.1211, + "step": 13970 + }, + { + "epoch": 0.36130566252293694, + "grad_norm": 0.026002783328294754, + "learning_rate": 8.986018511938508e-05, + "loss": 0.1209, + "step": 13980 + }, + { + "epoch": 0.3615641072028532, + "grad_norm": 0.034315504133701324, + "learning_rate": 8.983664045899252e-05, + "loss": 0.1215, + "step": 13990 + }, + { + "epoch": 0.3618225518827695, + "grad_norm": 0.02574988827109337, + "learning_rate": 8.981307313696574e-05, + "loss": 0.121, + "step": 14000 + }, + { + "epoch": 0.3620809965626858, + "grad_norm": 0.03017004393041134, + "learning_rate": 8.978948317480825e-05, + "loss": 0.1208, + "step": 14010 + }, + { + "epoch": 0.36233944124260203, + "grad_norm": 0.040961455553770065, + "learning_rate": 8.976587059404413e-05, + "loss": 0.121, + "step": 14020 + }, + { + "epoch": 0.3625978859225183, + "grad_norm": 0.05607009679079056, + "learning_rate": 8.974223541621819e-05, + "loss": 0.1208, + "step": 14030 + }, + { + "epoch": 0.36285633060243455, + "grad_norm": 0.040210749953985214, + "learning_rate": 8.97185776628958e-05, + "loss": 0.1222, + "step": 14040 + }, + { + "epoch": 0.3631147752823508, + "grad_norm": 0.037868306040763855, + "learning_rate": 8.969489735566292e-05, + "loss": 0.1209, + "step": 14050 + }, + { + "epoch": 0.36337321996226707, + "grad_norm": 0.04606558382511139, + "learning_rate": 8.967119451612618e-05, + "loss": 0.1212, + "step": 14060 + }, + { + "epoch": 0.3636316646421833, + "grad_norm": 0.03677297383546829, + "learning_rate": 8.964746916591264e-05, + "loss": 0.122, + "step": 14070 + }, + { + "epoch": 0.3638901093220996, + "grad_norm": 0.03527934476733208, + "learning_rate": 8.962372132666999e-05, + "loss": 0.1215, + "step": 14080 + }, + { + "epoch": 0.36414855400201585, + "grad_norm": 0.02658902481198311, + "learning_rate": 8.95999510200664e-05, + "loss": 0.1212, + "step": 14090 + }, + { + "epoch": 0.36440699868193216, + "grad_norm": 0.05158054828643799, + "learning_rate": 8.957615826779058e-05, + "loss": 0.1208, + "step": 14100 + }, + { + "epoch": 0.3646654433618484, + "grad_norm": 0.035846956074237823, + "learning_rate": 8.955234309155166e-05, + "loss": 0.1213, + "step": 14110 + }, + { + "epoch": 0.3649238880417647, + "grad_norm": 0.040304481983184814, + "learning_rate": 8.952850551307925e-05, + "loss": 0.1214, + "step": 14120 + }, + { + "epoch": 0.36518233272168094, + "grad_norm": 0.03278599679470062, + "learning_rate": 8.950464555412345e-05, + "loss": 0.1211, + "step": 14130 + }, + { + "epoch": 0.3654407774015972, + "grad_norm": 0.058741964399814606, + "learning_rate": 8.948076323645475e-05, + "loss": 0.1214, + "step": 14140 + }, + { + "epoch": 0.36569922208151345, + "grad_norm": 0.06026862934231758, + "learning_rate": 8.945685858186402e-05, + "loss": 0.1213, + "step": 14150 + }, + { + "epoch": 0.3659576667614297, + "grad_norm": 0.025828177109360695, + "learning_rate": 8.943293161216249e-05, + "loss": 0.1207, + "step": 14160 + }, + { + "epoch": 0.36621611144134597, + "grad_norm": 0.05259772762656212, + "learning_rate": 8.94089823491818e-05, + "loss": 0.1206, + "step": 14170 + }, + { + "epoch": 0.36647455612126223, + "grad_norm": 0.03007924184203148, + "learning_rate": 8.938501081477395e-05, + "loss": 0.121, + "step": 14180 + }, + { + "epoch": 0.3667330008011785, + "grad_norm": 0.024096205830574036, + "learning_rate": 8.936101703081121e-05, + "loss": 0.1214, + "step": 14190 + }, + { + "epoch": 0.36699144548109475, + "grad_norm": 0.0474141500890255, + "learning_rate": 8.933700101918616e-05, + "loss": 0.1212, + "step": 14200 + }, + { + "epoch": 0.36724989016101106, + "grad_norm": 0.060403063893318176, + "learning_rate": 8.931296280181167e-05, + "loss": 0.1212, + "step": 14210 + }, + { + "epoch": 0.3675083348409273, + "grad_norm": 0.040768448263406754, + "learning_rate": 8.92889024006209e-05, + "loss": 0.1215, + "step": 14220 + }, + { + "epoch": 0.3677667795208436, + "grad_norm": 0.02748105302453041, + "learning_rate": 8.926481983756718e-05, + "loss": 0.1209, + "step": 14230 + }, + { + "epoch": 0.36802522420075984, + "grad_norm": 0.024027403444051743, + "learning_rate": 8.924071513462413e-05, + "loss": 0.1213, + "step": 14240 + }, + { + "epoch": 0.3682836688806761, + "grad_norm": 0.06327123939990997, + "learning_rate": 8.921658831378553e-05, + "loss": 0.1213, + "step": 14250 + }, + { + "epoch": 0.36854211356059235, + "grad_norm": 0.05067422613501549, + "learning_rate": 8.919243939706536e-05, + "loss": 0.1215, + "step": 14260 + }, + { + "epoch": 0.3688005582405086, + "grad_norm": 0.03686918318271637, + "learning_rate": 8.916826840649776e-05, + "loss": 0.1212, + "step": 14270 + }, + { + "epoch": 0.3690590029204249, + "grad_norm": 0.026356516405940056, + "learning_rate": 8.914407536413701e-05, + "loss": 0.1212, + "step": 14280 + }, + { + "epoch": 0.36931744760034113, + "grad_norm": 0.029914580285549164, + "learning_rate": 8.91198602920575e-05, + "loss": 0.1212, + "step": 14290 + }, + { + "epoch": 0.3695758922802574, + "grad_norm": 0.04337996616959572, + "learning_rate": 8.909562321235371e-05, + "loss": 0.1212, + "step": 14300 + }, + { + "epoch": 0.36983433696017365, + "grad_norm": 0.032856617122888565, + "learning_rate": 8.907136414714029e-05, + "loss": 0.1203, + "step": 14310 + }, + { + "epoch": 0.37009278164008996, + "grad_norm": 0.030697623267769814, + "learning_rate": 8.904708311855179e-05, + "loss": 0.1213, + "step": 14320 + }, + { + "epoch": 0.3703512263200062, + "grad_norm": 0.03824090585112572, + "learning_rate": 8.902278014874292e-05, + "loss": 0.1211, + "step": 14330 + }, + { + "epoch": 0.3706096709999225, + "grad_norm": 0.034326907247304916, + "learning_rate": 8.899845525988839e-05, + "loss": 0.1207, + "step": 14340 + }, + { + "epoch": 0.37086811567983874, + "grad_norm": 0.032609954476356506, + "learning_rate": 8.89741084741829e-05, + "loss": 0.1218, + "step": 14350 + }, + { + "epoch": 0.371126560359755, + "grad_norm": 0.049623921513557434, + "learning_rate": 8.894973981384113e-05, + "loss": 0.1206, + "step": 14360 + }, + { + "epoch": 0.37138500503967126, + "grad_norm": 0.0493491068482399, + "learning_rate": 8.892534930109771e-05, + "loss": 0.1215, + "step": 14370 + }, + { + "epoch": 0.3716434497195875, + "grad_norm": 0.028201662003993988, + "learning_rate": 8.890093695820721e-05, + "loss": 0.1209, + "step": 14380 + }, + { + "epoch": 0.3719018943995038, + "grad_norm": 0.04292111471295357, + "learning_rate": 8.887650280744408e-05, + "loss": 0.121, + "step": 14390 + }, + { + "epoch": 0.37216033907942003, + "grad_norm": 0.022932175546884537, + "learning_rate": 8.88520468711028e-05, + "loss": 0.1209, + "step": 14400 + }, + { + "epoch": 0.3724187837593363, + "grad_norm": 0.04373687133193016, + "learning_rate": 8.88275691714976e-05, + "loss": 0.1205, + "step": 14410 + }, + { + "epoch": 0.37267722843925255, + "grad_norm": 0.03201518580317497, + "learning_rate": 8.880306973096258e-05, + "loss": 0.1215, + "step": 14420 + }, + { + "epoch": 0.37293567311916886, + "grad_norm": 0.048158250749111176, + "learning_rate": 8.877854857185173e-05, + "loss": 0.1206, + "step": 14430 + }, + { + "epoch": 0.3731941177990851, + "grad_norm": 0.06999403238296509, + "learning_rate": 8.875400571653883e-05, + "loss": 0.1207, + "step": 14440 + }, + { + "epoch": 0.3734525624790014, + "grad_norm": 0.02886328473687172, + "learning_rate": 8.872944118741743e-05, + "loss": 0.121, + "step": 14450 + }, + { + "epoch": 0.37371100715891764, + "grad_norm": 0.02765655145049095, + "learning_rate": 8.870485500690093e-05, + "loss": 0.121, + "step": 14460 + }, + { + "epoch": 0.3739694518388339, + "grad_norm": 0.03687107190489769, + "learning_rate": 8.86802471974224e-05, + "loss": 0.1208, + "step": 14470 + }, + { + "epoch": 0.37422789651875016, + "grad_norm": 0.03592827916145325, + "learning_rate": 8.865561778143472e-05, + "loss": 0.1208, + "step": 14480 + }, + { + "epoch": 0.3744863411986664, + "grad_norm": 0.02940983511507511, + "learning_rate": 8.86309667814104e-05, + "loss": 0.1203, + "step": 14490 + }, + { + "epoch": 0.3747447858785827, + "grad_norm": 0.029390331357717514, + "learning_rate": 8.860629421984173e-05, + "loss": 0.1211, + "step": 14500 + }, + { + "epoch": 0.37500323055849893, + "grad_norm": 0.025275185704231262, + "learning_rate": 8.858160011924062e-05, + "loss": 0.1213, + "step": 14510 + }, + { + "epoch": 0.3752616752384152, + "grad_norm": 0.06097283959388733, + "learning_rate": 8.855688450213862e-05, + "loss": 0.1207, + "step": 14520 + }, + { + "epoch": 0.3755201199183315, + "grad_norm": 0.03421664237976074, + "learning_rate": 8.853214739108698e-05, + "loss": 0.1205, + "step": 14530 + }, + { + "epoch": 0.37577856459824777, + "grad_norm": 0.03257683292031288, + "learning_rate": 8.850738880865648e-05, + "loss": 0.1206, + "step": 14540 + }, + { + "epoch": 0.376037009278164, + "grad_norm": 0.0410638265311718, + "learning_rate": 8.848260877743757e-05, + "loss": 0.1213, + "step": 14550 + }, + { + "epoch": 0.3762954539580803, + "grad_norm": 0.0493670329451561, + "learning_rate": 8.845780732004023e-05, + "loss": 0.1221, + "step": 14560 + }, + { + "epoch": 0.37655389863799654, + "grad_norm": 0.0282796211540699, + "learning_rate": 8.843298445909398e-05, + "loss": 0.1204, + "step": 14570 + }, + { + "epoch": 0.3768123433179128, + "grad_norm": 0.03191213309764862, + "learning_rate": 8.840814021724789e-05, + "loss": 0.121, + "step": 14580 + }, + { + "epoch": 0.37707078799782906, + "grad_norm": 0.037282463163137436, + "learning_rate": 8.838327461717049e-05, + "loss": 0.1217, + "step": 14590 + }, + { + "epoch": 0.3773292326777453, + "grad_norm": 0.04913278669118881, + "learning_rate": 8.835838768154989e-05, + "loss": 0.1212, + "step": 14600 + }, + { + "epoch": 0.3775876773576616, + "grad_norm": 0.05835198238492012, + "learning_rate": 8.833347943309358e-05, + "loss": 0.121, + "step": 14610 + }, + { + "epoch": 0.37784612203757784, + "grad_norm": 0.03687991946935654, + "learning_rate": 8.830854989452858e-05, + "loss": 0.1201, + "step": 14620 + }, + { + "epoch": 0.3781045667174941, + "grad_norm": 0.04715634509921074, + "learning_rate": 8.828359908860122e-05, + "loss": 0.1208, + "step": 14630 + }, + { + "epoch": 0.3783630113974104, + "grad_norm": 0.031608499586582184, + "learning_rate": 8.825862703807735e-05, + "loss": 0.1209, + "step": 14640 + }, + { + "epoch": 0.37862145607732667, + "grad_norm": 0.03498883545398712, + "learning_rate": 8.823363376574212e-05, + "loss": 0.1216, + "step": 14650 + }, + { + "epoch": 0.3788799007572429, + "grad_norm": 0.024721000343561172, + "learning_rate": 8.820861929440012e-05, + "loss": 0.1208, + "step": 14660 + }, + { + "epoch": 0.3791383454371592, + "grad_norm": 0.029720420017838478, + "learning_rate": 8.818358364687521e-05, + "loss": 0.1216, + "step": 14670 + }, + { + "epoch": 0.37939679011707544, + "grad_norm": 0.039848048239946365, + "learning_rate": 8.815852684601063e-05, + "loss": 0.1207, + "step": 14680 + }, + { + "epoch": 0.3796552347969917, + "grad_norm": 0.046364765614271164, + "learning_rate": 8.813344891466886e-05, + "loss": 0.1213, + "step": 14690 + }, + { + "epoch": 0.37991367947690796, + "grad_norm": 0.025613026693463326, + "learning_rate": 8.810834987573173e-05, + "loss": 0.1211, + "step": 14700 + }, + { + "epoch": 0.3801721241568242, + "grad_norm": 0.040389250963926315, + "learning_rate": 8.808322975210028e-05, + "loss": 0.121, + "step": 14710 + }, + { + "epoch": 0.3804305688367405, + "grad_norm": 0.026247890666127205, + "learning_rate": 8.805808856669482e-05, + "loss": 0.1206, + "step": 14720 + }, + { + "epoch": 0.38068901351665674, + "grad_norm": 0.0678076520562172, + "learning_rate": 8.803292634245481e-05, + "loss": 0.1206, + "step": 14730 + }, + { + "epoch": 0.380947458196573, + "grad_norm": 0.027668453752994537, + "learning_rate": 8.8007743102339e-05, + "loss": 0.121, + "step": 14740 + }, + { + "epoch": 0.3812059028764893, + "grad_norm": 0.03344142064452171, + "learning_rate": 8.798253886932523e-05, + "loss": 0.1213, + "step": 14750 + }, + { + "epoch": 0.38146434755640557, + "grad_norm": 0.03172322362661362, + "learning_rate": 8.79573136664106e-05, + "loss": 0.1209, + "step": 14760 + }, + { + "epoch": 0.38172279223632183, + "grad_norm": 0.05664968490600586, + "learning_rate": 8.793206751661125e-05, + "loss": 0.1207, + "step": 14770 + }, + { + "epoch": 0.3819812369162381, + "grad_norm": 0.037683483213186264, + "learning_rate": 8.790680044296242e-05, + "loss": 0.1208, + "step": 14780 + }, + { + "epoch": 0.38223968159615435, + "grad_norm": 0.040950071066617966, + "learning_rate": 8.788151246851854e-05, + "loss": 0.1211, + "step": 14790 + }, + { + "epoch": 0.3824981262760706, + "grad_norm": 0.05533726513385773, + "learning_rate": 8.785620361635303e-05, + "loss": 0.1208, + "step": 14800 + }, + { + "epoch": 0.38275657095598686, + "grad_norm": 0.03497011214494705, + "learning_rate": 8.783087390955837e-05, + "loss": 0.1213, + "step": 14810 + }, + { + "epoch": 0.3830150156359031, + "grad_norm": 0.047613564878702164, + "learning_rate": 8.78055233712461e-05, + "loss": 0.1201, + "step": 14820 + }, + { + "epoch": 0.3832734603158194, + "grad_norm": 0.04198547080159187, + "learning_rate": 8.778015202454676e-05, + "loss": 0.1211, + "step": 14830 + }, + { + "epoch": 0.38353190499573564, + "grad_norm": 0.027341743931174278, + "learning_rate": 8.775475989260984e-05, + "loss": 0.1209, + "step": 14840 + }, + { + "epoch": 0.38379034967565195, + "grad_norm": 0.029078619554638863, + "learning_rate": 8.772934699860383e-05, + "loss": 0.1213, + "step": 14850 + }, + { + "epoch": 0.3840487943555682, + "grad_norm": 0.058480020612478256, + "learning_rate": 8.770391336571615e-05, + "loss": 0.1206, + "step": 14860 + }, + { + "epoch": 0.38430723903548447, + "grad_norm": 0.03456335514783859, + "learning_rate": 8.767845901715314e-05, + "loss": 0.1211, + "step": 14870 + }, + { + "epoch": 0.38456568371540073, + "grad_norm": 0.040624894201755524, + "learning_rate": 8.765298397614008e-05, + "loss": 0.1213, + "step": 14880 + }, + { + "epoch": 0.384824128395317, + "grad_norm": 0.03171835467219353, + "learning_rate": 8.762748826592108e-05, + "loss": 0.1205, + "step": 14890 + }, + { + "epoch": 0.38508257307523325, + "grad_norm": 0.027414778247475624, + "learning_rate": 8.760197190975911e-05, + "loss": 0.1212, + "step": 14900 + }, + { + "epoch": 0.3853410177551495, + "grad_norm": 0.03840572386980057, + "learning_rate": 8.757643493093601e-05, + "loss": 0.1205, + "step": 14910 + }, + { + "epoch": 0.38559946243506577, + "grad_norm": 0.03013455681502819, + "learning_rate": 8.755087735275242e-05, + "loss": 0.1209, + "step": 14920 + }, + { + "epoch": 0.385857907114982, + "grad_norm": 0.0281399916857481, + "learning_rate": 8.752529919852778e-05, + "loss": 0.1212, + "step": 14930 + }, + { + "epoch": 0.3861163517948983, + "grad_norm": 0.029889576137065887, + "learning_rate": 8.749970049160032e-05, + "loss": 0.1218, + "step": 14940 + }, + { + "epoch": 0.38637479647481454, + "grad_norm": 0.07539156824350357, + "learning_rate": 8.747408125532697e-05, + "loss": 0.1213, + "step": 14950 + }, + { + "epoch": 0.38663324115473086, + "grad_norm": 0.03111117333173752, + "learning_rate": 8.744844151308342e-05, + "loss": 0.1214, + "step": 14960 + }, + { + "epoch": 0.3868916858346471, + "grad_norm": 0.058090344071388245, + "learning_rate": 8.742278128826414e-05, + "loss": 0.1209, + "step": 14970 + }, + { + "epoch": 0.3871501305145634, + "grad_norm": 0.030089467763900757, + "learning_rate": 8.739710060428217e-05, + "loss": 0.1219, + "step": 14980 + }, + { + "epoch": 0.38740857519447963, + "grad_norm": 0.03342143073678017, + "learning_rate": 8.737139948456927e-05, + "loss": 0.1209, + "step": 14990 + }, + { + "epoch": 0.3876670198743959, + "grad_norm": 0.03354685381054878, + "learning_rate": 8.734567795257589e-05, + "loss": 0.1212, + "step": 15000 + }, + { + "epoch": 0.38792546455431215, + "grad_norm": 0.030781874433159828, + "learning_rate": 8.731993603177102e-05, + "loss": 0.1209, + "step": 15010 + }, + { + "epoch": 0.3881839092342284, + "grad_norm": 0.03894934058189392, + "learning_rate": 8.729417374564233e-05, + "loss": 0.1206, + "step": 15020 + }, + { + "epoch": 0.38844235391414467, + "grad_norm": 0.05629362165927887, + "learning_rate": 8.7268391117696e-05, + "loss": 0.1216, + "step": 15030 + }, + { + "epoch": 0.3887007985940609, + "grad_norm": 0.05119958519935608, + "learning_rate": 8.724258817145685e-05, + "loss": 0.1205, + "step": 15040 + }, + { + "epoch": 0.3889592432739772, + "grad_norm": 0.029654869809746742, + "learning_rate": 8.72167649304682e-05, + "loss": 0.121, + "step": 15050 + }, + { + "epoch": 0.38921768795389344, + "grad_norm": 0.028428174555301666, + "learning_rate": 8.719092141829184e-05, + "loss": 0.121, + "step": 15060 + }, + { + "epoch": 0.38947613263380976, + "grad_norm": 0.02726399153470993, + "learning_rate": 8.716505765850813e-05, + "loss": 0.1208, + "step": 15070 + }, + { + "epoch": 0.389734577313726, + "grad_norm": 0.06720874458551407, + "learning_rate": 8.713917367471589e-05, + "loss": 0.1217, + "step": 15080 + }, + { + "epoch": 0.3899930219936423, + "grad_norm": 0.046359896659851074, + "learning_rate": 8.711326949053235e-05, + "loss": 0.1206, + "step": 15090 + }, + { + "epoch": 0.39025146667355853, + "grad_norm": 0.030734023079276085, + "learning_rate": 8.708734512959323e-05, + "loss": 0.1207, + "step": 15100 + }, + { + "epoch": 0.3905099113534748, + "grad_norm": 0.053919997066259384, + "learning_rate": 8.706140061555261e-05, + "loss": 0.1208, + "step": 15110 + }, + { + "epoch": 0.39076835603339105, + "grad_norm": 0.07103274017572403, + "learning_rate": 8.703543597208299e-05, + "loss": 0.1215, + "step": 15120 + }, + { + "epoch": 0.3910268007133073, + "grad_norm": 0.05091377720236778, + "learning_rate": 8.70094512228752e-05, + "loss": 0.1211, + "step": 15130 + }, + { + "epoch": 0.39128524539322357, + "grad_norm": 0.027106527239084244, + "learning_rate": 8.698344639163849e-05, + "loss": 0.1206, + "step": 15140 + }, + { + "epoch": 0.3915436900731398, + "grad_norm": 0.03043081983923912, + "learning_rate": 8.695742150210033e-05, + "loss": 0.1212, + "step": 15150 + }, + { + "epoch": 0.3918021347530561, + "grad_norm": 0.03293091431260109, + "learning_rate": 8.693137657800658e-05, + "loss": 0.1204, + "step": 15160 + }, + { + "epoch": 0.39206057943297234, + "grad_norm": 0.034762684255838394, + "learning_rate": 8.690531164312133e-05, + "loss": 0.1209, + "step": 15170 + }, + { + "epoch": 0.39231902411288866, + "grad_norm": 0.02678888477385044, + "learning_rate": 8.687922672122695e-05, + "loss": 0.1207, + "step": 15180 + }, + { + "epoch": 0.3925774687928049, + "grad_norm": 0.029134325683116913, + "learning_rate": 8.685312183612402e-05, + "loss": 0.1207, + "step": 15190 + }, + { + "epoch": 0.3928359134727212, + "grad_norm": 0.027646644040942192, + "learning_rate": 8.68269970116314e-05, + "loss": 0.1207, + "step": 15200 + }, + { + "epoch": 0.39309435815263744, + "grad_norm": 0.035694509744644165, + "learning_rate": 8.680085227158605e-05, + "loss": 0.1212, + "step": 15210 + }, + { + "epoch": 0.3933528028325537, + "grad_norm": 0.0530606284737587, + "learning_rate": 8.67746876398432e-05, + "loss": 0.121, + "step": 15220 + }, + { + "epoch": 0.39361124751246995, + "grad_norm": 0.02615213580429554, + "learning_rate": 8.674850314027611e-05, + "loss": 0.1217, + "step": 15230 + }, + { + "epoch": 0.3938696921923862, + "grad_norm": 0.040157996118068695, + "learning_rate": 8.672229879677632e-05, + "loss": 0.1203, + "step": 15240 + }, + { + "epoch": 0.39412813687230247, + "grad_norm": 0.03433075174689293, + "learning_rate": 8.669607463325334e-05, + "loss": 0.122, + "step": 15250 + }, + { + "epoch": 0.39438658155221873, + "grad_norm": 0.025249049067497253, + "learning_rate": 8.666983067363484e-05, + "loss": 0.1214, + "step": 15260 + }, + { + "epoch": 0.394645026232135, + "grad_norm": 0.04727722331881523, + "learning_rate": 8.66435669418665e-05, + "loss": 0.1213, + "step": 15270 + }, + { + "epoch": 0.3949034709120513, + "grad_norm": 0.02693631872534752, + "learning_rate": 8.661728346191212e-05, + "loss": 0.1206, + "step": 15280 + }, + { + "epoch": 0.39516191559196756, + "grad_norm": 0.06777693331241608, + "learning_rate": 8.659098025775345e-05, + "loss": 0.1214, + "step": 15290 + }, + { + "epoch": 0.3954203602718838, + "grad_norm": 0.023660030215978622, + "learning_rate": 8.656465735339023e-05, + "loss": 0.1218, + "step": 15300 + }, + { + "epoch": 0.3956788049518001, + "grad_norm": 0.03307170793414116, + "learning_rate": 8.653831477284024e-05, + "loss": 0.1211, + "step": 15310 + }, + { + "epoch": 0.39593724963171634, + "grad_norm": 0.03797569125890732, + "learning_rate": 8.651195254013917e-05, + "loss": 0.1209, + "step": 15320 + }, + { + "epoch": 0.3961956943116326, + "grad_norm": 0.04399798437952995, + "learning_rate": 8.648557067934063e-05, + "loss": 0.1216, + "step": 15330 + }, + { + "epoch": 0.39645413899154885, + "grad_norm": 0.04024475812911987, + "learning_rate": 8.645916921451616e-05, + "loss": 0.1209, + "step": 15340 + }, + { + "epoch": 0.3967125836714651, + "grad_norm": 0.03893791139125824, + "learning_rate": 8.643274816975517e-05, + "loss": 0.1211, + "step": 15350 + }, + { + "epoch": 0.39697102835138137, + "grad_norm": 0.062387071549892426, + "learning_rate": 8.640630756916499e-05, + "loss": 0.1217, + "step": 15360 + }, + { + "epoch": 0.39722947303129763, + "grad_norm": 0.06120022386312485, + "learning_rate": 8.637984743687073e-05, + "loss": 0.1217, + "step": 15370 + }, + { + "epoch": 0.3974879177112139, + "grad_norm": 0.034106627106666565, + "learning_rate": 8.635336779701533e-05, + "loss": 0.1215, + "step": 15380 + }, + { + "epoch": 0.3977463623911302, + "grad_norm": 0.04408707469701767, + "learning_rate": 8.632686867375954e-05, + "loss": 0.122, + "step": 15390 + }, + { + "epoch": 0.39800480707104646, + "grad_norm": 0.04395521059632301, + "learning_rate": 8.630035009128192e-05, + "loss": 0.1213, + "step": 15400 + }, + { + "epoch": 0.3982632517509627, + "grad_norm": 0.03679056465625763, + "learning_rate": 8.627381207377871e-05, + "loss": 0.1219, + "step": 15410 + }, + { + "epoch": 0.398521696430879, + "grad_norm": 0.03256494179368019, + "learning_rate": 8.624725464546399e-05, + "loss": 0.1208, + "step": 15420 + }, + { + "epoch": 0.39878014111079524, + "grad_norm": 0.024108340963721275, + "learning_rate": 8.622067783056942e-05, + "loss": 0.1205, + "step": 15430 + }, + { + "epoch": 0.3990385857907115, + "grad_norm": 0.04647469520568848, + "learning_rate": 8.619408165334445e-05, + "loss": 0.1206, + "step": 15440 + }, + { + "epoch": 0.39929703047062776, + "grad_norm": 0.027589308097958565, + "learning_rate": 8.616746613805618e-05, + "loss": 0.1206, + "step": 15450 + }, + { + "epoch": 0.399555475150544, + "grad_norm": 0.0358038954436779, + "learning_rate": 8.614083130898928e-05, + "loss": 0.1208, + "step": 15460 + }, + { + "epoch": 0.3998139198304603, + "grad_norm": 0.0351271815598011, + "learning_rate": 8.611417719044619e-05, + "loss": 0.1217, + "step": 15470 + }, + { + "epoch": 0.40007236451037653, + "grad_norm": 0.031648483127355576, + "learning_rate": 8.60875038067468e-05, + "loss": 0.1209, + "step": 15480 + }, + { + "epoch": 0.4003308091902928, + "grad_norm": 0.03246968612074852, + "learning_rate": 8.606081118222865e-05, + "loss": 0.1214, + "step": 15490 + }, + { + "epoch": 0.4005892538702091, + "grad_norm": 0.03699713572859764, + "learning_rate": 8.60340993412468e-05, + "loss": 0.1216, + "step": 15500 + }, + { + "epoch": 0.40084769855012536, + "grad_norm": 0.028596485033631325, + "learning_rate": 8.600736830817391e-05, + "loss": 0.1208, + "step": 15510 + }, + { + "epoch": 0.4011061432300416, + "grad_norm": 0.025530612096190453, + "learning_rate": 8.598061810740013e-05, + "loss": 0.1213, + "step": 15520 + }, + { + "epoch": 0.4013645879099579, + "grad_norm": 0.03004838526248932, + "learning_rate": 8.595384876333301e-05, + "loss": 0.1209, + "step": 15530 + }, + { + "epoch": 0.40162303258987414, + "grad_norm": 0.03609282150864601, + "learning_rate": 8.592706030039768e-05, + "loss": 0.1215, + "step": 15540 + }, + { + "epoch": 0.4018814772697904, + "grad_norm": 0.026773590594530106, + "learning_rate": 8.590025274303665e-05, + "loss": 0.1215, + "step": 15550 + }, + { + "epoch": 0.40213992194970666, + "grad_norm": 0.034723978489637375, + "learning_rate": 8.587342611570989e-05, + "loss": 0.1209, + "step": 15560 + }, + { + "epoch": 0.4023983666296229, + "grad_norm": 0.02943042293190956, + "learning_rate": 8.584658044289478e-05, + "loss": 0.121, + "step": 15570 + }, + { + "epoch": 0.4026568113095392, + "grad_norm": 0.03925080597400665, + "learning_rate": 8.581971574908598e-05, + "loss": 0.1207, + "step": 15580 + }, + { + "epoch": 0.40291525598945543, + "grad_norm": 0.03914114460349083, + "learning_rate": 8.579283205879562e-05, + "loss": 0.1213, + "step": 15590 + }, + { + "epoch": 0.40317370066937175, + "grad_norm": 0.02524516172707081, + "learning_rate": 8.576592939655313e-05, + "loss": 0.1208, + "step": 15600 + }, + { + "epoch": 0.403432145349288, + "grad_norm": 0.03648152947425842, + "learning_rate": 8.573900778690522e-05, + "loss": 0.1211, + "step": 15610 + }, + { + "epoch": 0.40369059002920427, + "grad_norm": 0.04120924323797226, + "learning_rate": 8.571206725441591e-05, + "loss": 0.1208, + "step": 15620 + }, + { + "epoch": 0.4039490347091205, + "grad_norm": 0.02932583913207054, + "learning_rate": 8.568510782366648e-05, + "loss": 0.1214, + "step": 15630 + }, + { + "epoch": 0.4042074793890368, + "grad_norm": 0.030477048829197884, + "learning_rate": 8.565812951925545e-05, + "loss": 0.1207, + "step": 15640 + }, + { + "epoch": 0.40446592406895304, + "grad_norm": 0.03660288080573082, + "learning_rate": 8.56311323657986e-05, + "loss": 0.1215, + "step": 15650 + }, + { + "epoch": 0.4047243687488693, + "grad_norm": 0.03589823469519615, + "learning_rate": 8.560411638792885e-05, + "loss": 0.121, + "step": 15660 + }, + { + "epoch": 0.40498281342878556, + "grad_norm": 0.03230449557304382, + "learning_rate": 8.557708161029632e-05, + "loss": 0.1209, + "step": 15670 + }, + { + "epoch": 0.4052412581087018, + "grad_norm": 0.02675209566950798, + "learning_rate": 8.555002805756826e-05, + "loss": 0.1208, + "step": 15680 + }, + { + "epoch": 0.4054997027886181, + "grad_norm": 0.02823133021593094, + "learning_rate": 8.552295575442914e-05, + "loss": 0.1211, + "step": 15690 + }, + { + "epoch": 0.40575814746853434, + "grad_norm": 0.03697390854358673, + "learning_rate": 8.549586472558043e-05, + "loss": 0.1207, + "step": 15700 + }, + { + "epoch": 0.40601659214845065, + "grad_norm": 0.04294130578637123, + "learning_rate": 8.546875499574075e-05, + "loss": 0.1214, + "step": 15710 + }, + { + "epoch": 0.4062750368283669, + "grad_norm": 0.05114894360303879, + "learning_rate": 8.544162658964574e-05, + "loss": 0.1216, + "step": 15720 + }, + { + "epoch": 0.40653348150828317, + "grad_norm": 0.03625350818037987, + "learning_rate": 8.541447953204811e-05, + "loss": 0.121, + "step": 15730 + }, + { + "epoch": 0.4067919261881994, + "grad_norm": 0.028737010434269905, + "learning_rate": 8.53873138477176e-05, + "loss": 0.1212, + "step": 15740 + }, + { + "epoch": 0.4070503708681157, + "grad_norm": 0.04362634941935539, + "learning_rate": 8.536012956144093e-05, + "loss": 0.121, + "step": 15750 + }, + { + "epoch": 0.40730881554803194, + "grad_norm": 0.027447624132037163, + "learning_rate": 8.533292669802177e-05, + "loss": 0.1213, + "step": 15760 + }, + { + "epoch": 0.4075672602279482, + "grad_norm": 0.029734376817941666, + "learning_rate": 8.530570528228076e-05, + "loss": 0.121, + "step": 15770 + }, + { + "epoch": 0.40782570490786446, + "grad_norm": 0.02965705096721649, + "learning_rate": 8.52784653390555e-05, + "loss": 0.1212, + "step": 15780 + }, + { + "epoch": 0.4080841495877807, + "grad_norm": 0.024572785943746567, + "learning_rate": 8.525120689320044e-05, + "loss": 0.1208, + "step": 15790 + }, + { + "epoch": 0.408342594267697, + "grad_norm": 0.04662232846021652, + "learning_rate": 8.522392996958693e-05, + "loss": 0.1212, + "step": 15800 + }, + { + "epoch": 0.40860103894761324, + "grad_norm": 0.02666018344461918, + "learning_rate": 8.519663459310319e-05, + "loss": 0.1208, + "step": 15810 + }, + { + "epoch": 0.40885948362752955, + "grad_norm": 0.04861900210380554, + "learning_rate": 8.516932078865429e-05, + "loss": 0.1213, + "step": 15820 + }, + { + "epoch": 0.4091179283074458, + "grad_norm": 0.023923644796013832, + "learning_rate": 8.514198858116207e-05, + "loss": 0.1201, + "step": 15830 + }, + { + "epoch": 0.40937637298736207, + "grad_norm": 0.02672005072236061, + "learning_rate": 8.511463799556523e-05, + "loss": 0.1208, + "step": 15840 + }, + { + "epoch": 0.40963481766727833, + "grad_norm": 0.04224829748272896, + "learning_rate": 8.508726905681914e-05, + "loss": 0.1213, + "step": 15850 + }, + { + "epoch": 0.4098932623471946, + "grad_norm": 0.03477904573082924, + "learning_rate": 8.505988178989601e-05, + "loss": 0.1203, + "step": 15860 + }, + { + "epoch": 0.41015170702711085, + "grad_norm": 0.04892458766698837, + "learning_rate": 8.503247621978474e-05, + "loss": 0.1214, + "step": 15870 + }, + { + "epoch": 0.4104101517070271, + "grad_norm": 0.05931811034679413, + "learning_rate": 8.50050523714909e-05, + "loss": 0.1205, + "step": 15880 + }, + { + "epoch": 0.41066859638694336, + "grad_norm": 0.026601992547512054, + "learning_rate": 8.497761027003679e-05, + "loss": 0.1214, + "step": 15890 + }, + { + "epoch": 0.4109270410668596, + "grad_norm": 0.03957698494195938, + "learning_rate": 8.495014994046131e-05, + "loss": 0.121, + "step": 15900 + }, + { + "epoch": 0.4111854857467759, + "grad_norm": 0.02567448653280735, + "learning_rate": 8.492267140782005e-05, + "loss": 0.1212, + "step": 15910 + }, + { + "epoch": 0.41144393042669214, + "grad_norm": 0.048902206122875214, + "learning_rate": 8.489517469718515e-05, + "loss": 0.121, + "step": 15920 + }, + { + "epoch": 0.41170237510660845, + "grad_norm": 0.03848840296268463, + "learning_rate": 8.486765983364542e-05, + "loss": 0.1214, + "step": 15930 + }, + { + "epoch": 0.4119608197865247, + "grad_norm": 0.025306904688477516, + "learning_rate": 8.484012684230612e-05, + "loss": 0.1211, + "step": 15940 + }, + { + "epoch": 0.41221926446644097, + "grad_norm": 0.029906559735536575, + "learning_rate": 8.481257574828912e-05, + "loss": 0.1213, + "step": 15950 + }, + { + "epoch": 0.41247770914635723, + "grad_norm": 0.04389210417866707, + "learning_rate": 8.47850065767328e-05, + "loss": 0.1216, + "step": 15960 + }, + { + "epoch": 0.4127361538262735, + "grad_norm": 0.026348350569605827, + "learning_rate": 8.475741935279205e-05, + "loss": 0.1213, + "step": 15970 + }, + { + "epoch": 0.41299459850618975, + "grad_norm": 0.036679770797491074, + "learning_rate": 8.47298141016382e-05, + "loss": 0.1213, + "step": 15980 + }, + { + "epoch": 0.413253043186106, + "grad_norm": 0.03450449928641319, + "learning_rate": 8.470219084845903e-05, + "loss": 0.1208, + "step": 15990 + }, + { + "epoch": 0.41351148786602226, + "grad_norm": 0.030833858996629715, + "learning_rate": 8.467454961845877e-05, + "loss": 0.1208, + "step": 16000 + }, + { + "epoch": 0.4137699325459385, + "grad_norm": 0.02943314239382744, + "learning_rate": 8.464689043685805e-05, + "loss": 0.1205, + "step": 16010 + }, + { + "epoch": 0.4140283772258548, + "grad_norm": 0.02616015076637268, + "learning_rate": 8.461921332889384e-05, + "loss": 0.1204, + "step": 16020 + }, + { + "epoch": 0.4142868219057711, + "grad_norm": 0.02970828488469124, + "learning_rate": 8.459151831981951e-05, + "loss": 0.1202, + "step": 16030 + }, + { + "epoch": 0.41454526658568736, + "grad_norm": 0.042566508054733276, + "learning_rate": 8.456380543490476e-05, + "loss": 0.1209, + "step": 16040 + }, + { + "epoch": 0.4148037112656036, + "grad_norm": 0.02767457440495491, + "learning_rate": 8.453607469943557e-05, + "loss": 0.1215, + "step": 16050 + }, + { + "epoch": 0.4150621559455199, + "grad_norm": 0.04817627742886543, + "learning_rate": 8.45083261387142e-05, + "loss": 0.121, + "step": 16060 + }, + { + "epoch": 0.41532060062543613, + "grad_norm": 0.028463782742619514, + "learning_rate": 8.448055977805926e-05, + "loss": 0.1217, + "step": 16070 + }, + { + "epoch": 0.4155790453053524, + "grad_norm": 0.029290547594428062, + "learning_rate": 8.445277564280551e-05, + "loss": 0.121, + "step": 16080 + }, + { + "epoch": 0.41583748998526865, + "grad_norm": 0.026767823845148087, + "learning_rate": 8.442497375830396e-05, + "loss": 0.121, + "step": 16090 + }, + { + "epoch": 0.4160959346651849, + "grad_norm": 0.026092415675520897, + "learning_rate": 8.439715414992182e-05, + "loss": 0.121, + "step": 16100 + }, + { + "epoch": 0.41635437934510117, + "grad_norm": 0.06636255234479904, + "learning_rate": 8.436931684304248e-05, + "loss": 0.1213, + "step": 16110 + }, + { + "epoch": 0.4166128240250174, + "grad_norm": 0.0241299606859684, + "learning_rate": 8.434146186306542e-05, + "loss": 0.1207, + "step": 16120 + }, + { + "epoch": 0.4168712687049337, + "grad_norm": 0.03866523876786232, + "learning_rate": 8.431358923540633e-05, + "loss": 0.1214, + "step": 16130 + }, + { + "epoch": 0.41712971338485, + "grad_norm": 0.046918630599975586, + "learning_rate": 8.428569898549696e-05, + "loss": 0.1213, + "step": 16140 + }, + { + "epoch": 0.41738815806476626, + "grad_norm": 0.024152349680662155, + "learning_rate": 8.425779113878514e-05, + "loss": 0.1212, + "step": 16150 + }, + { + "epoch": 0.4176466027446825, + "grad_norm": 0.06717929244041443, + "learning_rate": 8.422986572073472e-05, + "loss": 0.1214, + "step": 16160 + }, + { + "epoch": 0.4179050474245988, + "grad_norm": 0.029480861499905586, + "learning_rate": 8.420192275682566e-05, + "loss": 0.1216, + "step": 16170 + }, + { + "epoch": 0.41816349210451503, + "grad_norm": 0.038486603647470474, + "learning_rate": 8.41739622725539e-05, + "loss": 0.121, + "step": 16180 + }, + { + "epoch": 0.4184219367844313, + "grad_norm": 0.03742188960313797, + "learning_rate": 8.41459842934313e-05, + "loss": 0.1207, + "step": 16190 + }, + { + "epoch": 0.41868038146434755, + "grad_norm": 0.02524004504084587, + "learning_rate": 8.41179888449858e-05, + "loss": 0.1211, + "step": 16200 + }, + { + "epoch": 0.4189388261442638, + "grad_norm": 0.049571722745895386, + "learning_rate": 8.408997595276118e-05, + "loss": 0.1213, + "step": 16210 + }, + { + "epoch": 0.41919727082418007, + "grad_norm": 0.03594374284148216, + "learning_rate": 8.40619456423172e-05, + "loss": 0.1207, + "step": 16220 + }, + { + "epoch": 0.4194557155040963, + "grad_norm": 0.037545595318078995, + "learning_rate": 8.403389793922944e-05, + "loss": 0.121, + "step": 16230 + }, + { + "epoch": 0.4197141601840126, + "grad_norm": 0.037401676177978516, + "learning_rate": 8.400583286908946e-05, + "loss": 0.121, + "step": 16240 + }, + { + "epoch": 0.4199726048639289, + "grad_norm": 0.042802032083272934, + "learning_rate": 8.397775045750453e-05, + "loss": 0.121, + "step": 16250 + }, + { + "epoch": 0.42023104954384516, + "grad_norm": 0.0404074527323246, + "learning_rate": 8.394965073009789e-05, + "loss": 0.1211, + "step": 16260 + }, + { + "epoch": 0.4204894942237614, + "grad_norm": 0.03205915912985802, + "learning_rate": 8.392153371250846e-05, + "loss": 0.1212, + "step": 16270 + }, + { + "epoch": 0.4207479389036777, + "grad_norm": 0.038681160658597946, + "learning_rate": 8.389339943039096e-05, + "loss": 0.1209, + "step": 16280 + }, + { + "epoch": 0.42100638358359393, + "grad_norm": 0.03946966677904129, + "learning_rate": 8.386524790941595e-05, + "loss": 0.1209, + "step": 16290 + }, + { + "epoch": 0.4212648282635102, + "grad_norm": 0.038513340055942535, + "learning_rate": 8.383707917526959e-05, + "loss": 0.1209, + "step": 16300 + }, + { + "epoch": 0.42152327294342645, + "grad_norm": 0.02672209031879902, + "learning_rate": 8.380889325365385e-05, + "loss": 0.1207, + "step": 16310 + }, + { + "epoch": 0.4217817176233427, + "grad_norm": 0.04508436843752861, + "learning_rate": 8.378069017028632e-05, + "loss": 0.1205, + "step": 16320 + }, + { + "epoch": 0.42204016230325897, + "grad_norm": 0.03324988856911659, + "learning_rate": 8.375246995090025e-05, + "loss": 0.1214, + "step": 16330 + }, + { + "epoch": 0.42229860698317523, + "grad_norm": 0.04210703819990158, + "learning_rate": 8.37242326212446e-05, + "loss": 0.1205, + "step": 16340 + }, + { + "epoch": 0.42255705166309154, + "grad_norm": 0.0379868820309639, + "learning_rate": 8.369597820708383e-05, + "loss": 0.1218, + "step": 16350 + }, + { + "epoch": 0.4228154963430078, + "grad_norm": 0.03394721448421478, + "learning_rate": 8.366770673419808e-05, + "loss": 0.121, + "step": 16360 + }, + { + "epoch": 0.42307394102292406, + "grad_norm": 0.031023738905787468, + "learning_rate": 8.363941822838301e-05, + "loss": 0.1201, + "step": 16370 + }, + { + "epoch": 0.4233323857028403, + "grad_norm": 0.030742190778255463, + "learning_rate": 8.361111271544983e-05, + "loss": 0.1209, + "step": 16380 + }, + { + "epoch": 0.4235908303827566, + "grad_norm": 0.02840486727654934, + "learning_rate": 8.358279022122526e-05, + "loss": 0.1217, + "step": 16390 + }, + { + "epoch": 0.42384927506267284, + "grad_norm": 0.032014843076467514, + "learning_rate": 8.355445077155155e-05, + "loss": 0.1207, + "step": 16400 + }, + { + "epoch": 0.4241077197425891, + "grad_norm": 0.028312966227531433, + "learning_rate": 8.352609439228636e-05, + "loss": 0.1206, + "step": 16410 + }, + { + "epoch": 0.42436616442250535, + "grad_norm": 0.03689076006412506, + "learning_rate": 8.349772110930285e-05, + "loss": 0.1208, + "step": 16420 + }, + { + "epoch": 0.4246246091024216, + "grad_norm": 0.026454724371433258, + "learning_rate": 8.346933094848957e-05, + "loss": 0.121, + "step": 16430 + }, + { + "epoch": 0.42488305378233787, + "grad_norm": 0.023743337020277977, + "learning_rate": 8.344092393575051e-05, + "loss": 0.121, + "step": 16440 + }, + { + "epoch": 0.42514149846225413, + "grad_norm": 0.03280189260840416, + "learning_rate": 8.341250009700499e-05, + "loss": 0.1208, + "step": 16450 + }, + { + "epoch": 0.42539994314217044, + "grad_norm": 0.03938455134630203, + "learning_rate": 8.338405945818772e-05, + "loss": 0.121, + "step": 16460 + }, + { + "epoch": 0.4256583878220867, + "grad_norm": 0.05186153203248978, + "learning_rate": 8.33556020452487e-05, + "loss": 0.1211, + "step": 16470 + }, + { + "epoch": 0.42591683250200296, + "grad_norm": 0.05064249038696289, + "learning_rate": 8.332712788415324e-05, + "loss": 0.1216, + "step": 16480 + }, + { + "epoch": 0.4261752771819192, + "grad_norm": 0.03894728049635887, + "learning_rate": 8.3298637000882e-05, + "loss": 0.12, + "step": 16490 + }, + { + "epoch": 0.4264337218618355, + "grad_norm": 0.030547821894288063, + "learning_rate": 8.327012942143081e-05, + "loss": 0.1212, + "step": 16500 + }, + { + "epoch": 0.42669216654175174, + "grad_norm": 0.034204114228487015, + "learning_rate": 8.324160517181081e-05, + "loss": 0.1203, + "step": 16510 + }, + { + "epoch": 0.426950611221668, + "grad_norm": 0.0443953238427639, + "learning_rate": 8.321306427804825e-05, + "loss": 0.1202, + "step": 16520 + }, + { + "epoch": 0.42720905590158426, + "grad_norm": 0.03159923851490021, + "learning_rate": 8.318450676618466e-05, + "loss": 0.121, + "step": 16530 + }, + { + "epoch": 0.4274675005815005, + "grad_norm": 0.07847138494253159, + "learning_rate": 8.31559326622767e-05, + "loss": 0.1204, + "step": 16540 + }, + { + "epoch": 0.4277259452614168, + "grad_norm": 0.039625413715839386, + "learning_rate": 8.312734199239616e-05, + "loss": 0.1205, + "step": 16550 + }, + { + "epoch": 0.42798438994133303, + "grad_norm": 0.028664996847510338, + "learning_rate": 8.309873478262995e-05, + "loss": 0.1211, + "step": 16560 + }, + { + "epoch": 0.42824283462124935, + "grad_norm": 0.06638512760400772, + "learning_rate": 8.30701110590801e-05, + "loss": 0.1208, + "step": 16570 + }, + { + "epoch": 0.4285012793011656, + "grad_norm": 0.04530509561300278, + "learning_rate": 8.304147084786366e-05, + "loss": 0.1221, + "step": 16580 + }, + { + "epoch": 0.42875972398108186, + "grad_norm": 0.03943582624197006, + "learning_rate": 8.301281417511276e-05, + "loss": 0.1232, + "step": 16590 + }, + { + "epoch": 0.4290181686609981, + "grad_norm": 0.03222506120800972, + "learning_rate": 8.298414106697453e-05, + "loss": 0.1212, + "step": 16600 + }, + { + "epoch": 0.4292766133409144, + "grad_norm": 0.02473621815443039, + "learning_rate": 8.29554515496111e-05, + "loss": 0.1215, + "step": 16610 + }, + { + "epoch": 0.42953505802083064, + "grad_norm": 0.04001742601394653, + "learning_rate": 8.292674564919958e-05, + "loss": 0.1204, + "step": 16620 + }, + { + "epoch": 0.4297935027007469, + "grad_norm": 0.02742496132850647, + "learning_rate": 8.289802339193202e-05, + "loss": 0.1207, + "step": 16630 + }, + { + "epoch": 0.43005194738066316, + "grad_norm": 0.05026349425315857, + "learning_rate": 8.28692848040154e-05, + "loss": 0.1203, + "step": 16640 + }, + { + "epoch": 0.4303103920605794, + "grad_norm": 0.029600128531455994, + "learning_rate": 8.284052991167162e-05, + "loss": 0.1208, + "step": 16650 + }, + { + "epoch": 0.4305688367404957, + "grad_norm": 0.024870799854397774, + "learning_rate": 8.281175874113737e-05, + "loss": 0.1211, + "step": 16660 + }, + { + "epoch": 0.43082728142041193, + "grad_norm": 0.04882276430726051, + "learning_rate": 8.278297131866434e-05, + "loss": 0.1211, + "step": 16670 + }, + { + "epoch": 0.43108572610032825, + "grad_norm": 0.026854341849684715, + "learning_rate": 8.275416767051894e-05, + "loss": 0.1216, + "step": 16680 + }, + { + "epoch": 0.4313441707802445, + "grad_norm": 0.05000225454568863, + "learning_rate": 8.272534782298236e-05, + "loss": 0.1208, + "step": 16690 + }, + { + "epoch": 0.43160261546016077, + "grad_norm": 0.029729243367910385, + "learning_rate": 8.269651180235067e-05, + "loss": 0.1212, + "step": 16700 + }, + { + "epoch": 0.431861060140077, + "grad_norm": 0.036123208701610565, + "learning_rate": 8.266765963493466e-05, + "loss": 0.1201, + "step": 16710 + }, + { + "epoch": 0.4321195048199933, + "grad_norm": 0.03330158814787865, + "learning_rate": 8.263879134705978e-05, + "loss": 0.1215, + "step": 16720 + }, + { + "epoch": 0.43237794949990954, + "grad_norm": 0.041633736342191696, + "learning_rate": 8.260990696506634e-05, + "loss": 0.1206, + "step": 16730 + }, + { + "epoch": 0.4326363941798258, + "grad_norm": 0.047123052179813385, + "learning_rate": 8.258100651530915e-05, + "loss": 0.1207, + "step": 16740 + }, + { + "epoch": 0.43289483885974206, + "grad_norm": 0.02728656679391861, + "learning_rate": 8.255209002415787e-05, + "loss": 0.1203, + "step": 16750 + }, + { + "epoch": 0.4331532835396583, + "grad_norm": 0.03906015679240227, + "learning_rate": 8.252315751799662e-05, + "loss": 0.1214, + "step": 16760 + }, + { + "epoch": 0.4334117282195746, + "grad_norm": 0.06474848091602325, + "learning_rate": 8.249420902322427e-05, + "loss": 0.1207, + "step": 16770 + }, + { + "epoch": 0.4336701728994909, + "grad_norm": 0.06732568889856339, + "learning_rate": 8.24652445662542e-05, + "loss": 0.1203, + "step": 16780 + }, + { + "epoch": 0.43392861757940715, + "grad_norm": 0.028053415939211845, + "learning_rate": 8.243626417351437e-05, + "loss": 0.1218, + "step": 16790 + }, + { + "epoch": 0.4341870622593234, + "grad_norm": 0.03437976911664009, + "learning_rate": 8.24072678714473e-05, + "loss": 0.1212, + "step": 16800 + }, + { + "epoch": 0.43444550693923967, + "grad_norm": 0.025603612884879112, + "learning_rate": 8.237825568651003e-05, + "loss": 0.1208, + "step": 16810 + }, + { + "epoch": 0.4347039516191559, + "grad_norm": 0.02875765971839428, + "learning_rate": 8.234922764517403e-05, + "loss": 0.1211, + "step": 16820 + }, + { + "epoch": 0.4349623962990722, + "grad_norm": 0.03835492953658104, + "learning_rate": 8.232018377392532e-05, + "loss": 0.1205, + "step": 16830 + }, + { + "epoch": 0.43522084097898844, + "grad_norm": 0.05196511000394821, + "learning_rate": 8.229112409926431e-05, + "loss": 0.1216, + "step": 16840 + }, + { + "epoch": 0.4354792856589047, + "grad_norm": 0.033375345170497894, + "learning_rate": 8.226204864770588e-05, + "loss": 0.121, + "step": 16850 + }, + { + "epoch": 0.43573773033882096, + "grad_norm": 0.025522610172629356, + "learning_rate": 8.223295744577923e-05, + "loss": 0.1218, + "step": 16860 + }, + { + "epoch": 0.4359961750187372, + "grad_norm": 0.0441477969288826, + "learning_rate": 8.220385052002801e-05, + "loss": 0.1208, + "step": 16870 + }, + { + "epoch": 0.4362546196986535, + "grad_norm": 0.02623586542904377, + "learning_rate": 8.217472789701015e-05, + "loss": 0.1211, + "step": 16880 + }, + { + "epoch": 0.4365130643785698, + "grad_norm": 0.04169215261936188, + "learning_rate": 8.214558960329794e-05, + "loss": 0.1216, + "step": 16890 + }, + { + "epoch": 0.43677150905848605, + "grad_norm": 0.03021879307925701, + "learning_rate": 8.211643566547798e-05, + "loss": 0.1211, + "step": 16900 + }, + { + "epoch": 0.4370299537384023, + "grad_norm": 0.0524255633354187, + "learning_rate": 8.208726611015107e-05, + "loss": 0.1213, + "step": 16910 + }, + { + "epoch": 0.43728839841831857, + "grad_norm": 0.03296078369021416, + "learning_rate": 8.20580809639324e-05, + "loss": 0.1213, + "step": 16920 + }, + { + "epoch": 0.43754684309823483, + "grad_norm": 0.04083700850605965, + "learning_rate": 8.202888025345123e-05, + "loss": 0.1203, + "step": 16930 + }, + { + "epoch": 0.4378052877781511, + "grad_norm": 0.06784559041261673, + "learning_rate": 8.199966400535111e-05, + "loss": 0.1212, + "step": 16940 + }, + { + "epoch": 0.43806373245806735, + "grad_norm": 0.025397544726729393, + "learning_rate": 8.197043224628978e-05, + "loss": 0.1212, + "step": 16950 + }, + { + "epoch": 0.4383221771379836, + "grad_norm": 0.03208483010530472, + "learning_rate": 8.194118500293905e-05, + "loss": 0.1208, + "step": 16960 + }, + { + "epoch": 0.43858062181789986, + "grad_norm": 0.033650774508714676, + "learning_rate": 8.191192230198497e-05, + "loss": 0.1218, + "step": 16970 + }, + { + "epoch": 0.4388390664978161, + "grad_norm": 0.06751713901758194, + "learning_rate": 8.188264417012759e-05, + "loss": 0.1206, + "step": 16980 + }, + { + "epoch": 0.4390975111777324, + "grad_norm": 0.02640434540808201, + "learning_rate": 8.185335063408109e-05, + "loss": 0.1212, + "step": 16990 + }, + { + "epoch": 0.4393559558576487, + "grad_norm": 0.02650640346109867, + "learning_rate": 8.182404172057367e-05, + "loss": 0.1217, + "step": 17000 + }, + { + "epoch": 0.43961440053756495, + "grad_norm": 0.038730688393116, + "learning_rate": 8.179471745634765e-05, + "loss": 0.1205, + "step": 17010 + }, + { + "epoch": 0.4398728452174812, + "grad_norm": 0.02494356594979763, + "learning_rate": 8.176537786815928e-05, + "loss": 0.1205, + "step": 17020 + }, + { + "epoch": 0.44013128989739747, + "grad_norm": 0.04323997721076012, + "learning_rate": 8.173602298277878e-05, + "loss": 0.1208, + "step": 17030 + }, + { + "epoch": 0.44038973457731373, + "grad_norm": 0.035764142870903015, + "learning_rate": 8.170665282699038e-05, + "loss": 0.1212, + "step": 17040 + }, + { + "epoch": 0.44064817925723, + "grad_norm": 0.035736002027988434, + "learning_rate": 8.167726742759218e-05, + "loss": 0.1214, + "step": 17050 + }, + { + "epoch": 0.44090662393714625, + "grad_norm": 0.03259728103876114, + "learning_rate": 8.164786681139627e-05, + "loss": 0.1212, + "step": 17060 + }, + { + "epoch": 0.4411650686170625, + "grad_norm": 0.046860963106155396, + "learning_rate": 8.161845100522858e-05, + "loss": 0.121, + "step": 17070 + }, + { + "epoch": 0.44142351329697876, + "grad_norm": 0.03054000437259674, + "learning_rate": 8.158902003592886e-05, + "loss": 0.1213, + "step": 17080 + }, + { + "epoch": 0.441681957976895, + "grad_norm": 0.03182874247431755, + "learning_rate": 8.155957393035079e-05, + "loss": 0.121, + "step": 17090 + }, + { + "epoch": 0.4419404026568113, + "grad_norm": 0.03305334597826004, + "learning_rate": 8.153011271536176e-05, + "loss": 0.1208, + "step": 17100 + }, + { + "epoch": 0.4421988473367276, + "grad_norm": 0.03152094781398773, + "learning_rate": 8.150063641784303e-05, + "loss": 0.1206, + "step": 17110 + }, + { + "epoch": 0.44245729201664385, + "grad_norm": 0.033562611788511276, + "learning_rate": 8.147114506468956e-05, + "loss": 0.1201, + "step": 17120 + }, + { + "epoch": 0.4427157366965601, + "grad_norm": 0.03005565144121647, + "learning_rate": 8.144163868281007e-05, + "loss": 0.121, + "step": 17130 + }, + { + "epoch": 0.4429741813764764, + "grad_norm": 0.028537388890981674, + "learning_rate": 8.141211729912703e-05, + "loss": 0.1214, + "step": 17140 + }, + { + "epoch": 0.44323262605639263, + "grad_norm": 0.027524372562766075, + "learning_rate": 8.138258094057653e-05, + "loss": 0.121, + "step": 17150 + }, + { + "epoch": 0.4434910707363089, + "grad_norm": 0.04848814755678177, + "learning_rate": 8.135302963410836e-05, + "loss": 0.1214, + "step": 17160 + }, + { + "epoch": 0.44374951541622515, + "grad_norm": 0.0412188284099102, + "learning_rate": 8.132346340668596e-05, + "loss": 0.1208, + "step": 17170 + }, + { + "epoch": 0.4440079600961414, + "grad_norm": 0.03252585604786873, + "learning_rate": 8.129388228528634e-05, + "loss": 0.1214, + "step": 17180 + }, + { + "epoch": 0.44426640477605767, + "grad_norm": 0.049049969762563705, + "learning_rate": 8.126428629690017e-05, + "loss": 0.1213, + "step": 17190 + }, + { + "epoch": 0.4445248494559739, + "grad_norm": 0.026512062177062035, + "learning_rate": 8.12346754685316e-05, + "loss": 0.1209, + "step": 17200 + }, + { + "epoch": 0.44478329413589024, + "grad_norm": 0.027985364198684692, + "learning_rate": 8.12050498271984e-05, + "loss": 0.1209, + "step": 17210 + }, + { + "epoch": 0.4450417388158065, + "grad_norm": 0.0424652025103569, + "learning_rate": 8.117540939993182e-05, + "loss": 0.121, + "step": 17220 + }, + { + "epoch": 0.44530018349572276, + "grad_norm": 0.05181806907057762, + "learning_rate": 8.114575421377657e-05, + "loss": 0.1212, + "step": 17230 + }, + { + "epoch": 0.445558628175639, + "grad_norm": 0.027944931760430336, + "learning_rate": 8.111608429579089e-05, + "loss": 0.1208, + "step": 17240 + }, + { + "epoch": 0.4458170728555553, + "grad_norm": 0.056538984179496765, + "learning_rate": 8.108639967304641e-05, + "loss": 0.121, + "step": 17250 + }, + { + "epoch": 0.44607551753547153, + "grad_norm": 0.041051674634218216, + "learning_rate": 8.10567003726282e-05, + "loss": 0.1211, + "step": 17260 + }, + { + "epoch": 0.4463339622153878, + "grad_norm": 0.0246867872774601, + "learning_rate": 8.102698642163473e-05, + "loss": 0.1204, + "step": 17270 + }, + { + "epoch": 0.44659240689530405, + "grad_norm": 0.05035049095749855, + "learning_rate": 8.099725784717784e-05, + "loss": 0.1215, + "step": 17280 + }, + { + "epoch": 0.4468508515752203, + "grad_norm": 0.04515364021062851, + "learning_rate": 8.096751467638263e-05, + "loss": 0.121, + "step": 17290 + }, + { + "epoch": 0.44710929625513657, + "grad_norm": 0.035112813115119934, + "learning_rate": 8.093775693638768e-05, + "loss": 0.1204, + "step": 17300 + }, + { + "epoch": 0.4473677409350528, + "grad_norm": 0.04091711342334747, + "learning_rate": 8.09079846543447e-05, + "loss": 0.1212, + "step": 17310 + }, + { + "epoch": 0.44762618561496914, + "grad_norm": 0.026900311931967735, + "learning_rate": 8.08781978574188e-05, + "loss": 0.1206, + "step": 17320 + }, + { + "epoch": 0.4478846302948854, + "grad_norm": 0.04169172793626785, + "learning_rate": 8.08483965727882e-05, + "loss": 0.1211, + "step": 17330 + }, + { + "epoch": 0.44814307497480166, + "grad_norm": 0.023345522582530975, + "learning_rate": 8.081858082764447e-05, + "loss": 0.1207, + "step": 17340 + }, + { + "epoch": 0.4484015196547179, + "grad_norm": 0.06396216154098511, + "learning_rate": 8.078875064919229e-05, + "loss": 0.1219, + "step": 17350 + }, + { + "epoch": 0.4486599643346342, + "grad_norm": 0.029196137562394142, + "learning_rate": 8.075890606464955e-05, + "loss": 0.1205, + "step": 17360 + }, + { + "epoch": 0.44891840901455043, + "grad_norm": 0.03568878769874573, + "learning_rate": 8.072904710124727e-05, + "loss": 0.1211, + "step": 17370 + }, + { + "epoch": 0.4491768536944667, + "grad_norm": 0.061988092958927155, + "learning_rate": 8.069917378622958e-05, + "loss": 0.121, + "step": 17380 + }, + { + "epoch": 0.44943529837438295, + "grad_norm": 0.027111155912280083, + "learning_rate": 8.066928614685371e-05, + "loss": 0.1211, + "step": 17390 + }, + { + "epoch": 0.4496937430542992, + "grad_norm": 0.025734491646289825, + "learning_rate": 8.063938421039e-05, + "loss": 0.1199, + "step": 17400 + }, + { + "epoch": 0.44995218773421547, + "grad_norm": 0.03376910835504532, + "learning_rate": 8.060946800412173e-05, + "loss": 0.1213, + "step": 17410 + }, + { + "epoch": 0.45021063241413173, + "grad_norm": 0.03703751042485237, + "learning_rate": 8.057953755534533e-05, + "loss": 0.1213, + "step": 17420 + }, + { + "epoch": 0.45046907709404804, + "grad_norm": 0.03592773526906967, + "learning_rate": 8.054959289137016e-05, + "loss": 0.1209, + "step": 17430 + }, + { + "epoch": 0.4507275217739643, + "grad_norm": 0.022831428796052933, + "learning_rate": 8.051963403951853e-05, + "loss": 0.1206, + "step": 17440 + }, + { + "epoch": 0.45098596645388056, + "grad_norm": 0.029885658994317055, + "learning_rate": 8.048966102712575e-05, + "loss": 0.1207, + "step": 17450 + }, + { + "epoch": 0.4512444111337968, + "grad_norm": 0.02844417653977871, + "learning_rate": 8.045967388153999e-05, + "loss": 0.1204, + "step": 17460 + }, + { + "epoch": 0.4515028558137131, + "grad_norm": 0.025953255593776703, + "learning_rate": 8.042967263012235e-05, + "loss": 0.1215, + "step": 17470 + }, + { + "epoch": 0.45176130049362934, + "grad_norm": 0.0450054295361042, + "learning_rate": 8.039965730024684e-05, + "loss": 0.1209, + "step": 17480 + }, + { + "epoch": 0.4520197451735456, + "grad_norm": 0.04800622910261154, + "learning_rate": 8.03696279193002e-05, + "loss": 0.121, + "step": 17490 + }, + { + "epoch": 0.45227818985346185, + "grad_norm": 0.024891328066587448, + "learning_rate": 8.033958451468216e-05, + "loss": 0.1204, + "step": 17500 + }, + { + "epoch": 0.4525366345333781, + "grad_norm": 0.0259137824177742, + "learning_rate": 8.030952711380506e-05, + "loss": 0.1204, + "step": 17510 + }, + { + "epoch": 0.45279507921329437, + "grad_norm": 0.035296883434057236, + "learning_rate": 8.027945574409415e-05, + "loss": 0.1212, + "step": 17520 + }, + { + "epoch": 0.4530535238932107, + "grad_norm": 0.023492569103837013, + "learning_rate": 8.024937043298734e-05, + "loss": 0.121, + "step": 17530 + }, + { + "epoch": 0.45331196857312694, + "grad_norm": 0.028371915221214294, + "learning_rate": 8.021927120793533e-05, + "loss": 0.1219, + "step": 17540 + }, + { + "epoch": 0.4535704132530432, + "grad_norm": 0.042613960802555084, + "learning_rate": 8.018915809640149e-05, + "loss": 0.1213, + "step": 17550 + }, + { + "epoch": 0.45382885793295946, + "grad_norm": 0.02977057173848152, + "learning_rate": 8.015903112586178e-05, + "loss": 0.1205, + "step": 17560 + }, + { + "epoch": 0.4540873026128757, + "grad_norm": 0.03294564411044121, + "learning_rate": 8.012889032380494e-05, + "loss": 0.1212, + "step": 17570 + }, + { + "epoch": 0.454345747292792, + "grad_norm": 0.03282090649008751, + "learning_rate": 8.009873571773228e-05, + "loss": 0.1209, + "step": 17580 + }, + { + "epoch": 0.45460419197270824, + "grad_norm": 0.030965318903326988, + "learning_rate": 8.006856733515765e-05, + "loss": 0.1215, + "step": 17590 + }, + { + "epoch": 0.4548626366526245, + "grad_norm": 0.027042236179113388, + "learning_rate": 8.003838520360752e-05, + "loss": 0.1207, + "step": 17600 + }, + { + "epoch": 0.45512108133254076, + "grad_norm": 0.03583112731575966, + "learning_rate": 8.000818935062091e-05, + "loss": 0.1211, + "step": 17610 + }, + { + "epoch": 0.455379526012457, + "grad_norm": 0.0515785813331604, + "learning_rate": 7.997797980374932e-05, + "loss": 0.1216, + "step": 17620 + }, + { + "epoch": 0.4556379706923733, + "grad_norm": 0.02845834381878376, + "learning_rate": 7.994775659055682e-05, + "loss": 0.1213, + "step": 17630 + }, + { + "epoch": 0.4558964153722896, + "grad_norm": 0.037405528128147125, + "learning_rate": 7.991751973861987e-05, + "loss": 0.121, + "step": 17640 + }, + { + "epoch": 0.45615486005220585, + "grad_norm": 0.041332971304655075, + "learning_rate": 7.988726927552742e-05, + "loss": 0.1212, + "step": 17650 + }, + { + "epoch": 0.4564133047321221, + "grad_norm": 0.0270778089761734, + "learning_rate": 7.98570052288808e-05, + "loss": 0.1217, + "step": 17660 + }, + { + "epoch": 0.45667174941203836, + "grad_norm": 0.025271035730838776, + "learning_rate": 7.982672762629379e-05, + "loss": 0.1213, + "step": 17670 + }, + { + "epoch": 0.4569301940919546, + "grad_norm": 0.0330546572804451, + "learning_rate": 7.979643649539248e-05, + "loss": 0.1211, + "step": 17680 + }, + { + "epoch": 0.4571886387718709, + "grad_norm": 0.033037640154361725, + "learning_rate": 7.976613186381534e-05, + "loss": 0.121, + "step": 17690 + }, + { + "epoch": 0.45744708345178714, + "grad_norm": 0.035831086337566376, + "learning_rate": 7.973581375921318e-05, + "loss": 0.1209, + "step": 17700 + }, + { + "epoch": 0.4577055281317034, + "grad_norm": 0.03328409045934677, + "learning_rate": 7.970548220924902e-05, + "loss": 0.1212, + "step": 17710 + }, + { + "epoch": 0.45796397281161966, + "grad_norm": 0.031463827937841415, + "learning_rate": 7.967513724159824e-05, + "loss": 0.1209, + "step": 17720 + }, + { + "epoch": 0.4582224174915359, + "grad_norm": 0.033663634210824966, + "learning_rate": 7.964477888394841e-05, + "loss": 0.1209, + "step": 17730 + }, + { + "epoch": 0.4584808621714522, + "grad_norm": 0.035786133259534836, + "learning_rate": 7.961440716399932e-05, + "loss": 0.1208, + "step": 17740 + }, + { + "epoch": 0.4587393068513685, + "grad_norm": 0.030950557440519333, + "learning_rate": 7.9584022109463e-05, + "loss": 0.1205, + "step": 17750 + }, + { + "epoch": 0.45899775153128475, + "grad_norm": 0.038400836288928986, + "learning_rate": 7.955362374806356e-05, + "loss": 0.1219, + "step": 17760 + }, + { + "epoch": 0.459256196211201, + "grad_norm": 0.025500638410449028, + "learning_rate": 7.952321210753732e-05, + "loss": 0.1206, + "step": 17770 + }, + { + "epoch": 0.45951464089111727, + "grad_norm": 0.03494824841618538, + "learning_rate": 7.949278721563273e-05, + "loss": 0.1211, + "step": 17780 + }, + { + "epoch": 0.4597730855710335, + "grad_norm": 0.026645977050065994, + "learning_rate": 7.946234910011026e-05, + "loss": 0.1217, + "step": 17790 + }, + { + "epoch": 0.4600315302509498, + "grad_norm": 0.030729806050658226, + "learning_rate": 7.943189778874251e-05, + "loss": 0.122, + "step": 17800 + }, + { + "epoch": 0.46028997493086604, + "grad_norm": 0.04534943029284477, + "learning_rate": 7.940143330931407e-05, + "loss": 0.1208, + "step": 17810 + }, + { + "epoch": 0.4605484196107823, + "grad_norm": 0.02417948842048645, + "learning_rate": 7.937095568962159e-05, + "loss": 0.1203, + "step": 17820 + }, + { + "epoch": 0.46080686429069856, + "grad_norm": 0.04383620247244835, + "learning_rate": 7.934046495747367e-05, + "loss": 0.1208, + "step": 17830 + }, + { + "epoch": 0.4610653089706148, + "grad_norm": 0.033758495002985, + "learning_rate": 7.930996114069089e-05, + "loss": 0.1205, + "step": 17840 + }, + { + "epoch": 0.4613237536505311, + "grad_norm": 0.032417699694633484, + "learning_rate": 7.927944426710581e-05, + "loss": 0.1204, + "step": 17850 + }, + { + "epoch": 0.4615821983304474, + "grad_norm": 0.03325258567929268, + "learning_rate": 7.924891436456284e-05, + "loss": 0.1212, + "step": 17860 + }, + { + "epoch": 0.46184064301036365, + "grad_norm": 0.0318230502307415, + "learning_rate": 7.921837146091829e-05, + "loss": 0.1209, + "step": 17870 + }, + { + "epoch": 0.4620990876902799, + "grad_norm": 0.028837701305747032, + "learning_rate": 7.918781558404035e-05, + "loss": 0.1201, + "step": 17880 + }, + { + "epoch": 0.46235753237019617, + "grad_norm": 0.0288578812032938, + "learning_rate": 7.915724676180907e-05, + "loss": 0.1209, + "step": 17890 + }, + { + "epoch": 0.4626159770501124, + "grad_norm": 0.03512696549296379, + "learning_rate": 7.912666502211625e-05, + "loss": 0.1202, + "step": 17900 + }, + { + "epoch": 0.4628744217300287, + "grad_norm": 0.027083231136202812, + "learning_rate": 7.909607039286554e-05, + "loss": 0.1206, + "step": 17910 + }, + { + "epoch": 0.46313286640994494, + "grad_norm": 0.031255658715963364, + "learning_rate": 7.90654629019723e-05, + "loss": 0.121, + "step": 17920 + }, + { + "epoch": 0.4633913110898612, + "grad_norm": 0.02460126020014286, + "learning_rate": 7.903484257736365e-05, + "loss": 0.1212, + "step": 17930 + }, + { + "epoch": 0.46364975576977746, + "grad_norm": 0.023923933506011963, + "learning_rate": 7.900420944697844e-05, + "loss": 0.1205, + "step": 17940 + }, + { + "epoch": 0.4639082004496937, + "grad_norm": 0.02553286775946617, + "learning_rate": 7.897356353876714e-05, + "loss": 0.1208, + "step": 17950 + }, + { + "epoch": 0.46416664512961003, + "grad_norm": 0.04805140197277069, + "learning_rate": 7.894290488069199e-05, + "loss": 0.1207, + "step": 17960 + }, + { + "epoch": 0.4644250898095263, + "grad_norm": 0.028336919844150543, + "learning_rate": 7.891223350072671e-05, + "loss": 0.1216, + "step": 17970 + }, + { + "epoch": 0.46468353448944255, + "grad_norm": 0.06453067809343338, + "learning_rate": 7.888154942685675e-05, + "loss": 0.1205, + "step": 17980 + }, + { + "epoch": 0.4649419791693588, + "grad_norm": 0.030098699033260345, + "learning_rate": 7.88508526870791e-05, + "loss": 0.1208, + "step": 17990 + }, + { + "epoch": 0.46520042384927507, + "grad_norm": 0.034266021102666855, + "learning_rate": 7.882014330940232e-05, + "loss": 0.1203, + "step": 18000 + }, + { + "epoch": 0.4654588685291913, + "grad_norm": 0.039469532668590546, + "learning_rate": 7.878942132184648e-05, + "loss": 0.1212, + "step": 18010 + }, + { + "epoch": 0.4657173132091076, + "grad_norm": 0.03684231638908386, + "learning_rate": 7.875868675244317e-05, + "loss": 0.1208, + "step": 18020 + }, + { + "epoch": 0.46597575788902384, + "grad_norm": 0.030773069709539413, + "learning_rate": 7.872793962923544e-05, + "loss": 0.1211, + "step": 18030 + }, + { + "epoch": 0.4662342025689401, + "grad_norm": 0.03970390930771828, + "learning_rate": 7.869717998027784e-05, + "loss": 0.1207, + "step": 18040 + }, + { + "epoch": 0.46649264724885636, + "grad_norm": 0.0371558740735054, + "learning_rate": 7.866640783363632e-05, + "loss": 0.1211, + "step": 18050 + }, + { + "epoch": 0.4667510919287726, + "grad_norm": 0.03251232951879501, + "learning_rate": 7.863562321738821e-05, + "loss": 0.1209, + "step": 18060 + }, + { + "epoch": 0.46700953660868894, + "grad_norm": 0.0353286974132061, + "learning_rate": 7.860482615962226e-05, + "loss": 0.1211, + "step": 18070 + }, + { + "epoch": 0.4672679812886052, + "grad_norm": 0.025321772322058678, + "learning_rate": 7.857401668843854e-05, + "loss": 0.1207, + "step": 18080 + }, + { + "epoch": 0.46752642596852145, + "grad_norm": 0.03650693595409393, + "learning_rate": 7.854319483194845e-05, + "loss": 0.1212, + "step": 18090 + }, + { + "epoch": 0.4677848706484377, + "grad_norm": 0.033582791686058044, + "learning_rate": 7.851236061827473e-05, + "loss": 0.1211, + "step": 18100 + }, + { + "epoch": 0.46804331532835397, + "grad_norm": 0.033203985542058945, + "learning_rate": 7.848151407555136e-05, + "loss": 0.1208, + "step": 18110 + }, + { + "epoch": 0.46830176000827023, + "grad_norm": 0.024356326088309288, + "learning_rate": 7.845065523192354e-05, + "loss": 0.1214, + "step": 18120 + }, + { + "epoch": 0.4685602046881865, + "grad_norm": 0.035591162741184235, + "learning_rate": 7.841978411554778e-05, + "loss": 0.121, + "step": 18130 + }, + { + "epoch": 0.46881864936810275, + "grad_norm": 0.05609693005681038, + "learning_rate": 7.838890075459169e-05, + "loss": 0.1205, + "step": 18140 + }, + { + "epoch": 0.469077094048019, + "grad_norm": 0.05516740679740906, + "learning_rate": 7.83580051772341e-05, + "loss": 0.1205, + "step": 18150 + }, + { + "epoch": 0.46933553872793526, + "grad_norm": 0.038642801344394684, + "learning_rate": 7.832709741166504e-05, + "loss": 0.1208, + "step": 18160 + }, + { + "epoch": 0.4695939834078515, + "grad_norm": 0.044019702821969986, + "learning_rate": 7.829617748608553e-05, + "loss": 0.1215, + "step": 18170 + }, + { + "epoch": 0.46985242808776784, + "grad_norm": 0.036193523555994034, + "learning_rate": 7.826524542870782e-05, + "loss": 0.121, + "step": 18180 + }, + { + "epoch": 0.4701108727676841, + "grad_norm": 0.026931576430797577, + "learning_rate": 7.823430126775514e-05, + "loss": 0.1215, + "step": 18190 + }, + { + "epoch": 0.47036931744760035, + "grad_norm": 0.04033075273036957, + "learning_rate": 7.82033450314618e-05, + "loss": 0.1213, + "step": 18200 + }, + { + "epoch": 0.4706277621275166, + "grad_norm": 0.03224469721317291, + "learning_rate": 7.817237674807314e-05, + "loss": 0.1212, + "step": 18210 + }, + { + "epoch": 0.47088620680743287, + "grad_norm": 0.03407762944698334, + "learning_rate": 7.814139644584544e-05, + "loss": 0.121, + "step": 18220 + }, + { + "epoch": 0.47114465148734913, + "grad_norm": 0.04999006167054176, + "learning_rate": 7.8110404153046e-05, + "loss": 0.121, + "step": 18230 + }, + { + "epoch": 0.4714030961672654, + "grad_norm": 0.03140765056014061, + "learning_rate": 7.807939989795306e-05, + "loss": 0.1209, + "step": 18240 + }, + { + "epoch": 0.47166154084718165, + "grad_norm": 0.03008224628865719, + "learning_rate": 7.80483837088557e-05, + "loss": 0.1212, + "step": 18250 + }, + { + "epoch": 0.4719199855270979, + "grad_norm": 0.026429468765854836, + "learning_rate": 7.8017355614054e-05, + "loss": 0.1209, + "step": 18260 + }, + { + "epoch": 0.47217843020701417, + "grad_norm": 0.021291444078087807, + "learning_rate": 7.798631564185884e-05, + "loss": 0.1215, + "step": 18270 + }, + { + "epoch": 0.4724368748869305, + "grad_norm": 0.03507627546787262, + "learning_rate": 7.79552638205919e-05, + "loss": 0.1209, + "step": 18280 + }, + { + "epoch": 0.47269531956684674, + "grad_norm": 0.04378274083137512, + "learning_rate": 7.792420017858578e-05, + "loss": 0.1208, + "step": 18290 + }, + { + "epoch": 0.472953764246763, + "grad_norm": 0.04523232579231262, + "learning_rate": 7.789312474418375e-05, + "loss": 0.1205, + "step": 18300 + }, + { + "epoch": 0.47321220892667926, + "grad_norm": 0.03517361730337143, + "learning_rate": 7.786203754573994e-05, + "loss": 0.1208, + "step": 18310 + }, + { + "epoch": 0.4734706536065955, + "grad_norm": 0.03526557981967926, + "learning_rate": 7.78309386116191e-05, + "loss": 0.121, + "step": 18320 + }, + { + "epoch": 0.4737290982865118, + "grad_norm": 0.028304534032940865, + "learning_rate": 7.77998279701968e-05, + "loss": 0.1204, + "step": 18330 + }, + { + "epoch": 0.47398754296642803, + "grad_norm": 0.04075614735484123, + "learning_rate": 7.776870564985923e-05, + "loss": 0.1212, + "step": 18340 + }, + { + "epoch": 0.4742459876463443, + "grad_norm": 0.026510139927268028, + "learning_rate": 7.773757167900327e-05, + "loss": 0.1202, + "step": 18350 + }, + { + "epoch": 0.47450443232626055, + "grad_norm": 0.05515741929411888, + "learning_rate": 7.77064260860364e-05, + "loss": 0.1212, + "step": 18360 + }, + { + "epoch": 0.4747628770061768, + "grad_norm": 0.024724463000893593, + "learning_rate": 7.76752688993767e-05, + "loss": 0.1206, + "step": 18370 + }, + { + "epoch": 0.47502132168609307, + "grad_norm": 0.05541424825787544, + "learning_rate": 7.764410014745288e-05, + "loss": 0.1204, + "step": 18380 + }, + { + "epoch": 0.4752797663660094, + "grad_norm": 0.04238291084766388, + "learning_rate": 7.761291985870411e-05, + "loss": 0.1211, + "step": 18390 + }, + { + "epoch": 0.47553821104592564, + "grad_norm": 0.03613630682229996, + "learning_rate": 7.758172806158022e-05, + "loss": 0.1207, + "step": 18400 + }, + { + "epoch": 0.4757966557258419, + "grad_norm": 0.02706298418343067, + "learning_rate": 7.755052478454139e-05, + "loss": 0.1205, + "step": 18410 + }, + { + "epoch": 0.47605510040575816, + "grad_norm": 0.03277969732880592, + "learning_rate": 7.75193100560584e-05, + "loss": 0.1209, + "step": 18420 + }, + { + "epoch": 0.4763135450856744, + "grad_norm": 0.03943566977977753, + "learning_rate": 7.748808390461241e-05, + "loss": 0.1203, + "step": 18430 + }, + { + "epoch": 0.4765719897655907, + "grad_norm": 0.03377638757228851, + "learning_rate": 7.745684635869501e-05, + "loss": 0.1207, + "step": 18440 + }, + { + "epoch": 0.47683043444550693, + "grad_norm": 0.043932005763053894, + "learning_rate": 7.742559744680821e-05, + "loss": 0.1215, + "step": 18450 + }, + { + "epoch": 0.4770888791254232, + "grad_norm": 0.05725103244185448, + "learning_rate": 7.739433719746438e-05, + "loss": 0.1206, + "step": 18460 + }, + { + "epoch": 0.47734732380533945, + "grad_norm": 0.0425422303378582, + "learning_rate": 7.736306563918621e-05, + "loss": 0.1203, + "step": 18470 + }, + { + "epoch": 0.4776057684852557, + "grad_norm": 0.04107794538140297, + "learning_rate": 7.733178280050677e-05, + "loss": 0.1207, + "step": 18480 + }, + { + "epoch": 0.47786421316517197, + "grad_norm": 0.04837331175804138, + "learning_rate": 7.730048870996934e-05, + "loss": 0.1206, + "step": 18490 + }, + { + "epoch": 0.4781226578450883, + "grad_norm": 0.031742483377456665, + "learning_rate": 7.72691833961275e-05, + "loss": 0.1209, + "step": 18500 + }, + { + "epoch": 0.47838110252500454, + "grad_norm": 0.0299906674772501, + "learning_rate": 7.723786688754515e-05, + "loss": 0.1206, + "step": 18510 + }, + { + "epoch": 0.4786395472049208, + "grad_norm": 0.05250910669565201, + "learning_rate": 7.720653921279627e-05, + "loss": 0.1207, + "step": 18520 + }, + { + "epoch": 0.47889799188483706, + "grad_norm": 0.04350259155035019, + "learning_rate": 7.717520040046511e-05, + "loss": 0.1213, + "step": 18530 + }, + { + "epoch": 0.4791564365647533, + "grad_norm": 0.03823021054267883, + "learning_rate": 7.71438504791461e-05, + "loss": 0.1211, + "step": 18540 + }, + { + "epoch": 0.4794148812446696, + "grad_norm": 0.03723206743597984, + "learning_rate": 7.71124894774437e-05, + "loss": 0.1204, + "step": 18550 + }, + { + "epoch": 0.47967332592458584, + "grad_norm": 0.02649003267288208, + "learning_rate": 7.708111742397261e-05, + "loss": 0.1213, + "step": 18560 + }, + { + "epoch": 0.4799317706045021, + "grad_norm": 0.03084356151521206, + "learning_rate": 7.704973434735753e-05, + "loss": 0.1212, + "step": 18570 + }, + { + "epoch": 0.48019021528441835, + "grad_norm": 0.03331157565116882, + "learning_rate": 7.701834027623324e-05, + "loss": 0.1204, + "step": 18580 + }, + { + "epoch": 0.4804486599643346, + "grad_norm": 0.028239266946911812, + "learning_rate": 7.698693523924457e-05, + "loss": 0.1211, + "step": 18590 + }, + { + "epoch": 0.48070710464425087, + "grad_norm": 0.03997966647148132, + "learning_rate": 7.695551926504632e-05, + "loss": 0.1208, + "step": 18600 + }, + { + "epoch": 0.4809655493241672, + "grad_norm": 0.03455495461821556, + "learning_rate": 7.692409238230332e-05, + "loss": 0.1204, + "step": 18610 + }, + { + "epoch": 0.48122399400408344, + "grad_norm": 0.029356028884649277, + "learning_rate": 7.68926546196903e-05, + "loss": 0.1208, + "step": 18620 + }, + { + "epoch": 0.4814824386839997, + "grad_norm": 0.026941636577248573, + "learning_rate": 7.686120600589195e-05, + "loss": 0.12, + "step": 18630 + }, + { + "epoch": 0.48174088336391596, + "grad_norm": 0.036398954689502716, + "learning_rate": 7.682974656960283e-05, + "loss": 0.121, + "step": 18640 + }, + { + "epoch": 0.4819993280438322, + "grad_norm": 0.028269926086068153, + "learning_rate": 7.679827633952741e-05, + "loss": 0.1207, + "step": 18650 + }, + { + "epoch": 0.4822577727237485, + "grad_norm": 0.027184421196579933, + "learning_rate": 7.676679534438e-05, + "loss": 0.1211, + "step": 18660 + }, + { + "epoch": 0.48251621740366474, + "grad_norm": 0.049772970378398895, + "learning_rate": 7.67353036128847e-05, + "loss": 0.1207, + "step": 18670 + }, + { + "epoch": 0.482774662083581, + "grad_norm": 0.030553998425602913, + "learning_rate": 7.670380117377548e-05, + "loss": 0.1215, + "step": 18680 + }, + { + "epoch": 0.48303310676349726, + "grad_norm": 0.035214513540267944, + "learning_rate": 7.667228805579599e-05, + "loss": 0.1206, + "step": 18690 + }, + { + "epoch": 0.4832915514434135, + "grad_norm": 0.030374618247151375, + "learning_rate": 7.664076428769967e-05, + "loss": 0.1208, + "step": 18700 + }, + { + "epoch": 0.48354999612332983, + "grad_norm": 0.034145571291446686, + "learning_rate": 7.66092298982497e-05, + "loss": 0.1206, + "step": 18710 + }, + { + "epoch": 0.4838084408032461, + "grad_norm": 0.04430621489882469, + "learning_rate": 7.657768491621891e-05, + "loss": 0.1199, + "step": 18720 + }, + { + "epoch": 0.48406688548316235, + "grad_norm": 0.034370873123407364, + "learning_rate": 7.65461293703898e-05, + "loss": 0.1206, + "step": 18730 + }, + { + "epoch": 0.4843253301630786, + "grad_norm": 0.02490735612809658, + "learning_rate": 7.651456328955453e-05, + "loss": 0.1212, + "step": 18740 + }, + { + "epoch": 0.48458377484299486, + "grad_norm": 0.02994326874613762, + "learning_rate": 7.648298670251486e-05, + "loss": 0.1212, + "step": 18750 + }, + { + "epoch": 0.4848422195229111, + "grad_norm": 0.02952560968697071, + "learning_rate": 7.645139963808214e-05, + "loss": 0.1216, + "step": 18760 + }, + { + "epoch": 0.4851006642028274, + "grad_norm": 0.022260570898652077, + "learning_rate": 7.641980212507727e-05, + "loss": 0.1207, + "step": 18770 + }, + { + "epoch": 0.48535910888274364, + "grad_norm": 0.03574168682098389, + "learning_rate": 7.638819419233067e-05, + "loss": 0.1211, + "step": 18780 + }, + { + "epoch": 0.4856175535626599, + "grad_norm": 0.03445505350828171, + "learning_rate": 7.635657586868233e-05, + "loss": 0.1215, + "step": 18790 + }, + { + "epoch": 0.48587599824257616, + "grad_norm": 0.026242157444357872, + "learning_rate": 7.632494718298165e-05, + "loss": 0.1213, + "step": 18800 + }, + { + "epoch": 0.4861344429224924, + "grad_norm": 0.05051193758845329, + "learning_rate": 7.629330816408753e-05, + "loss": 0.1205, + "step": 18810 + }, + { + "epoch": 0.48639288760240873, + "grad_norm": 0.049741458147764206, + "learning_rate": 7.626165884086826e-05, + "loss": 0.1206, + "step": 18820 + }, + { + "epoch": 0.486651332282325, + "grad_norm": 0.026078389957547188, + "learning_rate": 7.622999924220157e-05, + "loss": 0.1207, + "step": 18830 + }, + { + "epoch": 0.48690977696224125, + "grad_norm": 0.029014501720666885, + "learning_rate": 7.619832939697457e-05, + "loss": 0.1211, + "step": 18840 + }, + { + "epoch": 0.4871682216421575, + "grad_norm": 0.04857383668422699, + "learning_rate": 7.616664933408364e-05, + "loss": 0.1208, + "step": 18850 + }, + { + "epoch": 0.48742666632207376, + "grad_norm": 0.03212606534361839, + "learning_rate": 7.613495908243458e-05, + "loss": 0.1214, + "step": 18860 + }, + { + "epoch": 0.48768511100199, + "grad_norm": 0.0243525430560112, + "learning_rate": 7.610325867094245e-05, + "loss": 0.1199, + "step": 18870 + }, + { + "epoch": 0.4879435556819063, + "grad_norm": 0.030657850205898285, + "learning_rate": 7.607154812853156e-05, + "loss": 0.1205, + "step": 18880 + }, + { + "epoch": 0.48820200036182254, + "grad_norm": 0.05423952266573906, + "learning_rate": 7.60398274841355e-05, + "loss": 0.1215, + "step": 18890 + }, + { + "epoch": 0.4884604450417388, + "grad_norm": 0.027357621118426323, + "learning_rate": 7.600809676669704e-05, + "loss": 0.1209, + "step": 18900 + }, + { + "epoch": 0.48871888972165506, + "grad_norm": 0.03764410316944122, + "learning_rate": 7.59763560051682e-05, + "loss": 0.1204, + "step": 18910 + }, + { + "epoch": 0.4889773344015713, + "grad_norm": 0.04262707009911537, + "learning_rate": 7.594460522851007e-05, + "loss": 0.1208, + "step": 18920 + }, + { + "epoch": 0.48923577908148763, + "grad_norm": 0.039808280766010284, + "learning_rate": 7.591284446569296e-05, + "loss": 0.121, + "step": 18930 + }, + { + "epoch": 0.4894942237614039, + "grad_norm": 0.030855044722557068, + "learning_rate": 7.588107374569625e-05, + "loss": 0.1214, + "step": 18940 + }, + { + "epoch": 0.48975266844132015, + "grad_norm": 0.024253515526652336, + "learning_rate": 7.584929309750843e-05, + "loss": 0.1206, + "step": 18950 + }, + { + "epoch": 0.4900111131212364, + "grad_norm": 0.03804367408156395, + "learning_rate": 7.581750255012706e-05, + "loss": 0.1217, + "step": 18960 + }, + { + "epoch": 0.49026955780115267, + "grad_norm": 0.034431904554367065, + "learning_rate": 7.578570213255867e-05, + "loss": 0.1209, + "step": 18970 + }, + { + "epoch": 0.4905280024810689, + "grad_norm": 0.05128060281276703, + "learning_rate": 7.575389187381886e-05, + "loss": 0.1214, + "step": 18980 + }, + { + "epoch": 0.4907864471609852, + "grad_norm": 0.028463473543524742, + "learning_rate": 7.572207180293218e-05, + "loss": 0.1203, + "step": 18990 + }, + { + "epoch": 0.49104489184090144, + "grad_norm": 0.026231560856103897, + "learning_rate": 7.569024194893212e-05, + "loss": 0.1205, + "step": 19000 + }, + { + "epoch": 0.4913033365208177, + "grad_norm": 0.03263719752430916, + "learning_rate": 7.565840234086116e-05, + "loss": 0.1216, + "step": 19010 + }, + { + "epoch": 0.49156178120073396, + "grad_norm": 0.03389092907309532, + "learning_rate": 7.56265530077706e-05, + "loss": 0.1214, + "step": 19020 + }, + { + "epoch": 0.4918202258806502, + "grad_norm": 0.02904711477458477, + "learning_rate": 7.559469397872064e-05, + "loss": 0.1218, + "step": 19030 + }, + { + "epoch": 0.49207867056056653, + "grad_norm": 0.03267741948366165, + "learning_rate": 7.556282528278035e-05, + "loss": 0.1215, + "step": 19040 + }, + { + "epoch": 0.4923371152404828, + "grad_norm": 0.03180887550115585, + "learning_rate": 7.553094694902763e-05, + "loss": 0.1211, + "step": 19050 + }, + { + "epoch": 0.49259555992039905, + "grad_norm": 0.0331382192671299, + "learning_rate": 7.549905900654911e-05, + "loss": 0.1212, + "step": 19060 + }, + { + "epoch": 0.4928540046003153, + "grad_norm": 0.5759429335594177, + "learning_rate": 7.546716148444023e-05, + "loss": 0.1317, + "step": 19070 + }, + { + "epoch": 0.49311244928023157, + "grad_norm": 0.07023889571428299, + "learning_rate": 7.543525441180519e-05, + "loss": 0.133, + "step": 19080 + }, + { + "epoch": 0.4933708939601478, + "grad_norm": 0.045341651886701584, + "learning_rate": 7.540333781775686e-05, + "loss": 0.1237, + "step": 19090 + }, + { + "epoch": 0.4936293386400641, + "grad_norm": 0.05453513562679291, + "learning_rate": 7.537141173141682e-05, + "loss": 0.1223, + "step": 19100 + }, + { + "epoch": 0.49388778331998034, + "grad_norm": 0.0357089526951313, + "learning_rate": 7.533947618191533e-05, + "loss": 0.1218, + "step": 19110 + }, + { + "epoch": 0.4941462279998966, + "grad_norm": 0.0338035486638546, + "learning_rate": 7.530753119839125e-05, + "loss": 0.1212, + "step": 19120 + }, + { + "epoch": 0.49440467267981286, + "grad_norm": 0.02506757341325283, + "learning_rate": 7.527557680999205e-05, + "loss": 0.1209, + "step": 19130 + }, + { + "epoch": 0.4946631173597292, + "grad_norm": 0.04927479848265648, + "learning_rate": 7.524361304587381e-05, + "loss": 0.1211, + "step": 19140 + }, + { + "epoch": 0.49492156203964544, + "grad_norm": 0.04696602001786232, + "learning_rate": 7.521163993520115e-05, + "loss": 0.121, + "step": 19150 + }, + { + "epoch": 0.4951800067195617, + "grad_norm": 0.02489475905895233, + "learning_rate": 7.517965750714719e-05, + "loss": 0.1211, + "step": 19160 + }, + { + "epoch": 0.49543845139947795, + "grad_norm": 0.038473065942525864, + "learning_rate": 7.514766579089363e-05, + "loss": 0.1211, + "step": 19170 + }, + { + "epoch": 0.4956968960793942, + "grad_norm": 0.030723921954631805, + "learning_rate": 7.511566481563055e-05, + "loss": 0.121, + "step": 19180 + }, + { + "epoch": 0.49595534075931047, + "grad_norm": 0.029933540150523186, + "learning_rate": 7.508365461055652e-05, + "loss": 0.1214, + "step": 19190 + }, + { + "epoch": 0.49621378543922673, + "grad_norm": 0.023409251123666763, + "learning_rate": 7.505163520487855e-05, + "loss": 0.1207, + "step": 19200 + }, + { + "epoch": 0.496472230119143, + "grad_norm": 0.038509152829647064, + "learning_rate": 7.5019606627812e-05, + "loss": 0.1209, + "step": 19210 + }, + { + "epoch": 0.49673067479905925, + "grad_norm": 0.02442345581948757, + "learning_rate": 7.498756890858065e-05, + "loss": 0.1213, + "step": 19220 + }, + { + "epoch": 0.4969891194789755, + "grad_norm": 0.026290714740753174, + "learning_rate": 7.49555220764166e-05, + "loss": 0.1206, + "step": 19230 + }, + { + "epoch": 0.49724756415889176, + "grad_norm": 0.036711644381284714, + "learning_rate": 7.492346616056025e-05, + "loss": 0.1203, + "step": 19240 + }, + { + "epoch": 0.4975060088388081, + "grad_norm": 0.028854476287961006, + "learning_rate": 7.489140119026032e-05, + "loss": 0.1206, + "step": 19250 + }, + { + "epoch": 0.49776445351872434, + "grad_norm": 0.049731142818927765, + "learning_rate": 7.485932719477373e-05, + "loss": 0.1208, + "step": 19260 + }, + { + "epoch": 0.4980228981986406, + "grad_norm": 0.02823898196220398, + "learning_rate": 7.482724420336569e-05, + "loss": 0.1207, + "step": 19270 + }, + { + "epoch": 0.49828134287855685, + "grad_norm": 0.02628445066511631, + "learning_rate": 7.479515224530964e-05, + "loss": 0.1196, + "step": 19280 + }, + { + "epoch": 0.4985397875584731, + "grad_norm": 0.047897517681121826, + "learning_rate": 7.476305134988713e-05, + "loss": 0.1211, + "step": 19290 + }, + { + "epoch": 0.49879823223838937, + "grad_norm": 0.0417899414896965, + "learning_rate": 7.473094154638789e-05, + "loss": 0.1214, + "step": 19300 + }, + { + "epoch": 0.49905667691830563, + "grad_norm": 0.04071054980158806, + "learning_rate": 7.469882286410983e-05, + "loss": 0.1209, + "step": 19310 + }, + { + "epoch": 0.4993151215982219, + "grad_norm": 0.0555400475859642, + "learning_rate": 7.466669533235889e-05, + "loss": 0.1209, + "step": 19320 + }, + { + "epoch": 0.49957356627813815, + "grad_norm": 0.03907303512096405, + "learning_rate": 7.463455898044912e-05, + "loss": 0.1207, + "step": 19330 + }, + { + "epoch": 0.4998320109580544, + "grad_norm": 0.023745369166135788, + "learning_rate": 7.46024138377026e-05, + "loss": 0.1203, + "step": 19340 + }, + { + "epoch": 0.5000904556379707, + "grad_norm": 0.031146835535764694, + "learning_rate": 7.457025993344948e-05, + "loss": 0.1199, + "step": 19350 + }, + { + "epoch": 0.5003489003178869, + "grad_norm": 0.043937116861343384, + "learning_rate": 7.453809729702784e-05, + "loss": 0.1205, + "step": 19360 + }, + { + "epoch": 0.5006073449978032, + "grad_norm": 0.02757302299141884, + "learning_rate": 7.450592595778376e-05, + "loss": 0.12, + "step": 19370 + }, + { + "epoch": 0.5008657896777194, + "grad_norm": 0.04315382242202759, + "learning_rate": 7.447374594507123e-05, + "loss": 0.1203, + "step": 19380 + }, + { + "epoch": 0.5011242343576358, + "grad_norm": 0.02705344185233116, + "learning_rate": 7.444155728825222e-05, + "loss": 0.1202, + "step": 19390 + }, + { + "epoch": 0.501382679037552, + "grad_norm": 0.02443881891667843, + "learning_rate": 7.440936001669653e-05, + "loss": 0.1203, + "step": 19400 + }, + { + "epoch": 0.5016411237174683, + "grad_norm": 0.03004947304725647, + "learning_rate": 7.437715415978182e-05, + "loss": 0.1205, + "step": 19410 + }, + { + "epoch": 0.5018995683973846, + "grad_norm": 0.029870666563510895, + "learning_rate": 7.434493974689363e-05, + "loss": 0.1207, + "step": 19420 + }, + { + "epoch": 0.5021580130773008, + "grad_norm": 0.03068307600915432, + "learning_rate": 7.431271680742524e-05, + "loss": 0.1209, + "step": 19430 + }, + { + "epoch": 0.5024164577572171, + "grad_norm": 0.03199680149555206, + "learning_rate": 7.428048537077779e-05, + "loss": 0.1203, + "step": 19440 + }, + { + "epoch": 0.5026749024371333, + "grad_norm": 0.08586801588535309, + "learning_rate": 7.42482454663601e-05, + "loss": 0.1205, + "step": 19450 + }, + { + "epoch": 0.5029333471170496, + "grad_norm": 0.0442020520567894, + "learning_rate": 7.421599712358876e-05, + "loss": 0.12, + "step": 19460 + }, + { + "epoch": 0.5031917917969658, + "grad_norm": 0.0481729581952095, + "learning_rate": 7.418374037188803e-05, + "loss": 0.1207, + "step": 19470 + }, + { + "epoch": 0.5034502364768821, + "grad_norm": 0.02706928923726082, + "learning_rate": 7.415147524068986e-05, + "loss": 0.1203, + "step": 19480 + }, + { + "epoch": 0.5037086811567983, + "grad_norm": 0.030817445367574692, + "learning_rate": 7.411920175943385e-05, + "loss": 0.1203, + "step": 19490 + }, + { + "epoch": 0.5039671258367147, + "grad_norm": 0.03502395376563072, + "learning_rate": 7.408691995756723e-05, + "loss": 0.1205, + "step": 19500 + }, + { + "epoch": 0.5042255705166309, + "grad_norm": 0.04453817009925842, + "learning_rate": 7.405462986454478e-05, + "loss": 0.1205, + "step": 19510 + }, + { + "epoch": 0.5044840151965472, + "grad_norm": 0.02701381966471672, + "learning_rate": 7.402233150982887e-05, + "loss": 0.1199, + "step": 19520 + }, + { + "epoch": 0.5047424598764635, + "grad_norm": 0.03177789971232414, + "learning_rate": 7.399002492288942e-05, + "loss": 0.1204, + "step": 19530 + }, + { + "epoch": 0.5050009045563797, + "grad_norm": 0.04018397629261017, + "learning_rate": 7.395771013320385e-05, + "loss": 0.1205, + "step": 19540 + }, + { + "epoch": 0.505259349236296, + "grad_norm": 0.03674954175949097, + "learning_rate": 7.392538717025706e-05, + "loss": 0.1206, + "step": 19550 + }, + { + "epoch": 0.5055177939162122, + "grad_norm": 0.03747940808534622, + "learning_rate": 7.38930560635414e-05, + "loss": 0.1208, + "step": 19560 + }, + { + "epoch": 0.5057762385961285, + "grad_norm": 0.027796296402812004, + "learning_rate": 7.386071684255667e-05, + "loss": 0.12, + "step": 19570 + }, + { + "epoch": 0.5060346832760447, + "grad_norm": 0.032420914620161057, + "learning_rate": 7.382836953681005e-05, + "loss": 0.1204, + "step": 19580 + }, + { + "epoch": 0.506293127955961, + "grad_norm": 0.041382379829883575, + "learning_rate": 7.379601417581614e-05, + "loss": 0.1211, + "step": 19590 + }, + { + "epoch": 0.5065515726358772, + "grad_norm": 0.03428683429956436, + "learning_rate": 7.376365078909684e-05, + "loss": 0.12, + "step": 19600 + }, + { + "epoch": 0.5068100173157936, + "grad_norm": 0.05661332607269287, + "learning_rate": 7.373127940618141e-05, + "loss": 0.1196, + "step": 19610 + }, + { + "epoch": 0.5070684619957099, + "grad_norm": 0.0362049862742424, + "learning_rate": 7.369890005660638e-05, + "loss": 0.1205, + "step": 19620 + }, + { + "epoch": 0.5073269066756261, + "grad_norm": 0.04137473925948143, + "learning_rate": 7.366651276991557e-05, + "loss": 0.1207, + "step": 19630 + }, + { + "epoch": 0.5075853513555424, + "grad_norm": 0.03497382625937462, + "learning_rate": 7.363411757566002e-05, + "loss": 0.1208, + "step": 19640 + }, + { + "epoch": 0.5078437960354586, + "grad_norm": 0.023908205330371857, + "learning_rate": 7.360171450339799e-05, + "loss": 0.1212, + "step": 19650 + }, + { + "epoch": 0.5081022407153749, + "grad_norm": 0.02919388934969902, + "learning_rate": 7.356930358269497e-05, + "loss": 0.121, + "step": 19660 + }, + { + "epoch": 0.5083606853952911, + "grad_norm": 0.048870749771595, + "learning_rate": 7.353688484312352e-05, + "loss": 0.1201, + "step": 19670 + }, + { + "epoch": 0.5086191300752074, + "grad_norm": 0.04047771543264389, + "learning_rate": 7.350445831426345e-05, + "loss": 0.1208, + "step": 19680 + }, + { + "epoch": 0.5088775747551236, + "grad_norm": 0.0429043173789978, + "learning_rate": 7.347202402570157e-05, + "loss": 0.12, + "step": 19690 + }, + { + "epoch": 0.5091360194350399, + "grad_norm": 0.032800331711769104, + "learning_rate": 7.343958200703183e-05, + "loss": 0.1213, + "step": 19700 + }, + { + "epoch": 0.5093944641149561, + "grad_norm": 0.029622206464409828, + "learning_rate": 7.340713228785523e-05, + "loss": 0.1201, + "step": 19710 + }, + { + "epoch": 0.5096529087948725, + "grad_norm": 0.03248055279254913, + "learning_rate": 7.33746748977798e-05, + "loss": 0.1201, + "step": 19720 + }, + { + "epoch": 0.5099113534747888, + "grad_norm": 0.034430284053087234, + "learning_rate": 7.334220986642052e-05, + "loss": 0.12, + "step": 19730 + }, + { + "epoch": 0.510169798154705, + "grad_norm": 0.028995446860790253, + "learning_rate": 7.330973722339942e-05, + "loss": 0.1207, + "step": 19740 + }, + { + "epoch": 0.5104282428346213, + "grad_norm": 0.06089401617646217, + "learning_rate": 7.32772569983454e-05, + "loss": 0.1211, + "step": 19750 + }, + { + "epoch": 0.5106866875145375, + "grad_norm": 0.035580962896347046, + "learning_rate": 7.324476922089433e-05, + "loss": 0.1197, + "step": 19760 + }, + { + "epoch": 0.5109451321944538, + "grad_norm": 0.03296618536114693, + "learning_rate": 7.321227392068894e-05, + "loss": 0.1203, + "step": 19770 + }, + { + "epoch": 0.51120357687437, + "grad_norm": 0.03645109757781029, + "learning_rate": 7.317977112737881e-05, + "loss": 0.1203, + "step": 19780 + }, + { + "epoch": 0.5114620215542863, + "grad_norm": 0.02981927990913391, + "learning_rate": 7.314726087062046e-05, + "loss": 0.1201, + "step": 19790 + }, + { + "epoch": 0.5117204662342025, + "grad_norm": 0.048343952745199203, + "learning_rate": 7.311474318007708e-05, + "loss": 0.12, + "step": 19800 + }, + { + "epoch": 0.5119789109141188, + "grad_norm": 0.023931222036480904, + "learning_rate": 7.30822180854187e-05, + "loss": 0.1212, + "step": 19810 + }, + { + "epoch": 0.512237355594035, + "grad_norm": 0.03143184632062912, + "learning_rate": 7.304968561632215e-05, + "loss": 0.1204, + "step": 19820 + }, + { + "epoch": 0.5124958002739514, + "grad_norm": 0.04674369469285011, + "learning_rate": 7.301714580247095e-05, + "loss": 0.1204, + "step": 19830 + }, + { + "epoch": 0.5127542449538677, + "grad_norm": 0.02729790471494198, + "learning_rate": 7.298459867355527e-05, + "loss": 0.1202, + "step": 19840 + }, + { + "epoch": 0.5130126896337839, + "grad_norm": 0.0326131209731102, + "learning_rate": 7.295204425927207e-05, + "loss": 0.1204, + "step": 19850 + }, + { + "epoch": 0.5132711343137002, + "grad_norm": 0.02843979001045227, + "learning_rate": 7.291948258932484e-05, + "loss": 0.1208, + "step": 19860 + }, + { + "epoch": 0.5135295789936164, + "grad_norm": 0.032138701528310776, + "learning_rate": 7.288691369342377e-05, + "loss": 0.1204, + "step": 19870 + }, + { + "epoch": 0.5137880236735327, + "grad_norm": 0.026202313601970673, + "learning_rate": 7.285433760128562e-05, + "loss": 0.1199, + "step": 19880 + }, + { + "epoch": 0.5140464683534489, + "grad_norm": 0.026974685490131378, + "learning_rate": 7.282175434263371e-05, + "loss": 0.12, + "step": 19890 + }, + { + "epoch": 0.5143049130333652, + "grad_norm": 0.025364646688103676, + "learning_rate": 7.27891639471979e-05, + "loss": 0.1202, + "step": 19900 + }, + { + "epoch": 0.5145633577132814, + "grad_norm": 0.05834595486521721, + "learning_rate": 7.275656644471457e-05, + "loss": 0.121, + "step": 19910 + }, + { + "epoch": 0.5148218023931977, + "grad_norm": 0.0503285713493824, + "learning_rate": 7.272396186492656e-05, + "loss": 0.1206, + "step": 19920 + }, + { + "epoch": 0.515080247073114, + "grad_norm": 0.036742448806762695, + "learning_rate": 7.269135023758318e-05, + "loss": 0.1207, + "step": 19930 + }, + { + "epoch": 0.5153386917530303, + "grad_norm": 0.025144286453723907, + "learning_rate": 7.265873159244023e-05, + "loss": 0.1208, + "step": 19940 + }, + { + "epoch": 0.5155971364329466, + "grad_norm": 0.03825952112674713, + "learning_rate": 7.26261059592598e-05, + "loss": 0.1203, + "step": 19950 + }, + { + "epoch": 0.5158555811128628, + "grad_norm": 0.0324537456035614, + "learning_rate": 7.259347336781043e-05, + "loss": 0.1203, + "step": 19960 + }, + { + "epoch": 0.5161140257927791, + "grad_norm": 0.050951037555933, + "learning_rate": 7.256083384786703e-05, + "loss": 0.1209, + "step": 19970 + }, + { + "epoch": 0.5163724704726953, + "grad_norm": 0.021621443331241608, + "learning_rate": 7.252818742921075e-05, + "loss": 0.12, + "step": 19980 + }, + { + "epoch": 0.5166309151526116, + "grad_norm": 0.031997714191675186, + "learning_rate": 7.249553414162911e-05, + "loss": 0.1211, + "step": 19990 + }, + { + "epoch": 0.5168893598325278, + "grad_norm": 0.03336925059556961, + "learning_rate": 7.246287401491585e-05, + "loss": 0.1202, + "step": 20000 + }, + { + "epoch": 0.5171478045124441, + "grad_norm": 0.04123704135417938, + "learning_rate": 7.243020707887096e-05, + "loss": 0.1208, + "step": 20010 + }, + { + "epoch": 0.5174062491923603, + "grad_norm": 0.03009060211479664, + "learning_rate": 7.23975333633007e-05, + "loss": 0.1205, + "step": 20020 + }, + { + "epoch": 0.5176646938722766, + "grad_norm": 0.024940215051174164, + "learning_rate": 7.236485289801742e-05, + "loss": 0.1206, + "step": 20030 + }, + { + "epoch": 0.5179231385521929, + "grad_norm": 0.03150799497961998, + "learning_rate": 7.233216571283968e-05, + "loss": 0.1201, + "step": 20040 + }, + { + "epoch": 0.5181815832321092, + "grad_norm": 0.026570986956357956, + "learning_rate": 7.22994718375922e-05, + "loss": 0.1205, + "step": 20050 + }, + { + "epoch": 0.5184400279120255, + "grad_norm": 0.03827090933918953, + "learning_rate": 7.226677130210572e-05, + "loss": 0.1211, + "step": 20060 + }, + { + "epoch": 0.5186984725919417, + "grad_norm": 0.04351060092449188, + "learning_rate": 7.223406413621716e-05, + "loss": 0.1202, + "step": 20070 + }, + { + "epoch": 0.518956917271858, + "grad_norm": 0.056141022592782974, + "learning_rate": 7.220135036976941e-05, + "loss": 0.1206, + "step": 20080 + }, + { + "epoch": 0.5192153619517742, + "grad_norm": 0.034053903073072433, + "learning_rate": 7.21686300326114e-05, + "loss": 0.1202, + "step": 20090 + }, + { + "epoch": 0.5194738066316905, + "grad_norm": 0.05351325869560242, + "learning_rate": 7.21359031545981e-05, + "loss": 0.1211, + "step": 20100 + }, + { + "epoch": 0.5197322513116067, + "grad_norm": 0.03403015807271004, + "learning_rate": 7.210316976559039e-05, + "loss": 0.1206, + "step": 20110 + }, + { + "epoch": 0.519990695991523, + "grad_norm": 0.023777080699801445, + "learning_rate": 7.207042989545511e-05, + "loss": 0.1211, + "step": 20120 + }, + { + "epoch": 0.5202491406714392, + "grad_norm": 0.03406531736254692, + "learning_rate": 7.203768357406506e-05, + "loss": 0.1205, + "step": 20130 + }, + { + "epoch": 0.5205075853513556, + "grad_norm": 0.025705114006996155, + "learning_rate": 7.200493083129884e-05, + "loss": 0.1204, + "step": 20140 + }, + { + "epoch": 0.5207660300312718, + "grad_norm": 0.03246362507343292, + "learning_rate": 7.197217169704095e-05, + "loss": 0.1207, + "step": 20150 + }, + { + "epoch": 0.5210244747111881, + "grad_norm": 0.048002004623413086, + "learning_rate": 7.193940620118178e-05, + "loss": 0.1201, + "step": 20160 + }, + { + "epoch": 0.5212829193911044, + "grad_norm": 0.020731130614876747, + "learning_rate": 7.19066343736174e-05, + "loss": 0.1214, + "step": 20170 + }, + { + "epoch": 0.5215413640710206, + "grad_norm": 0.035970184952020645, + "learning_rate": 7.187385624424979e-05, + "loss": 0.12, + "step": 20180 + }, + { + "epoch": 0.5217998087509369, + "grad_norm": 0.029571792110800743, + "learning_rate": 7.184107184298656e-05, + "loss": 0.1199, + "step": 20190 + }, + { + "epoch": 0.5220582534308531, + "grad_norm": 0.02627413719892502, + "learning_rate": 7.180828119974115e-05, + "loss": 0.1211, + "step": 20200 + }, + { + "epoch": 0.5223166981107694, + "grad_norm": 0.03428468108177185, + "learning_rate": 7.177548434443263e-05, + "loss": 0.1212, + "step": 20210 + }, + { + "epoch": 0.5225751427906856, + "grad_norm": 0.030139118432998657, + "learning_rate": 7.174268130698571e-05, + "loss": 0.1195, + "step": 20220 + }, + { + "epoch": 0.5228335874706019, + "grad_norm": 0.05158674716949463, + "learning_rate": 7.170987211733084e-05, + "loss": 0.1201, + "step": 20230 + }, + { + "epoch": 0.5230920321505181, + "grad_norm": 0.022252986207604408, + "learning_rate": 7.167705680540402e-05, + "loss": 0.121, + "step": 20240 + }, + { + "epoch": 0.5233504768304345, + "grad_norm": 0.023795749992132187, + "learning_rate": 7.164423540114682e-05, + "loss": 0.1208, + "step": 20250 + }, + { + "epoch": 0.5236089215103507, + "grad_norm": 0.05227832496166229, + "learning_rate": 7.161140793450641e-05, + "loss": 0.1209, + "step": 20260 + }, + { + "epoch": 0.523867366190267, + "grad_norm": 0.03743741288781166, + "learning_rate": 7.157857443543547e-05, + "loss": 0.1206, + "step": 20270 + }, + { + "epoch": 0.5241258108701833, + "grad_norm": 0.026272883638739586, + "learning_rate": 7.154573493389217e-05, + "loss": 0.1208, + "step": 20280 + }, + { + "epoch": 0.5243842555500995, + "grad_norm": 0.02973959594964981, + "learning_rate": 7.15128894598402e-05, + "loss": 0.1211, + "step": 20290 + }, + { + "epoch": 0.5246427002300158, + "grad_norm": 0.04028020426630974, + "learning_rate": 7.148003804324867e-05, + "loss": 0.1206, + "step": 20300 + }, + { + "epoch": 0.524901144909932, + "grad_norm": 0.029723072424530983, + "learning_rate": 7.144718071409211e-05, + "loss": 0.1204, + "step": 20310 + }, + { + "epoch": 0.5251595895898483, + "grad_norm": 0.03563931584358215, + "learning_rate": 7.141431750235047e-05, + "loss": 0.1207, + "step": 20320 + }, + { + "epoch": 0.5254180342697645, + "grad_norm": 0.045195139944553375, + "learning_rate": 7.1381448438009e-05, + "loss": 0.1208, + "step": 20330 + }, + { + "epoch": 0.5256764789496808, + "grad_norm": 0.04531498998403549, + "learning_rate": 7.134857355105838e-05, + "loss": 0.1211, + "step": 20340 + }, + { + "epoch": 0.525934923629597, + "grad_norm": 0.03601543977856636, + "learning_rate": 7.131569287149457e-05, + "loss": 0.1202, + "step": 20350 + }, + { + "epoch": 0.5261933683095134, + "grad_norm": 0.027531474828720093, + "learning_rate": 7.128280642931878e-05, + "loss": 0.1201, + "step": 20360 + }, + { + "epoch": 0.5264518129894296, + "grad_norm": 0.040528636425733566, + "learning_rate": 7.124991425453751e-05, + "loss": 0.1201, + "step": 20370 + }, + { + "epoch": 0.5267102576693459, + "grad_norm": 0.02939690835773945, + "learning_rate": 7.121701637716248e-05, + "loss": 0.1204, + "step": 20380 + }, + { + "epoch": 0.5269687023492622, + "grad_norm": 0.034015461802482605, + "learning_rate": 7.118411282721064e-05, + "loss": 0.1202, + "step": 20390 + }, + { + "epoch": 0.5272271470291784, + "grad_norm": 0.032597530633211136, + "learning_rate": 7.11512036347041e-05, + "loss": 0.1203, + "step": 20400 + }, + { + "epoch": 0.5274855917090947, + "grad_norm": 0.04330045357346535, + "learning_rate": 7.111828882967007e-05, + "loss": 0.1213, + "step": 20410 + }, + { + "epoch": 0.5277440363890109, + "grad_norm": 0.0331517830491066, + "learning_rate": 7.108536844214095e-05, + "loss": 0.12, + "step": 20420 + }, + { + "epoch": 0.5280024810689272, + "grad_norm": 0.033780504018068314, + "learning_rate": 7.105244250215422e-05, + "loss": 0.1207, + "step": 20430 + }, + { + "epoch": 0.5282609257488434, + "grad_norm": 0.06157774478197098, + "learning_rate": 7.101951103975238e-05, + "loss": 0.1204, + "step": 20440 + }, + { + "epoch": 0.5285193704287597, + "grad_norm": 0.024059602990746498, + "learning_rate": 7.098657408498303e-05, + "loss": 0.1195, + "step": 20450 + }, + { + "epoch": 0.5287778151086759, + "grad_norm": 0.02334517240524292, + "learning_rate": 7.095363166789877e-05, + "loss": 0.1205, + "step": 20460 + }, + { + "epoch": 0.5290362597885923, + "grad_norm": 0.044882986694574356, + "learning_rate": 7.092068381855712e-05, + "loss": 0.121, + "step": 20470 + }, + { + "epoch": 0.5292947044685086, + "grad_norm": 0.034278497099876404, + "learning_rate": 7.088773056702063e-05, + "loss": 0.12, + "step": 20480 + }, + { + "epoch": 0.5295531491484248, + "grad_norm": 0.03197415918111801, + "learning_rate": 7.085477194335674e-05, + "loss": 0.12, + "step": 20490 + }, + { + "epoch": 0.5298115938283411, + "grad_norm": 0.026582352817058563, + "learning_rate": 7.082180797763784e-05, + "loss": 0.1209, + "step": 20500 + }, + { + "epoch": 0.5300700385082573, + "grad_norm": 0.026003874838352203, + "learning_rate": 7.078883869994114e-05, + "loss": 0.1205, + "step": 20510 + }, + { + "epoch": 0.5303284831881736, + "grad_norm": 0.03376332297921181, + "learning_rate": 7.075586414034871e-05, + "loss": 0.1207, + "step": 20520 + }, + { + "epoch": 0.5305869278680898, + "grad_norm": 0.031209472566843033, + "learning_rate": 7.072288432894744e-05, + "loss": 0.1205, + "step": 20530 + }, + { + "epoch": 0.5308453725480061, + "grad_norm": 0.05439401790499687, + "learning_rate": 7.068989929582903e-05, + "loss": 0.12, + "step": 20540 + }, + { + "epoch": 0.5311038172279223, + "grad_norm": 0.029011519625782967, + "learning_rate": 7.065690907108993e-05, + "loss": 0.1206, + "step": 20550 + }, + { + "epoch": 0.5313622619078386, + "grad_norm": 0.02941972203552723, + "learning_rate": 7.062391368483132e-05, + "loss": 0.1209, + "step": 20560 + }, + { + "epoch": 0.5316207065877548, + "grad_norm": 0.032681480050086975, + "learning_rate": 7.059091316715912e-05, + "loss": 0.1206, + "step": 20570 + }, + { + "epoch": 0.5318791512676712, + "grad_norm": 0.03831613436341286, + "learning_rate": 7.055790754818387e-05, + "loss": 0.1198, + "step": 20580 + }, + { + "epoch": 0.5321375959475875, + "grad_norm": 0.039940815418958664, + "learning_rate": 7.052489685802086e-05, + "loss": 0.1207, + "step": 20590 + }, + { + "epoch": 0.5323960406275037, + "grad_norm": 0.030511900782585144, + "learning_rate": 7.04918811267899e-05, + "loss": 0.1204, + "step": 20600 + }, + { + "epoch": 0.53265448530742, + "grad_norm": 0.04238329082727432, + "learning_rate": 7.045886038461549e-05, + "loss": 0.1204, + "step": 20610 + }, + { + "epoch": 0.5329129299873362, + "grad_norm": 0.02636590227484703, + "learning_rate": 7.042583466162664e-05, + "loss": 0.1202, + "step": 20620 + }, + { + "epoch": 0.5331713746672525, + "grad_norm": 0.028068533167243004, + "learning_rate": 7.039280398795695e-05, + "loss": 0.1206, + "step": 20630 + }, + { + "epoch": 0.5334298193471687, + "grad_norm": 0.0563853457570076, + "learning_rate": 7.03597683937445e-05, + "loss": 0.1201, + "step": 20640 + }, + { + "epoch": 0.533688264027085, + "grad_norm": 0.05767499655485153, + "learning_rate": 7.032672790913186e-05, + "loss": 0.1202, + "step": 20650 + }, + { + "epoch": 0.5339467087070012, + "grad_norm": 0.038404881954193115, + "learning_rate": 7.029368256426612e-05, + "loss": 0.1205, + "step": 20660 + }, + { + "epoch": 0.5342051533869175, + "grad_norm": 0.049100130796432495, + "learning_rate": 7.026063238929872e-05, + "loss": 0.1209, + "step": 20670 + }, + { + "epoch": 0.5344635980668337, + "grad_norm": 0.04915165901184082, + "learning_rate": 7.022757741438557e-05, + "loss": 0.1215, + "step": 20680 + }, + { + "epoch": 0.5347220427467501, + "grad_norm": 0.04159319028258324, + "learning_rate": 7.019451766968693e-05, + "loss": 0.1201, + "step": 20690 + }, + { + "epoch": 0.5349804874266664, + "grad_norm": 0.03702859207987785, + "learning_rate": 7.016145318536741e-05, + "loss": 0.1203, + "step": 20700 + }, + { + "epoch": 0.5352389321065826, + "grad_norm": 0.05370931327342987, + "learning_rate": 7.0128383991596e-05, + "loss": 0.1203, + "step": 20710 + }, + { + "epoch": 0.5354973767864989, + "grad_norm": 0.02588723786175251, + "learning_rate": 7.009531011854591e-05, + "loss": 0.1206, + "step": 20720 + }, + { + "epoch": 0.5357558214664151, + "grad_norm": 0.055061232298612595, + "learning_rate": 7.006223159639466e-05, + "loss": 0.1203, + "step": 20730 + }, + { + "epoch": 0.5360142661463314, + "grad_norm": 0.02645109035074711, + "learning_rate": 7.002914845532398e-05, + "loss": 0.1204, + "step": 20740 + }, + { + "epoch": 0.5362727108262476, + "grad_norm": 0.041657399386167526, + "learning_rate": 6.999606072551987e-05, + "loss": 0.1208, + "step": 20750 + }, + { + "epoch": 0.5365311555061639, + "grad_norm": 0.02645970694720745, + "learning_rate": 6.996296843717247e-05, + "loss": 0.1198, + "step": 20760 + }, + { + "epoch": 0.5367896001860801, + "grad_norm": 0.024964727461338043, + "learning_rate": 6.99298716204761e-05, + "loss": 0.1197, + "step": 20770 + }, + { + "epoch": 0.5370480448659964, + "grad_norm": 0.04342738911509514, + "learning_rate": 6.989677030562919e-05, + "loss": 0.1205, + "step": 20780 + }, + { + "epoch": 0.5373064895459126, + "grad_norm": 0.02926427125930786, + "learning_rate": 6.986366452283432e-05, + "loss": 0.1201, + "step": 20790 + }, + { + "epoch": 0.537564934225829, + "grad_norm": 0.02591095305979252, + "learning_rate": 6.98305543022981e-05, + "loss": 0.1204, + "step": 20800 + }, + { + "epoch": 0.5378233789057453, + "grad_norm": 0.03658992424607277, + "learning_rate": 6.97974396742312e-05, + "loss": 0.1199, + "step": 20810 + }, + { + "epoch": 0.5380818235856615, + "grad_norm": 0.028331851586699486, + "learning_rate": 6.976432066884833e-05, + "loss": 0.12, + "step": 20820 + }, + { + "epoch": 0.5383402682655778, + "grad_norm": 0.024779271334409714, + "learning_rate": 6.973119731636817e-05, + "loss": 0.1205, + "step": 20830 + }, + { + "epoch": 0.538598712945494, + "grad_norm": 0.03042691946029663, + "learning_rate": 6.96980696470134e-05, + "loss": 0.1204, + "step": 20840 + }, + { + "epoch": 0.5388571576254103, + "grad_norm": 0.05990440770983696, + "learning_rate": 6.966493769101059e-05, + "loss": 0.121, + "step": 20850 + }, + { + "epoch": 0.5391156023053265, + "grad_norm": 0.031835947185754776, + "learning_rate": 6.963180147859025e-05, + "loss": 0.1206, + "step": 20860 + }, + { + "epoch": 0.5393740469852428, + "grad_norm": 0.02821270190179348, + "learning_rate": 6.959866103998682e-05, + "loss": 0.1208, + "step": 20870 + }, + { + "epoch": 0.539632491665159, + "grad_norm": 0.02637062408030033, + "learning_rate": 6.956551640543847e-05, + "loss": 0.1201, + "step": 20880 + }, + { + "epoch": 0.5398909363450753, + "grad_norm": 0.034757040441036224, + "learning_rate": 6.953236760518735e-05, + "loss": 0.1198, + "step": 20890 + }, + { + "epoch": 0.5401493810249915, + "grad_norm": 0.04228854179382324, + "learning_rate": 6.94992146694793e-05, + "loss": 0.1198, + "step": 20900 + }, + { + "epoch": 0.5404078257049079, + "grad_norm": 0.03495631739497185, + "learning_rate": 6.946605762856395e-05, + "loss": 0.1205, + "step": 20910 + }, + { + "epoch": 0.5406662703848242, + "grad_norm": 0.05104334279894829, + "learning_rate": 6.943289651269475e-05, + "loss": 0.12, + "step": 20920 + }, + { + "epoch": 0.5409247150647404, + "grad_norm": 0.025074992328882217, + "learning_rate": 6.939973135212879e-05, + "loss": 0.1211, + "step": 20930 + }, + { + "epoch": 0.5411831597446567, + "grad_norm": 0.05147240683436394, + "learning_rate": 6.936656217712686e-05, + "loss": 0.1202, + "step": 20940 + }, + { + "epoch": 0.5414416044245729, + "grad_norm": 0.03577698767185211, + "learning_rate": 6.933338901795345e-05, + "loss": 0.1206, + "step": 20950 + }, + { + "epoch": 0.5417000491044892, + "grad_norm": 0.032096799463033676, + "learning_rate": 6.930021190487664e-05, + "loss": 0.1209, + "step": 20960 + }, + { + "epoch": 0.5419584937844054, + "grad_norm": 0.03207904472947121, + "learning_rate": 6.926703086816817e-05, + "loss": 0.1207, + "step": 20970 + }, + { + "epoch": 0.5422169384643217, + "grad_norm": 0.02829430066049099, + "learning_rate": 6.923384593810332e-05, + "loss": 0.121, + "step": 20980 + }, + { + "epoch": 0.5424753831442379, + "grad_norm": 0.03477048873901367, + "learning_rate": 6.920065714496091e-05, + "loss": 0.1205, + "step": 20990 + }, + { + "epoch": 0.5427338278241542, + "grad_norm": 0.02922411821782589, + "learning_rate": 6.916746451902334e-05, + "loss": 0.12, + "step": 21000 + }, + { + "epoch": 0.5429922725040705, + "grad_norm": 0.027083877474069595, + "learning_rate": 6.913426809057646e-05, + "loss": 0.1206, + "step": 21010 + }, + { + "epoch": 0.5432507171839868, + "grad_norm": 0.04243084043264389, + "learning_rate": 6.91010678899096e-05, + "loss": 0.1202, + "step": 21020 + }, + { + "epoch": 0.5435091618639031, + "grad_norm": 0.036770161241292953, + "learning_rate": 6.906786394731555e-05, + "loss": 0.1208, + "step": 21030 + }, + { + "epoch": 0.5437676065438193, + "grad_norm": 0.023354075849056244, + "learning_rate": 6.90346562930905e-05, + "loss": 0.1203, + "step": 21040 + }, + { + "epoch": 0.5440260512237356, + "grad_norm": 0.026575906202197075, + "learning_rate": 6.900144495753402e-05, + "loss": 0.1206, + "step": 21050 + }, + { + "epoch": 0.5442844959036518, + "grad_norm": 0.02576720155775547, + "learning_rate": 6.896822997094904e-05, + "loss": 0.1215, + "step": 21060 + }, + { + "epoch": 0.5445429405835681, + "grad_norm": 0.04130380600690842, + "learning_rate": 6.893501136364187e-05, + "loss": 0.1206, + "step": 21070 + }, + { + "epoch": 0.5448013852634843, + "grad_norm": 0.02926800027489662, + "learning_rate": 6.8901789165922e-05, + "loss": 0.1205, + "step": 21080 + }, + { + "epoch": 0.5450598299434006, + "grad_norm": 0.04011395946145058, + "learning_rate": 6.886856340810236e-05, + "loss": 0.1209, + "step": 21090 + }, + { + "epoch": 0.5453182746233168, + "grad_norm": 0.028343509882688522, + "learning_rate": 6.883533412049901e-05, + "loss": 0.1206, + "step": 21100 + }, + { + "epoch": 0.5455767193032331, + "grad_norm": 0.02431676723062992, + "learning_rate": 6.880210133343124e-05, + "loss": 0.1199, + "step": 21110 + }, + { + "epoch": 0.5458351639831494, + "grad_norm": 0.06313106417655945, + "learning_rate": 6.876886507722162e-05, + "loss": 0.12, + "step": 21120 + }, + { + "epoch": 0.5460936086630657, + "grad_norm": 0.0297551266849041, + "learning_rate": 6.873562538219578e-05, + "loss": 0.1204, + "step": 21130 + }, + { + "epoch": 0.546352053342982, + "grad_norm": 0.030030611902475357, + "learning_rate": 6.870238227868253e-05, + "loss": 0.1192, + "step": 21140 + }, + { + "epoch": 0.5466104980228982, + "grad_norm": 0.0359189473092556, + "learning_rate": 6.866913579701381e-05, + "loss": 0.1201, + "step": 21150 + }, + { + "epoch": 0.5468689427028145, + "grad_norm": 0.035076338797807693, + "learning_rate": 6.863588596752464e-05, + "loss": 0.1206, + "step": 21160 + }, + { + "epoch": 0.5471273873827307, + "grad_norm": 0.03401882201433182, + "learning_rate": 6.860263282055307e-05, + "loss": 0.1216, + "step": 21170 + }, + { + "epoch": 0.547385832062647, + "grad_norm": 0.023739008232951164, + "learning_rate": 6.856937638644015e-05, + "loss": 0.12, + "step": 21180 + }, + { + "epoch": 0.5476442767425632, + "grad_norm": 0.033812835812568665, + "learning_rate": 6.853611669553001e-05, + "loss": 0.1206, + "step": 21190 + }, + { + "epoch": 0.5479027214224795, + "grad_norm": 0.029004937037825584, + "learning_rate": 6.850285377816968e-05, + "loss": 0.1203, + "step": 21200 + }, + { + "epoch": 0.5481611661023957, + "grad_norm": 0.025405416265130043, + "learning_rate": 6.846958766470917e-05, + "loss": 0.1204, + "step": 21210 + }, + { + "epoch": 0.548419610782312, + "grad_norm": 0.048874907195568085, + "learning_rate": 6.84363183855014e-05, + "loss": 0.1206, + "step": 21220 + }, + { + "epoch": 0.5486780554622284, + "grad_norm": 0.03822870925068855, + "learning_rate": 6.840304597090219e-05, + "loss": 0.1208, + "step": 21230 + }, + { + "epoch": 0.5489365001421446, + "grad_norm": 0.0229813102632761, + "learning_rate": 6.836977045127017e-05, + "loss": 0.1198, + "step": 21240 + }, + { + "epoch": 0.5491949448220609, + "grad_norm": 0.032341230660676956, + "learning_rate": 6.833649185696689e-05, + "loss": 0.1206, + "step": 21250 + }, + { + "epoch": 0.5494533895019771, + "grad_norm": 0.028868146240711212, + "learning_rate": 6.83032102183566e-05, + "loss": 0.1204, + "step": 21260 + }, + { + "epoch": 0.5497118341818934, + "grad_norm": 0.024773096665740013, + "learning_rate": 6.826992556580642e-05, + "loss": 0.1205, + "step": 21270 + }, + { + "epoch": 0.5499702788618096, + "grad_norm": 0.03922717645764351, + "learning_rate": 6.823663792968616e-05, + "loss": 0.1209, + "step": 21280 + }, + { + "epoch": 0.5502287235417259, + "grad_norm": 0.032556213438510895, + "learning_rate": 6.820334734036839e-05, + "loss": 0.1205, + "step": 21290 + }, + { + "epoch": 0.5504871682216421, + "grad_norm": 0.02909727580845356, + "learning_rate": 6.817005382822837e-05, + "loss": 0.1199, + "step": 21300 + }, + { + "epoch": 0.5507456129015584, + "grad_norm": 0.032313790172338486, + "learning_rate": 6.8136757423644e-05, + "loss": 0.1203, + "step": 21310 + }, + { + "epoch": 0.5510040575814746, + "grad_norm": 0.028175678104162216, + "learning_rate": 6.810345815699583e-05, + "loss": 0.1205, + "step": 21320 + }, + { + "epoch": 0.551262502261391, + "grad_norm": 0.025968126952648163, + "learning_rate": 6.807015605866705e-05, + "loss": 0.1201, + "step": 21330 + }, + { + "epoch": 0.5515209469413073, + "grad_norm": 0.03649004548788071, + "learning_rate": 6.803685115904339e-05, + "loss": 0.1204, + "step": 21340 + }, + { + "epoch": 0.5517793916212235, + "grad_norm": 0.033135540783405304, + "learning_rate": 6.800354348851317e-05, + "loss": 0.1208, + "step": 21350 + }, + { + "epoch": 0.5520378363011398, + "grad_norm": 0.027628453448414803, + "learning_rate": 6.79702330774672e-05, + "loss": 0.121, + "step": 21360 + }, + { + "epoch": 0.552296280981056, + "grad_norm": 0.03418595716357231, + "learning_rate": 6.793691995629885e-05, + "loss": 0.1199, + "step": 21370 + }, + { + "epoch": 0.5525547256609723, + "grad_norm": 0.022068781778216362, + "learning_rate": 6.79036041554039e-05, + "loss": 0.1207, + "step": 21380 + }, + { + "epoch": 0.5528131703408885, + "grad_norm": 0.02629811502993107, + "learning_rate": 6.787028570518066e-05, + "loss": 0.1204, + "step": 21390 + }, + { + "epoch": 0.5530716150208048, + "grad_norm": 0.03632663935422897, + "learning_rate": 6.783696463602974e-05, + "loss": 0.1197, + "step": 21400 + }, + { + "epoch": 0.553330059700721, + "grad_norm": 0.03278012573719025, + "learning_rate": 6.780364097835419e-05, + "loss": 0.1206, + "step": 21410 + }, + { + "epoch": 0.5535885043806373, + "grad_norm": 0.031133053824305534, + "learning_rate": 6.777031476255948e-05, + "loss": 0.12, + "step": 21420 + }, + { + "epoch": 0.5538469490605535, + "grad_norm": 0.029856855049729347, + "learning_rate": 6.773698601905336e-05, + "loss": 0.121, + "step": 21430 + }, + { + "epoch": 0.5541053937404699, + "grad_norm": 0.024248342961072922, + "learning_rate": 6.770365477824585e-05, + "loss": 0.1203, + "step": 21440 + }, + { + "epoch": 0.5543638384203862, + "grad_norm": 0.031011736020445824, + "learning_rate": 6.767032107054935e-05, + "loss": 0.1203, + "step": 21450 + }, + { + "epoch": 0.5546222831003024, + "grad_norm": 0.047894056886434555, + "learning_rate": 6.763698492637841e-05, + "loss": 0.1206, + "step": 21460 + }, + { + "epoch": 0.5548807277802187, + "grad_norm": 0.02912764623761177, + "learning_rate": 6.760364637614985e-05, + "loss": 0.1213, + "step": 21470 + }, + { + "epoch": 0.5551391724601349, + "grad_norm": 0.04453229531645775, + "learning_rate": 6.757030545028271e-05, + "loss": 0.1206, + "step": 21480 + }, + { + "epoch": 0.5553976171400512, + "grad_norm": 0.037943895906209946, + "learning_rate": 6.753696217919813e-05, + "loss": 0.1196, + "step": 21490 + }, + { + "epoch": 0.5556560618199674, + "grad_norm": 0.05504227057099342, + "learning_rate": 6.750361659331946e-05, + "loss": 0.1208, + "step": 21500 + }, + { + "epoch": 0.5559145064998837, + "grad_norm": 0.04778389260172844, + "learning_rate": 6.747026872307212e-05, + "loss": 0.1198, + "step": 21510 + }, + { + "epoch": 0.5561729511797999, + "grad_norm": 0.03463626280426979, + "learning_rate": 6.74369185988836e-05, + "loss": 0.1208, + "step": 21520 + }, + { + "epoch": 0.5564313958597162, + "grad_norm": 0.029547497630119324, + "learning_rate": 6.740356625118351e-05, + "loss": 0.1211, + "step": 21530 + }, + { + "epoch": 0.5566898405396324, + "grad_norm": 0.02629421465098858, + "learning_rate": 6.737021171040341e-05, + "loss": 0.1193, + "step": 21540 + }, + { + "epoch": 0.5569482852195488, + "grad_norm": 0.03618757054209709, + "learning_rate": 6.733685500697694e-05, + "loss": 0.1198, + "step": 21550 + }, + { + "epoch": 0.5572067298994651, + "grad_norm": 0.03143980726599693, + "learning_rate": 6.730349617133965e-05, + "loss": 0.1207, + "step": 21560 + }, + { + "epoch": 0.5574651745793813, + "grad_norm": 0.03436783701181412, + "learning_rate": 6.727013523392906e-05, + "loss": 0.121, + "step": 21570 + }, + { + "epoch": 0.5577236192592976, + "grad_norm": 0.026007071137428284, + "learning_rate": 6.723677222518463e-05, + "loss": 0.1198, + "step": 21580 + }, + { + "epoch": 0.5579820639392138, + "grad_norm": 0.027762286365032196, + "learning_rate": 6.720340717554765e-05, + "loss": 0.1198, + "step": 21590 + }, + { + "epoch": 0.5582405086191301, + "grad_norm": 0.029099421575665474, + "learning_rate": 6.717004011546134e-05, + "loss": 0.1207, + "step": 21600 + }, + { + "epoch": 0.5584989532990463, + "grad_norm": 0.025859162211418152, + "learning_rate": 6.713667107537072e-05, + "loss": 0.1199, + "step": 21610 + }, + { + "epoch": 0.5587573979789626, + "grad_norm": 0.033176716417074203, + "learning_rate": 6.710330008572261e-05, + "loss": 0.1204, + "step": 21620 + }, + { + "epoch": 0.5590158426588788, + "grad_norm": 0.03318626061081886, + "learning_rate": 6.70699271769656e-05, + "loss": 0.1209, + "step": 21630 + }, + { + "epoch": 0.5592742873387951, + "grad_norm": 0.034222912043333054, + "learning_rate": 6.703655237955011e-05, + "loss": 0.1203, + "step": 21640 + }, + { + "epoch": 0.5595327320187113, + "grad_norm": 0.03383937478065491, + "learning_rate": 6.700317572392817e-05, + "loss": 0.12, + "step": 21650 + }, + { + "epoch": 0.5597911766986277, + "grad_norm": 0.027863917872309685, + "learning_rate": 6.696979724055356e-05, + "loss": 0.1203, + "step": 21660 + }, + { + "epoch": 0.560049621378544, + "grad_norm": 0.0400434173643589, + "learning_rate": 6.693641695988178e-05, + "loss": 0.1198, + "step": 21670 + }, + { + "epoch": 0.5603080660584602, + "grad_norm": 0.02755608595907688, + "learning_rate": 6.690303491236985e-05, + "loss": 0.121, + "step": 21680 + }, + { + "epoch": 0.5605665107383765, + "grad_norm": 0.032733868807554245, + "learning_rate": 6.686965112847652e-05, + "loss": 0.1207, + "step": 21690 + }, + { + "epoch": 0.5608249554182927, + "grad_norm": 0.027760596945881844, + "learning_rate": 6.683626563866204e-05, + "loss": 0.1197, + "step": 21700 + }, + { + "epoch": 0.561083400098209, + "grad_norm": 0.036430079489946365, + "learning_rate": 6.680287847338825e-05, + "loss": 0.1203, + "step": 21710 + }, + { + "epoch": 0.5613418447781252, + "grad_norm": 0.0257630106061697, + "learning_rate": 6.676948966311855e-05, + "loss": 0.1212, + "step": 21720 + }, + { + "epoch": 0.5616002894580415, + "grad_norm": 0.056388773024082184, + "learning_rate": 6.673609923831778e-05, + "loss": 0.1207, + "step": 21730 + }, + { + "epoch": 0.5618587341379577, + "grad_norm": 0.031097155064344406, + "learning_rate": 6.670270722945227e-05, + "loss": 0.121, + "step": 21740 + }, + { + "epoch": 0.562117178817874, + "grad_norm": 0.03597048670053482, + "learning_rate": 6.666931366698986e-05, + "loss": 0.1207, + "step": 21750 + }, + { + "epoch": 0.5623756234977902, + "grad_norm": 0.030791345983743668, + "learning_rate": 6.663591858139971e-05, + "loss": 0.1203, + "step": 21760 + }, + { + "epoch": 0.5626340681777066, + "grad_norm": 0.0397588312625885, + "learning_rate": 6.660252200315245e-05, + "loss": 0.1206, + "step": 21770 + }, + { + "epoch": 0.5628925128576229, + "grad_norm": 0.026226703077554703, + "learning_rate": 6.656912396271998e-05, + "loss": 0.1207, + "step": 21780 + }, + { + "epoch": 0.5631509575375391, + "grad_norm": 0.024739356711506844, + "learning_rate": 6.653572449057564e-05, + "loss": 0.1205, + "step": 21790 + }, + { + "epoch": 0.5634094022174554, + "grad_norm": 0.04813724383711815, + "learning_rate": 6.650232361719404e-05, + "loss": 0.1201, + "step": 21800 + }, + { + "epoch": 0.5636678468973716, + "grad_norm": 0.031189315021038055, + "learning_rate": 6.6468921373051e-05, + "loss": 0.1199, + "step": 21810 + }, + { + "epoch": 0.5639262915772879, + "grad_norm": 0.03142096847295761, + "learning_rate": 6.643551778862369e-05, + "loss": 0.1208, + "step": 21820 + }, + { + "epoch": 0.5641847362572041, + "grad_norm": 0.032365500926971436, + "learning_rate": 6.640211289439044e-05, + "loss": 0.1206, + "step": 21830 + }, + { + "epoch": 0.5644431809371204, + "grad_norm": 0.030217768624424934, + "learning_rate": 6.636870672083082e-05, + "loss": 0.1204, + "step": 21840 + }, + { + "epoch": 0.5647016256170366, + "grad_norm": 0.035433802753686905, + "learning_rate": 6.633529929842552e-05, + "loss": 0.1207, + "step": 21850 + }, + { + "epoch": 0.5649600702969529, + "grad_norm": 0.03671165555715561, + "learning_rate": 6.630189065765638e-05, + "loss": 0.1206, + "step": 21860 + }, + { + "epoch": 0.5652185149768691, + "grad_norm": 0.03298381343483925, + "learning_rate": 6.626848082900638e-05, + "loss": 0.1194, + "step": 21870 + }, + { + "epoch": 0.5654769596567855, + "grad_norm": 0.0787547156214714, + "learning_rate": 6.623506984295957e-05, + "loss": 0.1206, + "step": 21880 + }, + { + "epoch": 0.5657354043367018, + "grad_norm": 0.03350275009870529, + "learning_rate": 6.620165773000105e-05, + "loss": 0.1202, + "step": 21890 + }, + { + "epoch": 0.565993849016618, + "grad_norm": 0.0341881588101387, + "learning_rate": 6.616824452061696e-05, + "loss": 0.1204, + "step": 21900 + }, + { + "epoch": 0.5662522936965343, + "grad_norm": 0.02640090137720108, + "learning_rate": 6.61348302452944e-05, + "loss": 0.1199, + "step": 21910 + }, + { + "epoch": 0.5665107383764505, + "grad_norm": 0.031360555440187454, + "learning_rate": 6.610141493452151e-05, + "loss": 0.1203, + "step": 21920 + }, + { + "epoch": 0.5667691830563668, + "grad_norm": 0.023985255509614944, + "learning_rate": 6.606799861878732e-05, + "loss": 0.1193, + "step": 21930 + }, + { + "epoch": 0.567027627736283, + "grad_norm": 0.03572600707411766, + "learning_rate": 6.603458132858181e-05, + "loss": 0.1204, + "step": 21940 + }, + { + "epoch": 0.5672860724161993, + "grad_norm": 0.032007936388254166, + "learning_rate": 6.600116309439581e-05, + "loss": 0.1208, + "step": 21950 + }, + { + "epoch": 0.5675445170961155, + "grad_norm": 0.027053479105234146, + "learning_rate": 6.596774394672107e-05, + "loss": 0.1202, + "step": 21960 + }, + { + "epoch": 0.5678029617760318, + "grad_norm": 0.03404033184051514, + "learning_rate": 6.593432391605012e-05, + "loss": 0.121, + "step": 21970 + }, + { + "epoch": 0.5680614064559482, + "grad_norm": 0.03701552003622055, + "learning_rate": 6.59009030328763e-05, + "loss": 0.1202, + "step": 21980 + }, + { + "epoch": 0.5683198511358644, + "grad_norm": 0.042509566992521286, + "learning_rate": 6.586748132769378e-05, + "loss": 0.1205, + "step": 21990 + }, + { + "epoch": 0.5685782958157807, + "grad_norm": 0.047740355134010315, + "learning_rate": 6.583405883099743e-05, + "loss": 0.1201, + "step": 22000 + }, + { + "epoch": 0.5688367404956969, + "grad_norm": 0.03312734514474869, + "learning_rate": 6.580063557328285e-05, + "loss": 0.1206, + "step": 22010 + }, + { + "epoch": 0.5690951851756132, + "grad_norm": 0.02465563267469406, + "learning_rate": 6.576721158504633e-05, + "loss": 0.1192, + "step": 22020 + }, + { + "epoch": 0.5693536298555294, + "grad_norm": 0.03344980999827385, + "learning_rate": 6.573378689678485e-05, + "loss": 0.1205, + "step": 22030 + }, + { + "epoch": 0.5696120745354457, + "grad_norm": 0.03134537115693092, + "learning_rate": 6.570036153899602e-05, + "loss": 0.1202, + "step": 22040 + }, + { + "epoch": 0.5698705192153619, + "grad_norm": 0.032678715884685516, + "learning_rate": 6.566693554217803e-05, + "loss": 0.1202, + "step": 22050 + }, + { + "epoch": 0.5701289638952782, + "grad_norm": 0.024689694866538048, + "learning_rate": 6.563350893682972e-05, + "loss": 0.12, + "step": 22060 + }, + { + "epoch": 0.5703874085751944, + "grad_norm": 0.0252884142100811, + "learning_rate": 6.56000817534504e-05, + "loss": 0.1204, + "step": 22070 + }, + { + "epoch": 0.5706458532551107, + "grad_norm": 0.03062187135219574, + "learning_rate": 6.556665402254001e-05, + "loss": 0.1199, + "step": 22080 + }, + { + "epoch": 0.5709042979350271, + "grad_norm": 0.03548770397901535, + "learning_rate": 6.553322577459886e-05, + "loss": 0.1208, + "step": 22090 + }, + { + "epoch": 0.5711627426149433, + "grad_norm": 0.03825638070702553, + "learning_rate": 6.549979704012787e-05, + "loss": 0.1203, + "step": 22100 + }, + { + "epoch": 0.5714211872948596, + "grad_norm": 0.04065892845392227, + "learning_rate": 6.546636784962828e-05, + "loss": 0.121, + "step": 22110 + }, + { + "epoch": 0.5716796319747758, + "grad_norm": 0.02941431850194931, + "learning_rate": 6.543293823360185e-05, + "loss": 0.1202, + "step": 22120 + }, + { + "epoch": 0.5719380766546921, + "grad_norm": 0.028102802112698555, + "learning_rate": 6.539950822255065e-05, + "loss": 0.1193, + "step": 22130 + }, + { + "epoch": 0.5721965213346083, + "grad_norm": 0.037234995514154434, + "learning_rate": 6.536607784697717e-05, + "loss": 0.1199, + "step": 22140 + }, + { + "epoch": 0.5724549660145246, + "grad_norm": 0.04100056737661362, + "learning_rate": 6.533264713738418e-05, + "loss": 0.121, + "step": 22150 + }, + { + "epoch": 0.5727134106944408, + "grad_norm": 0.03203975409269333, + "learning_rate": 6.529921612427478e-05, + "loss": 0.1193, + "step": 22160 + }, + { + "epoch": 0.5729718553743571, + "grad_norm": 0.025560351088643074, + "learning_rate": 6.526578483815236e-05, + "loss": 0.1208, + "step": 22170 + }, + { + "epoch": 0.5732303000542733, + "grad_norm": 0.04943282902240753, + "learning_rate": 6.523235330952056e-05, + "loss": 0.1203, + "step": 22180 + }, + { + "epoch": 0.5734887447341896, + "grad_norm": 0.022899329662322998, + "learning_rate": 6.519892156888317e-05, + "loss": 0.1208, + "step": 22190 + }, + { + "epoch": 0.573747189414106, + "grad_norm": 0.027206894010305405, + "learning_rate": 6.51654896467443e-05, + "loss": 0.1208, + "step": 22200 + }, + { + "epoch": 0.5740056340940222, + "grad_norm": 0.04901726916432381, + "learning_rate": 6.51320575736081e-05, + "loss": 0.1205, + "step": 22210 + }, + { + "epoch": 0.5742640787739385, + "grad_norm": 0.029641881585121155, + "learning_rate": 6.509862537997896e-05, + "loss": 0.1204, + "step": 22220 + }, + { + "epoch": 0.5745225234538547, + "grad_norm": 0.03833237290382385, + "learning_rate": 6.506519309636128e-05, + "loss": 0.1205, + "step": 22230 + }, + { + "epoch": 0.574780968133771, + "grad_norm": 0.032571692019701004, + "learning_rate": 6.503176075325963e-05, + "loss": 0.1204, + "step": 22240 + }, + { + "epoch": 0.5750394128136872, + "grad_norm": 0.041477445513010025, + "learning_rate": 6.49983283811786e-05, + "loss": 0.1196, + "step": 22250 + }, + { + "epoch": 0.5752978574936035, + "grad_norm": 0.026014678180217743, + "learning_rate": 6.496489601062282e-05, + "loss": 0.1198, + "step": 22260 + }, + { + "epoch": 0.5755563021735197, + "grad_norm": 0.03590506687760353, + "learning_rate": 6.493146367209686e-05, + "loss": 0.1207, + "step": 22270 + }, + { + "epoch": 0.575814746853436, + "grad_norm": 0.028091153129935265, + "learning_rate": 6.489803139610533e-05, + "loss": 0.1204, + "step": 22280 + }, + { + "epoch": 0.5760731915333522, + "grad_norm": 0.030878515914082527, + "learning_rate": 6.486459921315278e-05, + "loss": 0.1204, + "step": 22290 + }, + { + "epoch": 0.5763316362132685, + "grad_norm": 0.02798227220773697, + "learning_rate": 6.483116715374362e-05, + "loss": 0.1205, + "step": 22300 + }, + { + "epoch": 0.5765900808931849, + "grad_norm": 0.034497812390327454, + "learning_rate": 6.47977352483822e-05, + "loss": 0.1209, + "step": 22310 + }, + { + "epoch": 0.5768485255731011, + "grad_norm": 0.038187094032764435, + "learning_rate": 6.47643035275727e-05, + "loss": 0.1202, + "step": 22320 + }, + { + "epoch": 0.5771069702530174, + "grad_norm": 0.02641744166612625, + "learning_rate": 6.473087202181917e-05, + "loss": 0.1196, + "step": 22330 + }, + { + "epoch": 0.5773654149329336, + "grad_norm": 0.02326001413166523, + "learning_rate": 6.469744076162538e-05, + "loss": 0.1209, + "step": 22340 + }, + { + "epoch": 0.5776238596128499, + "grad_norm": 0.03216647356748581, + "learning_rate": 6.466400977749498e-05, + "loss": 0.1201, + "step": 22350 + }, + { + "epoch": 0.5778823042927661, + "grad_norm": 0.04746892675757408, + "learning_rate": 6.463057909993131e-05, + "loss": 0.1204, + "step": 22360 + }, + { + "epoch": 0.5781407489726824, + "grad_norm": 0.03139851987361908, + "learning_rate": 6.459714875943743e-05, + "loss": 0.1211, + "step": 22370 + }, + { + "epoch": 0.5783991936525986, + "grad_norm": 0.03010055609047413, + "learning_rate": 6.456371878651612e-05, + "loss": 0.1202, + "step": 22380 + }, + { + "epoch": 0.5786576383325149, + "grad_norm": 0.03618589788675308, + "learning_rate": 6.453028921166977e-05, + "loss": 0.121, + "step": 22390 + }, + { + "epoch": 0.5789160830124311, + "grad_norm": 0.029445912688970566, + "learning_rate": 6.449686006540048e-05, + "loss": 0.1202, + "step": 22400 + }, + { + "epoch": 0.5791745276923475, + "grad_norm": 0.022398250177502632, + "learning_rate": 6.446343137820991e-05, + "loss": 0.1201, + "step": 22410 + }, + { + "epoch": 0.5794329723722638, + "grad_norm": 0.030749386176466942, + "learning_rate": 6.443000318059932e-05, + "loss": 0.1196, + "step": 22420 + }, + { + "epoch": 0.57969141705218, + "grad_norm": 0.04112745448946953, + "learning_rate": 6.439657550306953e-05, + "loss": 0.1204, + "step": 22430 + }, + { + "epoch": 0.5799498617320963, + "grad_norm": 0.030715197324752808, + "learning_rate": 6.436314837612082e-05, + "loss": 0.1202, + "step": 22440 + }, + { + "epoch": 0.5802083064120125, + "grad_norm": 0.029872095212340355, + "learning_rate": 6.432972183025308e-05, + "loss": 0.1206, + "step": 22450 + }, + { + "epoch": 0.5804667510919288, + "grad_norm": 0.03129478543996811, + "learning_rate": 6.429629589596558e-05, + "loss": 0.1208, + "step": 22460 + }, + { + "epoch": 0.580725195771845, + "grad_norm": 0.0384504608809948, + "learning_rate": 6.426287060375706e-05, + "loss": 0.1207, + "step": 22470 + }, + { + "epoch": 0.5809836404517613, + "grad_norm": 0.02831476926803589, + "learning_rate": 6.42294459841257e-05, + "loss": 0.1204, + "step": 22480 + }, + { + "epoch": 0.5812420851316775, + "grad_norm": 0.029087301343679428, + "learning_rate": 6.419602206756902e-05, + "loss": 0.1209, + "step": 22490 + }, + { + "epoch": 0.5815005298115938, + "grad_norm": 0.029544398188591003, + "learning_rate": 6.416259888458393e-05, + "loss": 0.1206, + "step": 22500 + }, + { + "epoch": 0.58175897449151, + "grad_norm": 0.02308494783937931, + "learning_rate": 6.412917646566667e-05, + "loss": 0.1205, + "step": 22510 + }, + { + "epoch": 0.5820174191714264, + "grad_norm": 0.05386943370103836, + "learning_rate": 6.409575484131276e-05, + "loss": 0.1199, + "step": 22520 + }, + { + "epoch": 0.5822758638513427, + "grad_norm": 0.04702867940068245, + "learning_rate": 6.406233404201701e-05, + "loss": 0.1198, + "step": 22530 + }, + { + "epoch": 0.5825343085312589, + "grad_norm": 0.03789818286895752, + "learning_rate": 6.402891409827349e-05, + "loss": 0.1203, + "step": 22540 + }, + { + "epoch": 0.5827927532111752, + "grad_norm": 0.0229061059653759, + "learning_rate": 6.399549504057547e-05, + "loss": 0.1209, + "step": 22550 + }, + { + "epoch": 0.5830511978910914, + "grad_norm": 0.031253036111593246, + "learning_rate": 6.396207689941542e-05, + "loss": 0.12, + "step": 22560 + }, + { + "epoch": 0.5833096425710077, + "grad_norm": 0.046233661472797394, + "learning_rate": 6.392865970528496e-05, + "loss": 0.1206, + "step": 22570 + }, + { + "epoch": 0.5835680872509239, + "grad_norm": 0.05269487574696541, + "learning_rate": 6.389524348867486e-05, + "loss": 0.1205, + "step": 22580 + }, + { + "epoch": 0.5838265319308402, + "grad_norm": 0.03163508325815201, + "learning_rate": 6.386182828007501e-05, + "loss": 0.1204, + "step": 22590 + }, + { + "epoch": 0.5840849766107564, + "grad_norm": 0.03168831020593643, + "learning_rate": 6.382841410997437e-05, + "loss": 0.1201, + "step": 22600 + }, + { + "epoch": 0.5843434212906727, + "grad_norm": 0.04054988548159599, + "learning_rate": 6.37950010088609e-05, + "loss": 0.1208, + "step": 22610 + }, + { + "epoch": 0.5846018659705889, + "grad_norm": 0.03007984347641468, + "learning_rate": 6.376158900722171e-05, + "loss": 0.1207, + "step": 22620 + }, + { + "epoch": 0.5848603106505053, + "grad_norm": 0.03329646587371826, + "learning_rate": 6.372817813554277e-05, + "loss": 0.1202, + "step": 22630 + }, + { + "epoch": 0.5851187553304216, + "grad_norm": 0.04576975479722023, + "learning_rate": 6.36947684243091e-05, + "loss": 0.1201, + "step": 22640 + }, + { + "epoch": 0.5853772000103378, + "grad_norm": 0.02779039926826954, + "learning_rate": 6.366135990400464e-05, + "loss": 0.1208, + "step": 22650 + }, + { + "epoch": 0.5856356446902541, + "grad_norm": 0.03444911167025566, + "learning_rate": 6.362795260511223e-05, + "loss": 0.1204, + "step": 22660 + }, + { + "epoch": 0.5858940893701703, + "grad_norm": 0.03208622708916664, + "learning_rate": 6.359454655811362e-05, + "loss": 0.1207, + "step": 22670 + }, + { + "epoch": 0.5861525340500866, + "grad_norm": 0.02692144177854061, + "learning_rate": 6.356114179348941e-05, + "loss": 0.1205, + "step": 22680 + }, + { + "epoch": 0.5864109787300028, + "grad_norm": 0.031214356422424316, + "learning_rate": 6.352773834171901e-05, + "loss": 0.1195, + "step": 22690 + }, + { + "epoch": 0.5866694234099191, + "grad_norm": 0.04738342761993408, + "learning_rate": 6.349433623328069e-05, + "loss": 0.1204, + "step": 22700 + }, + { + "epoch": 0.5869278680898353, + "grad_norm": 0.022578999400138855, + "learning_rate": 6.34609354986514e-05, + "loss": 0.1209, + "step": 22710 + }, + { + "epoch": 0.5871863127697516, + "grad_norm": 0.04120737686753273, + "learning_rate": 6.342753616830691e-05, + "loss": 0.1197, + "step": 22720 + }, + { + "epoch": 0.587444757449668, + "grad_norm": 0.04269356280565262, + "learning_rate": 6.339413827272172e-05, + "loss": 0.1206, + "step": 22730 + }, + { + "epoch": 0.5877032021295842, + "grad_norm": 0.04744463413953781, + "learning_rate": 6.336074184236892e-05, + "loss": 0.1209, + "step": 22740 + }, + { + "epoch": 0.5879616468095005, + "grad_norm": 0.02943752147257328, + "learning_rate": 6.33273469077204e-05, + "loss": 0.1206, + "step": 22750 + }, + { + "epoch": 0.5882200914894167, + "grad_norm": 0.035306014120578766, + "learning_rate": 6.329395349924658e-05, + "loss": 0.1199, + "step": 22760 + }, + { + "epoch": 0.588478536169333, + "grad_norm": 0.03809744119644165, + "learning_rate": 6.326056164741652e-05, + "loss": 0.1209, + "step": 22770 + }, + { + "epoch": 0.5887369808492492, + "grad_norm": 0.040563102811574936, + "learning_rate": 6.322717138269792e-05, + "loss": 0.1204, + "step": 22780 + }, + { + "epoch": 0.5889954255291655, + "grad_norm": 0.03265578672289848, + "learning_rate": 6.319378273555689e-05, + "loss": 0.1201, + "step": 22790 + }, + { + "epoch": 0.5892538702090817, + "grad_norm": 0.03515256196260452, + "learning_rate": 6.316039573645821e-05, + "loss": 0.1196, + "step": 22800 + }, + { + "epoch": 0.589512314888998, + "grad_norm": 0.0388399176299572, + "learning_rate": 6.312701041586509e-05, + "loss": 0.1202, + "step": 22810 + }, + { + "epoch": 0.5897707595689142, + "grad_norm": 0.02906709350645542, + "learning_rate": 6.309362680423921e-05, + "loss": 0.1194, + "step": 22820 + }, + { + "epoch": 0.5900292042488305, + "grad_norm": 0.04094026982784271, + "learning_rate": 6.306024493204068e-05, + "loss": 0.1198, + "step": 22830 + }, + { + "epoch": 0.5902876489287469, + "grad_norm": 0.02836890146136284, + "learning_rate": 6.302686482972806e-05, + "loss": 0.1204, + "step": 22840 + }, + { + "epoch": 0.5905460936086631, + "grad_norm": 0.03595118597149849, + "learning_rate": 6.299348652775828e-05, + "loss": 0.1203, + "step": 22850 + }, + { + "epoch": 0.5908045382885794, + "grad_norm": 0.027492046356201172, + "learning_rate": 6.296011005658657e-05, + "loss": 0.1205, + "step": 22860 + }, + { + "epoch": 0.5910629829684956, + "grad_norm": 0.024970125406980515, + "learning_rate": 6.29267354466666e-05, + "loss": 0.1206, + "step": 22870 + }, + { + "epoch": 0.5913214276484119, + "grad_norm": 0.03439181670546532, + "learning_rate": 6.289336272845027e-05, + "loss": 0.1198, + "step": 22880 + }, + { + "epoch": 0.5915798723283281, + "grad_norm": 0.03530226647853851, + "learning_rate": 6.285999193238774e-05, + "loss": 0.121, + "step": 22890 + }, + { + "epoch": 0.5918383170082444, + "grad_norm": 0.02491113357245922, + "learning_rate": 6.282662308892747e-05, + "loss": 0.1202, + "step": 22900 + }, + { + "epoch": 0.5920967616881606, + "grad_norm": 0.04879268631339073, + "learning_rate": 6.27932562285161e-05, + "loss": 0.1217, + "step": 22910 + }, + { + "epoch": 0.5923552063680769, + "grad_norm": 0.028496816754341125, + "learning_rate": 6.275989138159848e-05, + "loss": 0.1199, + "step": 22920 + }, + { + "epoch": 0.5926136510479931, + "grad_norm": 0.05777880921959877, + "learning_rate": 6.272652857861761e-05, + "loss": 0.1201, + "step": 22930 + }, + { + "epoch": 0.5928720957279094, + "grad_norm": 0.027157416567206383, + "learning_rate": 6.26931678500146e-05, + "loss": 0.1199, + "step": 22940 + }, + { + "epoch": 0.5931305404078258, + "grad_norm": 0.031749606132507324, + "learning_rate": 6.265980922622873e-05, + "loss": 0.1207, + "step": 22950 + }, + { + "epoch": 0.593388985087742, + "grad_norm": 0.029378565028309822, + "learning_rate": 6.262645273769734e-05, + "loss": 0.1209, + "step": 22960 + }, + { + "epoch": 0.5936474297676583, + "grad_norm": 0.02987351082265377, + "learning_rate": 6.259309841485576e-05, + "loss": 0.1205, + "step": 22970 + }, + { + "epoch": 0.5939058744475745, + "grad_norm": 0.03395707160234451, + "learning_rate": 6.255974628813743e-05, + "loss": 0.1203, + "step": 22980 + }, + { + "epoch": 0.5941643191274908, + "grad_norm": 0.03329942375421524, + "learning_rate": 6.252639638797375e-05, + "loss": 0.12, + "step": 22990 + }, + { + "epoch": 0.594422763807407, + "grad_norm": 0.05002008005976677, + "learning_rate": 6.249304874479405e-05, + "loss": 0.1197, + "step": 23000 + }, + { + "epoch": 0.5946812084873233, + "grad_norm": 0.03216249495744705, + "learning_rate": 6.245970338902569e-05, + "loss": 0.1199, + "step": 23010 + }, + { + "epoch": 0.5949396531672395, + "grad_norm": 0.026841625571250916, + "learning_rate": 6.242636035109384e-05, + "loss": 0.1211, + "step": 23020 + }, + { + "epoch": 0.5951980978471558, + "grad_norm": 0.03101552464067936, + "learning_rate": 6.239301966142162e-05, + "loss": 0.1205, + "step": 23030 + }, + { + "epoch": 0.595456542527072, + "grad_norm": 0.03550703823566437, + "learning_rate": 6.235968135043002e-05, + "loss": 0.12, + "step": 23040 + }, + { + "epoch": 0.5957149872069883, + "grad_norm": 0.03734995797276497, + "learning_rate": 6.23263454485378e-05, + "loss": 0.1198, + "step": 23050 + }, + { + "epoch": 0.5959734318869047, + "grad_norm": 0.026315465569496155, + "learning_rate": 6.229301198616157e-05, + "loss": 0.1209, + "step": 23060 + }, + { + "epoch": 0.5962318765668209, + "grad_norm": 0.026756586506962776, + "learning_rate": 6.22596809937157e-05, + "loss": 0.1207, + "step": 23070 + }, + { + "epoch": 0.5964903212467372, + "grad_norm": 0.03903176635503769, + "learning_rate": 6.222635250161229e-05, + "loss": 0.1197, + "step": 23080 + }, + { + "epoch": 0.5967487659266534, + "grad_norm": 0.0344776026904583, + "learning_rate": 6.219302654026119e-05, + "loss": 0.1198, + "step": 23090 + }, + { + "epoch": 0.5970072106065697, + "grad_norm": 0.02440005913376808, + "learning_rate": 6.215970314006992e-05, + "loss": 0.1202, + "step": 23100 + }, + { + "epoch": 0.5972656552864859, + "grad_norm": 0.03433089330792427, + "learning_rate": 6.212638233144365e-05, + "loss": 0.1197, + "step": 23110 + }, + { + "epoch": 0.5975240999664022, + "grad_norm": 0.03403666615486145, + "learning_rate": 6.209306414478523e-05, + "loss": 0.1203, + "step": 23120 + }, + { + "epoch": 0.5977825446463184, + "grad_norm": 0.029751112684607506, + "learning_rate": 6.205974861049506e-05, + "loss": 0.1203, + "step": 23130 + }, + { + "epoch": 0.5980409893262347, + "grad_norm": 0.04149489849805832, + "learning_rate": 6.202643575897118e-05, + "loss": 0.1206, + "step": 23140 + }, + { + "epoch": 0.5982994340061509, + "grad_norm": 0.028638649731874466, + "learning_rate": 6.199312562060914e-05, + "loss": 0.12, + "step": 23150 + }, + { + "epoch": 0.5985578786860672, + "grad_norm": 0.03953849524259567, + "learning_rate": 6.195981822580203e-05, + "loss": 0.1202, + "step": 23160 + }, + { + "epoch": 0.5988163233659836, + "grad_norm": 0.028142329305410385, + "learning_rate": 6.192651360494044e-05, + "loss": 0.1201, + "step": 23170 + }, + { + "epoch": 0.5990747680458998, + "grad_norm": 0.02766558714210987, + "learning_rate": 6.189321178841238e-05, + "loss": 0.1207, + "step": 23180 + }, + { + "epoch": 0.5993332127258161, + "grad_norm": 0.04271257296204567, + "learning_rate": 6.18599128066034e-05, + "loss": 0.1202, + "step": 23190 + }, + { + "epoch": 0.5995916574057323, + "grad_norm": 0.025764020159840584, + "learning_rate": 6.182661668989639e-05, + "loss": 0.1201, + "step": 23200 + }, + { + "epoch": 0.5998501020856486, + "grad_norm": 0.023223519325256348, + "learning_rate": 6.179332346867162e-05, + "loss": 0.121, + "step": 23210 + }, + { + "epoch": 0.6001085467655648, + "grad_norm": 0.04817137494683266, + "learning_rate": 6.176003317330676e-05, + "loss": 0.1206, + "step": 23220 + }, + { + "epoch": 0.6003669914454811, + "grad_norm": 0.028352193534374237, + "learning_rate": 6.17267458341768e-05, + "loss": 0.1202, + "step": 23230 + }, + { + "epoch": 0.6006254361253973, + "grad_norm": 0.031135383993387222, + "learning_rate": 6.1693461481654e-05, + "loss": 0.1202, + "step": 23240 + }, + { + "epoch": 0.6008838808053136, + "grad_norm": 0.03249121457338333, + "learning_rate": 6.166018014610795e-05, + "loss": 0.1212, + "step": 23250 + }, + { + "epoch": 0.6011423254852298, + "grad_norm": 0.031614821404218674, + "learning_rate": 6.162690185790542e-05, + "loss": 0.1198, + "step": 23260 + }, + { + "epoch": 0.6014007701651461, + "grad_norm": 0.0349024161696434, + "learning_rate": 6.159362664741048e-05, + "loss": 0.1201, + "step": 23270 + }, + { + "epoch": 0.6016592148450625, + "grad_norm": 0.023977864533662796, + "learning_rate": 6.156035454498433e-05, + "loss": 0.1205, + "step": 23280 + }, + { + "epoch": 0.6019176595249787, + "grad_norm": 0.02667900174856186, + "learning_rate": 6.15270855809853e-05, + "loss": 0.1193, + "step": 23290 + }, + { + "epoch": 0.602176104204895, + "grad_norm": 0.03409869596362114, + "learning_rate": 6.149381978576897e-05, + "loss": 0.1202, + "step": 23300 + }, + { + "epoch": 0.6024345488848112, + "grad_norm": 0.026758112013339996, + "learning_rate": 6.146055718968798e-05, + "loss": 0.1203, + "step": 23310 + }, + { + "epoch": 0.6026929935647275, + "grad_norm": 0.03157913684844971, + "learning_rate": 6.142729782309197e-05, + "loss": 0.1203, + "step": 23320 + }, + { + "epoch": 0.6029514382446437, + "grad_norm": 0.03713419660925865, + "learning_rate": 6.139404171632777e-05, + "loss": 0.1196, + "step": 23330 + }, + { + "epoch": 0.60320988292456, + "grad_norm": 0.025241626426577568, + "learning_rate": 6.136078889973914e-05, + "loss": 0.1199, + "step": 23340 + }, + { + "epoch": 0.6034683276044762, + "grad_norm": 0.028667651116847992, + "learning_rate": 6.132753940366684e-05, + "loss": 0.1201, + "step": 23350 + }, + { + "epoch": 0.6037267722843925, + "grad_norm": 0.029299983754754066, + "learning_rate": 6.129429325844864e-05, + "loss": 0.1203, + "step": 23360 + }, + { + "epoch": 0.6039852169643087, + "grad_norm": 0.03859277814626694, + "learning_rate": 6.126105049441926e-05, + "loss": 0.1206, + "step": 23370 + }, + { + "epoch": 0.604243661644225, + "grad_norm": 0.025605369359254837, + "learning_rate": 6.12278111419103e-05, + "loss": 0.1202, + "step": 23380 + }, + { + "epoch": 0.6045021063241414, + "grad_norm": 0.02142292447388172, + "learning_rate": 6.119457523125026e-05, + "loss": 0.1196, + "step": 23390 + }, + { + "epoch": 0.6047605510040576, + "grad_norm": 0.025842322036623955, + "learning_rate": 6.116134279276448e-05, + "loss": 0.1203, + "step": 23400 + }, + { + "epoch": 0.6050189956839739, + "grad_norm": 0.03403163701295853, + "learning_rate": 6.112811385677517e-05, + "loss": 0.1207, + "step": 23410 + }, + { + "epoch": 0.6052774403638901, + "grad_norm": 0.02627512253820896, + "learning_rate": 6.109488845360135e-05, + "loss": 0.1201, + "step": 23420 + }, + { + "epoch": 0.6055358850438064, + "grad_norm": 0.03262653946876526, + "learning_rate": 6.106166661355872e-05, + "loss": 0.1199, + "step": 23430 + }, + { + "epoch": 0.6057943297237226, + "grad_norm": 0.026189209893345833, + "learning_rate": 6.1028448366959865e-05, + "loss": 0.1211, + "step": 23440 + }, + { + "epoch": 0.6060527744036389, + "grad_norm": 0.03575453162193298, + "learning_rate": 6.0995233744114025e-05, + "loss": 0.1205, + "step": 23450 + }, + { + "epoch": 0.6063112190835551, + "grad_norm": 0.029949020594358444, + "learning_rate": 6.096202277532711e-05, + "loss": 0.1201, + "step": 23460 + }, + { + "epoch": 0.6065696637634714, + "grad_norm": 0.023818977177143097, + "learning_rate": 6.092881549090174e-05, + "loss": 0.1202, + "step": 23470 + }, + { + "epoch": 0.6068281084433877, + "grad_norm": 0.024105656892061234, + "learning_rate": 6.089561192113713e-05, + "loss": 0.1205, + "step": 23480 + }, + { + "epoch": 0.607086553123304, + "grad_norm": 0.03473284840583801, + "learning_rate": 6.086241209632915e-05, + "loss": 0.1205, + "step": 23490 + }, + { + "epoch": 0.6073449978032203, + "grad_norm": 0.04409159719944, + "learning_rate": 6.082921604677025e-05, + "loss": 0.1197, + "step": 23500 + }, + { + "epoch": 0.6076034424831365, + "grad_norm": 0.04801373556256294, + "learning_rate": 6.079602380274938e-05, + "loss": 0.1206, + "step": 23510 + }, + { + "epoch": 0.6078618871630528, + "grad_norm": 0.037882331758737564, + "learning_rate": 6.0762835394552076e-05, + "loss": 0.1205, + "step": 23520 + }, + { + "epoch": 0.608120331842969, + "grad_norm": 0.025891806930303574, + "learning_rate": 6.0729650852460374e-05, + "loss": 0.12, + "step": 23530 + }, + { + "epoch": 0.6083787765228853, + "grad_norm": 0.0466228686273098, + "learning_rate": 6.069647020675273e-05, + "loss": 0.1205, + "step": 23540 + }, + { + "epoch": 0.6086372212028015, + "grad_norm": 0.031879086047410965, + "learning_rate": 6.066329348770411e-05, + "loss": 0.1205, + "step": 23550 + }, + { + "epoch": 0.6088956658827178, + "grad_norm": 0.03821231424808502, + "learning_rate": 6.0630120725585826e-05, + "loss": 0.1201, + "step": 23560 + }, + { + "epoch": 0.609154110562634, + "grad_norm": 0.035344578325748444, + "learning_rate": 6.059695195066566e-05, + "loss": 0.1206, + "step": 23570 + }, + { + "epoch": 0.6094125552425503, + "grad_norm": 0.03250548243522644, + "learning_rate": 6.05637871932077e-05, + "loss": 0.1204, + "step": 23580 + }, + { + "epoch": 0.6096709999224666, + "grad_norm": 0.02314457856118679, + "learning_rate": 6.053062648347236e-05, + "loss": 0.1202, + "step": 23590 + }, + { + "epoch": 0.6099294446023829, + "grad_norm": 0.035005632787942886, + "learning_rate": 6.0497469851716426e-05, + "loss": 0.12, + "step": 23600 + }, + { + "epoch": 0.6101878892822992, + "grad_norm": 0.03695303946733475, + "learning_rate": 6.0464317328192886e-05, + "loss": 0.1199, + "step": 23610 + }, + { + "epoch": 0.6104463339622154, + "grad_norm": 0.024721214547753334, + "learning_rate": 6.043116894315104e-05, + "loss": 0.1201, + "step": 23620 + }, + { + "epoch": 0.6107047786421317, + "grad_norm": 0.02752186544239521, + "learning_rate": 6.039802472683637e-05, + "loss": 0.12, + "step": 23630 + }, + { + "epoch": 0.6109632233220479, + "grad_norm": 0.03410831093788147, + "learning_rate": 6.036488470949056e-05, + "loss": 0.1208, + "step": 23640 + }, + { + "epoch": 0.6112216680019642, + "grad_norm": 0.026195811107754707, + "learning_rate": 6.033174892135152e-05, + "loss": 0.1201, + "step": 23650 + }, + { + "epoch": 0.6114801126818804, + "grad_norm": 0.024462958797812462, + "learning_rate": 6.02986173926532e-05, + "loss": 0.1199, + "step": 23660 + }, + { + "epoch": 0.6117385573617967, + "grad_norm": 0.02644454687833786, + "learning_rate": 6.026549015362575e-05, + "loss": 0.1208, + "step": 23670 + }, + { + "epoch": 0.6119970020417129, + "grad_norm": 0.03618277609348297, + "learning_rate": 6.0232367234495365e-05, + "loss": 0.1202, + "step": 23680 + }, + { + "epoch": 0.6122554467216292, + "grad_norm": 0.024188894778490067, + "learning_rate": 6.019924866548432e-05, + "loss": 0.1194, + "step": 23690 + }, + { + "epoch": 0.6125138914015456, + "grad_norm": 0.025586819276213646, + "learning_rate": 6.01661344768109e-05, + "loss": 0.1201, + "step": 23700 + }, + { + "epoch": 0.6127723360814618, + "grad_norm": 0.027737993746995926, + "learning_rate": 6.013302469868939e-05, + "loss": 0.1202, + "step": 23710 + }, + { + "epoch": 0.6130307807613781, + "grad_norm": 0.02380572445690632, + "learning_rate": 6.009991936133008e-05, + "loss": 0.1199, + "step": 23720 + }, + { + "epoch": 0.6132892254412943, + "grad_norm": 0.03877473622560501, + "learning_rate": 6.006681849493918e-05, + "loss": 0.1198, + "step": 23730 + }, + { + "epoch": 0.6135476701212106, + "grad_norm": 0.02746492624282837, + "learning_rate": 6.003372212971885e-05, + "loss": 0.1191, + "step": 23740 + }, + { + "epoch": 0.6138061148011268, + "grad_norm": 0.037739694118499756, + "learning_rate": 6.00006302958671e-05, + "loss": 0.119, + "step": 23750 + }, + { + "epoch": 0.6140645594810431, + "grad_norm": 0.027818018570542336, + "learning_rate": 5.9967543023577856e-05, + "loss": 0.12, + "step": 23760 + }, + { + "epoch": 0.6143230041609593, + "grad_norm": 0.028766591101884842, + "learning_rate": 5.993446034304082e-05, + "loss": 0.12, + "step": 23770 + }, + { + "epoch": 0.6145814488408756, + "grad_norm": 0.04416964575648308, + "learning_rate": 5.990138228444158e-05, + "loss": 0.1202, + "step": 23780 + }, + { + "epoch": 0.6148398935207918, + "grad_norm": 0.03307035192847252, + "learning_rate": 5.986830887796143e-05, + "loss": 0.1197, + "step": 23790 + }, + { + "epoch": 0.6150983382007081, + "grad_norm": 0.030773771926760674, + "learning_rate": 5.983524015377747e-05, + "loss": 0.1201, + "step": 23800 + }, + { + "epoch": 0.6153567828806245, + "grad_norm": 0.037904173135757446, + "learning_rate": 5.980217614206254e-05, + "loss": 0.1194, + "step": 23810 + }, + { + "epoch": 0.6156152275605407, + "grad_norm": 0.026969458907842636, + "learning_rate": 5.9769116872985095e-05, + "loss": 0.1208, + "step": 23820 + }, + { + "epoch": 0.615873672240457, + "grad_norm": 0.027603447437286377, + "learning_rate": 5.973606237670939e-05, + "loss": 0.1203, + "step": 23830 + }, + { + "epoch": 0.6161321169203732, + "grad_norm": 0.027950743213295937, + "learning_rate": 5.970301268339522e-05, + "loss": 0.12, + "step": 23840 + }, + { + "epoch": 0.6163905616002895, + "grad_norm": 0.023910438641905785, + "learning_rate": 5.966996782319802e-05, + "loss": 0.12, + "step": 23850 + }, + { + "epoch": 0.6166490062802057, + "grad_norm": 0.06179109588265419, + "learning_rate": 5.9636927826268875e-05, + "loss": 0.1197, + "step": 23860 + }, + { + "epoch": 0.616907450960122, + "grad_norm": 0.038110822439193726, + "learning_rate": 5.9603892722754354e-05, + "loss": 0.12, + "step": 23870 + }, + { + "epoch": 0.6171658956400382, + "grad_norm": 0.028888389468193054, + "learning_rate": 5.9570862542796594e-05, + "loss": 0.1205, + "step": 23880 + }, + { + "epoch": 0.6174243403199545, + "grad_norm": 0.02795802615582943, + "learning_rate": 5.9537837316533264e-05, + "loss": 0.1197, + "step": 23890 + }, + { + "epoch": 0.6176827849998707, + "grad_norm": 0.032210823148489, + "learning_rate": 5.950481707409745e-05, + "loss": 0.12, + "step": 23900 + }, + { + "epoch": 0.617941229679787, + "grad_norm": 0.03120792843401432, + "learning_rate": 5.947180184561777e-05, + "loss": 0.1196, + "step": 23910 + }, + { + "epoch": 0.6181996743597034, + "grad_norm": 0.029853668063879013, + "learning_rate": 5.943879166121822e-05, + "loss": 0.1206, + "step": 23920 + }, + { + "epoch": 0.6184581190396196, + "grad_norm": 0.03117043524980545, + "learning_rate": 5.940578655101816e-05, + "loss": 0.1203, + "step": 23930 + }, + { + "epoch": 0.6187165637195359, + "grad_norm": 0.031239697709679604, + "learning_rate": 5.937278654513243e-05, + "loss": 0.1193, + "step": 23940 + }, + { + "epoch": 0.6189750083994521, + "grad_norm": 0.03697359934449196, + "learning_rate": 5.933979167367107e-05, + "loss": 0.1202, + "step": 23950 + }, + { + "epoch": 0.6192334530793684, + "grad_norm": 0.028589272871613503, + "learning_rate": 5.930680196673957e-05, + "loss": 0.1203, + "step": 23960 + }, + { + "epoch": 0.6194918977592846, + "grad_norm": 0.030280714854598045, + "learning_rate": 5.927381745443862e-05, + "loss": 0.1199, + "step": 23970 + }, + { + "epoch": 0.6197503424392009, + "grad_norm": 0.030847055837512016, + "learning_rate": 5.924083816686419e-05, + "loss": 0.1202, + "step": 23980 + }, + { + "epoch": 0.6200087871191171, + "grad_norm": 0.028881292790174484, + "learning_rate": 5.9207864134107495e-05, + "loss": 0.1203, + "step": 23990 + }, + { + "epoch": 0.6202672317990334, + "grad_norm": 0.053632356226444244, + "learning_rate": 5.917489538625496e-05, + "loss": 0.1203, + "step": 24000 + }, + { + "epoch": 0.6205256764789496, + "grad_norm": 0.03311203047633171, + "learning_rate": 5.9141931953388154e-05, + "loss": 0.1206, + "step": 24010 + }, + { + "epoch": 0.6207841211588659, + "grad_norm": 0.027662605047225952, + "learning_rate": 5.910897386558385e-05, + "loss": 0.1207, + "step": 24020 + }, + { + "epoch": 0.6210425658387823, + "grad_norm": 0.028672438114881516, + "learning_rate": 5.907602115291389e-05, + "loss": 0.1203, + "step": 24030 + }, + { + "epoch": 0.6213010105186985, + "grad_norm": 0.02948087453842163, + "learning_rate": 5.904307384544525e-05, + "loss": 0.1204, + "step": 24040 + }, + { + "epoch": 0.6215594551986148, + "grad_norm": 0.035290371626615524, + "learning_rate": 5.9010131973239966e-05, + "loss": 0.1209, + "step": 24050 + }, + { + "epoch": 0.621817899878531, + "grad_norm": 0.033652812242507935, + "learning_rate": 5.897719556635508e-05, + "loss": 0.1207, + "step": 24060 + }, + { + "epoch": 0.6220763445584473, + "grad_norm": 0.024178508669137955, + "learning_rate": 5.894426465484271e-05, + "loss": 0.1208, + "step": 24070 + }, + { + "epoch": 0.6223347892383635, + "grad_norm": 0.03415082395076752, + "learning_rate": 5.89113392687499e-05, + "loss": 0.1194, + "step": 24080 + }, + { + "epoch": 0.6225932339182798, + "grad_norm": 0.02533694915473461, + "learning_rate": 5.887841943811869e-05, + "loss": 0.1204, + "step": 24090 + }, + { + "epoch": 0.622851678598196, + "grad_norm": 0.029842639341950417, + "learning_rate": 5.884550519298604e-05, + "loss": 0.1203, + "step": 24100 + }, + { + "epoch": 0.6231101232781123, + "grad_norm": 0.0562666691839695, + "learning_rate": 5.88125965633838e-05, + "loss": 0.1199, + "step": 24110 + }, + { + "epoch": 0.6233685679580285, + "grad_norm": 0.0415954515337944, + "learning_rate": 5.8779693579338745e-05, + "loss": 0.1201, + "step": 24120 + }, + { + "epoch": 0.6236270126379448, + "grad_norm": 0.02686784788966179, + "learning_rate": 5.874679627087243e-05, + "loss": 0.121, + "step": 24130 + }, + { + "epoch": 0.6238854573178612, + "grad_norm": 0.032707564532756805, + "learning_rate": 5.871390466800126e-05, + "loss": 0.1208, + "step": 24140 + }, + { + "epoch": 0.6241439019977774, + "grad_norm": 0.03841756656765938, + "learning_rate": 5.8681018800736456e-05, + "loss": 0.121, + "step": 24150 + }, + { + "epoch": 0.6244023466776937, + "grad_norm": 0.030206454917788506, + "learning_rate": 5.864813869908399e-05, + "loss": 0.1202, + "step": 24160 + }, + { + "epoch": 0.6246607913576099, + "grad_norm": 0.036633167415857315, + "learning_rate": 5.8615264393044554e-05, + "loss": 0.1204, + "step": 24170 + }, + { + "epoch": 0.6249192360375262, + "grad_norm": 0.030994726344943047, + "learning_rate": 5.858239591261359e-05, + "loss": 0.1198, + "step": 24180 + }, + { + "epoch": 0.6251776807174424, + "grad_norm": 0.035461775958538055, + "learning_rate": 5.85495332877812e-05, + "loss": 0.1198, + "step": 24190 + }, + { + "epoch": 0.6254361253973587, + "grad_norm": 0.02795405685901642, + "learning_rate": 5.851667654853212e-05, + "loss": 0.1197, + "step": 24200 + }, + { + "epoch": 0.6256945700772749, + "grad_norm": 0.04085111618041992, + "learning_rate": 5.848382572484579e-05, + "loss": 0.1202, + "step": 24210 + }, + { + "epoch": 0.6259530147571912, + "grad_norm": 0.030849743634462357, + "learning_rate": 5.8450980846696165e-05, + "loss": 0.1193, + "step": 24220 + }, + { + "epoch": 0.6262114594371074, + "grad_norm": 0.023829836398363113, + "learning_rate": 5.841814194405184e-05, + "loss": 0.1203, + "step": 24230 + }, + { + "epoch": 0.6264699041170237, + "grad_norm": 0.025277025997638702, + "learning_rate": 5.8385309046875924e-05, + "loss": 0.1209, + "step": 24240 + }, + { + "epoch": 0.6267283487969401, + "grad_norm": 0.03978417068719864, + "learning_rate": 5.8352482185126055e-05, + "loss": 0.1197, + "step": 24250 + }, + { + "epoch": 0.6269867934768563, + "grad_norm": 0.07226187735795975, + "learning_rate": 5.831966138875435e-05, + "loss": 0.1196, + "step": 24260 + }, + { + "epoch": 0.6272452381567726, + "grad_norm": 0.03896019607782364, + "learning_rate": 5.828684668770745e-05, + "loss": 0.1198, + "step": 24270 + }, + { + "epoch": 0.6275036828366888, + "grad_norm": 0.03002898208796978, + "learning_rate": 5.825403811192634e-05, + "loss": 0.1195, + "step": 24280 + }, + { + "epoch": 0.6277621275166051, + "grad_norm": 0.029256947338581085, + "learning_rate": 5.822123569134651e-05, + "loss": 0.1202, + "step": 24290 + }, + { + "epoch": 0.6280205721965213, + "grad_norm": 0.027877312153577805, + "learning_rate": 5.818843945589776e-05, + "loss": 0.1203, + "step": 24300 + }, + { + "epoch": 0.6282790168764376, + "grad_norm": 0.029464812949299812, + "learning_rate": 5.81556494355043e-05, + "loss": 0.1196, + "step": 24310 + }, + { + "epoch": 0.6285374615563538, + "grad_norm": 0.0393315814435482, + "learning_rate": 5.8122865660084634e-05, + "loss": 0.1198, + "step": 24320 + }, + { + "epoch": 0.6287959062362701, + "grad_norm": 0.02924761362373829, + "learning_rate": 5.809008815955158e-05, + "loss": 0.1208, + "step": 24330 + }, + { + "epoch": 0.6290543509161864, + "grad_norm": 0.04077480360865593, + "learning_rate": 5.805731696381225e-05, + "loss": 0.12, + "step": 24340 + }, + { + "epoch": 0.6293127955961026, + "grad_norm": 0.03377600759267807, + "learning_rate": 5.802455210276796e-05, + "loss": 0.12, + "step": 24350 + }, + { + "epoch": 0.629571240276019, + "grad_norm": 0.02665090560913086, + "learning_rate": 5.799179360631427e-05, + "loss": 0.1203, + "step": 24360 + }, + { + "epoch": 0.6298296849559352, + "grad_norm": 0.04721689596772194, + "learning_rate": 5.795904150434096e-05, + "loss": 0.1198, + "step": 24370 + }, + { + "epoch": 0.6300881296358515, + "grad_norm": 0.04136420041322708, + "learning_rate": 5.792629582673193e-05, + "loss": 0.1201, + "step": 24380 + }, + { + "epoch": 0.6303465743157677, + "grad_norm": 0.028800373896956444, + "learning_rate": 5.789355660336524e-05, + "loss": 0.1201, + "step": 24390 + }, + { + "epoch": 0.630605018995684, + "grad_norm": 0.03124481998383999, + "learning_rate": 5.7860823864113065e-05, + "loss": 0.1204, + "step": 24400 + }, + { + "epoch": 0.6308634636756002, + "grad_norm": 0.027955301105976105, + "learning_rate": 5.782809763884165e-05, + "loss": 0.1201, + "step": 24410 + }, + { + "epoch": 0.6311219083555165, + "grad_norm": 0.02553599141538143, + "learning_rate": 5.7795377957411304e-05, + "loss": 0.1201, + "step": 24420 + }, + { + "epoch": 0.6313803530354327, + "grad_norm": 0.03364476561546326, + "learning_rate": 5.776266484967637e-05, + "loss": 0.1201, + "step": 24430 + }, + { + "epoch": 0.631638797715349, + "grad_norm": 0.03482991084456444, + "learning_rate": 5.772995834548518e-05, + "loss": 0.1199, + "step": 24440 + }, + { + "epoch": 0.6318972423952653, + "grad_norm": 0.03227397799491882, + "learning_rate": 5.769725847468005e-05, + "loss": 0.1197, + "step": 24450 + }, + { + "epoch": 0.6321556870751815, + "grad_norm": 0.030005550011992455, + "learning_rate": 5.7664565267097245e-05, + "loss": 0.1196, + "step": 24460 + }, + { + "epoch": 0.6324141317550979, + "grad_norm": 0.02360370382666588, + "learning_rate": 5.763187875256695e-05, + "loss": 0.1208, + "step": 24470 + }, + { + "epoch": 0.6326725764350141, + "grad_norm": 0.03376266360282898, + "learning_rate": 5.759919896091324e-05, + "loss": 0.1197, + "step": 24480 + }, + { + "epoch": 0.6329310211149304, + "grad_norm": 0.032962772995233536, + "learning_rate": 5.756652592195404e-05, + "loss": 0.1194, + "step": 24490 + }, + { + "epoch": 0.6331894657948466, + "grad_norm": 0.040364962071180344, + "learning_rate": 5.753385966550116e-05, + "loss": 0.1199, + "step": 24500 + }, + { + "epoch": 0.6334479104747629, + "grad_norm": 0.03463337570428848, + "learning_rate": 5.7501200221360166e-05, + "loss": 0.1201, + "step": 24510 + }, + { + "epoch": 0.6337063551546791, + "grad_norm": 0.029294949024915695, + "learning_rate": 5.746854761933043e-05, + "loss": 0.1199, + "step": 24520 + }, + { + "epoch": 0.6339647998345954, + "grad_norm": 0.03732588142156601, + "learning_rate": 5.743590188920511e-05, + "loss": 0.1196, + "step": 24530 + }, + { + "epoch": 0.6342232445145116, + "grad_norm": 0.03258556127548218, + "learning_rate": 5.740326306077105e-05, + "loss": 0.1202, + "step": 24540 + }, + { + "epoch": 0.6344816891944279, + "grad_norm": 0.03437505289912224, + "learning_rate": 5.737063116380882e-05, + "loss": 0.1202, + "step": 24550 + }, + { + "epoch": 0.6347401338743442, + "grad_norm": 0.02828713320195675, + "learning_rate": 5.7338006228092666e-05, + "loss": 0.1201, + "step": 24560 + }, + { + "epoch": 0.6349985785542605, + "grad_norm": 0.02348913811147213, + "learning_rate": 5.7305388283390474e-05, + "loss": 0.1195, + "step": 24570 + }, + { + "epoch": 0.6352570232341768, + "grad_norm": 0.04117782413959503, + "learning_rate": 5.7272777359463736e-05, + "loss": 0.1197, + "step": 24580 + }, + { + "epoch": 0.635515467914093, + "grad_norm": 0.030943231657147408, + "learning_rate": 5.7240173486067594e-05, + "loss": 0.1202, + "step": 24590 + }, + { + "epoch": 0.6357739125940093, + "grad_norm": 0.028614941984415054, + "learning_rate": 5.7207576692950694e-05, + "loss": 0.1207, + "step": 24600 + }, + { + "epoch": 0.6360323572739255, + "grad_norm": 0.022621458396315575, + "learning_rate": 5.717498700985524e-05, + "loss": 0.1198, + "step": 24610 + }, + { + "epoch": 0.6362908019538418, + "grad_norm": 0.036870427429676056, + "learning_rate": 5.714240446651696e-05, + "loss": 0.1197, + "step": 24620 + }, + { + "epoch": 0.636549246633758, + "grad_norm": 0.05102727934718132, + "learning_rate": 5.710982909266509e-05, + "loss": 0.12, + "step": 24630 + }, + { + "epoch": 0.6368076913136743, + "grad_norm": 0.032229792326688766, + "learning_rate": 5.707726091802226e-05, + "loss": 0.1201, + "step": 24640 + }, + { + "epoch": 0.6370661359935905, + "grad_norm": 0.03232279792428017, + "learning_rate": 5.70446999723046e-05, + "loss": 0.1199, + "step": 24650 + }, + { + "epoch": 0.6373245806735068, + "grad_norm": 0.031677063554525375, + "learning_rate": 5.701214628522158e-05, + "loss": 0.1205, + "step": 24660 + }, + { + "epoch": 0.6375830253534231, + "grad_norm": 0.024359755218029022, + "learning_rate": 5.697959988647611e-05, + "loss": 0.1205, + "step": 24670 + }, + { + "epoch": 0.6378414700333394, + "grad_norm": 0.04074995964765549, + "learning_rate": 5.694706080576442e-05, + "loss": 0.1202, + "step": 24680 + }, + { + "epoch": 0.6380999147132557, + "grad_norm": 0.02405305579304695, + "learning_rate": 5.6914529072776025e-05, + "loss": 0.1207, + "step": 24690 + }, + { + "epoch": 0.6383583593931719, + "grad_norm": 0.03367634117603302, + "learning_rate": 5.6882004717193814e-05, + "loss": 0.121, + "step": 24700 + }, + { + "epoch": 0.6386168040730882, + "grad_norm": 0.03751122206449509, + "learning_rate": 5.68494877686939e-05, + "loss": 0.1205, + "step": 24710 + }, + { + "epoch": 0.6388752487530044, + "grad_norm": 0.03229736536741257, + "learning_rate": 5.681697825694561e-05, + "loss": 0.1194, + "step": 24720 + }, + { + "epoch": 0.6391336934329207, + "grad_norm": 0.02507159858942032, + "learning_rate": 5.678447621161155e-05, + "loss": 0.1197, + "step": 24730 + }, + { + "epoch": 0.6393921381128369, + "grad_norm": 0.03590483218431473, + "learning_rate": 5.675198166234748e-05, + "loss": 0.1204, + "step": 24740 + }, + { + "epoch": 0.6396505827927532, + "grad_norm": 0.02819031849503517, + "learning_rate": 5.6719494638802286e-05, + "loss": 0.1197, + "step": 24750 + }, + { + "epoch": 0.6399090274726694, + "grad_norm": 0.02711637131869793, + "learning_rate": 5.668701517061805e-05, + "loss": 0.1207, + "step": 24760 + }, + { + "epoch": 0.6401674721525857, + "grad_norm": 0.04199349507689476, + "learning_rate": 5.665454328742994e-05, + "loss": 0.1188, + "step": 24770 + }, + { + "epoch": 0.640425916832502, + "grad_norm": 0.03979264944791794, + "learning_rate": 5.662207901886618e-05, + "loss": 0.1205, + "step": 24780 + }, + { + "epoch": 0.6406843615124183, + "grad_norm": 0.042441003024578094, + "learning_rate": 5.658962239454807e-05, + "loss": 0.1207, + "step": 24790 + }, + { + "epoch": 0.6409428061923346, + "grad_norm": 0.0321795828640461, + "learning_rate": 5.6557173444089915e-05, + "loss": 0.1198, + "step": 24800 + }, + { + "epoch": 0.6412012508722508, + "grad_norm": 0.02678738161921501, + "learning_rate": 5.652473219709904e-05, + "loss": 0.1197, + "step": 24810 + }, + { + "epoch": 0.6414596955521671, + "grad_norm": 0.025737112388014793, + "learning_rate": 5.649229868317574e-05, + "loss": 0.1202, + "step": 24820 + }, + { + "epoch": 0.6417181402320833, + "grad_norm": 0.06117282062768936, + "learning_rate": 5.6459872931913226e-05, + "loss": 0.1197, + "step": 24830 + }, + { + "epoch": 0.6419765849119996, + "grad_norm": 0.03487526625394821, + "learning_rate": 5.6427454972897654e-05, + "loss": 0.1204, + "step": 24840 + }, + { + "epoch": 0.6422350295919158, + "grad_norm": 0.02815990336239338, + "learning_rate": 5.6395044835708074e-05, + "loss": 0.1199, + "step": 24850 + }, + { + "epoch": 0.6424934742718321, + "grad_norm": 0.058127790689468384, + "learning_rate": 5.6362642549916364e-05, + "loss": 0.1192, + "step": 24860 + }, + { + "epoch": 0.6427519189517483, + "grad_norm": 0.03291185572743416, + "learning_rate": 5.6330248145087294e-05, + "loss": 0.1201, + "step": 24870 + }, + { + "epoch": 0.6430103636316646, + "grad_norm": 0.029835432767868042, + "learning_rate": 5.629786165077838e-05, + "loss": 0.1195, + "step": 24880 + }, + { + "epoch": 0.643268808311581, + "grad_norm": 0.023014042526483536, + "learning_rate": 5.626548309653996e-05, + "loss": 0.1203, + "step": 24890 + }, + { + "epoch": 0.6435272529914972, + "grad_norm": 0.027811389416456223, + "learning_rate": 5.623311251191512e-05, + "loss": 0.1199, + "step": 24900 + }, + { + "epoch": 0.6437856976714135, + "grad_norm": 0.03482671454548836, + "learning_rate": 5.620074992643966e-05, + "loss": 0.1197, + "step": 24910 + }, + { + "epoch": 0.6440441423513297, + "grad_norm": 0.03597183898091316, + "learning_rate": 5.616839536964211e-05, + "loss": 0.1202, + "step": 24920 + }, + { + "epoch": 0.644302587031246, + "grad_norm": 0.03530609607696533, + "learning_rate": 5.6136048871043665e-05, + "loss": 0.1198, + "step": 24930 + }, + { + "epoch": 0.6445610317111622, + "grad_norm": 0.036171674728393555, + "learning_rate": 5.610371046015813e-05, + "loss": 0.1201, + "step": 24940 + }, + { + "epoch": 0.6448194763910785, + "grad_norm": 0.02483583241701126, + "learning_rate": 5.6071380166491995e-05, + "loss": 0.1202, + "step": 24950 + }, + { + "epoch": 0.6450779210709947, + "grad_norm": 0.04125643149018288, + "learning_rate": 5.603905801954428e-05, + "loss": 0.1204, + "step": 24960 + }, + { + "epoch": 0.645336365750911, + "grad_norm": 0.04914587736129761, + "learning_rate": 5.6006744048806615e-05, + "loss": 0.1203, + "step": 24970 + }, + { + "epoch": 0.6455948104308272, + "grad_norm": 0.02955356426537037, + "learning_rate": 5.597443828376317e-05, + "loss": 0.1199, + "step": 24980 + }, + { + "epoch": 0.6458532551107435, + "grad_norm": 0.02415124885737896, + "learning_rate": 5.594214075389059e-05, + "loss": 0.1193, + "step": 24990 + }, + { + "epoch": 0.6461116997906599, + "grad_norm": 0.03034690022468567, + "learning_rate": 5.590985148865804e-05, + "loss": 0.1205, + "step": 25000 + }, + { + "epoch": 0.6463701444705761, + "grad_norm": 0.03594639152288437, + "learning_rate": 5.5877570517527136e-05, + "loss": 0.1203, + "step": 25010 + }, + { + "epoch": 0.6466285891504924, + "grad_norm": 0.02639005333185196, + "learning_rate": 5.584529786995192e-05, + "loss": 0.1203, + "step": 25020 + }, + { + "epoch": 0.6468870338304086, + "grad_norm": 0.02775043435394764, + "learning_rate": 5.5813033575378835e-05, + "loss": 0.1198, + "step": 25030 + }, + { + "epoch": 0.6471454785103249, + "grad_norm": 0.028890706598758698, + "learning_rate": 5.5780777663246744e-05, + "loss": 0.1205, + "step": 25040 + }, + { + "epoch": 0.6474039231902411, + "grad_norm": 0.024572158232331276, + "learning_rate": 5.574853016298679e-05, + "loss": 0.1201, + "step": 25050 + }, + { + "epoch": 0.6476623678701574, + "grad_norm": 0.024553561583161354, + "learning_rate": 5.57162911040225e-05, + "loss": 0.1194, + "step": 25060 + }, + { + "epoch": 0.6479208125500736, + "grad_norm": 0.03024650178849697, + "learning_rate": 5.5684060515769686e-05, + "loss": 0.1205, + "step": 25070 + }, + { + "epoch": 0.6481792572299899, + "grad_norm": 0.028168601915240288, + "learning_rate": 5.565183842763639e-05, + "loss": 0.1214, + "step": 25080 + }, + { + "epoch": 0.6484377019099062, + "grad_norm": 0.03320198506116867, + "learning_rate": 5.561962486902299e-05, + "loss": 0.1206, + "step": 25090 + }, + { + "epoch": 0.6486961465898224, + "grad_norm": 0.0378737710416317, + "learning_rate": 5.558741986932196e-05, + "loss": 0.1198, + "step": 25100 + }, + { + "epoch": 0.6489545912697388, + "grad_norm": 0.028430327773094177, + "learning_rate": 5.555522345791806e-05, + "loss": 0.1201, + "step": 25110 + }, + { + "epoch": 0.649213035949655, + "grad_norm": 0.026681307703256607, + "learning_rate": 5.552303566418819e-05, + "loss": 0.1199, + "step": 25120 + }, + { + "epoch": 0.6494714806295713, + "grad_norm": 0.035116855055093765, + "learning_rate": 5.549085651750135e-05, + "loss": 0.1198, + "step": 25130 + }, + { + "epoch": 0.6497299253094875, + "grad_norm": 0.028820693492889404, + "learning_rate": 5.545868604721871e-05, + "loss": 0.12, + "step": 25140 + }, + { + "epoch": 0.6499883699894038, + "grad_norm": 0.025469042360782623, + "learning_rate": 5.542652428269348e-05, + "loss": 0.12, + "step": 25150 + }, + { + "epoch": 0.65024681466932, + "grad_norm": 0.0316486693918705, + "learning_rate": 5.5394371253270915e-05, + "loss": 0.1204, + "step": 25160 + }, + { + "epoch": 0.6505052593492363, + "grad_norm": 0.03132392093539238, + "learning_rate": 5.536222698828837e-05, + "loss": 0.1208, + "step": 25170 + }, + { + "epoch": 0.6507637040291525, + "grad_norm": 0.03062150813639164, + "learning_rate": 5.5330091517075114e-05, + "loss": 0.1205, + "step": 25180 + }, + { + "epoch": 0.6510221487090688, + "grad_norm": 0.03650057688355446, + "learning_rate": 5.5297964868952465e-05, + "loss": 0.1197, + "step": 25190 + }, + { + "epoch": 0.6512805933889851, + "grad_norm": 0.0356188602745533, + "learning_rate": 5.526584707323366e-05, + "loss": 0.1201, + "step": 25200 + }, + { + "epoch": 0.6515390380689013, + "grad_norm": 0.03903546556830406, + "learning_rate": 5.523373815922383e-05, + "loss": 0.1199, + "step": 25210 + }, + { + "epoch": 0.6517974827488177, + "grad_norm": 0.0248777586966753, + "learning_rate": 5.5201638156220056e-05, + "loss": 0.1201, + "step": 25220 + }, + { + "epoch": 0.6520559274287339, + "grad_norm": 0.028163433074951172, + "learning_rate": 5.5169547093511254e-05, + "loss": 0.1204, + "step": 25230 + }, + { + "epoch": 0.6523143721086502, + "grad_norm": 0.02941160276532173, + "learning_rate": 5.5137465000378206e-05, + "loss": 0.1207, + "step": 25240 + }, + { + "epoch": 0.6525728167885664, + "grad_norm": 0.04972623288631439, + "learning_rate": 5.5105391906093497e-05, + "loss": 0.1203, + "step": 25250 + }, + { + "epoch": 0.6528312614684827, + "grad_norm": 0.03381293639540672, + "learning_rate": 5.5073327839921476e-05, + "loss": 0.1206, + "step": 25260 + }, + { + "epoch": 0.6530897061483989, + "grad_norm": 0.035716794431209564, + "learning_rate": 5.50412728311183e-05, + "loss": 0.1203, + "step": 25270 + }, + { + "epoch": 0.6533481508283152, + "grad_norm": 0.03782845288515091, + "learning_rate": 5.5009226908931845e-05, + "loss": 0.12, + "step": 25280 + }, + { + "epoch": 0.6536065955082314, + "grad_norm": 0.03299339488148689, + "learning_rate": 5.4977190102601676e-05, + "loss": 0.1207, + "step": 25290 + }, + { + "epoch": 0.6538650401881477, + "grad_norm": 0.02749737724661827, + "learning_rate": 5.494516244135905e-05, + "loss": 0.1208, + "step": 25300 + }, + { + "epoch": 0.654123484868064, + "grad_norm": 0.028708314523100853, + "learning_rate": 5.4913143954426925e-05, + "loss": 0.1207, + "step": 25310 + }, + { + "epoch": 0.6543819295479802, + "grad_norm": 0.02820289507508278, + "learning_rate": 5.488113467101982e-05, + "loss": 0.1198, + "step": 25320 + }, + { + "epoch": 0.6546403742278966, + "grad_norm": 0.028929775580763817, + "learning_rate": 5.484913462034389e-05, + "loss": 0.1209, + "step": 25330 + }, + { + "epoch": 0.6548988189078128, + "grad_norm": 0.025597140192985535, + "learning_rate": 5.4817143831596854e-05, + "loss": 0.1197, + "step": 25340 + }, + { + "epoch": 0.6551572635877291, + "grad_norm": 0.02871745266020298, + "learning_rate": 5.4785162333967996e-05, + "loss": 0.12, + "step": 25350 + }, + { + "epoch": 0.6554157082676453, + "grad_norm": 0.05172182247042656, + "learning_rate": 5.475319015663813e-05, + "loss": 0.1202, + "step": 25360 + }, + { + "epoch": 0.6556741529475616, + "grad_norm": 0.027819454669952393, + "learning_rate": 5.472122732877953e-05, + "loss": 0.1204, + "step": 25370 + }, + { + "epoch": 0.6559325976274778, + "grad_norm": 0.024913856759667397, + "learning_rate": 5.468927387955597e-05, + "loss": 0.1196, + "step": 25380 + }, + { + "epoch": 0.6561910423073941, + "grad_norm": 0.03589522838592529, + "learning_rate": 5.465732983812265e-05, + "loss": 0.1204, + "step": 25390 + }, + { + "epoch": 0.6564494869873103, + "grad_norm": 0.030139105394482613, + "learning_rate": 5.4625395233626195e-05, + "loss": 0.1191, + "step": 25400 + }, + { + "epoch": 0.6567079316672266, + "grad_norm": 0.03189101442694664, + "learning_rate": 5.4593470095204616e-05, + "loss": 0.1202, + "step": 25410 + }, + { + "epoch": 0.6569663763471429, + "grad_norm": 0.027369987219572067, + "learning_rate": 5.456155445198729e-05, + "loss": 0.12, + "step": 25420 + }, + { + "epoch": 0.6572248210270591, + "grad_norm": 0.03357731178402901, + "learning_rate": 5.452964833309493e-05, + "loss": 0.12, + "step": 25430 + }, + { + "epoch": 0.6574832657069755, + "grad_norm": 0.03357580676674843, + "learning_rate": 5.449775176763955e-05, + "loss": 0.1193, + "step": 25440 + }, + { + "epoch": 0.6577417103868917, + "grad_norm": 0.028555599972605705, + "learning_rate": 5.446586478472446e-05, + "loss": 0.1203, + "step": 25450 + }, + { + "epoch": 0.658000155066808, + "grad_norm": 0.0291401669383049, + "learning_rate": 5.443398741344421e-05, + "loss": 0.1198, + "step": 25460 + }, + { + "epoch": 0.6582585997467242, + "grad_norm": 0.03145145997405052, + "learning_rate": 5.44021196828846e-05, + "loss": 0.1199, + "step": 25470 + }, + { + "epoch": 0.6585170444266405, + "grad_norm": 0.03379204124212265, + "learning_rate": 5.437026162212262e-05, + "loss": 0.1206, + "step": 25480 + }, + { + "epoch": 0.6587754891065567, + "grad_norm": 0.047877222299575806, + "learning_rate": 5.433841326022645e-05, + "loss": 0.1206, + "step": 25490 + }, + { + "epoch": 0.659033933786473, + "grad_norm": 0.03171222656965256, + "learning_rate": 5.4306574626255404e-05, + "loss": 0.1205, + "step": 25500 + }, + { + "epoch": 0.6592923784663892, + "grad_norm": 0.028316201642155647, + "learning_rate": 5.4274745749259935e-05, + "loss": 0.1206, + "step": 25510 + }, + { + "epoch": 0.6595508231463055, + "grad_norm": 0.02742663212120533, + "learning_rate": 5.424292665828158e-05, + "loss": 0.12, + "step": 25520 + }, + { + "epoch": 0.6598092678262218, + "grad_norm": 0.03049127385020256, + "learning_rate": 5.421111738235296e-05, + "loss": 0.1196, + "step": 25530 + }, + { + "epoch": 0.660067712506138, + "grad_norm": 0.02635253593325615, + "learning_rate": 5.417931795049771e-05, + "loss": 0.1197, + "step": 25540 + } + ], + "logging_steps": 10, + "max_steps": 38693, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 387, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8.153831474164924e+18, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +} diff --git a/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/training_args.bin b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dcc24935fbec007a8bb96df5405430d79cd72660 --- /dev/null +++ b/saves/composition-10B/op_level/id2-10_0.2easy_0.3medium_0.5hard/cpt0.2-uniform_0.8-11-14_plus/checkpoint-25542/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e3a78494c56d71516c49e12d8a074314e5088114a26358e11ffa123915451a +size 6353