Translation
Transformers
PyTorch
t5
text2text-generation
chemistry
biology
text-generation-inference
Instructions to use AI4PD/REXzyme with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use AI4PD/REXzyme with Transformers:
# Use a pipeline as a high-level helper # Warning: Pipeline type "translation" is no longer supported in transformers v5. # You must load the model directly (see below) or downgrade to v4.x with: # 'pip install "transformers<5.0.0' from transformers import pipeline pipe = pipeline("translation", model="AI4PD/REXzyme")# Load model directly from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("AI4PD/REXzyme") model = AutoModelForSeq2SeqLM.from_pretrained("AI4PD/REXzyme") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.473575572084416, | |
| "global_step": 210000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999221493020686e-05, | |
| "loss": 3.8388, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.99844298604137e-05, | |
| "loss": 2.944, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.997664479062055e-05, | |
| "loss": 2.8362, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.99688597208274e-05, | |
| "loss": 2.782, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9961074651034254e-05, | |
| "loss": 2.7643, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.9626307922272054e-05, | |
| "loss": 2.7412, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.956402590931739e-05, | |
| "loss": 2.7286, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9501743896362735e-05, | |
| "loss": 2.7117, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.943946188340807e-05, | |
| "loss": 2.6993, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9377179870453416e-05, | |
| "loss": 2.6795, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 2.5517239570617676, | |
| "eval_runtime": 3812.4069, | |
| "eval_samples_per_second": 838.582, | |
| "eval_steps_per_second": 0.819, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.9314897857498754e-05, | |
| "loss": 2.6627, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.92526158445441e-05, | |
| "loss": 2.652, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.919033383158944e-05, | |
| "loss": 2.6428, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.912805181863478e-05, | |
| "loss": 2.6378, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.9065769805680123e-05, | |
| "loss": 2.6297, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.900348779272546e-05, | |
| "loss": 2.622, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.8941205779770805e-05, | |
| "loss": 2.6128, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.887892376681615e-05, | |
| "loss": 2.6052, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.8816641753861486e-05, | |
| "loss": 2.594, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.875435974090683e-05, | |
| "loss": 2.5815, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.869207772795217e-05, | |
| "loss": 2.5688, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.862979571499751e-05, | |
| "loss": 2.5553, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.8567513702042856e-05, | |
| "loss": 2.5421, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.850523168908819e-05, | |
| "loss": 2.5244, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.844294967613354e-05, | |
| "loss": 2.5127, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.8380667663178875e-05, | |
| "loss": 2.4942, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.831838565022422e-05, | |
| "loss": 2.4787, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.8256103637269556e-05, | |
| "loss": 2.4644, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.81938216243149e-05, | |
| "loss": 2.4422, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8131539611360244e-05, | |
| "loss": 2.4266, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.806925759840558e-05, | |
| "loss": 2.402, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.8006975585450926e-05, | |
| "loss": 2.388, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.794469357249626e-05, | |
| "loss": 2.3705, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.788241155954161e-05, | |
| "loss": 2.3557, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.782012954658695e-05, | |
| "loss": 2.3361, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.775784753363229e-05, | |
| "loss": 2.3186, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.769556552067763e-05, | |
| "loss": 2.301, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.763328350772297e-05, | |
| "loss": 2.2857, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.7571001494768314e-05, | |
| "loss": 2.2735, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.750871948181366e-05, | |
| "loss": 2.2549, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.7446437468858996e-05, | |
| "loss": 2.245, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.738415545590434e-05, | |
| "loss": 2.2276, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.732187344294968e-05, | |
| "loss": 2.2133, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.725959142999502e-05, | |
| "loss": 2.1995, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.719730941704036e-05, | |
| "loss": 2.1916, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.71350274040857e-05, | |
| "loss": 2.1726, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.707274539113105e-05, | |
| "loss": 2.1621, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.7010463378176384e-05, | |
| "loss": 2.1505, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.694818136522173e-05, | |
| "loss": 2.1406, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.6885899352267066e-05, | |
| "loss": 2.1286, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.682361733931241e-05, | |
| "loss": 2.1165, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.6761335326357754e-05, | |
| "loss": 2.1072, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.669905331340309e-05, | |
| "loss": 2.098, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.6636771300448435e-05, | |
| "loss": 2.0874, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.657448928749377e-05, | |
| "loss": 2.0759, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.651220727453912e-05, | |
| "loss": 2.0631, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.644992526158446e-05, | |
| "loss": 2.0547, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.63876432486298e-05, | |
| "loss": 2.0457, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.632536123567514e-05, | |
| "loss": 2.039, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.626307922272048e-05, | |
| "loss": 2.0286, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_loss": 1.8759167194366455, | |
| "eval_runtime": 3772.4979, | |
| "eval_samples_per_second": 847.453, | |
| "eval_steps_per_second": 0.111, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.6200797209765824e-05, | |
| "loss": 2.02, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.613851519681116e-05, | |
| "loss": 2.0093, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.6076233183856505e-05, | |
| "loss": 2.0037, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.601395117090185e-05, | |
| "loss": 1.9936, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.595166915794719e-05, | |
| "loss": 1.9883, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.588938714499253e-05, | |
| "loss": 1.9801, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.582710513203787e-05, | |
| "loss": 1.9706, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.576482311908321e-05, | |
| "loss": 1.9608, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.5702541106128556e-05, | |
| "loss": 1.9552, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.5640259093173894e-05, | |
| "loss": 1.9484, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.557797708021924e-05, | |
| "loss": 1.9403, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.5515695067264575e-05, | |
| "loss": 1.9353, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.545341305430992e-05, | |
| "loss": 1.9266, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.5391131041355263e-05, | |
| "loss": 1.9183, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.53288490284006e-05, | |
| "loss": 1.9128, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.5266567015445945e-05, | |
| "loss": 1.9033, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.520428500249128e-05, | |
| "loss": 1.9033, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.5142002989536626e-05, | |
| "loss": 1.8912, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.5079720976581964e-05, | |
| "loss": 1.884, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.501743896362731e-05, | |
| "loss": 1.8806, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.495515695067265e-05, | |
| "loss": 1.874, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.489287493771799e-05, | |
| "loss": 1.8705, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.4830592924763333e-05, | |
| "loss": 1.8617, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.476831091180867e-05, | |
| "loss": 1.8551, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.4706028898854015e-05, | |
| "loss": 1.8511, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.464374688589936e-05, | |
| "loss": 1.8471, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.4581464872944696e-05, | |
| "loss": 1.8361, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.451918285999004e-05, | |
| "loss": 1.8358, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.445690084703538e-05, | |
| "loss": 1.8295, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.439461883408072e-05, | |
| "loss": 1.8226, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.4332336821126066e-05, | |
| "loss": 1.8202, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.42700548081714e-05, | |
| "loss": 1.814, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.420777279521675e-05, | |
| "loss": 1.8097, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.4145490782262085e-05, | |
| "loss": 1.8006, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.408320876930743e-05, | |
| "loss": 1.7972, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.4020926756352766e-05, | |
| "loss": 1.7909, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.395864474339811e-05, | |
| "loss": 1.7888, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.3896362730443454e-05, | |
| "loss": 1.7852, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.383408071748879e-05, | |
| "loss": 1.7806, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.3771798704534136e-05, | |
| "loss": 1.7741, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.370951669157947e-05, | |
| "loss": 1.7699, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.364723467862482e-05, | |
| "loss": 1.7663, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.358495266567016e-05, | |
| "loss": 1.7591, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.35226706527155e-05, | |
| "loss": 1.7592, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.346038863976084e-05, | |
| "loss": 1.7527, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.339810662680618e-05, | |
| "loss": 1.7481, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.3335824613851524e-05, | |
| "loss": 1.7421, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.327354260089686e-05, | |
| "loss": 1.7388, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.3211260587942206e-05, | |
| "loss": 1.7372, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.314897857498755e-05, | |
| "loss": 1.7322, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.308669656203289e-05, | |
| "loss": 1.7293, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.302441454907823e-05, | |
| "loss": 1.7228, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.296213253612357e-05, | |
| "loss": 1.7208, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.289985052316891e-05, | |
| "loss": 1.7209, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.283756851021426e-05, | |
| "loss": 1.7147, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.2775286497259594e-05, | |
| "loss": 1.709, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.271300448430494e-05, | |
| "loss": 1.7073, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.2650722471350276e-05, | |
| "loss": 1.7032, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 4.258844045839562e-05, | |
| "loss": 1.6995, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.2526158445440964e-05, | |
| "loss": 1.6961, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_loss": 1.5560799837112427, | |
| "eval_runtime": 3764.7391, | |
| "eval_samples_per_second": 849.2, | |
| "eval_steps_per_second": 0.111, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.24638764324863e-05, | |
| "loss": 1.6951, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.2401594419531645e-05, | |
| "loss": 1.6896, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.233931240657698e-05, | |
| "loss": 1.6875, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.227703039362233e-05, | |
| "loss": 1.6824, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.2214748380667664e-05, | |
| "loss": 1.6791, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.215246636771301e-05, | |
| "loss": 1.6768, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.209018435475835e-05, | |
| "loss": 1.6697, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.202790234180369e-05, | |
| "loss": 1.6715, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.1965620328849034e-05, | |
| "loss": 1.664, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.190333831589437e-05, | |
| "loss": 1.6636, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.1841056302939715e-05, | |
| "loss": 1.6587, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.177877428998506e-05, | |
| "loss": 1.6541, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.17164922770304e-05, | |
| "loss": 1.6552, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.165421026407574e-05, | |
| "loss": 1.6456, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.159192825112108e-05, | |
| "loss": 1.6481, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.152964623816642e-05, | |
| "loss": 1.6425, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.1467364225211766e-05, | |
| "loss": 1.6406, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.1405082212257104e-05, | |
| "loss": 1.6374, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.134280019930245e-05, | |
| "loss": 1.6321, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.1280518186347785e-05, | |
| "loss": 1.6306, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.121823617339313e-05, | |
| "loss": 1.6305, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.115595416043847e-05, | |
| "loss": 1.6254, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 4.109367214748381e-05, | |
| "loss": 1.6239, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.1031390134529155e-05, | |
| "loss": 1.6219, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.096910812157449e-05, | |
| "loss": 1.6187, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.0906826108619836e-05, | |
| "loss": 1.614, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.0844544095665174e-05, | |
| "loss": 1.6101, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.078226208271052e-05, | |
| "loss": 1.6102, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.071998006975586e-05, | |
| "loss": 1.6081, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.06576980568012e-05, | |
| "loss": 1.6, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.059541604384654e-05, | |
| "loss": 1.602, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.053313403089188e-05, | |
| "loss": 1.5951, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.0470852017937225e-05, | |
| "loss": 1.595, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.040857000498256e-05, | |
| "loss": 1.5945, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.0346287992027906e-05, | |
| "loss": 1.5904, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.028400597907325e-05, | |
| "loss": 1.5918, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.022172396611859e-05, | |
| "loss": 1.5856, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.015944195316393e-05, | |
| "loss": 1.5831, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.009715994020927e-05, | |
| "loss": 1.5831, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 4.003487792725461e-05, | |
| "loss": 1.5787, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.997259591429995e-05, | |
| "loss": 1.5836, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.9910313901345295e-05, | |
| "loss": 1.5703, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.984803188839064e-05, | |
| "loss": 1.5706, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.9785749875435976e-05, | |
| "loss": 1.5717, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.972346786248132e-05, | |
| "loss": 1.5688, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.966118584952666e-05, | |
| "loss": 1.5626, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.9598903836572e-05, | |
| "loss": 1.5619, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.953662182361734e-05, | |
| "loss": 1.5606, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.947433981066268e-05, | |
| "loss": 1.5612, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.941205779770803e-05, | |
| "loss": 1.5571, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.9349775784753365e-05, | |
| "loss": 1.5539, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.928749377179871e-05, | |
| "loss": 1.5487, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.9225211758844046e-05, | |
| "loss": 1.5509, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.916292974588939e-05, | |
| "loss": 1.5489, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.9100647732934734e-05, | |
| "loss": 1.5446, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.903836571998007e-05, | |
| "loss": 1.5426, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.8976083707025416e-05, | |
| "loss": 1.5438, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.891380169407075e-05, | |
| "loss": 1.5386, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.88515196811161e-05, | |
| "loss": 1.5423, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.8789237668161435e-05, | |
| "loss": 1.5398, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_loss": 1.4072346687316895, | |
| "eval_runtime": 3765.404, | |
| "eval_samples_per_second": 849.05, | |
| "eval_steps_per_second": 0.111, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.872695565520678e-05, | |
| "loss": 1.5325, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.866467364225212e-05, | |
| "loss": 1.5294, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.860239162929746e-05, | |
| "loss": 1.5304, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.8540109616342804e-05, | |
| "loss": 1.5273, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.847782760338814e-05, | |
| "loss": 1.5241, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.8415545590433486e-05, | |
| "loss": 1.5253, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.835326357747882e-05, | |
| "loss": 1.5244, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.829098156452417e-05, | |
| "loss": 1.5183, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.822869955156951e-05, | |
| "loss": 1.5203, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.816641753861485e-05, | |
| "loss": 1.5192, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.810413552566019e-05, | |
| "loss": 1.5149, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.804185351270553e-05, | |
| "loss": 1.5129, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.7979571499750874e-05, | |
| "loss": 1.5089, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.791728948679621e-05, | |
| "loss": 1.5125, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 3.7855007473841556e-05, | |
| "loss": 1.509, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 3.77927254608869e-05, | |
| "loss": 1.5044, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.773044344793224e-05, | |
| "loss": 1.5038, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 3.766816143497758e-05, | |
| "loss": 1.5041, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.760587942202292e-05, | |
| "loss": 1.5035, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.754359740906826e-05, | |
| "loss": 1.5, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 3.74813153961136e-05, | |
| "loss": 1.4976, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.7419033383158944e-05, | |
| "loss": 1.4956, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 3.735675137020429e-05, | |
| "loss": 1.4972, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 3.7294469357249626e-05, | |
| "loss": 1.4933, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.723218734429497e-05, | |
| "loss": 1.4929, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 3.716990533134031e-05, | |
| "loss": 1.4885, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 3.710762331838565e-05, | |
| "loss": 1.4897, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.704534130543099e-05, | |
| "loss": 1.4888, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 3.698305929247633e-05, | |
| "loss": 1.4871, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.692077727952168e-05, | |
| "loss": 1.4816, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.6858495266567014e-05, | |
| "loss": 1.4798, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 3.679621325361236e-05, | |
| "loss": 1.4803, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 3.6733931240657695e-05, | |
| "loss": 1.477, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 3.667164922770304e-05, | |
| "loss": 1.4763, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.660936721474838e-05, | |
| "loss": 1.4753, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.654708520179372e-05, | |
| "loss": 1.4732, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 3.6484803188839065e-05, | |
| "loss": 1.4735, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.64225211758844e-05, | |
| "loss": 1.4739, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 3.6360239162929747e-05, | |
| "loss": 1.4713, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 3.6297957149975084e-05, | |
| "loss": 1.4708, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 3.623567513702043e-05, | |
| "loss": 1.4662, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 3.617339312406577e-05, | |
| "loss": 1.4667, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 3.611111111111111e-05, | |
| "loss": 1.4632, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 3.6048829098156454e-05, | |
| "loss": 1.4614, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 3.598654708520179e-05, | |
| "loss": 1.4566, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 3.5924265072247135e-05, | |
| "loss": 1.4599, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 3.586198305929247e-05, | |
| "loss": 1.46, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 3.5799701046337816e-05, | |
| "loss": 1.4546, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 3.573741903338316e-05, | |
| "loss": 1.4555, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 3.56751370204285e-05, | |
| "loss": 1.4561, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.561285500747384e-05, | |
| "loss": 1.4547, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.555057299451918e-05, | |
| "loss": 1.4519, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 3.5488290981564523e-05, | |
| "loss": 1.4516, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.542600896860987e-05, | |
| "loss": 1.4482, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 3.5363726955655205e-05, | |
| "loss": 1.4505, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 3.530144494270055e-05, | |
| "loss": 1.4449, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 3.5239162929745886e-05, | |
| "loss": 1.4458, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 3.517688091679123e-05, | |
| "loss": 1.4447, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 3.5114598903836575e-05, | |
| "loss": 1.4413, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 3.505231689088191e-05, | |
| "loss": 1.4424, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "eval_loss": 1.3201874494552612, | |
| "eval_runtime": 3766.4667, | |
| "eval_samples_per_second": 848.81, | |
| "eval_steps_per_second": 0.111, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.4990034877927256e-05, | |
| "loss": 1.4434, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 3.4927752864972593e-05, | |
| "loss": 1.4395, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 3.486547085201794e-05, | |
| "loss": 1.4401, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 3.4803188839063275e-05, | |
| "loss": 1.4324, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 3.474090682610862e-05, | |
| "loss": 1.4342, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 3.467862481315396e-05, | |
| "loss": 1.4349, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 3.46163428001993e-05, | |
| "loss": 1.4306, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 3.4554060787244645e-05, | |
| "loss": 1.4331, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 3.449177877428998e-05, | |
| "loss": 1.4294, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 3.4429496761335326e-05, | |
| "loss": 1.4288, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 3.436721474838067e-05, | |
| "loss": 1.4266, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 3.430493273542601e-05, | |
| "loss": 1.4267, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 3.424265072247135e-05, | |
| "loss": 1.4264, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 3.418036870951669e-05, | |
| "loss": 1.4262, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 3.411808669656203e-05, | |
| "loss": 1.4219, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 3.405580468360738e-05, | |
| "loss": 1.4211, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 3.3993522670652714e-05, | |
| "loss": 1.4216, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 3.393124065769806e-05, | |
| "loss": 1.419, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 3.3868958644743396e-05, | |
| "loss": 1.4209, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 3.380667663178874e-05, | |
| "loss": 1.4182, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 3.374439461883408e-05, | |
| "loss": 1.417, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 3.368211260587942e-05, | |
| "loss": 1.4154, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.3619830592924766e-05, | |
| "loss": 1.4106, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 3.35575485799701e-05, | |
| "loss": 1.4133, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 3.349526656701545e-05, | |
| "loss": 1.4118, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 3.3432984554060784e-05, | |
| "loss": 1.4102, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 3.337070254110613e-05, | |
| "loss": 1.4133, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.330842052815147e-05, | |
| "loss": 1.4065, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.324613851519681e-05, | |
| "loss": 1.41, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.3183856502242154e-05, | |
| "loss": 1.4057, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 3.312157448928749e-05, | |
| "loss": 1.4048, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 3.3059292476332835e-05, | |
| "loss": 1.4031, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 3.299701046337818e-05, | |
| "loss": 1.4039, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 3.293472845042352e-05, | |
| "loss": 1.4053, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 3.287244643746886e-05, | |
| "loss": 1.4039, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 3.28101644245142e-05, | |
| "loss": 1.3993, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 3.274788241155954e-05, | |
| "loss": 1.4015, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 3.268560039860488e-05, | |
| "loss": 1.3998, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 3.2623318385650224e-05, | |
| "loss": 1.3974, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 3.256103637269557e-05, | |
| "loss": 1.3942, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 3.2498754359740905e-05, | |
| "loss": 1.3995, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 3.243647234678625e-05, | |
| "loss": 1.3976, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 3.237419033383159e-05, | |
| "loss": 1.3932, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 3.231190832087693e-05, | |
| "loss": 1.3897, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 3.2249626307922275e-05, | |
| "loss": 1.3919, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 3.218734429496761e-05, | |
| "loss": 1.3912, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 3.2125062282012957e-05, | |
| "loss": 1.3895, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 3.2062780269058294e-05, | |
| "loss": 1.3893, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 3.200049825610364e-05, | |
| "loss": 1.3901, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 3.193821624314898e-05, | |
| "loss": 1.3839, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 3.187593423019432e-05, | |
| "loss": 1.3853, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 3.1813652217239664e-05, | |
| "loss": 1.3853, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 3.1751370204285e-05, | |
| "loss": 1.3829, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 3.1689088191330345e-05, | |
| "loss": 1.3833, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 3.162680617837568e-05, | |
| "loss": 1.3822, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 3.1564524165421026e-05, | |
| "loss": 1.3796, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 3.150224215246637e-05, | |
| "loss": 1.3807, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.143996013951171e-05, | |
| "loss": 1.3764, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.137767812655705e-05, | |
| "loss": 1.3786, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.131539611360239e-05, | |
| "loss": 1.3776, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "eval_loss": 1.2612946033477783, | |
| "eval_runtime": 3765.9008, | |
| "eval_samples_per_second": 848.938, | |
| "eval_steps_per_second": 0.111, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.1253114100647733e-05, | |
| "loss": 1.3748, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.119083208769308e-05, | |
| "loss": 1.3744, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.1128550074738415e-05, | |
| "loss": 1.3776, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.106626806178376e-05, | |
| "loss": 1.3751, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 3.1003986048829096e-05, | |
| "loss": 1.3728, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 3.094170403587444e-05, | |
| "loss": 1.3721, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 3.087942202291978e-05, | |
| "loss": 1.3759, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.081714000996512e-05, | |
| "loss": 1.3731, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.0754857997010466e-05, | |
| "loss": 1.3736, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 3.06925759840558e-05, | |
| "loss": 1.3705, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 3.063029397110115e-05, | |
| "loss": 1.3671, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 3.0568011958146485e-05, | |
| "loss": 1.3671, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 3.050572994519183e-05, | |
| "loss": 1.3647, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 3.0443447932237173e-05, | |
| "loss": 1.3654, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 3.038116591928251e-05, | |
| "loss": 1.3648, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 3.0318883906327854e-05, | |
| "loss": 1.3665, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 3.0256601893373192e-05, | |
| "loss": 1.361, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 3.0194319880418536e-05, | |
| "loss": 1.3635, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 3.013203786746388e-05, | |
| "loss": 1.3646, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 3.0069755854509217e-05, | |
| "loss": 1.3592, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 3.000747384155456e-05, | |
| "loss": 1.3619, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 2.99451918285999e-05, | |
| "loss": 1.3653, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 2.9882909815645243e-05, | |
| "loss": 1.3559, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 2.982062780269058e-05, | |
| "loss": 1.3582, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 2.9758345789735924e-05, | |
| "loss": 1.3544, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 2.969606377678127e-05, | |
| "loss": 1.3574, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 2.9633781763826606e-05, | |
| "loss": 1.3538, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 2.957149975087195e-05, | |
| "loss": 1.3537, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 2.9509217737917287e-05, | |
| "loss": 1.3509, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 2.944693572496263e-05, | |
| "loss": 1.3511, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 2.9384653712007976e-05, | |
| "loss": 1.3508, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 2.9322371699053313e-05, | |
| "loss": 1.3503, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 2.9260089686098657e-05, | |
| "loss": 1.3487, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 2.9197807673143994e-05, | |
| "loss": 1.3453, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 2.913552566018934e-05, | |
| "loss": 1.345, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 2.9073243647234683e-05, | |
| "loss": 1.3482, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 2.901096163428002e-05, | |
| "loss": 1.3499, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 2.8948679621325364e-05, | |
| "loss": 1.3463, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 2.88863976083707e-05, | |
| "loss": 1.3464, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 2.8824115595416045e-05, | |
| "loss": 1.3478, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 2.8761833582461383e-05, | |
| "loss": 1.3403, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 2.8699551569506727e-05, | |
| "loss": 1.3387, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 2.863726955655207e-05, | |
| "loss": 1.3411, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 2.857498754359741e-05, | |
| "loss": 1.3408, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 2.8512705530642752e-05, | |
| "loss": 1.3422, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 2.845042351768809e-05, | |
| "loss": 1.3438, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 2.8388141504733434e-05, | |
| "loss": 1.3375, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 2.8325859491778778e-05, | |
| "loss": 1.3398, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 2.8263577478824115e-05, | |
| "loss": 1.3363, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 2.820129546586946e-05, | |
| "loss": 1.3354, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 2.8139013452914797e-05, | |
| "loss": 1.3382, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 2.807673143996014e-05, | |
| "loss": 1.3353, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 2.8014449427005485e-05, | |
| "loss": 1.3369, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 2.7952167414050822e-05, | |
| "loss": 1.3373, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 2.7889885401096166e-05, | |
| "loss": 1.338, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 2.7827603388141504e-05, | |
| "loss": 1.3325, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 2.7765321375186848e-05, | |
| "loss": 1.3343, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 2.7703039362232185e-05, | |
| "loss": 1.336, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 2.764075734927753e-05, | |
| "loss": 1.3327, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 2.7578475336322873e-05, | |
| "loss": 1.3319, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "eval_loss": 1.2188748121261597, | |
| "eval_runtime": 3765.9112, | |
| "eval_samples_per_second": 848.935, | |
| "eval_steps_per_second": 0.111, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 2.751619332336821e-05, | |
| "loss": 1.3288, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 2.7453911310413555e-05, | |
| "loss": 1.3313, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 2.7391629297458892e-05, | |
| "loss": 1.33, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 2.7329347284504236e-05, | |
| "loss": 1.3285, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 2.726706527154958e-05, | |
| "loss": 1.3286, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 2.7204783258594918e-05, | |
| "loss": 1.3282, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 2.7142501245640262e-05, | |
| "loss": 1.3262, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 2.70802192326856e-05, | |
| "loss": 1.3266, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 2.7017937219730943e-05, | |
| "loss": 1.3257, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 2.6955655206776288e-05, | |
| "loss": 1.3256, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 2.6893373193821625e-05, | |
| "loss": 1.3226, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 2.683109118086697e-05, | |
| "loss": 1.3211, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 2.6768809167912306e-05, | |
| "loss": 1.3228, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 2.670652715495765e-05, | |
| "loss": 1.3241, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 2.6644245142002988e-05, | |
| "loss": 1.3201, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 2.6581963129048332e-05, | |
| "loss": 1.3232, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 2.6519681116093676e-05, | |
| "loss": 1.3221, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 2.6457399103139013e-05, | |
| "loss": 1.3229, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 2.6395117090184357e-05, | |
| "loss": 1.3188, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 2.6332835077229695e-05, | |
| "loss": 1.3175, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 2.627055306427504e-05, | |
| "loss": 1.3183, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 2.6208271051320383e-05, | |
| "loss": 1.3192, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 2.614598903836572e-05, | |
| "loss": 1.3184, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 2.6083707025411064e-05, | |
| "loss": 1.3155, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 2.6021425012456402e-05, | |
| "loss": 1.3178, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 2.5959142999501746e-05, | |
| "loss": 1.3187, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 2.589686098654709e-05, | |
| "loss": 1.3131, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 2.5834578973592427e-05, | |
| "loss": 1.3139, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 2.577229696063777e-05, | |
| "loss": 1.3152, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 2.571001494768311e-05, | |
| "loss": 1.3124, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 2.5647732934728453e-05, | |
| "loss": 1.3139, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 2.558545092177379e-05, | |
| "loss": 1.3136, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 2.5523168908819134e-05, | |
| "loss": 1.3107, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 2.546088689586448e-05, | |
| "loss": 1.3136, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 2.5398604882909816e-05, | |
| "loss": 1.3118, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 2.533632286995516e-05, | |
| "loss": 1.308, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 2.5274040857000497e-05, | |
| "loss": 1.3139, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 2.521175884404584e-05, | |
| "loss": 1.3063, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 2.5149476831091185e-05, | |
| "loss": 1.3063, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 2.5087194818136523e-05, | |
| "loss": 1.305, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 2.5024912805181867e-05, | |
| "loss": 1.3059, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 2.4962630792227208e-05, | |
| "loss": 1.3081, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 2.490034877927255e-05, | |
| "loss": 1.3041, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 2.483806676631789e-05, | |
| "loss": 1.3044, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 2.477578475336323e-05, | |
| "loss": 1.3076, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 2.471350274040857e-05, | |
| "loss": 1.3018, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 2.4651220727453915e-05, | |
| "loss": 1.3061, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 2.4588938714499255e-05, | |
| "loss": 1.307, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 2.4526656701544596e-05, | |
| "loss": 1.3049, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 2.4464374688589937e-05, | |
| "loss": 1.3042, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 2.4402092675635278e-05, | |
| "loss": 1.2987, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 2.4339810662680618e-05, | |
| "loss": 1.3039, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 2.4277528649725962e-05, | |
| "loss": 1.3028, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 2.4215246636771303e-05, | |
| "loss": 1.2998, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 2.4152964623816644e-05, | |
| "loss": 1.3006, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 2.4090682610861985e-05, | |
| "loss": 1.3027, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 2.4028400597907325e-05, | |
| "loss": 1.3016, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 2.396611858495267e-05, | |
| "loss": 1.3001, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 2.390383657199801e-05, | |
| "loss": 1.2998, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 2.384155455904335e-05, | |
| "loss": 1.3011, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "eval_loss": 1.189267635345459, | |
| "eval_runtime": 3765.6956, | |
| "eval_samples_per_second": 848.984, | |
| "eval_steps_per_second": 0.111, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 2.377927254608869e-05, | |
| "loss": 1.2986, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 2.3716990533134032e-05, | |
| "loss": 1.2972, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 2.3654708520179373e-05, | |
| "loss": 1.2977, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 2.3592426507224717e-05, | |
| "loss": 1.2945, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 2.3530144494270058e-05, | |
| "loss": 1.2955, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 2.34678624813154e-05, | |
| "loss": 1.2931, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 2.340558046836074e-05, | |
| "loss": 1.2941, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 2.334329845540608e-05, | |
| "loss": 1.2977, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 2.328101644245142e-05, | |
| "loss": 1.2974, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 2.3218734429496765e-05, | |
| "loss": 1.2967, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 2.3156452416542106e-05, | |
| "loss": 1.2919, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 2.3094170403587446e-05, | |
| "loss": 1.2928, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 2.3031888390632787e-05, | |
| "loss": 1.293, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 2.2969606377678128e-05, | |
| "loss": 1.2947, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 2.2907324364723472e-05, | |
| "loss": 1.2926, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 2.2845042351768813e-05, | |
| "loss": 1.2892, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 2.2782760338814153e-05, | |
| "loss": 1.2944, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 2.2720478325859494e-05, | |
| "loss": 1.2907, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 2.2658196312904835e-05, | |
| "loss": 1.2888, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 2.2595914299950176e-05, | |
| "loss": 1.2882, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 2.253363228699552e-05, | |
| "loss": 1.2868, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 2.247135027404086e-05, | |
| "loss": 1.2885, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 2.24090682610862e-05, | |
| "loss": 1.2885, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 2.2346786248131542e-05, | |
| "loss": 1.2855, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 2.2284504235176883e-05, | |
| "loss": 1.2869, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 1.2854, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 2.2159940209267564e-05, | |
| "loss": 1.2869, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 2.2097658196312908e-05, | |
| "loss": 1.2861, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 2.203537618335825e-05, | |
| "loss": 1.2858, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 2.197309417040359e-05, | |
| "loss": 1.2863, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 2.191081215744893e-05, | |
| "loss": 1.2839, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.184853014449427e-05, | |
| "loss": 1.2812, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.1786248131539612e-05, | |
| "loss": 1.2837, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.1723966118584952e-05, | |
| "loss": 1.2828, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.1661684105630297e-05, | |
| "loss": 1.281, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.1599402092675637e-05, | |
| "loss": 1.2833, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.1537120079720978e-05, | |
| "loss": 1.2823, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.147483806676632e-05, | |
| "loss": 1.283, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.141255605381166e-05, | |
| "loss": 1.2774, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.1350274040857e-05, | |
| "loss": 1.2794, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.128799202790234e-05, | |
| "loss": 1.2784, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.1225710014947685e-05, | |
| "loss": 1.2797, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.1163428001993026e-05, | |
| "loss": 1.2771, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 2.1101145989038367e-05, | |
| "loss": 1.2746, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 2.1038863976083707e-05, | |
| "loss": 1.2825, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 2.0976581963129048e-05, | |
| "loss": 1.2783, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 2.091429995017439e-05, | |
| "loss": 1.2773, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 2.085201793721973e-05, | |
| "loss": 1.2773, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 2.0789735924265074e-05, | |
| "loss": 1.2795, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 2.0727453911310414e-05, | |
| "loss": 1.2756, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 2.0665171898355755e-05, | |
| "loss": 1.2732, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 2.0602889885401096e-05, | |
| "loss": 1.2799, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 2.0540607872446436e-05, | |
| "loss": 1.2752, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 2.0478325859491777e-05, | |
| "loss": 1.2769, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.041604384653712e-05, | |
| "loss": 1.2744, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.0353761833582462e-05, | |
| "loss": 1.2759, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 2.0291479820627803e-05, | |
| "loss": 1.2773, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.0229197807673143e-05, | |
| "loss": 1.2718, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.0166915794718484e-05, | |
| "loss": 1.2753, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.0104633781763825e-05, | |
| "loss": 1.2737, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_loss": 1.1658238172531128, | |
| "eval_runtime": 3765.1156, | |
| "eval_samples_per_second": 849.115, | |
| "eval_steps_per_second": 0.111, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 2.004235176880917e-05, | |
| "loss": 1.2731, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.998006975585451e-05, | |
| "loss": 1.2779, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 1.991778774289985e-05, | |
| "loss": 1.2682, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 1.985550572994519e-05, | |
| "loss": 1.2672, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 1.9793223716990532e-05, | |
| "loss": 1.2717, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 1.9730941704035873e-05, | |
| "loss": 1.2675, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 1.9668659691081217e-05, | |
| "loss": 1.2708, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 1.9606377678126557e-05, | |
| "loss": 1.2687, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 1.9544095665171898e-05, | |
| "loss": 1.2709, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 1.948181365221724e-05, | |
| "loss": 1.2707, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 1.941953163926258e-05, | |
| "loss": 1.2693, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 1.9357249626307924e-05, | |
| "loss": 1.2653, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 1.9294967613353264e-05, | |
| "loss": 1.267, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 1.9232685600398605e-05, | |
| "loss": 1.2651, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 1.9170403587443946e-05, | |
| "loss": 1.2661, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 1.9108121574489287e-05, | |
| "loss": 1.2656, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 1.9045839561534627e-05, | |
| "loss": 1.2665, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 1.898355754857997e-05, | |
| "loss": 1.2666, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 1.8921275535625312e-05, | |
| "loss": 1.267, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 1.8858993522670653e-05, | |
| "loss": 1.2609, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 1.8796711509715994e-05, | |
| "loss": 1.2659, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 1.8734429496761334e-05, | |
| "loss": 1.2663, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 1.8672147483806675e-05, | |
| "loss": 1.2657, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 1.860986547085202e-05, | |
| "loss": 1.2639, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 1.854758345789736e-05, | |
| "loss": 1.2662, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 1.84853014449427e-05, | |
| "loss": 1.2608, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 1.842301943198804e-05, | |
| "loss": 1.2623, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 1.8360737419033382e-05, | |
| "loss": 1.2631, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 1.8298455406078726e-05, | |
| "loss": 1.2613, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 1.8236173393124067e-05, | |
| "loss": 1.2637, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 1.8173891380169408e-05, | |
| "loss": 1.2591, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 1.811160936721475e-05, | |
| "loss": 1.2594, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 1.804932735426009e-05, | |
| "loss": 1.2596, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 1.798704534130543e-05, | |
| "loss": 1.2633, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 1.7924763328350774e-05, | |
| "loss": 1.2586, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 1.7862481315396115e-05, | |
| "loss": 1.2593, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 1.7800199302441455e-05, | |
| "loss": 1.2585, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 1.7737917289486796e-05, | |
| "loss": 1.2578, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 1.7675635276532137e-05, | |
| "loss": 1.2583, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 1.7613353263577478e-05, | |
| "loss": 1.2601, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 1.7551071250622822e-05, | |
| "loss": 1.2603, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 1.7488789237668162e-05, | |
| "loss": 1.2571, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 1.7426507224713503e-05, | |
| "loss": 1.2565, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 1.7364225211758844e-05, | |
| "loss": 1.2592, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 1.7301943198804185e-05, | |
| "loss": 1.2577, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 1.723966118584953e-05, | |
| "loss": 1.2554, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 1.717737917289487e-05, | |
| "loss": 1.2581, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 1.711509715994021e-05, | |
| "loss": 1.2549, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 1.705281514698555e-05, | |
| "loss": 1.2565, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 1.699053313403089e-05, | |
| "loss": 1.2552, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 1.6928251121076232e-05, | |
| "loss": 1.2579, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 1.6865969108121576e-05, | |
| "loss": 1.2526, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 1.6803687095166917e-05, | |
| "loss": 1.2573, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 1.6741405082212258e-05, | |
| "loss": 1.2559, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 1.66791230692576e-05, | |
| "loss": 1.2574, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 1.661684105630294e-05, | |
| "loss": 1.2537, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 1.655455904334828e-05, | |
| "loss": 1.2549, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 1.6492277030393624e-05, | |
| "loss": 1.2552, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 1.6429995017438965e-05, | |
| "loss": 1.2515, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 1.6367713004484306e-05, | |
| "loss": 1.2524, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "eval_loss": 1.1479804515838623, | |
| "eval_runtime": 3765.2288, | |
| "eval_samples_per_second": 849.089, | |
| "eval_steps_per_second": 0.111, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 1.6305430991529646e-05, | |
| "loss": 1.2528, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 1.6243148978574987e-05, | |
| "loss": 1.2518, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 1.618086696562033e-05, | |
| "loss": 1.2482, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 1.6118584952665672e-05, | |
| "loss": 1.2502, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 1.6056302939711013e-05, | |
| "loss": 1.2515, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 1.5994020926756353e-05, | |
| "loss": 1.2514, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 1.5931738913801694e-05, | |
| "loss": 1.251, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 1.5869456900847035e-05, | |
| "loss": 1.2519, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 1.580717488789238e-05, | |
| "loss": 1.2503, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 1.574489287493772e-05, | |
| "loss": 1.252, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 1.568261086198306e-05, | |
| "loss": 1.2499, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 1.56203288490284e-05, | |
| "loss": 1.2458, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 1.5558046836073742e-05, | |
| "loss": 1.2474, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 1.5495764823119083e-05, | |
| "loss": 1.2494, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 1.5433482810164427e-05, | |
| "loss": 1.2502, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 1.5371200797209767e-05, | |
| "loss": 1.2502, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 1.5308918784255108e-05, | |
| "loss": 1.2509, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 1.5246636771300449e-05, | |
| "loss": 1.2491, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 1.518435475834579e-05, | |
| "loss": 1.2499, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 1.512207274539113e-05, | |
| "loss": 1.244, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 1.5059790732436474e-05, | |
| "loss": 1.2428, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 1.4997508719481815e-05, | |
| "loss": 1.2492, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 1.4935226706527156e-05, | |
| "loss": 1.2488, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 1.4872944693572497e-05, | |
| "loss": 1.2476, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 1.4810662680617837e-05, | |
| "loss": 1.2468, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 1.4748380667663181e-05, | |
| "loss": 1.2453, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 1.4686098654708522e-05, | |
| "loss": 1.2441, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 1.4623816641753863e-05, | |
| "loss": 1.2474, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 1.4561534628799204e-05, | |
| "loss": 1.2443, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 1.4499252615844544e-05, | |
| "loss": 1.2447, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 1.4436970602889885e-05, | |
| "loss": 1.2432, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 1.437468858993523e-05, | |
| "loss": 1.2434, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 1.431240657698057e-05, | |
| "loss": 1.2447, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 1.425012456402591e-05, | |
| "loss": 1.2442, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 1.4187842551071251e-05, | |
| "loss": 1.2462, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 1.4125560538116592e-05, | |
| "loss": 1.245, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 1.4063278525161933e-05, | |
| "loss": 1.2461, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 1.4000996512207277e-05, | |
| "loss": 1.2462, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 1.3938714499252618e-05, | |
| "loss": 1.2435, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 1.3876432486297958e-05, | |
| "loss": 1.2461, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 1.3814150473343299e-05, | |
| "loss": 1.2398, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 1.375186846038864e-05, | |
| "loss": 1.2402, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 1.3689586447433982e-05, | |
| "loss": 1.2427, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 1.3627304434479323e-05, | |
| "loss": 1.2396, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 1.3565022421524665e-05, | |
| "loss": 1.2373, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 1.3502740408570006e-05, | |
| "loss": 1.2422, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 1.3440458395615347e-05, | |
| "loss": 1.2388, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 1.3378176382660688e-05, | |
| "loss": 1.2424, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 1.331589436970603e-05, | |
| "loss": 1.2402, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 1.325361235675137e-05, | |
| "loss": 1.2377, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 1.3191330343796713e-05, | |
| "loss": 1.2408, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 1.3129048330842054e-05, | |
| "loss": 1.2405, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 1.3066766317887395e-05, | |
| "loss": 1.2373, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 1.3004484304932735e-05, | |
| "loss": 1.2391, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 1.2942202291978078e-05, | |
| "loss": 1.2409, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 1.2879920279023418e-05, | |
| "loss": 1.2403, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 1.281763826606876e-05, | |
| "loss": 1.2382, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 1.2755356253114102e-05, | |
| "loss": 1.2396, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 1.2693074240159442e-05, | |
| "loss": 1.2376, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 1.2630792227204785e-05, | |
| "loss": 1.2388, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "eval_loss": 1.135990023612976, | |
| "eval_runtime": 3765.93, | |
| "eval_samples_per_second": 848.931, | |
| "eval_steps_per_second": 0.111, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 1.2568510214250125e-05, | |
| "loss": 1.2378, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 1.2506228201295466e-05, | |
| "loss": 1.2371, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 1.2443946188340807e-05, | |
| "loss": 1.2379, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 1.2381664175386148e-05, | |
| "loss": 1.2392, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 1.231938216243149e-05, | |
| "loss": 1.2374, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 1.225710014947683e-05, | |
| "loss": 1.2385, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 1.2194818136522173e-05, | |
| "loss": 1.2375, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 1.2132536123567514e-05, | |
| "loss": 1.2386, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 1.2070254110612855e-05, | |
| "loss": 1.2368, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 1.2007972097658197e-05, | |
| "loss": 1.2335, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 1.1945690084703538e-05, | |
| "loss": 1.2341, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 1.1883408071748879e-05, | |
| "loss": 1.2366, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 1.1821126058794221e-05, | |
| "loss": 1.2334, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 1.1758844045839562e-05, | |
| "loss": 1.2355, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 1.1696562032884902e-05, | |
| "loss": 1.2353, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 1.1634280019930245e-05, | |
| "loss": 1.2363, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 1.1571998006975586e-05, | |
| "loss": 1.2348, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 1.1509715994020926e-05, | |
| "loss": 1.2334, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 1.1447433981066269e-05, | |
| "loss": 1.2383, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 1.138515196811161e-05, | |
| "loss": 1.2331, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 1.132286995515695e-05, | |
| "loss": 1.2358, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 1.1260587942202293e-05, | |
| "loss": 1.2378, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 1.1198305929247633e-05, | |
| "loss": 1.2354, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 1.1136023916292976e-05, | |
| "loss": 1.2345, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 1.1073741903338316e-05, | |
| "loss": 1.2308, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 1.1011459890383657e-05, | |
| "loss": 1.2326, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 1.0949177877429e-05, | |
| "loss": 1.2352, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 1.088689586447434e-05, | |
| "loss": 1.2328, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 1.0824613851519681e-05, | |
| "loss": 1.2305, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 1.0762331838565023e-05, | |
| "loss": 1.2327, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 1.0700049825610364e-05, | |
| "loss": 1.2314, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 1.0637767812655705e-05, | |
| "loss": 1.2349, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 1.0575485799701047e-05, | |
| "loss": 1.2324, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 1.0513203786746388e-05, | |
| "loss": 1.2324, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 1.0450921773791729e-05, | |
| "loss": 1.2311, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 1.0388639760837071e-05, | |
| "loss": 1.2321, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 1.0326357747882412e-05, | |
| "loss": 1.2327, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 1.0264075734927753e-05, | |
| "loss": 1.2342, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 1.0201793721973095e-05, | |
| "loss": 1.2334, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 1.0139511709018436e-05, | |
| "loss": 1.2321, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 1.0077229696063776e-05, | |
| "loss": 1.2332, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.0014947683109119e-05, | |
| "loss": 1.2331, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 9.95266567015446e-06, | |
| "loss": 1.2375, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 9.890383657199802e-06, | |
| "loss": 1.2312, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 9.828101644245143e-06, | |
| "loss": 1.228, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 9.765819631290484e-06, | |
| "loss": 1.229, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 9.703537618335826e-06, | |
| "loss": 1.2275, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 9.641255605381167e-06, | |
| "loss": 1.228, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 9.578973592426507e-06, | |
| "loss": 1.2293, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 9.51669157947185e-06, | |
| "loss": 1.2284, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 9.45440956651719e-06, | |
| "loss": 1.231, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 9.392127553562531e-06, | |
| "loss": 1.2283, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 9.329845540607874e-06, | |
| "loss": 1.2283, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 9.267563527653214e-06, | |
| "loss": 1.2274, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 9.205281514698555e-06, | |
| "loss": 1.2284, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 9.142999501743898e-06, | |
| "loss": 1.2257, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 9.080717488789238e-06, | |
| "loss": 1.2309, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 9.018435475834579e-06, | |
| "loss": 1.2262, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 8.956153462879921e-06, | |
| "loss": 1.2296, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 8.893871449925262e-06, | |
| "loss": 1.2269, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "eval_loss": 1.1256771087646484, | |
| "eval_runtime": 3766.7778, | |
| "eval_samples_per_second": 848.74, | |
| "eval_steps_per_second": 0.111, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 8.831589436970605e-06, | |
| "loss": 1.226, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 8.769307424015945e-06, | |
| "loss": 1.225, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 8.707025411061286e-06, | |
| "loss": 1.2287, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 8.644743398106628e-06, | |
| "loss": 1.2269, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 8.582461385151969e-06, | |
| "loss": 1.2251, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 8.52017937219731e-06, | |
| "loss": 1.2281, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 8.457897359242652e-06, | |
| "loss": 1.2278, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 8.395615346287993e-06, | |
| "loss": 1.2263, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 1.2243, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 8.271051320378676e-06, | |
| "loss": 1.2246, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 8.208769307424017e-06, | |
| "loss": 1.228, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 8.146487294469358e-06, | |
| "loss": 1.225, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 8.0842052815147e-06, | |
| "loss": 1.2274, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 8.02192326856004e-06, | |
| "loss": 1.2275, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 7.959641255605381e-06, | |
| "loss": 1.2278, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 7.897359242650724e-06, | |
| "loss": 1.2289, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 7.835077229696065e-06, | |
| "loss": 1.2256, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 7.772795216741405e-06, | |
| "loss": 1.223, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 7.710513203786748e-06, | |
| "loss": 1.2265, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 7.648231190832088e-06, | |
| "loss": 1.2261, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 7.58594917787743e-06, | |
| "loss": 1.2246, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 7.523667164922771e-06, | |
| "loss": 1.2243, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 7.4613851519681115e-06, | |
| "loss": 1.2227, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 7.399103139013454e-06, | |
| "loss": 1.2258, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 7.336821126058795e-06, | |
| "loss": 1.2239, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 7.274539113104135e-06, | |
| "loss": 1.2241, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 7.212257100149478e-06, | |
| "loss": 1.2247, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 7.1499750871948185e-06, | |
| "loss": 1.2269, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 7.087693074240159e-06, | |
| "loss": 1.2227, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 7.025411061285502e-06, | |
| "loss": 1.2259, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 6.963129048330842e-06, | |
| "loss": 1.2231, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 6.900847035376183e-06, | |
| "loss": 1.2225, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 6.8385650224215255e-06, | |
| "loss": 1.2232, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 6.776283009466866e-06, | |
| "loss": 1.2267, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 6.714000996512207e-06, | |
| "loss": 1.2207, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 6.651718983557549e-06, | |
| "loss": 1.2246, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 6.58943697060289e-06, | |
| "loss": 1.2199, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 6.5271549576482325e-06, | |
| "loss": 1.222, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 6.464872944693573e-06, | |
| "loss": 1.2247, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 6.402590931738914e-06, | |
| "loss": 1.2255, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 6.3403089187842556e-06, | |
| "loss": 1.221, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 6.278026905829597e-06, | |
| "loss": 1.2253, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 6.215744892874939e-06, | |
| "loss": 1.2222, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 6.1534628799202794e-06, | |
| "loss": 1.2211, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 6.091180866965621e-06, | |
| "loss": 1.2221, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 6.028898854010962e-06, | |
| "loss": 1.2206, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 5.966616841056303e-06, | |
| "loss": 1.2213, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 5.904334828101644e-06, | |
| "loss": 1.2236, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 5.842052815146986e-06, | |
| "loss": 1.223, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 5.779770802192327e-06, | |
| "loss": 1.2184, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 5.717488789237668e-06, | |
| "loss": 1.2201, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 5.6552067762830095e-06, | |
| "loss": 1.221, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 5.592924763328351e-06, | |
| "loss": 1.2204, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 5.530642750373692e-06, | |
| "loss": 1.2206, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 5.468360737419033e-06, | |
| "loss": 1.2195, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 5.406078724464375e-06, | |
| "loss": 1.2231, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 5.343796711509716e-06, | |
| "loss": 1.2222, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 5.281514698555057e-06, | |
| "loss": 1.2194, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 5.219232685600399e-06, | |
| "loss": 1.2203, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 5.15695067264574e-06, | |
| "loss": 1.2226, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "eval_loss": 1.1193122863769531, | |
| "eval_runtime": 3765.4633, | |
| "eval_samples_per_second": 849.036, | |
| "eval_steps_per_second": 0.111, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 5.094668659691081e-06, | |
| "loss": 1.2224, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 5.032386646736423e-06, | |
| "loss": 1.2179, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 4.970104633781764e-06, | |
| "loss": 1.2197, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 4.907822620827105e-06, | |
| "loss": 1.2199, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 4.8455406078724465e-06, | |
| "loss": 1.2201, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 4.783258594917788e-06, | |
| "loss": 1.2208, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 4.720976581963129e-06, | |
| "loss": 1.2179, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 4.65869456900847e-06, | |
| "loss": 1.2177, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 4.596412556053812e-06, | |
| "loss": 1.22, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 4.5341305430991535e-06, | |
| "loss": 1.2178, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 4.471848530144494e-06, | |
| "loss": 1.2224, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 4.409566517189836e-06, | |
| "loss": 1.2202, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 4.347284504235177e-06, | |
| "loss": 1.2215, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 4.285002491280518e-06, | |
| "loss": 1.2227, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 4.22272047832586e-06, | |
| "loss": 1.2212, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 4.160438465371201e-06, | |
| "loss": 1.2197, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 4.098156452416543e-06, | |
| "loss": 1.2196, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 4.0358744394618836e-06, | |
| "loss": 1.2189, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 3.973592426507225e-06, | |
| "loss": 1.2191, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 3.911310413552567e-06, | |
| "loss": 1.2191, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 3.8490284005979074e-06, | |
| "loss": 1.221, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 3.7867463876432486e-06, | |
| "loss": 1.2192, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 3.72446437468859e-06, | |
| "loss": 1.2207, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 3.6621823617339313e-06, | |
| "loss": 1.2151, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 3.5999003487792725e-06, | |
| "loss": 1.2182, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 3.537618335824614e-06, | |
| "loss": 1.2195, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 3.4753363228699556e-06, | |
| "loss": 1.2201, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 3.4130543099152963e-06, | |
| "loss": 1.2188, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 3.350772296960638e-06, | |
| "loss": 1.2234, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 3.2884902840059795e-06, | |
| "loss": 1.2169, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 3.22620827105132e-06, | |
| "loss": 1.2174, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 3.1639262580966618e-06, | |
| "loss": 1.219, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 3.1016442451420033e-06, | |
| "loss": 1.2176, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 3.0393622321873445e-06, | |
| "loss": 1.219, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 2.9770802192326856e-06, | |
| "loss": 1.2197, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 2.9147982062780272e-06, | |
| "loss": 1.216, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 2.8525161933233684e-06, | |
| "loss": 1.2199, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 2.79023418036871e-06, | |
| "loss": 1.2165, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 2.727952167414051e-06, | |
| "loss": 1.2172, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 2.6656701544593922e-06, | |
| "loss": 1.2168, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 2.603388141504734e-06, | |
| "loss": 1.2162, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 2.541106128550075e-06, | |
| "loss": 1.2179, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 2.478824115595416e-06, | |
| "loss": 1.2227, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 2.4165421026407577e-06, | |
| "loss": 1.2172, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 2.354260089686099e-06, | |
| "loss": 1.2153, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 2.29197807673144e-06, | |
| "loss": 1.2184, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 2.229696063776781e-06, | |
| "loss": 1.2154, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 2.1674140508221227e-06, | |
| "loss": 1.2177, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 2.105132037867464e-06, | |
| "loss": 1.2191, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 2.042850024912805e-06, | |
| "loss": 1.2147, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 1.9805680119581466e-06, | |
| "loss": 1.2168, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 1.9182859990034877e-06, | |
| "loss": 1.2164, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 1.8560039860488293e-06, | |
| "loss": 1.2125, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 1.7937219730941704e-06, | |
| "loss": 1.2177, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 1.7314399601395116e-06, | |
| "loss": 1.2161, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 1.6691579471848532e-06, | |
| "loss": 1.2186, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 1.6068759342301943e-06, | |
| "loss": 1.2223, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 1.5445939212755357e-06, | |
| "loss": 1.2181, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 1.482311908320877e-06, | |
| "loss": 1.2183, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 1.4200298953662184e-06, | |
| "loss": 1.215, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "eval_loss": 1.116109848022461, | |
| "eval_runtime": 3771.6482, | |
| "eval_samples_per_second": 847.644, | |
| "eval_steps_per_second": 0.111, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 1.3577478824115595e-06, | |
| "loss": 1.2191, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 1.2954658694569009e-06, | |
| "loss": 1.2197, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 1.233183856502242e-06, | |
| "loss": 1.2189, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 1.1709018435475834e-06, | |
| "loss": 1.2182, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 1.1086198305929248e-06, | |
| "loss": 1.216, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 1.0463378176382661e-06, | |
| "loss": 1.2162, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 9.840558046836075e-07, | |
| "loss": 1.2144, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 9.217737917289488e-07, | |
| "loss": 1.2152, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 8.5949177877429e-07, | |
| "loss": 1.2146, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 7.972097658196313e-07, | |
| "loss": 1.2185, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 7.349277528649726e-07, | |
| "loss": 1.2179, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 6.72645739910314e-07, | |
| "loss": 1.2167, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 6.103637269556552e-07, | |
| "loss": 1.2201, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 5.480817140009966e-07, | |
| "loss": 1.2175, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 4.857997010463378e-07, | |
| "loss": 1.2193, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 1.90146985550573e-05, | |
| "loss": 1.2172, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 1.897577229696064e-05, | |
| "loss": 1.2198, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 1.8936846038863976e-05, | |
| "loss": 1.2203, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 1.8897919780767316e-05, | |
| "loss": 1.2197, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 1.8858993522670653e-05, | |
| "loss": 1.2189, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 1.8820067264573993e-05, | |
| "loss": 1.2181, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 1.878114100647733e-05, | |
| "loss": 1.2196, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 1.874221474838067e-05, | |
| "loss": 1.2217, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 1.8703288490284006e-05, | |
| "loss": 1.2172, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 1.8664362232187347e-05, | |
| "loss": 1.2162, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 1.8625435974090683e-05, | |
| "loss": 1.2177, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 1.8586509715994023e-05, | |
| "loss": 1.2178, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 1.854758345789736e-05, | |
| "loss": 1.218, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 1.85086571998007e-05, | |
| "loss": 1.2185, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 1.8469730941704037e-05, | |
| "loss": 1.2157, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 1.8430804683607373e-05, | |
| "loss": 1.2126, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 1.8391878425510713e-05, | |
| "loss": 1.2129, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 1.835295216741405e-05, | |
| "loss": 1.2163, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 1.831402590931739e-05, | |
| "loss": 1.2145, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 1.8275099651220727e-05, | |
| "loss": 1.2161, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 1.8236173393124067e-05, | |
| "loss": 1.2158, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 1.8197247135027404e-05, | |
| "loss": 1.2124, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 1.8158320876930744e-05, | |
| "loss": 1.2137, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 1.811939461883408e-05, | |
| "loss": 1.2161, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 1.808046836073742e-05, | |
| "loss": 1.2128, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 1.8041542102640757e-05, | |
| "loss": 1.2185, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 1.8002615844544097e-05, | |
| "loss": 1.2123, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 1.7963689586447434e-05, | |
| "loss": 1.2143, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 1.7924763328350774e-05, | |
| "loss": 1.2138, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 1.788583707025411e-05, | |
| "loss": 1.2151, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 1.784691081215745e-05, | |
| "loss": 1.215, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 1.7807984554060787e-05, | |
| "loss": 1.2125, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 1.7769058295964127e-05, | |
| "loss": 1.2151, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 1.7730132037867464e-05, | |
| "loss": 1.2114, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 1.7691205779770804e-05, | |
| "loss": 1.2088, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 1.765227952167414e-05, | |
| "loss": 1.2114, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 1.7613353263577478e-05, | |
| "loss": 1.2137, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 1.7574427005480818e-05, | |
| "loss": 1.2117, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 1.7535500747384158e-05, | |
| "loss": 1.2145, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 1.7496574489287494e-05, | |
| "loss": 1.2149, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 1.745764823119083e-05, | |
| "loss": 1.2105, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 1.741872197309417e-05, | |
| "loss": 1.2117, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 1.737979571499751e-05, | |
| "loss": 1.2114, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 1.7340869456900848e-05, | |
| "loss": 1.2121, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 1.7301943198804185e-05, | |
| "loss": 1.2107, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "eval_loss": 1.1098637580871582, | |
| "eval_runtime": 3772.0412, | |
| "eval_samples_per_second": 847.556, | |
| "eval_steps_per_second": 0.111, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 1.7263016940707525e-05, | |
| "loss": 1.2076, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 1.7224090682610865e-05, | |
| "loss": 1.2118, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 1.71851644245142e-05, | |
| "loss": 1.2069, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 1.7146238166417538e-05, | |
| "loss": 1.2099, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 1.7107311908320878e-05, | |
| "loss": 1.2106, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 1.7068385650224218e-05, | |
| "loss": 1.207, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 1.702945939212755e-05, | |
| "loss": 1.2082, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 1.699053313403089e-05, | |
| "loss": 1.2087, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 1.6951606875934232e-05, | |
| "loss": 1.2076, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 1.6912680617837572e-05, | |
| "loss": 1.2071, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 1.6873754359740905e-05, | |
| "loss": 1.2058, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 1.6834828101644245e-05, | |
| "loss": 1.2086, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 1.6795901843547585e-05, | |
| "loss": 1.2044, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 1.6756975585450925e-05, | |
| "loss": 1.2075, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 1.671804932735426e-05, | |
| "loss": 1.2076, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 1.66791230692576e-05, | |
| "loss": 1.2092, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 1.664019681116094e-05, | |
| "loss": 1.2069, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 1.6601270553064275e-05, | |
| "loss": 1.2044, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 1.6562344294967612e-05, | |
| "loss": 1.2063, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 1.6523418036870952e-05, | |
| "loss": 1.2023, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 1.6484491778774292e-05, | |
| "loss": 1.2016, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 1.644556552067763e-05, | |
| "loss": 1.2022, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 1.6406639262580966e-05, | |
| "loss": 1.2043, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 1.6367713004484306e-05, | |
| "loss": 1.2067, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 1.6328786746387646e-05, | |
| "loss": 1.2051, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 1.6289860488290982e-05, | |
| "loss": 1.2033, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 1.625093423019432e-05, | |
| "loss": 1.2025, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 1.621200797209766e-05, | |
| "loss": 1.2046, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 1.6173081714001e-05, | |
| "loss": 1.2057, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 1.6134155455904336e-05, | |
| "loss": 1.2048, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 1.6095229197807673e-05, | |
| "loss": 1.205, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 1.6056302939711013e-05, | |
| "loss": 1.205, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 1.6017376681614353e-05, | |
| "loss": 1.2071, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 1.597845042351769e-05, | |
| "loss": 1.2007, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 1.5939524165421026e-05, | |
| "loss": 1.2041, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 1.5900597907324366e-05, | |
| "loss": 1.2033, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 1.5861671649227706e-05, | |
| "loss": 1.2014, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 1.5822745391131043e-05, | |
| "loss": 1.2038, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 1.578381913303438e-05, | |
| "loss": 1.2035, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 1.574489287493772e-05, | |
| "loss": 1.2017, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 1.5705966616841056e-05, | |
| "loss": 1.201, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 1.5667040358744396e-05, | |
| "loss": 1.1996, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 1.5628114100647733e-05, | |
| "loss": 1.1999, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 1.5589187842551073e-05, | |
| "loss": 1.2003, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 1.555026158445441e-05, | |
| "loss": 1.2, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 1.5511335326357747e-05, | |
| "loss": 1.2009, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 1.5472409068261087e-05, | |
| "loss": 1.2003, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 1.5433482810164427e-05, | |
| "loss": 1.1988, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 1.5394556552067763e-05, | |
| "loss": 1.1993, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 1.53556302939711e-05, | |
| "loss": 1.1983, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 1.531670403587444e-05, | |
| "loss": 1.2016, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 1.527777777777778e-05, | |
| "loss": 1.2031, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 1.5238851519681115e-05, | |
| "loss": 1.201, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 1.5199925261584455e-05, | |
| "loss": 1.1997, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 1.5160999003487794e-05, | |
| "loss": 1.2019, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 1.512207274539113e-05, | |
| "loss": 1.1986, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 1.5083146487294469e-05, | |
| "loss": 1.1964, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 1.5044220229197809e-05, | |
| "loss": 1.1958, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 1.5005293971101147e-05, | |
| "loss": 1.1984, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 1.4966367713004484e-05, | |
| "loss": 1.1975, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "eval_loss": 1.0976917743682861, | |
| "eval_runtime": 3764.7544, | |
| "eval_samples_per_second": 849.196, | |
| "eval_steps_per_second": 0.111, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 1.4927441454907822e-05, | |
| "loss": 1.1948, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 1.4888515196811162e-05, | |
| "loss": 1.1941, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 1.48495889387145e-05, | |
| "loss": 1.1972, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 1.4810662680617837e-05, | |
| "loss": 1.1975, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 1.4771736422521176e-05, | |
| "loss": 1.1966, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 1.4732810164424516e-05, | |
| "loss": 1.1958, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 1.4693883906327854e-05, | |
| "loss": 1.1966, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 1.465495764823119e-05, | |
| "loss": 1.1971, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 1.461603139013453e-05, | |
| "loss": 1.1962, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 1.457710513203787e-05, | |
| "loss": 1.1951, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 1.4538178873941208e-05, | |
| "loss": 1.1944, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 1.4499252615844544e-05, | |
| "loss": 1.1968, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 1.4460326357747883e-05, | |
| "loss": 1.1943, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 1.4421400099651223e-05, | |
| "loss": 1.1968, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 1.4382473841554558e-05, | |
| "loss": 1.1949, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 1.4343547583457898e-05, | |
| "loss": 1.196, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 1.4304621325361236e-05, | |
| "loss": 1.1967, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 1.4265695067264576e-05, | |
| "loss": 1.1923, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 1.4226768809167911e-05, | |
| "loss": 1.1952, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 1.4187842551071251e-05, | |
| "loss": 1.1949, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 1.414891629297459e-05, | |
| "loss": 1.1939, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 1.410999003487793e-05, | |
| "loss": 1.1905, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 1.4071063776781265e-05, | |
| "loss": 1.1931, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 1.4032137518684605e-05, | |
| "loss": 1.192, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 1.3993211260587943e-05, | |
| "loss": 1.1925, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 1.3954285002491282e-05, | |
| "loss": 1.194, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 1.3915358744394618e-05, | |
| "loss": 1.19, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 1.3876432486297958e-05, | |
| "loss": 1.1951, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 1.3837506228201297e-05, | |
| "loss": 1.1893, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 1.3798579970104633e-05, | |
| "loss": 1.1939, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 1.3759653712007972e-05, | |
| "loss": 1.1904, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 1.3720727453911312e-05, | |
| "loss": 1.1902, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 1.368180119581465e-05, | |
| "loss": 1.191, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 1.3642874937717987e-05, | |
| "loss": 1.1911, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 1.3603948679621325e-05, | |
| "loss": 1.1933, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 1.3565022421524665e-05, | |
| "loss": 1.1905, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 1.3526096163428004e-05, | |
| "loss": 1.1917, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 1.348716990533134e-05, | |
| "loss": 1.1926, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 1.3448243647234679e-05, | |
| "loss": 1.1913, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 1.3409317389138017e-05, | |
| "loss": 1.1902, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 1.3370391131041357e-05, | |
| "loss": 1.1858, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 1.3331464872944694e-05, | |
| "loss": 1.1908, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 1.3292538614848032e-05, | |
| "loss": 1.1901, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 1.325361235675137e-05, | |
| "loss": 1.1905, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 1.321468609865471e-05, | |
| "loss": 1.19, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 1.3175759840558047e-05, | |
| "loss": 1.1905, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 1.3136833582461386e-05, | |
| "loss": 1.1913, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 1.3097907324364724e-05, | |
| "loss": 1.1904, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 1.3058981066268061e-05, | |
| "loss": 1.1876, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 1.3020054808171401e-05, | |
| "loss": 1.1909, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 1.298112855007474e-05, | |
| "loss": 1.1894, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 1.2942202291978078e-05, | |
| "loss": 1.1899, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 1.2903276033881414e-05, | |
| "loss": 1.1898, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 1.2864349775784754e-05, | |
| "loss": 1.1879, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 1.2825423517688093e-05, | |
| "loss": 1.1888, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 1.2786497259591431e-05, | |
| "loss": 1.1926, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 1.2747571001494768e-05, | |
| "loss": 1.1878, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 1.2708644743398106e-05, | |
| "loss": 1.1874, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 1.2669718485301446e-05, | |
| "loss": 1.1888, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 1.2630792227204785e-05, | |
| "loss": 1.1865, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "eval_loss": 1.088141918182373, | |
| "eval_runtime": 3765.4987, | |
| "eval_samples_per_second": 849.028, | |
| "eval_steps_per_second": 0.111, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 1.2591865969108121e-05, | |
| "loss": 1.1876, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 1.255293971101146e-05, | |
| "loss": 1.1875, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 1.25140134529148e-05, | |
| "loss": 1.1865, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 1.2475087194818137e-05, | |
| "loss": 1.1897, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 1.2436160936721475e-05, | |
| "loss": 1.1864, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 1.2397234678624813e-05, | |
| "loss": 1.1863, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 1.2358308420528152e-05, | |
| "loss": 1.1852, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 1.231938216243149e-05, | |
| "loss": 1.1857, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 1.2280455904334828e-05, | |
| "loss": 1.1899, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 1.2241529646238167e-05, | |
| "loss": 1.1851, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "learning_rate": 1.2202603388141505e-05, | |
| "loss": 1.1846, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "learning_rate": 1.2163677130044844e-05, | |
| "loss": 1.1887, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 1.2124750871948182e-05, | |
| "loss": 1.1853, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 1.208582461385152e-05, | |
| "loss": 1.1863, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 1.2046898355754859e-05, | |
| "loss": 1.1871, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 1.2007972097658197e-05, | |
| "loss": 1.1867, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 1.1969045839561535e-05, | |
| "loss": 1.1865, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 1.1930119581464874e-05, | |
| "loss": 1.1821, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 1.1891193323368212e-05, | |
| "loss": 1.1866, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 1.1852267065271549e-05, | |
| "loss": 1.1868, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "learning_rate": 1.1813340807174889e-05, | |
| "loss": 1.1832, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 1.1774414549078226e-05, | |
| "loss": 1.1829, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 1.1735488290981566e-05, | |
| "loss": 1.1857, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 1.1696562032884902e-05, | |
| "loss": 1.1836, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 1.1657635774788242e-05, | |
| "loss": 1.1825, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 1.1618709516691579e-05, | |
| "loss": 1.1847, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 1.157978325859492e-05, | |
| "loss": 1.1832, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 1.1540857000498256e-05, | |
| "loss": 1.1861, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 1.1501930742401596e-05, | |
| "loss": 1.1837, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 1.1463004484304933e-05, | |
| "loss": 1.1821, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 1.1424078226208273e-05, | |
| "loss": 1.1804, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 1.138515196811161e-05, | |
| "loss": 1.1828, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 1.134622571001495e-05, | |
| "loss": 1.1824, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 1.1307299451918286e-05, | |
| "loss": 1.1831, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 1.1268373193821626e-05, | |
| "loss": 1.1832, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 1.1229446935724963e-05, | |
| "loss": 1.1833, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 1.1190520677628301e-05, | |
| "loss": 1.1842, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 1.115159441953164e-05, | |
| "loss": 1.1842, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 1.1112668161434978e-05, | |
| "loss": 1.1823, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 1.1073741903338316e-05, | |
| "loss": 1.1813, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 1.1034815645241655e-05, | |
| "loss": 1.1827, | |
| "step": 100100 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 1.0995889387144993e-05, | |
| "loss": 1.181, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 1.0956963129048332e-05, | |
| "loss": 1.1834, | |
| "step": 100300 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 1.091803687095167e-05, | |
| "loss": 1.183, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 1.0879110612855008e-05, | |
| "loss": 1.1766, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 1.0840184354758347e-05, | |
| "loss": 1.1772, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 1.0801258096661685e-05, | |
| "loss": 1.1813, | |
| "step": 100700 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 1.0762331838565023e-05, | |
| "loss": 1.1808, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 1.0723405580468362e-05, | |
| "loss": 1.1793, | |
| "step": 100900 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 1.06844793223717e-05, | |
| "loss": 1.1791, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 1.0645553064275039e-05, | |
| "loss": 1.1807, | |
| "step": 101100 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 1.0606626806178375e-05, | |
| "loss": 1.1769, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 1.0567700548081715e-05, | |
| "loss": 1.1802, | |
| "step": 101300 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 1.0528774289985052e-05, | |
| "loss": 1.1784, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 1.0489848031888392e-05, | |
| "loss": 1.1782, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 1.0450921773791729e-05, | |
| "loss": 1.1773, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 1.0411995515695069e-05, | |
| "loss": 1.1783, | |
| "step": 101700 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 1.0373069257598406e-05, | |
| "loss": 1.179, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 1.0334142999501744e-05, | |
| "loss": 1.1765, | |
| "step": 101900 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 1.0295216741405082e-05, | |
| "loss": 1.1764, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "eval_loss": 1.0806279182434082, | |
| "eval_runtime": 3770.9356, | |
| "eval_samples_per_second": 847.804, | |
| "eval_steps_per_second": 0.111, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 1.025629048330842e-05, | |
| "loss": 1.177, | |
| "step": 102100 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 1.0217364225211759e-05, | |
| "loss": 1.1794, | |
| "step": 102200 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 1.0178437967115097e-05, | |
| "loss": 1.1783, | |
| "step": 102300 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 1.0139511709018436e-05, | |
| "loss": 1.1807, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 1.0100585450921774e-05, | |
| "loss": 1.1807, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "learning_rate": 1.0061659192825113e-05, | |
| "loss": 1.1769, | |
| "step": 102600 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 1.0022732934728451e-05, | |
| "loss": 1.1777, | |
| "step": 102700 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 9.98380667663179e-06, | |
| "loss": 1.1766, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 9.944880418535128e-06, | |
| "loss": 1.1775, | |
| "step": 102900 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 9.905954160438466e-06, | |
| "loss": 1.1743, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 9.867027902341804e-06, | |
| "loss": 1.1771, | |
| "step": 103100 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "learning_rate": 9.828101644245143e-06, | |
| "loss": 1.1765, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "learning_rate": 9.78917538614848e-06, | |
| "loss": 1.1774, | |
| "step": 103300 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 9.75024912805182e-06, | |
| "loss": 1.177, | |
| "step": 103400 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 9.711322869955156e-06, | |
| "loss": 1.1752, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 9.672396611858496e-06, | |
| "loss": 1.1776, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 9.633470353761833e-06, | |
| "loss": 1.1792, | |
| "step": 103700 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 9.594544095665173e-06, | |
| "loss": 1.1748, | |
| "step": 103800 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "learning_rate": 9.55561783756851e-06, | |
| "loss": 1.1761, | |
| "step": 103900 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 9.51669157947185e-06, | |
| "loss": 1.1766, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 9.477765321375186e-06, | |
| "loss": 1.1788, | |
| "step": 104100 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 9.438839063278527e-06, | |
| "loss": 1.1755, | |
| "step": 104200 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 9.399912805181863e-06, | |
| "loss": 1.1764, | |
| "step": 104300 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 9.360986547085203e-06, | |
| "loss": 1.1726, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 9.32206028898854e-06, | |
| "loss": 1.1769, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 9.283134030891878e-06, | |
| "loss": 1.1775, | |
| "step": 104600 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 9.244207772795217e-06, | |
| "loss": 1.1732, | |
| "step": 104700 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 9.205281514698555e-06, | |
| "loss": 1.175, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 9.166355256601893e-06, | |
| "loss": 1.1744, | |
| "step": 104900 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 9.127428998505232e-06, | |
| "loss": 1.1795, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 9.08850274040857e-06, | |
| "loss": 1.1779, | |
| "step": 105100 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 9.049576482311909e-06, | |
| "loss": 1.1736, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 9.010650224215247e-06, | |
| "loss": 1.1744, | |
| "step": 105300 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 8.971723966118585e-06, | |
| "loss": 1.1723, | |
| "step": 105400 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 8.932797708021924e-06, | |
| "loss": 1.1779, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 8.893871449925262e-06, | |
| "loss": 1.1728, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 8.8549451918286e-06, | |
| "loss": 1.1724, | |
| "step": 105700 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 8.816018933731939e-06, | |
| "loss": 1.175, | |
| "step": 105800 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 8.777092675635277e-06, | |
| "loss": 1.1758, | |
| "step": 105900 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 8.738166417538616e-06, | |
| "loss": 1.1737, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 8.699240159441954e-06, | |
| "loss": 1.174, | |
| "step": 106100 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 8.660313901345292e-06, | |
| "loss": 1.1754, | |
| "step": 106200 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 8.621387643248629e-06, | |
| "loss": 1.171, | |
| "step": 106300 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 8.582461385151969e-06, | |
| "loss": 1.1725, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 8.543535127055306e-06, | |
| "loss": 1.1707, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 8.504608868958646e-06, | |
| "loss": 1.1731, | |
| "step": 106600 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 8.465682610861983e-06, | |
| "loss": 1.1725, | |
| "step": 106700 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 8.426756352765323e-06, | |
| "loss": 1.1718, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 8.38783009466866e-06, | |
| "loss": 1.1735, | |
| "step": 106900 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 8.348903836572e-06, | |
| "loss": 1.1749, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 8.309977578475336e-06, | |
| "loss": 1.1734, | |
| "step": 107100 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 8.271051320378676e-06, | |
| "loss": 1.1726, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 8.232125062282013e-06, | |
| "loss": 1.1735, | |
| "step": 107300 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 8.193198804185353e-06, | |
| "loss": 1.1727, | |
| "step": 107400 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 8.15427254608869e-06, | |
| "loss": 1.1721, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 8.11534628799203e-06, | |
| "loss": 1.172, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 8.076420029895366e-06, | |
| "loss": 1.1748, | |
| "step": 107700 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 8.037493771798706e-06, | |
| "loss": 1.1746, | |
| "step": 107800 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 7.998567513702043e-06, | |
| "loss": 1.1713, | |
| "step": 107900 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 7.959641255605381e-06, | |
| "loss": 1.1751, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "eval_loss": 1.0753653049468994, | |
| "eval_runtime": 3765.5362, | |
| "eval_samples_per_second": 849.02, | |
| "eval_steps_per_second": 0.111, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 7.92071499750872e-06, | |
| "loss": 1.1738, | |
| "step": 108100 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 7.881788739412058e-06, | |
| "loss": 1.1715, | |
| "step": 108200 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 7.842862481315397e-06, | |
| "loss": 1.1737, | |
| "step": 108300 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 7.803936223218735e-06, | |
| "loss": 1.1703, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 7.765009965122073e-06, | |
| "loss": 1.1739, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 7.726083707025412e-06, | |
| "loss": 1.167, | |
| "step": 108600 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 7.68715744892875e-06, | |
| "loss": 1.1735, | |
| "step": 108700 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 7.648231190832088e-06, | |
| "loss": 1.169, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 7.609304932735427e-06, | |
| "loss": 1.1709, | |
| "step": 108900 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 7.570378674638764e-06, | |
| "loss": 1.1723, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 7.531452416542104e-06, | |
| "loss": 1.1733, | |
| "step": 109100 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 7.492526158445441e-06, | |
| "loss": 1.1717, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 7.45359990034878e-06, | |
| "loss": 1.1724, | |
| "step": 109300 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 7.414673642252118e-06, | |
| "loss": 1.1715, | |
| "step": 109400 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 7.3757473841554554e-06, | |
| "loss": 1.1696, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 7.336821126058795e-06, | |
| "loss": 1.1719, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 7.297894867962132e-06, | |
| "loss": 1.1712, | |
| "step": 109700 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 7.258968609865471e-06, | |
| "loss": 1.1728, | |
| "step": 109800 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 7.220042351768809e-06, | |
| "loss": 1.1714, | |
| "step": 109900 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 7.181116093672148e-06, | |
| "loss": 1.1714, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 7.142189835575486e-06, | |
| "loss": 1.1723, | |
| "step": 110100 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 7.103263577478825e-06, | |
| "loss": 1.1681, | |
| "step": 110200 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 7.0643373193821624e-06, | |
| "loss": 1.1716, | |
| "step": 110300 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 7.025411061285502e-06, | |
| "loss": 1.169, | |
| "step": 110400 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 6.986484803188839e-06, | |
| "loss": 1.168, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 6.947558545092178e-06, | |
| "loss": 1.1717, | |
| "step": 110600 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 6.908632286995516e-06, | |
| "loss": 1.172, | |
| "step": 110700 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 6.869706028898855e-06, | |
| "loss": 1.1704, | |
| "step": 110800 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 6.830779770802193e-06, | |
| "loss": 1.1689, | |
| "step": 110900 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 6.791853512705532e-06, | |
| "loss": 1.1655, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 6.7529272546088695e-06, | |
| "loss": 1.1639, | |
| "step": 111100 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 6.714000996512207e-06, | |
| "loss": 1.1693, | |
| "step": 111200 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 6.675074738415546e-06, | |
| "loss": 1.1669, | |
| "step": 111300 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 6.636148480318884e-06, | |
| "loss": 1.1692, | |
| "step": 111400 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 6.597222222222223e-06, | |
| "loss": 1.1657, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 6.5582959641255605e-06, | |
| "loss": 1.1698, | |
| "step": 111600 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 6.5193697060289e-06, | |
| "loss": 1.1721, | |
| "step": 111700 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 6.480443447932237e-06, | |
| "loss": 1.1694, | |
| "step": 111800 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 6.4415171898355765e-06, | |
| "loss": 1.1709, | |
| "step": 111900 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 6.402590931738914e-06, | |
| "loss": 1.1702, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 6.363664673642253e-06, | |
| "loss": 1.1655, | |
| "step": 112100 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 6.324738415545591e-06, | |
| "loss": 1.1695, | |
| "step": 112200 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 6.285812157448929e-06, | |
| "loss": 1.1689, | |
| "step": 112300 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 6.2468858993522675e-06, | |
| "loss": 1.1781, | |
| "step": 112400 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 6.207959641255606e-06, | |
| "loss": 1.1675, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 6.169033383158944e-06, | |
| "loss": 1.1665, | |
| "step": 112600 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 6.130107125062283e-06, | |
| "loss": 1.1679, | |
| "step": 112700 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 6.091180866965621e-06, | |
| "loss": 1.17, | |
| "step": 112800 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 6.052254608868959e-06, | |
| "loss": 1.1653, | |
| "step": 112900 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 6.013328350772297e-06, | |
| "loss": 1.1712, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 5.974402092675635e-06, | |
| "loss": 1.1653, | |
| "step": 113100 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 5.935475834578974e-06, | |
| "loss": 1.1676, | |
| "step": 113200 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 5.896549576482312e-06, | |
| "loss": 1.1679, | |
| "step": 113300 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 5.8576233183856504e-06, | |
| "loss": 1.1671, | |
| "step": 113400 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 5.818697060288989e-06, | |
| "loss": 1.1654, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 5.779770802192327e-06, | |
| "loss": 1.1718, | |
| "step": 113600 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 5.7408445440956656e-06, | |
| "loss": 1.1703, | |
| "step": 113700 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 5.701918285999003e-06, | |
| "loss": 1.165, | |
| "step": 113800 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 5.6629920279023415e-06, | |
| "loss": 1.167, | |
| "step": 113900 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 5.62406576980568e-06, | |
| "loss": 1.1679, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "eval_loss": 1.070519208908081, | |
| "eval_runtime": 3765.1139, | |
| "eval_samples_per_second": 849.115, | |
| "eval_steps_per_second": 0.111, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 3.308065530208667e-05, | |
| "loss": 2.3823, | |
| "step": 114100 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 3.306582677912618e-05, | |
| "loss": 2.1578, | |
| "step": 114200 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 3.30509982561657e-05, | |
| "loss": 2.14, | |
| "step": 114300 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 3.303616973320522e-05, | |
| "loss": 2.1186, | |
| "step": 114400 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 3.302134121024473e-05, | |
| "loss": 2.1023, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 3.3006512687284244e-05, | |
| "loss": 2.0899, | |
| "step": 114600 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 3.299168416432376e-05, | |
| "loss": 2.0709, | |
| "step": 114700 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 3.2976855641363274e-05, | |
| "loss": 2.0573, | |
| "step": 114800 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 3.296202711840279e-05, | |
| "loss": 2.0477, | |
| "step": 114900 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 3.2947198595442305e-05, | |
| "loss": 2.0338, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 3.2932370072481824e-05, | |
| "loss": 2.0172, | |
| "step": 115100 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 3.2917541549521335e-05, | |
| "loss": 2.0062, | |
| "step": 115200 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 3.290271302656085e-05, | |
| "loss": 1.9953, | |
| "step": 115300 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 3.2887884503600366e-05, | |
| "loss": 1.9829, | |
| "step": 115400 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 3.2873055980639885e-05, | |
| "loss": 1.9714, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 3.2858227457679397e-05, | |
| "loss": 1.9585, | |
| "step": 115600 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 3.284339893471891e-05, | |
| "loss": 1.9509, | |
| "step": 115700 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 3.282857041175843e-05, | |
| "loss": 1.9385, | |
| "step": 115800 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 3.2813741888797946e-05, | |
| "loss": 1.932, | |
| "step": 115900 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 3.279891336583746e-05, | |
| "loss": 1.9274, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 3.278408484287697e-05, | |
| "loss": 1.9128, | |
| "step": 116100 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 3.276925631991649e-05, | |
| "loss": 1.9078, | |
| "step": 116200 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 3.2754427796956e-05, | |
| "loss": 1.8988, | |
| "step": 116300 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 3.273959927399552e-05, | |
| "loss": 1.8916, | |
| "step": 116400 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 3.272477075103503e-05, | |
| "loss": 1.8821, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 3.270994222807455e-05, | |
| "loss": 1.878, | |
| "step": 116600 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 3.269511370511406e-05, | |
| "loss": 1.8717, | |
| "step": 116700 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 3.268028518215357e-05, | |
| "loss": 1.8593, | |
| "step": 116800 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 3.266545665919309e-05, | |
| "loss": 1.8599, | |
| "step": 116900 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 3.265062813623261e-05, | |
| "loss": 1.8474, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 3.263579961327212e-05, | |
| "loss": 1.8443, | |
| "step": 117100 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 3.2620971090311634e-05, | |
| "loss": 1.8391, | |
| "step": 117200 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 3.260614256735115e-05, | |
| "loss": 1.8372, | |
| "step": 117300 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 3.259131404439067e-05, | |
| "loss": 1.8278, | |
| "step": 117400 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 3.2576485521430184e-05, | |
| "loss": 1.8248, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 3.2561656998469695e-05, | |
| "loss": 1.8199, | |
| "step": 117600 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 3.2546828475509214e-05, | |
| "loss": 1.814, | |
| "step": 117700 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 3.253199995254873e-05, | |
| "loss": 1.813, | |
| "step": 117800 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 3.2517171429588245e-05, | |
| "loss": 1.8052, | |
| "step": 117900 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 3.2502342906627757e-05, | |
| "loss": 1.8019, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 3.2487514383667275e-05, | |
| "loss": 1.7977, | |
| "step": 118100 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 3.247268586070679e-05, | |
| "loss": 1.7908, | |
| "step": 118200 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 3.2457857337746306e-05, | |
| "loss": 1.7882, | |
| "step": 118300 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 3.244302881478582e-05, | |
| "loss": 1.7808, | |
| "step": 118400 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 3.2428200291825336e-05, | |
| "loss": 1.7811, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 3.241337176886485e-05, | |
| "loss": 1.7708, | |
| "step": 118600 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 3.239854324590436e-05, | |
| "loss": 1.7708, | |
| "step": 118700 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 3.238371472294388e-05, | |
| "loss": 1.7681, | |
| "step": 118800 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 3.23688861999834e-05, | |
| "loss": 1.7638, | |
| "step": 118900 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 3.235405767702291e-05, | |
| "loss": 1.763, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "eval_loss": 1.660704255104065, | |
| "eval_runtime": 5267.7174, | |
| "eval_samples_per_second": 606.907, | |
| "eval_steps_per_second": 0.395, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 3.233922915406242e-05, | |
| "loss": 1.7514, | |
| "step": 119100 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 3.232440063110193e-05, | |
| "loss": 1.7539, | |
| "step": 119200 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 3.230957210814146e-05, | |
| "loss": 1.748, | |
| "step": 119300 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 3.229474358518097e-05, | |
| "loss": 1.7446, | |
| "step": 119400 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 3.227991506222048e-05, | |
| "loss": 1.7415, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 3.226508653926e-05, | |
| "loss": 1.7379, | |
| "step": 119600 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 3.225025801629951e-05, | |
| "loss": 1.7366, | |
| "step": 119700 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 3.223542949333903e-05, | |
| "loss": 1.7303, | |
| "step": 119800 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 3.2220600970378543e-05, | |
| "loss": 1.7306, | |
| "step": 119900 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 3.220577244741806e-05, | |
| "loss": 1.7261, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 3.2190943924457574e-05, | |
| "loss": 1.7264, | |
| "step": 120100 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 3.2176115401497086e-05, | |
| "loss": 1.7197, | |
| "step": 120200 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 3.2161286878536605e-05, | |
| "loss": 1.7138, | |
| "step": 120300 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 3.214645835557612e-05, | |
| "loss": 1.7129, | |
| "step": 120400 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 3.2131629832615635e-05, | |
| "loss": 1.712, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 3.211680130965515e-05, | |
| "loss": 1.708, | |
| "step": 120600 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 3.210197278669466e-05, | |
| "loss": 1.7059, | |
| "step": 120700 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 3.2087144263734184e-05, | |
| "loss": 1.7045, | |
| "step": 120800 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 3.2072315740773696e-05, | |
| "loss": 1.6968, | |
| "step": 120900 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 3.205748721781321e-05, | |
| "loss": 1.6983, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 3.204265869485272e-05, | |
| "loss": 1.6883, | |
| "step": 121100 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 3.2027830171892246e-05, | |
| "loss": 1.6899, | |
| "step": 121200 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 3.201300164893176e-05, | |
| "loss": 1.6976, | |
| "step": 121300 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 3.199817312597127e-05, | |
| "loss": 1.6896, | |
| "step": 121400 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 3.198334460301078e-05, | |
| "loss": 1.6818, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 3.19685160800503e-05, | |
| "loss": 1.6793, | |
| "step": 121600 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 3.195368755708982e-05, | |
| "loss": 1.6818, | |
| "step": 121700 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 3.193885903412933e-05, | |
| "loss": 1.6772, | |
| "step": 121800 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 3.192403051116884e-05, | |
| "loss": 1.6714, | |
| "step": 121900 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 3.190920198820836e-05, | |
| "loss": 1.6694, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 3.189437346524787e-05, | |
| "loss": 1.6712, | |
| "step": 122100 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 3.187954494228739e-05, | |
| "loss": 1.6667, | |
| "step": 122200 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 3.1864716419326903e-05, | |
| "loss": 1.6695, | |
| "step": 122300 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 3.184988789636642e-05, | |
| "loss": 1.6665, | |
| "step": 122400 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 3.1835059373405934e-05, | |
| "loss": 1.6647, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 3.1820230850445446e-05, | |
| "loss": 1.6574, | |
| "step": 122600 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 3.1805402327484965e-05, | |
| "loss": 1.6542, | |
| "step": 122700 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 3.179057380452448e-05, | |
| "loss": 1.6567, | |
| "step": 122800 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 3.1775745281563995e-05, | |
| "loss": 1.6537, | |
| "step": 122900 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 3.176091675860351e-05, | |
| "loss": 1.6494, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 3.1746088235643026e-05, | |
| "loss": 1.6515, | |
| "step": 123100 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 3.1731259712682544e-05, | |
| "loss": 1.646, | |
| "step": 123200 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 3.1716431189722056e-05, | |
| "loss": 1.6456, | |
| "step": 123300 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 3.170160266676157e-05, | |
| "loss": 1.6433, | |
| "step": 123400 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 3.168677414380109e-05, | |
| "loss": 1.6382, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 3.16719456208406e-05, | |
| "loss": 1.6406, | |
| "step": 123600 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 3.165711709788012e-05, | |
| "loss": 1.6316, | |
| "step": 123700 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 3.164228857491963e-05, | |
| "loss": 1.637, | |
| "step": 123800 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 3.162746005195915e-05, | |
| "loss": 1.6322, | |
| "step": 123900 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 3.161263152899866e-05, | |
| "loss": 1.6321, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 3.159780300603817e-05, | |
| "loss": 1.6289, | |
| "step": 124100 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 3.158297448307769e-05, | |
| "loss": 1.6241, | |
| "step": 124200 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 3.156814596011721e-05, | |
| "loss": 1.6247, | |
| "step": 124300 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 3.155331743715672e-05, | |
| "loss": 1.6229, | |
| "step": 124400 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 3.153848891419623e-05, | |
| "loss": 1.6192, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 3.152366039123575e-05, | |
| "loss": 1.619, | |
| "step": 124600 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 3.150883186827527e-05, | |
| "loss": 1.6203, | |
| "step": 124700 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 3.149400334531478e-05, | |
| "loss": 1.6139, | |
| "step": 124800 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 3.1479174822354294e-05, | |
| "loss": 1.6154, | |
| "step": 124900 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 3.146434629939381e-05, | |
| "loss": 1.6159, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 3.144951777643333e-05, | |
| "loss": 1.6124, | |
| "step": 125100 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 3.143468925347284e-05, | |
| "loss": 1.6126, | |
| "step": 125200 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 3.1419860730512355e-05, | |
| "loss": 1.6095, | |
| "step": 125300 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 3.1405032207551874e-05, | |
| "loss": 1.6051, | |
| "step": 125400 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 3.1390203684591386e-05, | |
| "loss": 1.6035, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 3.1375375161630904e-05, | |
| "loss": 1.6025, | |
| "step": 125600 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 3.1360546638670416e-05, | |
| "loss": 1.6039, | |
| "step": 125700 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 3.1345718115709935e-05, | |
| "loss": 1.6036, | |
| "step": 125800 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 3.133088959274945e-05, | |
| "loss": 1.5949, | |
| "step": 125900 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 3.131606106978896e-05, | |
| "loss": 1.5965, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "eval_loss": 1.4935563802719116, | |
| "eval_runtime": 5265.7828, | |
| "eval_samples_per_second": 607.13, | |
| "eval_steps_per_second": 0.395, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 3.130123254682848e-05, | |
| "loss": 1.5982, | |
| "step": 126100 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 3.1286404023867996e-05, | |
| "loss": 1.5976, | |
| "step": 126200 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 3.127157550090751e-05, | |
| "loss": 1.5872, | |
| "step": 126300 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 3.125674697794702e-05, | |
| "loss": 1.5846, | |
| "step": 126400 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 3.124191845498654e-05, | |
| "loss": 1.5899, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 3.122708993202606e-05, | |
| "loss": 1.5856, | |
| "step": 126600 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 3.121226140906557e-05, | |
| "loss": 1.5869, | |
| "step": 126700 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 3.119743288610508e-05, | |
| "loss": 1.586, | |
| "step": 126800 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 3.11826043631446e-05, | |
| "loss": 1.5838, | |
| "step": 126900 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 3.116777584018411e-05, | |
| "loss": 1.5824, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 3.115294731722363e-05, | |
| "loss": 1.5809, | |
| "step": 127100 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 3.113811879426314e-05, | |
| "loss": 1.5782, | |
| "step": 127200 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 3.112329027130266e-05, | |
| "loss": 1.5721, | |
| "step": 127300 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 3.110846174834217e-05, | |
| "loss": 1.5722, | |
| "step": 127400 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 3.1093633225381685e-05, | |
| "loss": 1.5741, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 3.10788047024212e-05, | |
| "loss": 1.5802, | |
| "step": 127600 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 3.106397617946072e-05, | |
| "loss": 1.5717, | |
| "step": 127700 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 3.1049147656500234e-05, | |
| "loss": 1.5707, | |
| "step": 127800 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 3.1034319133539746e-05, | |
| "loss": 1.5683, | |
| "step": 127900 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 3.101949061057926e-05, | |
| "loss": 1.5671, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 3.100466208761878e-05, | |
| "loss": 1.5686, | |
| "step": 128100 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 3.0989833564658295e-05, | |
| "loss": 1.5651, | |
| "step": 128200 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 3.097500504169781e-05, | |
| "loss": 1.5657, | |
| "step": 128300 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 3.096017651873732e-05, | |
| "loss": 1.5648, | |
| "step": 128400 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 3.094534799577684e-05, | |
| "loss": 1.564, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 3.0930519472816356e-05, | |
| "loss": 1.5625, | |
| "step": 128600 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 3.091569094985587e-05, | |
| "loss": 1.5579, | |
| "step": 128700 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 3.090086242689538e-05, | |
| "loss": 1.5563, | |
| "step": 128800 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 3.08860339039349e-05, | |
| "loss": 1.5579, | |
| "step": 128900 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 3.087120538097441e-05, | |
| "loss": 1.5584, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 3.085637685801393e-05, | |
| "loss": 1.5535, | |
| "step": 129100 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 3.084154833505344e-05, | |
| "loss": 1.5497, | |
| "step": 129200 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 3.082671981209296e-05, | |
| "loss": 1.5538, | |
| "step": 129300 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 3.081189128913247e-05, | |
| "loss": 1.5526, | |
| "step": 129400 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 3.079706276617199e-05, | |
| "loss": 1.5479, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 3.07822342432115e-05, | |
| "loss": 1.5457, | |
| "step": 129600 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 3.076740572025102e-05, | |
| "loss": 1.548, | |
| "step": 129700 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 3.075257719729053e-05, | |
| "loss": 1.5494, | |
| "step": 129800 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 3.0737748674330045e-05, | |
| "loss": 1.5455, | |
| "step": 129900 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 3.072292015136956e-05, | |
| "loss": 1.5455, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 3.070809162840908e-05, | |
| "loss": 1.5384, | |
| "step": 130100 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 3.0693263105448594e-05, | |
| "loss": 1.5358, | |
| "step": 130200 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 3.0678434582488106e-05, | |
| "loss": 1.5392, | |
| "step": 130300 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 3.0663606059527624e-05, | |
| "loss": 1.5343, | |
| "step": 130400 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 3.064877753656714e-05, | |
| "loss": 1.5395, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 3.0633949013606655e-05, | |
| "loss": 1.5365, | |
| "step": 130600 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 3.061912049064617e-05, | |
| "loss": 1.5328, | |
| "step": 130700 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 3.0604291967685685e-05, | |
| "loss": 1.535, | |
| "step": 130800 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 3.05894634447252e-05, | |
| "loss": 1.5325, | |
| "step": 130900 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 3.0574634921764716e-05, | |
| "loss": 1.5394, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 3.055980639880423e-05, | |
| "loss": 1.5318, | |
| "step": 131100 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 3.0544977875843747e-05, | |
| "loss": 1.5266, | |
| "step": 131200 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 3.053014935288326e-05, | |
| "loss": 1.5321, | |
| "step": 131300 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 3.0515320829922774e-05, | |
| "loss": 1.5228, | |
| "step": 131400 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 3.0500492306962292e-05, | |
| "loss": 1.5286, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 3.0485663784001804e-05, | |
| "loss": 1.5237, | |
| "step": 131600 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 3.047083526104132e-05, | |
| "loss": 1.5213, | |
| "step": 131700 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 3.0456006738080835e-05, | |
| "loss": 1.5261, | |
| "step": 131800 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 3.0441178215120347e-05, | |
| "loss": 1.5268, | |
| "step": 131900 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 3.0426349692159865e-05, | |
| "loss": 1.523, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 3.041152116919938e-05, | |
| "loss": 1.5209, | |
| "step": 132100 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 3.0396692646238896e-05, | |
| "loss": 1.5209, | |
| "step": 132200 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 3.0381864123278408e-05, | |
| "loss": 1.5145, | |
| "step": 132300 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 3.0367035600317923e-05, | |
| "loss": 1.5202, | |
| "step": 132400 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 3.0352207077357442e-05, | |
| "loss": 1.5181, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 3.0337378554396957e-05, | |
| "loss": 1.5158, | |
| "step": 132600 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 3.032255003143647e-05, | |
| "loss": 1.512, | |
| "step": 132700 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 3.0307721508475984e-05, | |
| "loss": 1.5124, | |
| "step": 132800 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 3.0292892985515496e-05, | |
| "loss": 1.5116, | |
| "step": 132900 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 3.0278064462555018e-05, | |
| "loss": 1.5115, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "eval_loss": 1.406484842300415, | |
| "eval_runtime": 5267.2899, | |
| "eval_samples_per_second": 606.956, | |
| "eval_steps_per_second": 0.395, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 3.026323593959453e-05, | |
| "loss": 1.5082, | |
| "step": 133100 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 3.0248407416634045e-05, | |
| "loss": 1.5074, | |
| "step": 133200 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 3.0233578893673557e-05, | |
| "loss": 1.5071, | |
| "step": 133300 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 3.021875037071308e-05, | |
| "loss": 1.5138, | |
| "step": 133400 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 3.020392184775259e-05, | |
| "loss": 1.5083, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 3.0189093324792107e-05, | |
| "loss": 1.5055, | |
| "step": 133600 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 3.017426480183162e-05, | |
| "loss": 1.5071, | |
| "step": 133700 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 3.0159436278871134e-05, | |
| "loss": 1.504, | |
| "step": 133800 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 3.0144607755910652e-05, | |
| "loss": 1.5002, | |
| "step": 133900 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 3.0129779232950168e-05, | |
| "loss": 1.5044, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 3.011495070998968e-05, | |
| "loss": 1.5019, | |
| "step": 134100 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 3.0100122187029195e-05, | |
| "loss": 1.4975, | |
| "step": 134200 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 3.008529366406871e-05, | |
| "loss": 1.5001, | |
| "step": 134300 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 3.007046514110823e-05, | |
| "loss": 1.4989, | |
| "step": 134400 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 3.005563661814774e-05, | |
| "loss": 1.4973, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 3.0040808095187256e-05, | |
| "loss": 1.5, | |
| "step": 134600 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 3.002597957222677e-05, | |
| "loss": 1.4921, | |
| "step": 134700 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 3.0011151049266283e-05, | |
| "loss": 1.4924, | |
| "step": 134800 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 2.9996322526305802e-05, | |
| "loss": 1.4897, | |
| "step": 134900 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 2.9981494003345317e-05, | |
| "loss": 1.4939, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 2.9966665480384832e-05, | |
| "loss": 1.4914, | |
| "step": 135100 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 2.9951836957424344e-05, | |
| "loss": 1.4865, | |
| "step": 135200 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 2.993700843446386e-05, | |
| "loss": 1.4868, | |
| "step": 135300 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 2.9922179911503378e-05, | |
| "loss": 1.4868, | |
| "step": 135400 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 2.9907351388542893e-05, | |
| "loss": 1.49, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 2.9892522865582405e-05, | |
| "loss": 1.4842, | |
| "step": 135600 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 2.987769434262192e-05, | |
| "loss": 1.4873, | |
| "step": 135700 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 2.9862865819661433e-05, | |
| "loss": 1.4853, | |
| "step": 135800 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 2.9848037296700955e-05, | |
| "loss": 1.4922, | |
| "step": 135900 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 2.9833208773740467e-05, | |
| "loss": 1.4846, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 2.9818380250779982e-05, | |
| "loss": 1.4864, | |
| "step": 136100 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 2.9803551727819494e-05, | |
| "loss": 1.4808, | |
| "step": 136200 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 2.978872320485901e-05, | |
| "loss": 1.4811, | |
| "step": 136300 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 2.9773894681898528e-05, | |
| "loss": 1.4791, | |
| "step": 136400 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 2.9759066158938043e-05, | |
| "loss": 1.4797, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 2.9744237635977558e-05, | |
| "loss": 1.4807, | |
| "step": 136600 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 2.972940911301707e-05, | |
| "loss": 1.479, | |
| "step": 136700 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 2.9714580590056585e-05, | |
| "loss": 1.4794, | |
| "step": 136800 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 2.9699752067096104e-05, | |
| "loss": 1.4774, | |
| "step": 136900 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 2.968492354413562e-05, | |
| "loss": 1.4758, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 2.967009502117513e-05, | |
| "loss": 1.4745, | |
| "step": 137100 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 2.9655266498214646e-05, | |
| "loss": 1.476, | |
| "step": 137200 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 2.9640437975254165e-05, | |
| "loss": 1.4696, | |
| "step": 137300 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 2.962560945229368e-05, | |
| "loss": 1.47, | |
| "step": 137400 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 2.9610780929333192e-05, | |
| "loss": 1.4709, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 2.9595952406372708e-05, | |
| "loss": 1.4681, | |
| "step": 137600 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 2.958112388341222e-05, | |
| "loss": 1.4672, | |
| "step": 137700 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 2.956629536045174e-05, | |
| "loss": 1.4698, | |
| "step": 137800 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 2.9551466837491253e-05, | |
| "loss": 1.4667, | |
| "step": 137900 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 2.953663831453077e-05, | |
| "loss": 1.4699, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 2.952180979157028e-05, | |
| "loss": 1.4624, | |
| "step": 138100 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 2.9506981268609796e-05, | |
| "loss": 1.4707, | |
| "step": 138200 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 2.9492152745649315e-05, | |
| "loss": 1.4635, | |
| "step": 138300 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 2.947732422268883e-05, | |
| "loss": 1.467, | |
| "step": 138400 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 2.9462495699728342e-05, | |
| "loss": 1.4611, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 2.9447667176767857e-05, | |
| "loss": 1.4624, | |
| "step": 138600 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 2.9432838653807372e-05, | |
| "loss": 1.465, | |
| "step": 138700 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 2.941801013084689e-05, | |
| "loss": 1.4667, | |
| "step": 138800 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 2.9403181607886403e-05, | |
| "loss": 1.4608, | |
| "step": 138900 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 2.9388353084925918e-05, | |
| "loss": 1.46, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 2.9373524561965433e-05, | |
| "loss": 1.4618, | |
| "step": 139100 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 2.9358696039004945e-05, | |
| "loss": 1.4591, | |
| "step": 139200 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 2.9343867516044464e-05, | |
| "loss": 1.4572, | |
| "step": 139300 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 2.932903899308398e-05, | |
| "loss": 1.4575, | |
| "step": 139400 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 2.9314210470123495e-05, | |
| "loss": 1.4535, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 2.9299381947163006e-05, | |
| "loss": 1.4577, | |
| "step": 139600 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 2.9284553424202522e-05, | |
| "loss": 1.4532, | |
| "step": 139700 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 2.926972490124204e-05, | |
| "loss": 1.456, | |
| "step": 139800 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 2.9254896378281556e-05, | |
| "loss": 1.4543, | |
| "step": 139900 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 2.9240067855321068e-05, | |
| "loss": 1.4536, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "eval_loss": 1.347744345664978, | |
| "eval_runtime": 5266.9309, | |
| "eval_samples_per_second": 606.998, | |
| "eval_steps_per_second": 0.395, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 2.9225239332360583e-05, | |
| "loss": 1.4541, | |
| "step": 140100 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 2.9210410809400095e-05, | |
| "loss": 1.4511, | |
| "step": 140200 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 2.9195582286439617e-05, | |
| "loss": 1.4538, | |
| "step": 140300 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 2.918075376347913e-05, | |
| "loss": 1.4509, | |
| "step": 140400 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 2.9165925240518644e-05, | |
| "loss": 1.446, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 2.9151096717558156e-05, | |
| "loss": 1.447, | |
| "step": 140600 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 2.913626819459767e-05, | |
| "loss": 1.4463, | |
| "step": 140700 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 2.912143967163719e-05, | |
| "loss": 1.4456, | |
| "step": 140800 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 2.9106611148676705e-05, | |
| "loss": 1.4439, | |
| "step": 140900 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 2.9091782625716217e-05, | |
| "loss": 1.4504, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 2.9076954102755732e-05, | |
| "loss": 1.4403, | |
| "step": 141100 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 2.906212557979525e-05, | |
| "loss": 1.4411, | |
| "step": 141200 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 2.9047297056834766e-05, | |
| "loss": 1.4432, | |
| "step": 141300 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 2.9032468533874278e-05, | |
| "loss": 1.4474, | |
| "step": 141400 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 2.9017640010913793e-05, | |
| "loss": 1.4458, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 2.900281148795331e-05, | |
| "loss": 1.4414, | |
| "step": 141600 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 2.8987982964992827e-05, | |
| "loss": 1.4397, | |
| "step": 141700 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 2.897315444203234e-05, | |
| "loss": 1.4379, | |
| "step": 141800 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 2.8958325919071855e-05, | |
| "loss": 1.4366, | |
| "step": 141900 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 2.894349739611137e-05, | |
| "loss": 1.4409, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 2.8928668873150882e-05, | |
| "loss": 1.4328, | |
| "step": 142100 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 2.89138403501904e-05, | |
| "loss": 1.4375, | |
| "step": 142200 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 2.8899011827229916e-05, | |
| "loss": 1.4345, | |
| "step": 142300 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 2.888418330426943e-05, | |
| "loss": 1.4348, | |
| "step": 142400 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 2.8869354781308943e-05, | |
| "loss": 1.4358, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 2.8854526258348458e-05, | |
| "loss": 1.4339, | |
| "step": 142600 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 2.8839697735387977e-05, | |
| "loss": 1.4373, | |
| "step": 142700 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 2.8824869212427492e-05, | |
| "loss": 1.4298, | |
| "step": 142800 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 2.8810040689467004e-05, | |
| "loss": 1.4299, | |
| "step": 142900 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 2.879521216650652e-05, | |
| "loss": 1.4268, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 2.878038364354603e-05, | |
| "loss": 1.4307, | |
| "step": 143100 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 2.8765555120585553e-05, | |
| "loss": 1.432, | |
| "step": 143200 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 2.8750726597625065e-05, | |
| "loss": 1.4299, | |
| "step": 143300 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 2.873589807466458e-05, | |
| "loss": 1.4255, | |
| "step": 143400 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 2.8721069551704096e-05, | |
| "loss": 1.4291, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 2.8706241028743608e-05, | |
| "loss": 1.4341, | |
| "step": 143600 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 2.8691412505783126e-05, | |
| "loss": 1.4292, | |
| "step": 143700 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 2.867658398282264e-05, | |
| "loss": 1.4275, | |
| "step": 143800 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 2.8661755459862157e-05, | |
| "loss": 1.4284, | |
| "step": 143900 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 2.864692693690167e-05, | |
| "loss": 1.4261, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 2.8632098413941184e-05, | |
| "loss": 1.4226, | |
| "step": 144100 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 2.8617269890980703e-05, | |
| "loss": 1.4253, | |
| "step": 144200 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 2.8602441368020218e-05, | |
| "loss": 1.4203, | |
| "step": 144300 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 2.858761284505973e-05, | |
| "loss": 1.4256, | |
| "step": 144400 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 2.8572784322099245e-05, | |
| "loss": 1.421, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 2.8557955799138757e-05, | |
| "loss": 1.4241, | |
| "step": 144600 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 2.854312727617828e-05, | |
| "loss": 1.4224, | |
| "step": 144700 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 2.852829875321779e-05, | |
| "loss": 1.4218, | |
| "step": 144800 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 2.8513470230257306e-05, | |
| "loss": 1.421, | |
| "step": 144900 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 2.8498641707296818e-05, | |
| "loss": 1.4184, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 2.848381318433634e-05, | |
| "loss": 1.4194, | |
| "step": 145100 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 2.8468984661375852e-05, | |
| "loss": 1.4187, | |
| "step": 145200 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 2.8454156138415367e-05, | |
| "loss": 1.4169, | |
| "step": 145300 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 2.843932761545488e-05, | |
| "loss": 1.4151, | |
| "step": 145400 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 2.8424499092494395e-05, | |
| "loss": 1.4164, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 2.8409670569533913e-05, | |
| "loss": 1.4192, | |
| "step": 145600 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 2.839484204657343e-05, | |
| "loss": 1.4171, | |
| "step": 145700 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 2.838001352361294e-05, | |
| "loss": 1.4178, | |
| "step": 145800 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 2.8365185000652456e-05, | |
| "loss": 1.4169, | |
| "step": 145900 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 2.835035647769197e-05, | |
| "loss": 1.4152, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 2.833552795473149e-05, | |
| "loss": 1.4179, | |
| "step": 146100 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 2.8320699431771e-05, | |
| "loss": 1.4128, | |
| "step": 146200 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 2.8305870908810517e-05, | |
| "loss": 1.4114, | |
| "step": 146300 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 2.8291042385850032e-05, | |
| "loss": 1.4137, | |
| "step": 146400 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 2.8276213862889544e-05, | |
| "loss": 1.4109, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 2.8261385339929063e-05, | |
| "loss": 1.4152, | |
| "step": 146600 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 2.8246556816968578e-05, | |
| "loss": 1.4132, | |
| "step": 146700 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 2.8231728294008093e-05, | |
| "loss": 1.4139, | |
| "step": 146800 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 2.8216899771047605e-05, | |
| "loss": 1.4097, | |
| "step": 146900 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 2.820207124808712e-05, | |
| "loss": 1.4096, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "eval_loss": 1.3063678741455078, | |
| "eval_runtime": 5266.6946, | |
| "eval_samples_per_second": 607.025, | |
| "eval_steps_per_second": 0.395, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 2.818724272512664e-05, | |
| "loss": 1.4057, | |
| "step": 147100 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 2.8172414202166154e-05, | |
| "loss": 1.4041, | |
| "step": 147200 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 2.8157585679205666e-05, | |
| "loss": 1.4117, | |
| "step": 147300 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 2.814275715624518e-05, | |
| "loss": 1.4044, | |
| "step": 147400 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 2.8127928633284693e-05, | |
| "loss": 1.4054, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 2.8113100110324215e-05, | |
| "loss": 1.4058, | |
| "step": 147600 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 2.8098271587363727e-05, | |
| "loss": 1.4034, | |
| "step": 147700 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 2.8083443064403243e-05, | |
| "loss": 1.4057, | |
| "step": 147800 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 2.8068614541442754e-05, | |
| "loss": 1.4041, | |
| "step": 147900 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 2.805378601848227e-05, | |
| "loss": 1.4012, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 2.803895749552179e-05, | |
| "loss": 1.4003, | |
| "step": 148100 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 2.8024128972561304e-05, | |
| "loss": 1.4017, | |
| "step": 148200 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 2.8009300449600816e-05, | |
| "loss": 1.4006, | |
| "step": 148300 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 2.799447192664033e-05, | |
| "loss": 1.4011, | |
| "step": 148400 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 2.7979643403679846e-05, | |
| "loss": 1.3981, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 2.7964814880719365e-05, | |
| "loss": 1.3981, | |
| "step": 148600 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 2.7949986357758877e-05, | |
| "loss": 1.4017, | |
| "step": 148700 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 2.7935157834798392e-05, | |
| "loss": 1.3974, | |
| "step": 148800 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 2.7920329311837907e-05, | |
| "loss": 1.399, | |
| "step": 148900 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 2.7905500788877426e-05, | |
| "loss": 1.3965, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 2.7890672265916938e-05, | |
| "loss": 1.3995, | |
| "step": 149100 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 2.7875843742956453e-05, | |
| "loss": 1.3926, | |
| "step": 149200 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 2.786101521999597e-05, | |
| "loss": 1.3932, | |
| "step": 149300 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 2.784618669703548e-05, | |
| "loss": 1.3966, | |
| "step": 149400 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 2.7831358174075e-05, | |
| "loss": 1.3986, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 2.7816529651114514e-05, | |
| "loss": 1.3959, | |
| "step": 149600 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 2.780170112815403e-05, | |
| "loss": 1.3959, | |
| "step": 149700 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 2.778687260519354e-05, | |
| "loss": 1.3982, | |
| "step": 149800 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 2.7772044082233057e-05, | |
| "loss": 1.3925, | |
| "step": 149900 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 2.7757215559272575e-05, | |
| "loss": 1.3927, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 2.774238703631209e-05, | |
| "loss": 1.3906, | |
| "step": 150100 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 2.7727558513351603e-05, | |
| "loss": 1.3885, | |
| "step": 150200 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 2.7712729990391118e-05, | |
| "loss": 1.3915, | |
| "step": 150300 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 2.769790146743063e-05, | |
| "loss": 1.3892, | |
| "step": 150400 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 2.7683072944470152e-05, | |
| "loss": 1.3881, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 2.7668244421509664e-05, | |
| "loss": 1.389, | |
| "step": 150600 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 2.765341589854918e-05, | |
| "loss": 1.3808, | |
| "step": 150700 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 2.7638587375588694e-05, | |
| "loss": 1.3872, | |
| "step": 150800 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 2.7623758852628206e-05, | |
| "loss": 1.3853, | |
| "step": 150900 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 2.7608930329667725e-05, | |
| "loss": 1.3855, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 2.759410180670724e-05, | |
| "loss": 1.389, | |
| "step": 151100 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 2.7579273283746755e-05, | |
| "loss": 1.3837, | |
| "step": 151200 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 2.7564444760786267e-05, | |
| "loss": 1.3848, | |
| "step": 151300 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 2.7549616237825783e-05, | |
| "loss": 1.3872, | |
| "step": 151400 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 2.75347877148653e-05, | |
| "loss": 1.3812, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 2.7519959191904817e-05, | |
| "loss": 1.3859, | |
| "step": 151600 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 2.750513066894433e-05, | |
| "loss": 1.3824, | |
| "step": 151700 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 2.7490302145983844e-05, | |
| "loss": 1.3857, | |
| "step": 151800 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 2.7475473623023356e-05, | |
| "loss": 1.3829, | |
| "step": 151900 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 2.7460645100062878e-05, | |
| "loss": 1.384, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 2.744581657710239e-05, | |
| "loss": 1.3846, | |
| "step": 152100 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 2.7430988054141905e-05, | |
| "loss": 1.3823, | |
| "step": 152200 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 2.7416159531181417e-05, | |
| "loss": 1.3836, | |
| "step": 152300 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 2.7401331008220932e-05, | |
| "loss": 1.3819, | |
| "step": 152400 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 2.738650248526045e-05, | |
| "loss": 1.3802, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 2.7371673962299966e-05, | |
| "loss": 1.3825, | |
| "step": 152600 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 2.7356845439339478e-05, | |
| "loss": 1.385, | |
| "step": 152700 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 2.7342016916378993e-05, | |
| "loss": 1.3845, | |
| "step": 152800 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 2.7327188393418512e-05, | |
| "loss": 1.378, | |
| "step": 152900 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 2.7312359870458027e-05, | |
| "loss": 1.3807, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 2.729753134749754e-05, | |
| "loss": 1.3773, | |
| "step": 153100 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 2.7282702824537054e-05, | |
| "loss": 1.3797, | |
| "step": 153200 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 2.726787430157657e-05, | |
| "loss": 1.3765, | |
| "step": 153300 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 2.7253045778616088e-05, | |
| "loss": 1.3761, | |
| "step": 153400 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 2.72382172556556e-05, | |
| "loss": 1.376, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 2.7223388732695115e-05, | |
| "loss": 1.3766, | |
| "step": 153600 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 2.720856020973463e-05, | |
| "loss": 1.377, | |
| "step": 153700 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 2.7193731686774143e-05, | |
| "loss": 1.3772, | |
| "step": 153800 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 2.717890316381366e-05, | |
| "loss": 1.3705, | |
| "step": 153900 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 2.7164074640853176e-05, | |
| "loss": 1.3737, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "eval_loss": 1.2714881896972656, | |
| "eval_runtime": 5266.973, | |
| "eval_samples_per_second": 606.993, | |
| "eval_steps_per_second": 0.395, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 2.7149246117892692e-05, | |
| "loss": 1.3707, | |
| "step": 154100 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 2.7134417594932204e-05, | |
| "loss": 1.3735, | |
| "step": 154200 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 2.711958907197172e-05, | |
| "loss": 1.37, | |
| "step": 154300 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 2.7104760549011238e-05, | |
| "loss": 1.3709, | |
| "step": 154400 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 2.7089932026050753e-05, | |
| "loss": 1.3713, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 2.7075103503090265e-05, | |
| "loss": 1.3695, | |
| "step": 154600 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 2.706027498012978e-05, | |
| "loss": 1.3724, | |
| "step": 154700 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 2.7045446457169292e-05, | |
| "loss": 1.3718, | |
| "step": 154800 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 2.7030617934208814e-05, | |
| "loss": 1.3705, | |
| "step": 154900 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 2.7015789411248326e-05, | |
| "loss": 1.3695, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 2.700096088828784e-05, | |
| "loss": 1.3681, | |
| "step": 155100 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 2.6986132365327353e-05, | |
| "loss": 1.3727, | |
| "step": 155200 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 2.697130384236687e-05, | |
| "loss": 1.3673, | |
| "step": 155300 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 2.6956475319406387e-05, | |
| "loss": 1.3698, | |
| "step": 155400 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 2.6941646796445902e-05, | |
| "loss": 1.3658, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 2.6926818273485414e-05, | |
| "loss": 1.3688, | |
| "step": 155600 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 2.691198975052493e-05, | |
| "loss": 1.365, | |
| "step": 155700 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 2.6897161227564445e-05, | |
| "loss": 1.3676, | |
| "step": 155800 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 2.6882332704603963e-05, | |
| "loss": 1.3694, | |
| "step": 155900 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 2.6867504181643475e-05, | |
| "loss": 1.3679, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 2.685267565868299e-05, | |
| "loss": 1.3646, | |
| "step": 156100 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 2.6837847135722506e-05, | |
| "loss": 1.3682, | |
| "step": 156200 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 2.6823018612762018e-05, | |
| "loss": 1.367, | |
| "step": 156300 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 2.6808190089801536e-05, | |
| "loss": 1.3673, | |
| "step": 156400 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 2.6793361566841052e-05, | |
| "loss": 1.3674, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 2.6778533043880567e-05, | |
| "loss": 1.3638, | |
| "step": 156600 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 2.676370452092008e-05, | |
| "loss": 1.3681, | |
| "step": 156700 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 2.6748875997959594e-05, | |
| "loss": 1.3589, | |
| "step": 156800 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 2.6734047474999113e-05, | |
| "loss": 1.3589, | |
| "step": 156900 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 2.6719218952038628e-05, | |
| "loss": 1.361, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 2.670439042907814e-05, | |
| "loss": 1.3622, | |
| "step": 157100 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 2.6689561906117655e-05, | |
| "loss": 1.359, | |
| "step": 157200 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 2.6674733383157174e-05, | |
| "loss": 1.3586, | |
| "step": 157300 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 2.665990486019669e-05, | |
| "loss": 1.3584, | |
| "step": 157400 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 2.66450763372362e-05, | |
| "loss": 1.3601, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 2.6630247814275716e-05, | |
| "loss": 1.3536, | |
| "step": 157600 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 2.6615419291315232e-05, | |
| "loss": 1.3556, | |
| "step": 157700 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 2.660059076835475e-05, | |
| "loss": 1.3578, | |
| "step": 157800 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 2.6585762245394262e-05, | |
| "loss": 1.3576, | |
| "step": 157900 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 2.6570933722433778e-05, | |
| "loss": 1.3573, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 2.6556105199473293e-05, | |
| "loss": 1.3575, | |
| "step": 158100 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 2.6541276676512805e-05, | |
| "loss": 1.3539, | |
| "step": 158200 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 2.6526448153552323e-05, | |
| "loss": 1.3535, | |
| "step": 158300 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 2.651161963059184e-05, | |
| "loss": 1.3574, | |
| "step": 158400 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 2.6496791107631354e-05, | |
| "loss": 1.3544, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 2.6481962584670866e-05, | |
| "loss": 1.3585, | |
| "step": 158600 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 2.646713406171038e-05, | |
| "loss": 1.3563, | |
| "step": 158700 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 2.64523055387499e-05, | |
| "loss": 1.3498, | |
| "step": 158800 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 2.6437477015789415e-05, | |
| "loss": 1.3512, | |
| "step": 158900 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 2.6422648492828927e-05, | |
| "loss": 1.3525, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 2.6407819969868442e-05, | |
| "loss": 1.3546, | |
| "step": 159100 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 2.6392991446907954e-05, | |
| "loss": 1.351, | |
| "step": 159200 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 2.6378162923947476e-05, | |
| "loss": 1.3531, | |
| "step": 159300 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 2.6363334400986988e-05, | |
| "loss": 1.3488, | |
| "step": 159400 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 2.6348505878026503e-05, | |
| "loss": 1.349, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 2.6333677355066015e-05, | |
| "loss": 1.3524, | |
| "step": 159600 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 2.631884883210553e-05, | |
| "loss": 1.3487, | |
| "step": 159700 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 2.630402030914505e-05, | |
| "loss": 1.3497, | |
| "step": 159800 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 2.6289191786184565e-05, | |
| "loss": 1.3498, | |
| "step": 159900 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 2.6274363263224076e-05, | |
| "loss": 1.3533, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 2.6259534740263592e-05, | |
| "loss": 1.3474, | |
| "step": 160100 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 2.6244706217303107e-05, | |
| "loss": 1.3488, | |
| "step": 160200 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 2.6229877694342626e-05, | |
| "loss": 1.3496, | |
| "step": 160300 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 2.6215049171382138e-05, | |
| "loss": 1.352, | |
| "step": 160400 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 2.6200220648421653e-05, | |
| "loss": 1.3465, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 2.6185392125461168e-05, | |
| "loss": 1.3448, | |
| "step": 160600 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 2.617056360250068e-05, | |
| "loss": 1.3499, | |
| "step": 160700 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 2.61557350795402e-05, | |
| "loss": 1.3477, | |
| "step": 160800 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 2.6140906556579714e-05, | |
| "loss": 1.3424, | |
| "step": 160900 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 2.612607803361923e-05, | |
| "loss": 1.3457, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "eval_loss": 1.243394374847412, | |
| "eval_runtime": 5266.0707, | |
| "eval_samples_per_second": 607.097, | |
| "eval_steps_per_second": 0.395, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 2.611124951065874e-05, | |
| "loss": 1.3487, | |
| "step": 161100 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 2.609642098769826e-05, | |
| "loss": 1.3396, | |
| "step": 161200 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 2.6081592464737775e-05, | |
| "loss": 1.3445, | |
| "step": 161300 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 2.606676394177729e-05, | |
| "loss": 1.3422, | |
| "step": 161400 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 2.6051935418816802e-05, | |
| "loss": 1.3394, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 2.6037106895856318e-05, | |
| "loss": 1.3459, | |
| "step": 161600 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 2.6022278372895836e-05, | |
| "loss": 1.338, | |
| "step": 161700 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 2.600744984993535e-05, | |
| "loss": 1.3411, | |
| "step": 161800 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 2.5992621326974863e-05, | |
| "loss": 1.3402, | |
| "step": 161900 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 2.597779280401438e-05, | |
| "loss": 1.344, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 2.596296428105389e-05, | |
| "loss": 1.3379, | |
| "step": 162100 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 2.5948135758093413e-05, | |
| "loss": 1.3384, | |
| "step": 162200 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 2.5933307235132925e-05, | |
| "loss": 1.3445, | |
| "step": 162300 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 2.591847871217244e-05, | |
| "loss": 1.338, | |
| "step": 162400 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 2.590365018921195e-05, | |
| "loss": 1.334, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 2.5888821666251467e-05, | |
| "loss": 1.3393, | |
| "step": 162600 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 2.5873993143290986e-05, | |
| "loss": 1.3389, | |
| "step": 162700 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 2.58591646203305e-05, | |
| "loss": 1.3422, | |
| "step": 162800 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 2.5844336097370013e-05, | |
| "loss": 1.3348, | |
| "step": 162900 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 2.5829507574409528e-05, | |
| "loss": 1.3366, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 2.5814679051449043e-05, | |
| "loss": 1.3369, | |
| "step": 163100 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 2.5799850528488562e-05, | |
| "loss": 1.3359, | |
| "step": 163200 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 2.5785022005528074e-05, | |
| "loss": 1.3395, | |
| "step": 163300 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 2.577019348256759e-05, | |
| "loss": 1.3401, | |
| "step": 163400 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 2.5755364959607104e-05, | |
| "loss": 1.3355, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 2.5740536436646616e-05, | |
| "loss": 1.3383, | |
| "step": 163600 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 2.572570791368614e-05, | |
| "loss": 1.3343, | |
| "step": 163700 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 2.571087939072565e-05, | |
| "loss": 1.3327, | |
| "step": 163800 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 2.5696050867765166e-05, | |
| "loss": 1.3324, | |
| "step": 163900 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 2.5681222344804678e-05, | |
| "loss": 1.335, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 2.5666393821844193e-05, | |
| "loss": 1.3318, | |
| "step": 164100 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 2.565156529888371e-05, | |
| "loss": 1.3382, | |
| "step": 164200 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 2.5636736775923227e-05, | |
| "loss": 1.3324, | |
| "step": 164300 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 2.562190825296274e-05, | |
| "loss": 1.3308, | |
| "step": 164400 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 2.5607079730002254e-05, | |
| "loss": 1.3345, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 2.5592251207041766e-05, | |
| "loss": 1.3252, | |
| "step": 164600 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 2.5577422684081288e-05, | |
| "loss": 1.3304, | |
| "step": 164700 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 2.55625941611208e-05, | |
| "loss": 1.3318, | |
| "step": 164800 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 2.5547765638160315e-05, | |
| "loss": 1.3314, | |
| "step": 164900 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 2.553293711519983e-05, | |
| "loss": 1.3309, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 2.551810859223935e-05, | |
| "loss": 1.3303, | |
| "step": 165100 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 2.550328006927886e-05, | |
| "loss": 1.3324, | |
| "step": 165200 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 2.5488451546318376e-05, | |
| "loss": 1.3338, | |
| "step": 165300 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 2.547362302335789e-05, | |
| "loss": 1.3247, | |
| "step": 165400 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 2.5458794500397403e-05, | |
| "loss": 1.3286, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 2.5443965977436922e-05, | |
| "loss": 1.3303, | |
| "step": 165600 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 2.5429137454476437e-05, | |
| "loss": 1.3325, | |
| "step": 165700 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 2.5414308931515953e-05, | |
| "loss": 1.3298, | |
| "step": 165800 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 2.5399480408555464e-05, | |
| "loss": 1.3264, | |
| "step": 165900 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 2.538465188559498e-05, | |
| "loss": 1.3209, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 2.53698233626345e-05, | |
| "loss": 1.3323, | |
| "step": 166100 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 2.5354994839674014e-05, | |
| "loss": 1.325, | |
| "step": 166200 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 2.5340166316713526e-05, | |
| "loss": 1.3292, | |
| "step": 166300 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 2.532533779375304e-05, | |
| "loss": 1.325, | |
| "step": 166400 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 2.5310509270792553e-05, | |
| "loss": 1.3289, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 2.5295680747832075e-05, | |
| "loss": 1.3288, | |
| "step": 166600 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 2.5280852224871587e-05, | |
| "loss": 1.3311, | |
| "step": 166700 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 2.5266023701911102e-05, | |
| "loss": 1.3238, | |
| "step": 166800 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 2.5251195178950614e-05, | |
| "loss": 1.3244, | |
| "step": 166900 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 2.523636665599013e-05, | |
| "loss": 1.3227, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 2.5221538133029648e-05, | |
| "loss": 1.3237, | |
| "step": 167100 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 2.5206709610069163e-05, | |
| "loss": 1.3265, | |
| "step": 167200 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 2.5191881087108675e-05, | |
| "loss": 1.3273, | |
| "step": 167300 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 2.517705256414819e-05, | |
| "loss": 1.3249, | |
| "step": 167400 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 2.5162224041187706e-05, | |
| "loss": 1.3199, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 2.5147395518227224e-05, | |
| "loss": 1.3194, | |
| "step": 167600 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 2.5132566995266736e-05, | |
| "loss": 1.325, | |
| "step": 167700 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 2.511773847230625e-05, | |
| "loss": 1.3222, | |
| "step": 167800 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 2.5102909949345767e-05, | |
| "loss": 1.315, | |
| "step": 167900 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 2.508808142638528e-05, | |
| "loss": 1.3229, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "eval_loss": 1.2230604887008667, | |
| "eval_runtime": 5266.4527, | |
| "eval_samples_per_second": 607.053, | |
| "eval_steps_per_second": 0.395, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 2.5073252903424797e-05, | |
| "loss": 1.3212, | |
| "step": 168100 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 2.5058424380464313e-05, | |
| "loss": 1.3205, | |
| "step": 168200 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 2.5043595857503828e-05, | |
| "loss": 1.3239, | |
| "step": 168300 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 2.502876733454334e-05, | |
| "loss": 1.3216, | |
| "step": 168400 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 2.5013938811582855e-05, | |
| "loss": 1.3163, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 2.499911028862237e-05, | |
| "loss": 1.3194, | |
| "step": 168600 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 2.498428176566189e-05, | |
| "loss": 1.3145, | |
| "step": 168700 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 2.49694532427014e-05, | |
| "loss": 1.3186, | |
| "step": 168800 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 2.495462471974092e-05, | |
| "loss": 1.314, | |
| "step": 168900 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 2.493979619678043e-05, | |
| "loss": 1.3172, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 2.4924967673819947e-05, | |
| "loss": 1.3172, | |
| "step": 169100 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 2.4910139150859462e-05, | |
| "loss": 1.318, | |
| "step": 169200 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 2.4895310627898977e-05, | |
| "loss": 1.3157, | |
| "step": 169300 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 2.4880482104938493e-05, | |
| "loss": 1.3163, | |
| "step": 169400 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 2.4865653581978008e-05, | |
| "loss": 1.3201, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 2.485082505901752e-05, | |
| "loss": 1.3154, | |
| "step": 169600 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 2.483599653605704e-05, | |
| "loss": 1.3131, | |
| "step": 169700 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 2.482116801309655e-05, | |
| "loss": 1.3172, | |
| "step": 169800 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "learning_rate": 2.480633949013607e-05, | |
| "loss": 1.3122, | |
| "step": 169900 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "learning_rate": 2.479151096717558e-05, | |
| "loss": 1.3191, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "learning_rate": 2.47766824442151e-05, | |
| "loss": 1.3149, | |
| "step": 170100 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 2.476185392125461e-05, | |
| "loss": 1.3182, | |
| "step": 170200 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 2.4747025398294127e-05, | |
| "loss": 1.3152, | |
| "step": 170300 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 2.4732196875333642e-05, | |
| "loss": 1.3085, | |
| "step": 170400 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 2.4717368352373157e-05, | |
| "loss": 1.3178, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 2.4702539829412673e-05, | |
| "loss": 1.3162, | |
| "step": 170600 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 2.4687711306452188e-05, | |
| "loss": 1.3108, | |
| "step": 170700 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 2.4672882783491703e-05, | |
| "loss": 1.3183, | |
| "step": 170800 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 2.465805426053122e-05, | |
| "loss": 1.3152, | |
| "step": 170900 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 2.4643225737570734e-05, | |
| "loss": 1.3154, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 2.462839721461025e-05, | |
| "loss": 1.3075, | |
| "step": 171100 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 2.4613568691649764e-05, | |
| "loss": 1.3091, | |
| "step": 171200 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 2.4598740168689276e-05, | |
| "loss": 1.315, | |
| "step": 171300 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 2.4583911645728795e-05, | |
| "loss": 1.3132, | |
| "step": 171400 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 2.4569083122768307e-05, | |
| "loss": 1.3146, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "learning_rate": 2.4554254599807825e-05, | |
| "loss": 1.3087, | |
| "step": 171600 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "learning_rate": 2.4539426076847337e-05, | |
| "loss": 1.309, | |
| "step": 171700 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "learning_rate": 2.4524597553886856e-05, | |
| "loss": 1.3085, | |
| "step": 171800 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 2.4509769030926368e-05, | |
| "loss": 1.3101, | |
| "step": 171900 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 2.4494940507965883e-05, | |
| "loss": 1.313, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 2.44801119850054e-05, | |
| "loss": 1.3124, | |
| "step": 172100 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 2.4465283462044914e-05, | |
| "loss": 1.3066, | |
| "step": 172200 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 2.445045493908443e-05, | |
| "loss": 1.3072, | |
| "step": 172300 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 2.4435626416123944e-05, | |
| "loss": 1.3081, | |
| "step": 172400 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 2.442079789316346e-05, | |
| "loss": 1.3062, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 2.4405969370202975e-05, | |
| "loss": 1.3059, | |
| "step": 172600 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 2.439114084724249e-05, | |
| "loss": 1.3053, | |
| "step": 172700 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 2.4376312324282005e-05, | |
| "loss": 1.3128, | |
| "step": 172800 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 2.436148380132152e-05, | |
| "loss": 1.3061, | |
| "step": 172900 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 2.4346655278361032e-05, | |
| "loss": 1.3082, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 2.433182675540055e-05, | |
| "loss": 1.3055, | |
| "step": 173100 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 2.4316998232440063e-05, | |
| "loss": 1.3003, | |
| "step": 173200 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 2.4302169709479582e-05, | |
| "loss": 1.3058, | |
| "step": 173300 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 2.4287341186519094e-05, | |
| "loss": 1.3028, | |
| "step": 173400 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 2.427251266355861e-05, | |
| "loss": 1.3054, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 2.4257684140598124e-05, | |
| "loss": 1.3067, | |
| "step": 173600 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 2.424285561763764e-05, | |
| "loss": 1.3015, | |
| "step": 173700 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 2.4228027094677155e-05, | |
| "loss": 1.3067, | |
| "step": 173800 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 2.421319857171667e-05, | |
| "loss": 1.3042, | |
| "step": 173900 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 2.4198370048756185e-05, | |
| "loss": 1.305, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 2.41835415257957e-05, | |
| "loss": 1.3004, | |
| "step": 174100 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 2.4168713002835212e-05, | |
| "loss": 1.3073, | |
| "step": 174200 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 2.415388447987473e-05, | |
| "loss": 1.3023, | |
| "step": 174300 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 2.4139055956914243e-05, | |
| "loss": 1.3045, | |
| "step": 174400 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 2.4124227433953762e-05, | |
| "loss": 1.3022, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 2.4109398910993274e-05, | |
| "loss": 1.3098, | |
| "step": 174600 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 2.409457038803279e-05, | |
| "loss": 1.2989, | |
| "step": 174700 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 2.4079741865072304e-05, | |
| "loss": 1.2992, | |
| "step": 174800 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 2.406491334211182e-05, | |
| "loss": 1.3, | |
| "step": 174900 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 2.4050084819151335e-05, | |
| "loss": 1.3005, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "eval_loss": 1.2012759447097778, | |
| "eval_runtime": 5266.8158, | |
| "eval_samples_per_second": 607.011, | |
| "eval_steps_per_second": 0.395, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 2.403525629619085e-05, | |
| "loss": 1.3033, | |
| "step": 175100 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 2.4020427773230365e-05, | |
| "loss": 1.2997, | |
| "step": 175200 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 2.400559925026988e-05, | |
| "loss": 1.3008, | |
| "step": 175300 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 2.3990770727309396e-05, | |
| "loss": 1.2992, | |
| "step": 175400 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 2.397594220434891e-05, | |
| "loss": 1.3016, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 2.3961113681388426e-05, | |
| "loss": 1.3031, | |
| "step": 175600 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 2.394628515842794e-05, | |
| "loss": 1.3023, | |
| "step": 175700 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 2.3931456635467457e-05, | |
| "loss": 1.2968, | |
| "step": 175800 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 2.391662811250697e-05, | |
| "loss": 1.2974, | |
| "step": 175900 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 2.3901799589546488e-05, | |
| "loss": 1.2937, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 2.3886971066586e-05, | |
| "loss": 1.2998, | |
| "step": 176100 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 2.3872142543625518e-05, | |
| "loss": 1.2975, | |
| "step": 176200 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 2.385731402066503e-05, | |
| "loss": 1.2968, | |
| "step": 176300 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 2.3842485497704545e-05, | |
| "loss": 1.2966, | |
| "step": 176400 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 2.382765697474406e-05, | |
| "loss": 1.2927, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 2.3812828451783576e-05, | |
| "loss": 1.2953, | |
| "step": 176600 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 2.379799992882309e-05, | |
| "loss": 1.2927, | |
| "step": 176700 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 2.3783171405862606e-05, | |
| "loss": 1.2981, | |
| "step": 176800 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 2.3768342882902118e-05, | |
| "loss": 1.2937, | |
| "step": 176900 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 2.3753514359941637e-05, | |
| "loss": 1.2939, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 2.373868583698115e-05, | |
| "loss": 1.2901, | |
| "step": 177100 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 2.3723857314020668e-05, | |
| "loss": 1.2962, | |
| "step": 177200 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 2.370902879106018e-05, | |
| "loss": 1.2964, | |
| "step": 177300 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 2.3694200268099695e-05, | |
| "loss": 1.2917, | |
| "step": 177400 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 2.367937174513921e-05, | |
| "loss": 1.2927, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 2.3664543222178725e-05, | |
| "loss": 1.2932, | |
| "step": 177600 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 2.364971469921824e-05, | |
| "loss": 1.2884, | |
| "step": 177700 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 2.3634886176257756e-05, | |
| "loss": 1.2955, | |
| "step": 177800 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 2.3620057653297275e-05, | |
| "loss": 1.2967, | |
| "step": 177900 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 2.3605229130336786e-05, | |
| "loss": 1.2925, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 2.35904006073763e-05, | |
| "loss": 1.2931, | |
| "step": 178100 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 2.3575572084415817e-05, | |
| "loss": 1.2892, | |
| "step": 178200 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 2.3560743561455332e-05, | |
| "loss": 1.2877, | |
| "step": 178300 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 2.3545915038494848e-05, | |
| "loss": 1.2891, | |
| "step": 178400 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 2.3531086515534363e-05, | |
| "loss": 1.2887, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 2.3516257992573875e-05, | |
| "loss": 1.2845, | |
| "step": 178600 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 2.3501429469613393e-05, | |
| "loss": 1.2909, | |
| "step": 178700 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 2.3486600946652905e-05, | |
| "loss": 1.288, | |
| "step": 178800 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 2.3471772423692424e-05, | |
| "loss": 1.293, | |
| "step": 178900 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 2.3456943900731936e-05, | |
| "loss": 1.2886, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 2.344211537777145e-05, | |
| "loss": 1.2937, | |
| "step": 179100 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 2.3427286854810966e-05, | |
| "loss": 1.292, | |
| "step": 179200 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 2.341245833185048e-05, | |
| "loss": 1.2916, | |
| "step": 179300 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 2.3397629808889997e-05, | |
| "loss": 1.2883, | |
| "step": 179400 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "learning_rate": 2.3382801285929512e-05, | |
| "loss": 1.2859, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "learning_rate": 2.3367972762969028e-05, | |
| "loss": 1.2873, | |
| "step": 179600 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 2.3353144240008543e-05, | |
| "loss": 1.2869, | |
| "step": 179700 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 2.3338315717048058e-05, | |
| "loss": 1.2862, | |
| "step": 179800 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 2.3323487194087573e-05, | |
| "loss": 1.2856, | |
| "step": 179900 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 2.330865867112709e-05, | |
| "loss": 1.2867, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 2.3293830148166604e-05, | |
| "loss": 1.2824, | |
| "step": 180100 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 2.327900162520612e-05, | |
| "loss": 1.2836, | |
| "step": 180200 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 2.326417310224563e-05, | |
| "loss": 1.2834, | |
| "step": 180300 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 2.324934457928515e-05, | |
| "loss": 1.2855, | |
| "step": 180400 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 2.323451605632466e-05, | |
| "loss": 1.2856, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "learning_rate": 2.321968753336418e-05, | |
| "loss": 1.2844, | |
| "step": 180600 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "learning_rate": 2.3204859010403692e-05, | |
| "loss": 1.2845, | |
| "step": 180700 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "learning_rate": 2.3190030487443208e-05, | |
| "loss": 1.2849, | |
| "step": 180800 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 2.3175201964482723e-05, | |
| "loss": 1.2853, | |
| "step": 180900 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 2.3160373441522238e-05, | |
| "loss": 1.2836, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 2.3145544918561753e-05, | |
| "loss": 1.2805, | |
| "step": 181100 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 2.313071639560127e-05, | |
| "loss": 1.2853, | |
| "step": 181200 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 2.311588787264078e-05, | |
| "loss": 1.2846, | |
| "step": 181300 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 2.31010593496803e-05, | |
| "loss": 1.2868, | |
| "step": 181400 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 2.308623082671981e-05, | |
| "loss": 1.2813, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 2.307140230375933e-05, | |
| "loss": 1.2833, | |
| "step": 181600 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "learning_rate": 2.305657378079884e-05, | |
| "loss": 1.2839, | |
| "step": 181700 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "learning_rate": 2.304174525783836e-05, | |
| "loss": 1.2851, | |
| "step": 181800 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "learning_rate": 2.3026916734877872e-05, | |
| "loss": 1.2804, | |
| "step": 181900 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 2.3012088211917387e-05, | |
| "loss": 1.2813, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "eval_loss": 1.1860889196395874, | |
| "eval_runtime": 5267.0817, | |
| "eval_samples_per_second": 606.98, | |
| "eval_steps_per_second": 0.395, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 2.2997259688956903e-05, | |
| "loss": 1.2841, | |
| "step": 182100 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 2.2982431165996418e-05, | |
| "loss": 1.2878, | |
| "step": 182200 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 2.2967602643035933e-05, | |
| "loss": 1.2791, | |
| "step": 182300 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 2.295277412007545e-05, | |
| "loss": 1.2807, | |
| "step": 182400 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 2.2937945597114964e-05, | |
| "loss": 1.2833, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 2.292311707415448e-05, | |
| "loss": 1.2803, | |
| "step": 182600 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 2.2908288551193994e-05, | |
| "loss": 1.2761, | |
| "step": 182700 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 2.289346002823351e-05, | |
| "loss": 1.2797, | |
| "step": 182800 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 2.2878631505273025e-05, | |
| "loss": 1.2803, | |
| "step": 182900 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 2.2863802982312537e-05, | |
| "loss": 1.2855, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 2.2848974459352056e-05, | |
| "loss": 1.2768, | |
| "step": 183100 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 2.2834145936391567e-05, | |
| "loss": 1.278, | |
| "step": 183200 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 2.2819317413431086e-05, | |
| "loss": 1.2747, | |
| "step": 183300 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 2.2804488890470598e-05, | |
| "loss": 1.2807, | |
| "step": 183400 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 2.2789660367510113e-05, | |
| "loss": 1.2807, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 2.277483184454963e-05, | |
| "loss": 1.2776, | |
| "step": 183600 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 2.2760003321589144e-05, | |
| "loss": 1.2819, | |
| "step": 183700 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 2.274517479862866e-05, | |
| "loss": 1.2819, | |
| "step": 183800 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 2.2730346275668174e-05, | |
| "loss": 1.2827, | |
| "step": 183900 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 2.271551775270769e-05, | |
| "loss": 1.2778, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 2.2700689229747205e-05, | |
| "loss": 1.2801, | |
| "step": 184100 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 2.2685860706786717e-05, | |
| "loss": 1.2756, | |
| "step": 184200 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 2.2671032183826236e-05, | |
| "loss": 1.2737, | |
| "step": 184300 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 2.2656203660865747e-05, | |
| "loss": 1.276, | |
| "step": 184400 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 2.2641375137905266e-05, | |
| "loss": 1.2793, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 2.2626546614944778e-05, | |
| "loss": 1.2804, | |
| "step": 184600 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 2.2611718091984293e-05, | |
| "loss": 1.2723, | |
| "step": 184700 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 2.259688956902381e-05, | |
| "loss": 1.2749, | |
| "step": 184800 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 2.2582061046063324e-05, | |
| "loss": 1.274, | |
| "step": 184900 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 2.2567232523102843e-05, | |
| "loss": 1.2763, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 2.2552404000142354e-05, | |
| "loss": 1.2756, | |
| "step": 185100 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 2.253757547718187e-05, | |
| "loss": 1.2792, | |
| "step": 185200 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 2.2522746954221385e-05, | |
| "loss": 1.2746, | |
| "step": 185300 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 2.25079184312609e-05, | |
| "loss": 1.274, | |
| "step": 185400 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 2.2493089908300416e-05, | |
| "loss": 1.2751, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 2.247826138533993e-05, | |
| "loss": 1.2739, | |
| "step": 185600 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 2.2463432862379446e-05, | |
| "loss": 1.2769, | |
| "step": 185700 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 2.244860433941896e-05, | |
| "loss": 1.2763, | |
| "step": 185800 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 2.2433775816458473e-05, | |
| "loss": 1.2711, | |
| "step": 185900 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 2.2418947293497992e-05, | |
| "loss": 1.2722, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 2.2404118770537504e-05, | |
| "loss": 1.2724, | |
| "step": 186100 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 2.2389290247577023e-05, | |
| "loss": 1.2692, | |
| "step": 186200 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 2.2374461724616534e-05, | |
| "loss": 1.2714, | |
| "step": 186300 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 2.235963320165605e-05, | |
| "loss": 1.2675, | |
| "step": 186400 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 2.2344804678695565e-05, | |
| "loss": 1.2726, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 2.232997615573508e-05, | |
| "loss": 1.2715, | |
| "step": 186600 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 2.2315147632774596e-05, | |
| "loss": 1.2685, | |
| "step": 186700 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 2.230031910981411e-05, | |
| "loss": 1.2692, | |
| "step": 186800 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 2.2285490586853626e-05, | |
| "loss": 1.2688, | |
| "step": 186900 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 2.227066206389314e-05, | |
| "loss": 1.2696, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 2.2255833540932657e-05, | |
| "loss": 1.2709, | |
| "step": 187100 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 2.2241005017972172e-05, | |
| "loss": 1.2692, | |
| "step": 187200 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 2.2226176495011687e-05, | |
| "loss": 1.2714, | |
| "step": 187300 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 2.22113479720512e-05, | |
| "loss": 1.2685, | |
| "step": 187400 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 2.2196519449090718e-05, | |
| "loss": 1.2756, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 2.218169092613023e-05, | |
| "loss": 1.2669, | |
| "step": 187600 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 2.216686240316975e-05, | |
| "loss": 1.2698, | |
| "step": 187700 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 2.215203388020926e-05, | |
| "loss": 1.2722, | |
| "step": 187800 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 2.213720535724878e-05, | |
| "loss": 1.2699, | |
| "step": 187900 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 2.212237683428829e-05, | |
| "loss": 1.2684, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 2.2107548311327806e-05, | |
| "loss": 1.2669, | |
| "step": 188100 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 2.209271978836732e-05, | |
| "loss": 1.2695, | |
| "step": 188200 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 2.2077891265406837e-05, | |
| "loss": 1.2637, | |
| "step": 188300 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 2.2063062742446352e-05, | |
| "loss": 1.2707, | |
| "step": 188400 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 2.2048234219485867e-05, | |
| "loss": 1.2682, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 2.203340569652538e-05, | |
| "loss": 1.2688, | |
| "step": 188600 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 2.2018577173564898e-05, | |
| "loss": 1.272, | |
| "step": 188700 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 2.200374865060441e-05, | |
| "loss": 1.2665, | |
| "step": 188800 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 2.198892012764393e-05, | |
| "loss": 1.2698, | |
| "step": 188900 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 2.197409160468344e-05, | |
| "loss": 1.2679, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "eval_loss": 1.170892357826233, | |
| "eval_runtime": 5266.3993, | |
| "eval_samples_per_second": 607.059, | |
| "eval_steps_per_second": 0.395, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 2.1959263081722956e-05, | |
| "loss": 1.2677, | |
| "step": 189100 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 2.194443455876247e-05, | |
| "loss": 1.2689, | |
| "step": 189200 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 2.1929606035801986e-05, | |
| "loss": 1.265, | |
| "step": 189300 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 2.19147775128415e-05, | |
| "loss": 1.2661, | |
| "step": 189400 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 2.1899948989881017e-05, | |
| "loss": 1.2663, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 2.1885120466920532e-05, | |
| "loss": 1.2661, | |
| "step": 189600 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 2.1870291943960047e-05, | |
| "loss": 1.2632, | |
| "step": 189700 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 2.1855463420999562e-05, | |
| "loss": 1.2658, | |
| "step": 189800 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 2.1840634898039078e-05, | |
| "loss": 1.2675, | |
| "step": 189900 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 2.1825806375078593e-05, | |
| "loss": 1.2598, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 2.181097785211811e-05, | |
| "loss": 1.267, | |
| "step": 190100 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 2.1796149329157624e-05, | |
| "loss": 1.2648, | |
| "step": 190200 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "learning_rate": 2.1781320806197136e-05, | |
| "loss": 1.2633, | |
| "step": 190300 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 2.1766492283236654e-05, | |
| "loss": 1.2623, | |
| "step": 190400 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 2.1751663760276166e-05, | |
| "loss": 1.2598, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 2.1736835237315685e-05, | |
| "loss": 1.2626, | |
| "step": 190600 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 2.1722006714355197e-05, | |
| "loss": 1.262, | |
| "step": 190700 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 2.1707178191394712e-05, | |
| "loss": 1.2679, | |
| "step": 190800 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 2.1692349668434227e-05, | |
| "loss": 1.2631, | |
| "step": 190900 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 2.1677521145473742e-05, | |
| "loss": 1.2628, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 2.1662692622513258e-05, | |
| "loss": 1.2652, | |
| "step": 191100 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 2.1647864099552773e-05, | |
| "loss": 1.2665, | |
| "step": 191200 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 2.1633035576592285e-05, | |
| "loss": 1.2591, | |
| "step": 191300 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 2.1618207053631804e-05, | |
| "loss": 1.2633, | |
| "step": 191400 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 2.1603378530671315e-05, | |
| "loss": 1.2594, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 2.1588550007710834e-05, | |
| "loss": 1.2628, | |
| "step": 191600 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 2.1573721484750346e-05, | |
| "loss": 1.2587, | |
| "step": 191700 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 2.1558892961789865e-05, | |
| "loss": 1.2597, | |
| "step": 191800 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 2.1544064438829377e-05, | |
| "loss": 1.2604, | |
| "step": 191900 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 2.1529235915868892e-05, | |
| "loss": 1.2609, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 2.151440739290841e-05, | |
| "loss": 1.2617, | |
| "step": 192100 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 2.1499578869947922e-05, | |
| "loss": 1.2587, | |
| "step": 192200 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 2.148475034698744e-05, | |
| "loss": 1.2614, | |
| "step": 192300 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 2.1469921824026953e-05, | |
| "loss": 1.2599, | |
| "step": 192400 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 2.145509330106647e-05, | |
| "loss": 1.2644, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 2.1440264778105984e-05, | |
| "loss": 1.2592, | |
| "step": 192600 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 2.14254362551455e-05, | |
| "loss": 1.2598, | |
| "step": 192700 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 2.1410607732185014e-05, | |
| "loss": 1.2573, | |
| "step": 192800 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 2.139577920922453e-05, | |
| "loss": 1.2585, | |
| "step": 192900 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 2.138095068626404e-05, | |
| "loss": 1.2591, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 2.136612216330356e-05, | |
| "loss": 1.2566, | |
| "step": 193100 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 2.1351293640343072e-05, | |
| "loss": 1.2579, | |
| "step": 193200 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 2.133646511738259e-05, | |
| "loss": 1.2511, | |
| "step": 193300 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 2.1321636594422102e-05, | |
| "loss": 1.2584, | |
| "step": 193400 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 2.1306808071461618e-05, | |
| "loss": 1.2591, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 2.1291979548501133e-05, | |
| "loss": 1.2577, | |
| "step": 193600 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 2.1277151025540648e-05, | |
| "loss": 1.2577, | |
| "step": 193700 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 2.1262322502580164e-05, | |
| "loss": 1.2569, | |
| "step": 193800 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 2.124749397961968e-05, | |
| "loss": 1.2588, | |
| "step": 193900 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 2.1232665456659194e-05, | |
| "loss": 1.255, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 2.121783693369871e-05, | |
| "loss": 1.2521, | |
| "step": 194100 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 2.1203008410738225e-05, | |
| "loss": 1.2516, | |
| "step": 194200 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 2.118817988777774e-05, | |
| "loss": 1.2533, | |
| "step": 194300 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 2.1173351364817255e-05, | |
| "loss": 1.253, | |
| "step": 194400 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 2.115852284185677e-05, | |
| "loss": 1.2543, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 2.1143694318896286e-05, | |
| "loss": 1.2535, | |
| "step": 194600 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 2.1128865795935798e-05, | |
| "loss": 1.2546, | |
| "step": 194700 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "learning_rate": 2.1114037272975316e-05, | |
| "loss": 1.2518, | |
| "step": 194800 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 2.1099208750014828e-05, | |
| "loss": 1.256, | |
| "step": 194900 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 2.1084380227054347e-05, | |
| "loss": 1.2514, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 2.106955170409386e-05, | |
| "loss": 1.2542, | |
| "step": 195100 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 2.1054723181133374e-05, | |
| "loss": 1.2504, | |
| "step": 195200 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 2.103989465817289e-05, | |
| "loss": 1.2593, | |
| "step": 195300 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 2.1025066135212405e-05, | |
| "loss": 1.2587, | |
| "step": 195400 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 2.101023761225192e-05, | |
| "loss": 1.2552, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 2.0995409089291435e-05, | |
| "loss": 1.2563, | |
| "step": 195600 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 2.098058056633095e-05, | |
| "loss": 1.256, | |
| "step": 195700 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 2.0965752043370466e-05, | |
| "loss": 1.2545, | |
| "step": 195800 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 2.0950923520409978e-05, | |
| "loss": 1.2573, | |
| "step": 195900 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 2.0936094997449496e-05, | |
| "loss": 1.2489, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "eval_loss": 1.157165765762329, | |
| "eval_runtime": 5266.6739, | |
| "eval_samples_per_second": 607.027, | |
| "eval_steps_per_second": 0.395, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 2.0921266474489008e-05, | |
| "loss": 1.2478, | |
| "step": 196100 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 2.0906437951528527e-05, | |
| "loss": 1.2517, | |
| "step": 196200 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 2.089160942856804e-05, | |
| "loss": 1.2536, | |
| "step": 196300 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 2.0876780905607554e-05, | |
| "loss": 1.253, | |
| "step": 196400 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 2.086195238264707e-05, | |
| "loss": 1.2536, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 2.0847123859686585e-05, | |
| "loss": 1.2563, | |
| "step": 196600 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 2.08322953367261e-05, | |
| "loss": 1.2538, | |
| "step": 196700 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 2.0817466813765615e-05, | |
| "loss": 1.2497, | |
| "step": 196800 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 2.080263829080513e-05, | |
| "loss": 1.2493, | |
| "step": 196900 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 2.0787809767844646e-05, | |
| "loss": 1.247, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 2.077298124488416e-05, | |
| "loss": 1.2537, | |
| "step": 197100 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 2.0758152721923676e-05, | |
| "loss": 1.2545, | |
| "step": 197200 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 2.074332419896319e-05, | |
| "loss": 1.2509, | |
| "step": 197300 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 2.0728495676002704e-05, | |
| "loss": 1.2503, | |
| "step": 197400 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 2.0713667153042222e-05, | |
| "loss": 1.2514, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 2.0698838630081734e-05, | |
| "loss": 1.244, | |
| "step": 197600 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 2.0684010107121253e-05, | |
| "loss": 1.2541, | |
| "step": 197700 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 2.0669181584160765e-05, | |
| "loss": 1.2527, | |
| "step": 197800 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 2.0654353061200283e-05, | |
| "loss": 1.2455, | |
| "step": 197900 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 2.0639524538239795e-05, | |
| "loss": 1.249, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 2.062469601527931e-05, | |
| "loss": 1.2475, | |
| "step": 198100 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 2.0609867492318826e-05, | |
| "loss": 1.245, | |
| "step": 198200 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 2.059503896935834e-05, | |
| "loss": 1.2526, | |
| "step": 198300 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 2.0580210446397856e-05, | |
| "loss": 1.2445, | |
| "step": 198400 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 2.056538192343737e-05, | |
| "loss": 1.2496, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 2.0550553400476884e-05, | |
| "loss": 1.2455, | |
| "step": 198600 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 2.0535724877516402e-05, | |
| "loss": 1.2488, | |
| "step": 198700 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 2.0520896354555914e-05, | |
| "loss": 1.2534, | |
| "step": 198800 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 2.0506067831595433e-05, | |
| "loss": 1.2483, | |
| "step": 198900 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 2.0491239308634945e-05, | |
| "loss": 1.2498, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 2.047641078567446e-05, | |
| "loss": 1.2453, | |
| "step": 199100 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 2.046158226271398e-05, | |
| "loss": 1.2446, | |
| "step": 199200 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 2.044675373975349e-05, | |
| "loss": 1.2473, | |
| "step": 199300 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 2.043192521679301e-05, | |
| "loss": 1.2422, | |
| "step": 199400 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 2.041709669383252e-05, | |
| "loss": 1.2472, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 2.040226817087204e-05, | |
| "loss": 1.2426, | |
| "step": 199600 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 2.038743964791155e-05, | |
| "loss": 1.2448, | |
| "step": 199700 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 2.0372611124951067e-05, | |
| "loss": 1.2432, | |
| "step": 199800 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 2.0357782601990582e-05, | |
| "loss": 1.243, | |
| "step": 199900 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 2.0342954079030097e-05, | |
| "loss": 1.2458, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 2.0328125556069613e-05, | |
| "loss": 1.2455, | |
| "step": 200100 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 2.0313297033109128e-05, | |
| "loss": 1.2436, | |
| "step": 200200 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "learning_rate": 2.029846851014864e-05, | |
| "loss": 1.2458, | |
| "step": 200300 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "learning_rate": 2.028363998718816e-05, | |
| "loss": 1.2452, | |
| "step": 200400 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 2.026881146422767e-05, | |
| "loss": 1.2451, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 2.025398294126719e-05, | |
| "loss": 1.2452, | |
| "step": 200600 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 2.02391544183067e-05, | |
| "loss": 1.247, | |
| "step": 200700 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 2.0224325895346216e-05, | |
| "loss": 1.2424, | |
| "step": 200800 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 2.020949737238573e-05, | |
| "loss": 1.2414, | |
| "step": 200900 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 2.0194668849425247e-05, | |
| "loss": 1.2474, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 2.0179840326464762e-05, | |
| "loss": 1.2442, | |
| "step": 201100 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 2.0165011803504277e-05, | |
| "loss": 1.2418, | |
| "step": 201200 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 2.0150183280543793e-05, | |
| "loss": 1.2459, | |
| "step": 201300 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 2.0135354757583308e-05, | |
| "loss": 1.2449, | |
| "step": 201400 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 2.0120526234622823e-05, | |
| "loss": 1.2432, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 2.010569771166234e-05, | |
| "loss": 1.2367, | |
| "step": 201600 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 2.0090869188701854e-05, | |
| "loss": 1.2444, | |
| "step": 201700 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 2.007604066574137e-05, | |
| "loss": 1.2402, | |
| "step": 201800 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 2.0061212142780884e-05, | |
| "loss": 1.2437, | |
| "step": 201900 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 2.0046383619820396e-05, | |
| "loss": 1.2435, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 2.0031555096859915e-05, | |
| "loss": 1.2444, | |
| "step": 202100 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 2.0016726573899427e-05, | |
| "loss": 1.2407, | |
| "step": 202200 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 2.0001898050938946e-05, | |
| "loss": 1.2428, | |
| "step": 202300 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 1.9987069527978457e-05, | |
| "loss": 1.2359, | |
| "step": 202400 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 1.9972241005017973e-05, | |
| "loss": 1.2366, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 1.9957412482057488e-05, | |
| "loss": 1.2422, | |
| "step": 202600 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 1.9942583959097003e-05, | |
| "loss": 1.2437, | |
| "step": 202700 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 1.992775543613652e-05, | |
| "loss": 1.2457, | |
| "step": 202800 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 1.9912926913176034e-05, | |
| "loss": 1.2426, | |
| "step": 202900 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 1.9898098390215546e-05, | |
| "loss": 1.2363, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "eval_loss": 1.1457105875015259, | |
| "eval_runtime": 5266.5683, | |
| "eval_samples_per_second": 607.04, | |
| "eval_steps_per_second": 0.395, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 1.9883269867255064e-05, | |
| "loss": 1.2382, | |
| "step": 203100 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 1.9868441344294576e-05, | |
| "loss": 1.2411, | |
| "step": 203200 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "learning_rate": 1.9853612821334095e-05, | |
| "loss": 1.2397, | |
| "step": 203300 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "learning_rate": 1.9838784298373607e-05, | |
| "loss": 1.2392, | |
| "step": 203400 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "learning_rate": 1.9823955775413126e-05, | |
| "loss": 1.2401, | |
| "step": 203500 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 1.9809127252452637e-05, | |
| "loss": 1.2336, | |
| "step": 203600 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 1.9794298729492153e-05, | |
| "loss": 1.2402, | |
| "step": 203700 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 1.9779470206531668e-05, | |
| "loss": 1.2372, | |
| "step": 203800 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 1.9764641683571183e-05, | |
| "loss": 1.2382, | |
| "step": 203900 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 1.97498131606107e-05, | |
| "loss": 1.2372, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 1.9734984637650214e-05, | |
| "loss": 1.2382, | |
| "step": 204100 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 1.972015611468973e-05, | |
| "loss": 1.2377, | |
| "step": 204200 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 1.9705327591729244e-05, | |
| "loss": 1.2402, | |
| "step": 204300 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 1.969049906876876e-05, | |
| "loss": 1.2396, | |
| "step": 204400 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 1.9675670545808275e-05, | |
| "loss": 1.241, | |
| "step": 204500 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 1.966084202284779e-05, | |
| "loss": 1.2342, | |
| "step": 204600 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 1.9646013499887302e-05, | |
| "loss": 1.24, | |
| "step": 204700 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 1.963118497692682e-05, | |
| "loss": 1.2401, | |
| "step": 204800 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 1.9616356453966333e-05, | |
| "loss": 1.2378, | |
| "step": 204900 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 1.960152793100585e-05, | |
| "loss": 1.2361, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 1.9586699408045363e-05, | |
| "loss": 1.2343, | |
| "step": 205100 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 1.957187088508488e-05, | |
| "loss": 1.2387, | |
| "step": 205200 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 1.9557042362124394e-05, | |
| "loss": 1.2374, | |
| "step": 205300 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 1.954221383916391e-05, | |
| "loss": 1.2341, | |
| "step": 205400 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 1.9527385316203424e-05, | |
| "loss": 1.2394, | |
| "step": 205500 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 1.951255679324294e-05, | |
| "loss": 1.2364, | |
| "step": 205600 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 1.9497728270282455e-05, | |
| "loss": 1.2341, | |
| "step": 205700 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 1.948289974732197e-05, | |
| "loss": 1.2405, | |
| "step": 205800 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 1.9468071224361482e-05, | |
| "loss": 1.2359, | |
| "step": 205900 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 1.9453242701401e-05, | |
| "loss": 1.2335, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 1.9438414178440513e-05, | |
| "loss": 1.2394, | |
| "step": 206100 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 1.942358565548003e-05, | |
| "loss": 1.2299, | |
| "step": 206200 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 1.9408757132519547e-05, | |
| "loss": 1.2391, | |
| "step": 206300 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "learning_rate": 1.939392860955906e-05, | |
| "loss": 1.2398, | |
| "step": 206400 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "learning_rate": 1.9379100086598577e-05, | |
| "loss": 1.2369, | |
| "step": 206500 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "learning_rate": 1.936427156363809e-05, | |
| "loss": 1.2337, | |
| "step": 206600 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 1.9349443040677608e-05, | |
| "loss": 1.2322, | |
| "step": 206700 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 1.933461451771712e-05, | |
| "loss": 1.233, | |
| "step": 206800 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 1.9319785994756635e-05, | |
| "loss": 1.2543, | |
| "step": 206900 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 1.930495747179615e-05, | |
| "loss": 1.2308, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 1.9290128948835666e-05, | |
| "loss": 1.2373, | |
| "step": 207100 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 1.927530042587518e-05, | |
| "loss": 1.2342, | |
| "step": 207200 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 1.9260471902914696e-05, | |
| "loss": 1.2328, | |
| "step": 207300 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 1.9245643379954208e-05, | |
| "loss": 1.2339, | |
| "step": 207400 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 1.9230814856993727e-05, | |
| "loss": 1.231, | |
| "step": 207500 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 1.921598633403324e-05, | |
| "loss": 1.2304, | |
| "step": 207600 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 1.9201157811072757e-05, | |
| "loss": 1.2385, | |
| "step": 207700 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 1.918632928811227e-05, | |
| "loss": 1.2326, | |
| "step": 207800 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 1.9171500765151788e-05, | |
| "loss": 1.232, | |
| "step": 207900 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 1.91566722421913e-05, | |
| "loss": 1.2331, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 7.41, | |
| "learning_rate": 1.9141843719230815e-05, | |
| "loss": 1.2306, | |
| "step": 208100 | |
| }, | |
| { | |
| "epoch": 7.41, | |
| "learning_rate": 1.912701519627033e-05, | |
| "loss": 1.233, | |
| "step": 208200 | |
| }, | |
| { | |
| "epoch": 7.41, | |
| "learning_rate": 1.9112186673309845e-05, | |
| "loss": 1.2325, | |
| "step": 208300 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 1.909735815034936e-05, | |
| "loss": 1.2358, | |
| "step": 208400 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 1.9082529627388876e-05, | |
| "loss": 1.2335, | |
| "step": 208500 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 1.906770110442839e-05, | |
| "loss": 1.2319, | |
| "step": 208600 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 1.9052872581467907e-05, | |
| "loss": 1.2331, | |
| "step": 208700 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 1.9038044058507422e-05, | |
| "loss": 1.2252, | |
| "step": 208800 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 1.9023215535546937e-05, | |
| "loss": 1.226, | |
| "step": 208900 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 1.9008387012586452e-05, | |
| "loss": 1.231, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 1.8993558489625964e-05, | |
| "loss": 1.2307, | |
| "step": 209100 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 1.8978729966665483e-05, | |
| "loss": 1.2359, | |
| "step": 209200 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 1.8963901443704995e-05, | |
| "loss": 1.2324, | |
| "step": 209300 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 1.8949072920744514e-05, | |
| "loss": 1.2297, | |
| "step": 209400 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 1.8934244397784025e-05, | |
| "loss": 1.2305, | |
| "step": 209500 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 1.8919415874823544e-05, | |
| "loss": 1.2325, | |
| "step": 209600 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 1.8904587351863056e-05, | |
| "loss": 1.2364, | |
| "step": 209700 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 1.888975882890257e-05, | |
| "loss": 1.2298, | |
| "step": 209800 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 1.8874930305942087e-05, | |
| "loss": 1.2278, | |
| "step": 209900 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 1.8860101782981602e-05, | |
| "loss": 1.2278, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "eval_loss": 1.1346683502197266, | |
| "eval_runtime": 5266.1777, | |
| "eval_samples_per_second": 607.085, | |
| "eval_steps_per_second": 0.395, | |
| "step": 210000 | |
| } | |
| ], | |
| "max_steps": 337188, | |
| "num_train_epochs": 12, | |
| "total_flos": 5.933940663465429e+20, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |