Instructions to use Changahou/Llama8B_mathinstruct_SFT with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use Changahou/Llama8B_mathinstruct_SFT with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("/home/chenjh2/disk_chenjh/models/models/Llama-3-8b-Instruct") model = PeftModel.from_pretrained(base_model, "Changahou/Llama8B_mathinstruct_SFT") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 6252, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0023999520009599807, | |
| "grad_norm": 0.49803411960601807, | |
| "learning_rate": 4.999994949996767e-05, | |
| "loss": 0.9716, | |
| "num_input_tokens_seen": 54328, | |
| "step": 5, | |
| "train_runtime": 8.3772, | |
| "train_tokens_per_second": 6485.207 | |
| }, | |
| { | |
| "epoch": 0.004799904001919961, | |
| "grad_norm": 0.4587724804878235, | |
| "learning_rate": 4.9999744343936e-05, | |
| "loss": 0.9705, | |
| "num_input_tokens_seen": 108376, | |
| "step": 10, | |
| "train_runtime": 16.0234, | |
| "train_tokens_per_second": 6763.598 | |
| }, | |
| { | |
| "epoch": 0.007199856002879942, | |
| "grad_norm": 0.3823186159133911, | |
| "learning_rate": 4.999938137694701e-05, | |
| "loss": 0.8622, | |
| "num_input_tokens_seen": 163816, | |
| "step": 15, | |
| "train_runtime": 24.1319, | |
| "train_tokens_per_second": 6788.347 | |
| }, | |
| { | |
| "epoch": 0.009599808003839923, | |
| "grad_norm": 0.3449329733848572, | |
| "learning_rate": 4.999886060129194e-05, | |
| "loss": 0.8309, | |
| "num_input_tokens_seen": 225216, | |
| "step": 20, | |
| "train_runtime": 33.0832, | |
| "train_tokens_per_second": 6807.559 | |
| }, | |
| { | |
| "epoch": 0.011999760004799903, | |
| "grad_norm": 0.4700082242488861, | |
| "learning_rate": 4.999818202025819e-05, | |
| "loss": 0.8118, | |
| "num_input_tokens_seen": 279480, | |
| "step": 25, | |
| "train_runtime": 40.5384, | |
| "train_tokens_per_second": 6894.209 | |
| }, | |
| { | |
| "epoch": 0.014399712005759884, | |
| "grad_norm": 0.3817142844200134, | |
| "learning_rate": 4.999734563812929e-05, | |
| "loss": 0.7763, | |
| "num_input_tokens_seen": 337816, | |
| "step": 30, | |
| "train_runtime": 48.8916, | |
| "train_tokens_per_second": 6909.488 | |
| }, | |
| { | |
| "epoch": 0.016799664006719867, | |
| "grad_norm": 0.33569249510765076, | |
| "learning_rate": 4.9996351460184923e-05, | |
| "loss": 0.7919, | |
| "num_input_tokens_seen": 394952, | |
| "step": 35, | |
| "train_runtime": 57.353, | |
| "train_tokens_per_second": 6886.34 | |
| }, | |
| { | |
| "epoch": 0.019199616007679846, | |
| "grad_norm": 0.29491692781448364, | |
| "learning_rate": 4.9995199492700826e-05, | |
| "loss": 0.7095, | |
| "num_input_tokens_seen": 454608, | |
| "step": 40, | |
| "train_runtime": 65.8361, | |
| "train_tokens_per_second": 6905.143 | |
| }, | |
| { | |
| "epoch": 0.021599568008639828, | |
| "grad_norm": 0.3096805810928345, | |
| "learning_rate": 4.9993889742948806e-05, | |
| "loss": 0.7347, | |
| "num_input_tokens_seen": 510256, | |
| "step": 45, | |
| "train_runtime": 74.0444, | |
| "train_tokens_per_second": 6891.219 | |
| }, | |
| { | |
| "epoch": 0.023999520009599807, | |
| "grad_norm": 0.3358306884765625, | |
| "learning_rate": 4.9992422219196656e-05, | |
| "loss": 0.7461, | |
| "num_input_tokens_seen": 568112, | |
| "step": 50, | |
| "train_runtime": 82.4236, | |
| "train_tokens_per_second": 6892.587 | |
| }, | |
| { | |
| "epoch": 0.02639947201055979, | |
| "grad_norm": 0.3951747417449951, | |
| "learning_rate": 4.9990796930708125e-05, | |
| "loss": 0.7168, | |
| "num_input_tokens_seen": 625896, | |
| "step": 55, | |
| "train_runtime": 90.7851, | |
| "train_tokens_per_second": 6894.26 | |
| }, | |
| { | |
| "epoch": 0.02879942401151977, | |
| "grad_norm": 0.3642365038394928, | |
| "learning_rate": 4.9989013887742856e-05, | |
| "loss": 0.7117, | |
| "num_input_tokens_seen": 677856, | |
| "step": 60, | |
| "train_runtime": 98.2631, | |
| "train_tokens_per_second": 6898.377 | |
| }, | |
| { | |
| "epoch": 0.03119937601247975, | |
| "grad_norm": 0.3379388153553009, | |
| "learning_rate": 4.998707310155631e-05, | |
| "loss": 0.6441, | |
| "num_input_tokens_seen": 740792, | |
| "step": 65, | |
| "train_runtime": 107.5229, | |
| "train_tokens_per_second": 6889.624 | |
| }, | |
| { | |
| "epoch": 0.03359932801343973, | |
| "grad_norm": 0.46827253699302673, | |
| "learning_rate": 4.99849745843997e-05, | |
| "loss": 0.617, | |
| "num_input_tokens_seen": 795784, | |
| "step": 70, | |
| "train_runtime": 115.3119, | |
| "train_tokens_per_second": 6901.144 | |
| }, | |
| { | |
| "epoch": 0.03599928001439971, | |
| "grad_norm": 0.46408799290657043, | |
| "learning_rate": 4.998271834951993e-05, | |
| "loss": 0.685, | |
| "num_input_tokens_seen": 852016, | |
| "step": 75, | |
| "train_runtime": 123.2024, | |
| "train_tokens_per_second": 6915.578 | |
| }, | |
| { | |
| "epoch": 0.03839923201535969, | |
| "grad_norm": 0.4789453148841858, | |
| "learning_rate": 4.998030441115949e-05, | |
| "loss": 0.6505, | |
| "num_input_tokens_seen": 909224, | |
| "step": 80, | |
| "train_runtime": 131.6277, | |
| "train_tokens_per_second": 6907.542 | |
| }, | |
| { | |
| "epoch": 0.04079918401631968, | |
| "grad_norm": 0.40359923243522644, | |
| "learning_rate": 4.9977732784556355e-05, | |
| "loss": 0.6212, | |
| "num_input_tokens_seen": 959568, | |
| "step": 85, | |
| "train_runtime": 139.0702, | |
| "train_tokens_per_second": 6899.88 | |
| }, | |
| { | |
| "epoch": 0.043199136017279656, | |
| "grad_norm": 0.452709436416626, | |
| "learning_rate": 4.997500348594394e-05, | |
| "loss": 0.6978, | |
| "num_input_tokens_seen": 1010696, | |
| "step": 90, | |
| "train_runtime": 146.8828, | |
| "train_tokens_per_second": 6880.967 | |
| }, | |
| { | |
| "epoch": 0.045599088018239635, | |
| "grad_norm": 0.4287179112434387, | |
| "learning_rate": 4.997211653255096e-05, | |
| "loss": 0.6212, | |
| "num_input_tokens_seen": 1067912, | |
| "step": 95, | |
| "train_runtime": 155.302, | |
| "train_tokens_per_second": 6876.357 | |
| }, | |
| { | |
| "epoch": 0.047999040019199614, | |
| "grad_norm": 0.5242288112640381, | |
| "learning_rate": 4.996907194260129e-05, | |
| "loss": 0.6182, | |
| "num_input_tokens_seen": 1127264, | |
| "step": 100, | |
| "train_runtime": 164.3956, | |
| "train_tokens_per_second": 6857.02 | |
| }, | |
| { | |
| "epoch": 0.0503989920201596, | |
| "grad_norm": 0.31285974383354187, | |
| "learning_rate": 4.996586973531394e-05, | |
| "loss": 0.6254, | |
| "num_input_tokens_seen": 1183208, | |
| "step": 105, | |
| "train_runtime": 172.5905, | |
| "train_tokens_per_second": 6855.582 | |
| }, | |
| { | |
| "epoch": 0.05279894402111958, | |
| "grad_norm": 0.30165454745292664, | |
| "learning_rate": 4.9962509930902836e-05, | |
| "loss": 0.5758, | |
| "num_input_tokens_seen": 1243128, | |
| "step": 110, | |
| "train_runtime": 181.2314, | |
| "train_tokens_per_second": 6859.34 | |
| }, | |
| { | |
| "epoch": 0.05519889602207956, | |
| "grad_norm": 0.3959163725376129, | |
| "learning_rate": 4.9958992550576754e-05, | |
| "loss": 0.6427, | |
| "num_input_tokens_seen": 1294648, | |
| "step": 115, | |
| "train_runtime": 188.848, | |
| "train_tokens_per_second": 6855.503 | |
| }, | |
| { | |
| "epoch": 0.05759884802303954, | |
| "grad_norm": 0.44546279311180115, | |
| "learning_rate": 4.9955317616539174e-05, | |
| "loss": 0.6416, | |
| "num_input_tokens_seen": 1349136, | |
| "step": 120, | |
| "train_runtime": 196.8055, | |
| "train_tokens_per_second": 6855.173 | |
| }, | |
| { | |
| "epoch": 0.05999880002399952, | |
| "grad_norm": 0.5479788184165955, | |
| "learning_rate": 4.9951485151988126e-05, | |
| "loss": 0.6039, | |
| "num_input_tokens_seen": 1403304, | |
| "step": 125, | |
| "train_runtime": 204.9341, | |
| "train_tokens_per_second": 6847.585 | |
| }, | |
| { | |
| "epoch": 0.0623987520249595, | |
| "grad_norm": 0.46208852529525757, | |
| "learning_rate": 4.994749518111604e-05, | |
| "loss": 0.6365, | |
| "num_input_tokens_seen": 1460712, | |
| "step": 130, | |
| "train_runtime": 212.8501, | |
| "train_tokens_per_second": 6862.633 | |
| }, | |
| { | |
| "epoch": 0.06479870402591949, | |
| "grad_norm": 0.5154985189437866, | |
| "learning_rate": 4.9943347729109646e-05, | |
| "loss": 0.5757, | |
| "num_input_tokens_seen": 1516920, | |
| "step": 135, | |
| "train_runtime": 221.1296, | |
| "train_tokens_per_second": 6859.868 | |
| }, | |
| { | |
| "epoch": 0.06719865602687947, | |
| "grad_norm": 0.4509885311126709, | |
| "learning_rate": 4.993904282214972e-05, | |
| "loss": 0.6484, | |
| "num_input_tokens_seen": 1569296, | |
| "step": 140, | |
| "train_runtime": 228.9245, | |
| "train_tokens_per_second": 6855.081 | |
| }, | |
| { | |
| "epoch": 0.06959860802783945, | |
| "grad_norm": 0.47324448823928833, | |
| "learning_rate": 4.993458048741102e-05, | |
| "loss": 0.5967, | |
| "num_input_tokens_seen": 1627720, | |
| "step": 145, | |
| "train_runtime": 237.6306, | |
| "train_tokens_per_second": 6849.792 | |
| }, | |
| { | |
| "epoch": 0.07199856002879942, | |
| "grad_norm": 0.4491414427757263, | |
| "learning_rate": 4.992996075306203e-05, | |
| "loss": 0.6705, | |
| "num_input_tokens_seen": 1680600, | |
| "step": 150, | |
| "train_runtime": 245.5875, | |
| "train_tokens_per_second": 6843.181 | |
| }, | |
| { | |
| "epoch": 0.0743985120297594, | |
| "grad_norm": 0.5371958613395691, | |
| "learning_rate": 4.992518364826484e-05, | |
| "loss": 0.5925, | |
| "num_input_tokens_seen": 1732368, | |
| "step": 155, | |
| "train_runtime": 253.2225, | |
| "train_tokens_per_second": 6841.288 | |
| }, | |
| { | |
| "epoch": 0.07679846403071938, | |
| "grad_norm": 0.44730937480926514, | |
| "learning_rate": 4.9920249203174945e-05, | |
| "loss": 0.5695, | |
| "num_input_tokens_seen": 1794680, | |
| "step": 160, | |
| "train_runtime": 262.1391, | |
| "train_tokens_per_second": 6846.289 | |
| }, | |
| { | |
| "epoch": 0.07919841603167936, | |
| "grad_norm": 0.4398422837257385, | |
| "learning_rate": 4.9915157448941044e-05, | |
| "loss": 0.5549, | |
| "num_input_tokens_seen": 1854040, | |
| "step": 165, | |
| "train_runtime": 270.3386, | |
| "train_tokens_per_second": 6858.214 | |
| }, | |
| { | |
| "epoch": 0.08159836803263935, | |
| "grad_norm": 0.5156921148300171, | |
| "learning_rate": 4.9909908417704835e-05, | |
| "loss": 0.5701, | |
| "num_input_tokens_seen": 1908808, | |
| "step": 170, | |
| "train_runtime": 278.3256, | |
| "train_tokens_per_second": 6858.183 | |
| }, | |
| { | |
| "epoch": 0.08399832003359933, | |
| "grad_norm": 0.40140026807785034, | |
| "learning_rate": 4.990450214260086e-05, | |
| "loss": 0.5478, | |
| "num_input_tokens_seen": 1966184, | |
| "step": 175, | |
| "train_runtime": 286.7588, | |
| "train_tokens_per_second": 6856.577 | |
| }, | |
| { | |
| "epoch": 0.08639827203455931, | |
| "grad_norm": 0.5238102674484253, | |
| "learning_rate": 4.9898938657756234e-05, | |
| "loss": 0.5816, | |
| "num_input_tokens_seen": 2023280, | |
| "step": 180, | |
| "train_runtime": 295.3163, | |
| "train_tokens_per_second": 6851.231 | |
| }, | |
| { | |
| "epoch": 0.08879822403551929, | |
| "grad_norm": 0.4058316648006439, | |
| "learning_rate": 4.989321799829048e-05, | |
| "loss": 0.6243, | |
| "num_input_tokens_seen": 2079160, | |
| "step": 185, | |
| "train_runtime": 303.4477, | |
| "train_tokens_per_second": 6851.791 | |
| }, | |
| { | |
| "epoch": 0.09119817603647927, | |
| "grad_norm": 0.48315656185150146, | |
| "learning_rate": 4.988734020031527e-05, | |
| "loss": 0.5903, | |
| "num_input_tokens_seen": 2129480, | |
| "step": 190, | |
| "train_runtime": 310.6729, | |
| "train_tokens_per_second": 6854.413 | |
| }, | |
| { | |
| "epoch": 0.09359812803743925, | |
| "grad_norm": 0.49458763003349304, | |
| "learning_rate": 4.9881305300934225e-05, | |
| "loss": 0.5232, | |
| "num_input_tokens_seen": 2189160, | |
| "step": 195, | |
| "train_runtime": 319.1985, | |
| "train_tokens_per_second": 6858.302 | |
| }, | |
| { | |
| "epoch": 0.09599808003839923, | |
| "grad_norm": 0.3490532338619232, | |
| "learning_rate": 4.987511333824266e-05, | |
| "loss": 0.5846, | |
| "num_input_tokens_seen": 2247704, | |
| "step": 200, | |
| "train_runtime": 327.3152, | |
| "train_tokens_per_second": 6867.093 | |
| }, | |
| { | |
| "epoch": 0.0983980320393592, | |
| "grad_norm": 0.41308099031448364, | |
| "learning_rate": 4.986876435132736e-05, | |
| "loss": 0.589, | |
| "num_input_tokens_seen": 2307040, | |
| "step": 205, | |
| "train_runtime": 335.8335, | |
| "train_tokens_per_second": 6869.595 | |
| }, | |
| { | |
| "epoch": 0.1007979840403192, | |
| "grad_norm": 0.4715804159641266, | |
| "learning_rate": 4.9862258380266325e-05, | |
| "loss": 0.5737, | |
| "num_input_tokens_seen": 2357488, | |
| "step": 210, | |
| "train_runtime": 343.3468, | |
| "train_tokens_per_second": 6866.201 | |
| }, | |
| { | |
| "epoch": 0.10319793604127918, | |
| "grad_norm": 0.35753390192985535, | |
| "learning_rate": 4.985559546612851e-05, | |
| "loss": 0.6138, | |
| "num_input_tokens_seen": 2414064, | |
| "step": 215, | |
| "train_runtime": 351.1882, | |
| "train_tokens_per_second": 6873.99 | |
| }, | |
| { | |
| "epoch": 0.10559788804223916, | |
| "grad_norm": 0.44587111473083496, | |
| "learning_rate": 4.984877565097359e-05, | |
| "loss": 0.5923, | |
| "num_input_tokens_seen": 2472744, | |
| "step": 220, | |
| "train_runtime": 359.483, | |
| "train_tokens_per_second": 6878.611 | |
| }, | |
| { | |
| "epoch": 0.10799784004319914, | |
| "grad_norm": 0.47545069456100464, | |
| "learning_rate": 4.984179897785166e-05, | |
| "loss": 0.55, | |
| "num_input_tokens_seen": 2529024, | |
| "step": 225, | |
| "train_runtime": 367.4645, | |
| "train_tokens_per_second": 6882.364 | |
| }, | |
| { | |
| "epoch": 0.11039779204415912, | |
| "grad_norm": 0.5392165184020996, | |
| "learning_rate": 4.983466549080299e-05, | |
| "loss": 0.618, | |
| "num_input_tokens_seen": 2584864, | |
| "step": 230, | |
| "train_runtime": 375.6998, | |
| "train_tokens_per_second": 6880.132 | |
| }, | |
| { | |
| "epoch": 0.1127977440451191, | |
| "grad_norm": 0.6994487047195435, | |
| "learning_rate": 4.9827375234857735e-05, | |
| "loss": 0.6055, | |
| "num_input_tokens_seen": 2638696, | |
| "step": 235, | |
| "train_runtime": 383.3187, | |
| "train_tokens_per_second": 6883.818 | |
| }, | |
| { | |
| "epoch": 0.11519769604607907, | |
| "grad_norm": 0.5480724573135376, | |
| "learning_rate": 4.981992825603566e-05, | |
| "loss": 0.5962, | |
| "num_input_tokens_seen": 2699160, | |
| "step": 240, | |
| "train_runtime": 391.6683, | |
| "train_tokens_per_second": 6891.444 | |
| }, | |
| { | |
| "epoch": 0.11759764804703907, | |
| "grad_norm": 0.49630582332611084, | |
| "learning_rate": 4.981232460134584e-05, | |
| "loss": 0.5556, | |
| "num_input_tokens_seen": 2756440, | |
| "step": 245, | |
| "train_runtime": 400.0318, | |
| "train_tokens_per_second": 6890.551 | |
| }, | |
| { | |
| "epoch": 0.11999760004799905, | |
| "grad_norm": 0.48846226930618286, | |
| "learning_rate": 4.980456431878636e-05, | |
| "loss": 0.6064, | |
| "num_input_tokens_seen": 2811584, | |
| "step": 250, | |
| "train_runtime": 408.3053, | |
| "train_tokens_per_second": 6885.985 | |
| }, | |
| { | |
| "epoch": 0.12239755204895902, | |
| "grad_norm": 0.7514108419418335, | |
| "learning_rate": 4.9796647457344034e-05, | |
| "loss": 0.622, | |
| "num_input_tokens_seen": 2864600, | |
| "step": 255, | |
| "train_runtime": 416.1151, | |
| "train_tokens_per_second": 6884.152 | |
| }, | |
| { | |
| "epoch": 0.124797504049919, | |
| "grad_norm": 0.45766520500183105, | |
| "learning_rate": 4.9788574066994074e-05, | |
| "loss": 0.5792, | |
| "num_input_tokens_seen": 2920384, | |
| "step": 260, | |
| "train_runtime": 424.193, | |
| "train_tokens_per_second": 6884.564 | |
| }, | |
| { | |
| "epoch": 0.12719745605087898, | |
| "grad_norm": 0.40784621238708496, | |
| "learning_rate": 4.978034419869977e-05, | |
| "loss": 0.5464, | |
| "num_input_tokens_seen": 2976400, | |
| "step": 265, | |
| "train_runtime": 432.5949, | |
| "train_tokens_per_second": 6880.34 | |
| }, | |
| { | |
| "epoch": 0.12959740805183897, | |
| "grad_norm": 0.5691152811050415, | |
| "learning_rate": 4.977195790441219e-05, | |
| "loss": 0.5769, | |
| "num_input_tokens_seen": 3031640, | |
| "step": 270, | |
| "train_runtime": 440.75, | |
| "train_tokens_per_second": 6878.366 | |
| }, | |
| { | |
| "epoch": 0.13199736005279894, | |
| "grad_norm": 0.509024977684021, | |
| "learning_rate": 4.976341523706986e-05, | |
| "loss": 0.5853, | |
| "num_input_tokens_seen": 3088304, | |
| "step": 275, | |
| "train_runtime": 448.5804, | |
| "train_tokens_per_second": 6884.616 | |
| }, | |
| { | |
| "epoch": 0.13439731205375893, | |
| "grad_norm": 0.5476660132408142, | |
| "learning_rate": 4.975471625059837e-05, | |
| "loss": 0.5715, | |
| "num_input_tokens_seen": 3146984, | |
| "step": 280, | |
| "train_runtime": 456.8318, | |
| "train_tokens_per_second": 6888.716 | |
| }, | |
| { | |
| "epoch": 0.1367972640547189, | |
| "grad_norm": 0.41494348645210266, | |
| "learning_rate": 4.9745860999910093e-05, | |
| "loss": 0.5492, | |
| "num_input_tokens_seen": 3206416, | |
| "step": 285, | |
| "train_runtime": 465.4275, | |
| "train_tokens_per_second": 6889.185 | |
| }, | |
| { | |
| "epoch": 0.1391972160556789, | |
| "grad_norm": 0.4294047951698303, | |
| "learning_rate": 4.973684954090384e-05, | |
| "loss": 0.6008, | |
| "num_input_tokens_seen": 3263920, | |
| "step": 290, | |
| "train_runtime": 473.6647, | |
| "train_tokens_per_second": 6890.782 | |
| }, | |
| { | |
| "epoch": 0.14159716805663886, | |
| "grad_norm": 0.673201858997345, | |
| "learning_rate": 4.972768193046446e-05, | |
| "loss": 0.5588, | |
| "num_input_tokens_seen": 3318032, | |
| "step": 295, | |
| "train_runtime": 481.6549, | |
| "train_tokens_per_second": 6888.816 | |
| }, | |
| { | |
| "epoch": 0.14399712005759885, | |
| "grad_norm": 0.6196733117103577, | |
| "learning_rate": 4.971835822646254e-05, | |
| "loss": 0.5692, | |
| "num_input_tokens_seen": 3373136, | |
| "step": 300, | |
| "train_runtime": 489.8925, | |
| "train_tokens_per_second": 6885.461 | |
| }, | |
| { | |
| "epoch": 0.14639707205855884, | |
| "grad_norm": 0.5182610154151917, | |
| "learning_rate": 4.9708878487753976e-05, | |
| "loss": 0.5801, | |
| "num_input_tokens_seen": 3428032, | |
| "step": 305, | |
| "train_runtime": 497.9042, | |
| "train_tokens_per_second": 6884.923 | |
| }, | |
| { | |
| "epoch": 0.1487970240595188, | |
| "grad_norm": 0.5710193514823914, | |
| "learning_rate": 4.969924277417963e-05, | |
| "loss": 0.5601, | |
| "num_input_tokens_seen": 3482432, | |
| "step": 310, | |
| "train_runtime": 505.9162, | |
| "train_tokens_per_second": 6883.416 | |
| }, | |
| { | |
| "epoch": 0.1511969760604788, | |
| "grad_norm": 0.5431010127067566, | |
| "learning_rate": 4.968945114656499e-05, | |
| "loss": 0.6167, | |
| "num_input_tokens_seen": 3540200, | |
| "step": 315, | |
| "train_runtime": 513.822, | |
| "train_tokens_per_second": 6889.935 | |
| }, | |
| { | |
| "epoch": 0.15359692806143876, | |
| "grad_norm": 0.5962916016578674, | |
| "learning_rate": 4.967950366671973e-05, | |
| "loss": 0.5528, | |
| "num_input_tokens_seen": 3590376, | |
| "step": 320, | |
| "train_runtime": 521.033, | |
| "train_tokens_per_second": 6890.88 | |
| }, | |
| { | |
| "epoch": 0.15599688006239876, | |
| "grad_norm": 0.43872061371803284, | |
| "learning_rate": 4.966940039743734e-05, | |
| "loss": 0.582, | |
| "num_input_tokens_seen": 3650392, | |
| "step": 325, | |
| "train_runtime": 529.3248, | |
| "train_tokens_per_second": 6896.317 | |
| }, | |
| { | |
| "epoch": 0.15839683206335872, | |
| "grad_norm": 0.6549321413040161, | |
| "learning_rate": 4.965914140249475e-05, | |
| "loss": 0.6262, | |
| "num_input_tokens_seen": 3700960, | |
| "step": 330, | |
| "train_runtime": 537.0024, | |
| "train_tokens_per_second": 6891.887 | |
| }, | |
| { | |
| "epoch": 0.16079678406431872, | |
| "grad_norm": 0.49688732624053955, | |
| "learning_rate": 4.9648726746651875e-05, | |
| "loss": 0.555, | |
| "num_input_tokens_seen": 3757192, | |
| "step": 335, | |
| "train_runtime": 544.9852, | |
| "train_tokens_per_second": 6894.117 | |
| }, | |
| { | |
| "epoch": 0.1631967360652787, | |
| "grad_norm": 0.600683331489563, | |
| "learning_rate": 4.9638156495651265e-05, | |
| "loss": 0.5747, | |
| "num_input_tokens_seen": 3812168, | |
| "step": 340, | |
| "train_runtime": 552.5883, | |
| "train_tokens_per_second": 6898.749 | |
| }, | |
| { | |
| "epoch": 0.16559668806623867, | |
| "grad_norm": 0.506166398525238, | |
| "learning_rate": 4.9627430716217674e-05, | |
| "loss": 0.562, | |
| "num_input_tokens_seen": 3873432, | |
| "step": 345, | |
| "train_runtime": 561.3753, | |
| "train_tokens_per_second": 6899.898 | |
| }, | |
| { | |
| "epoch": 0.16799664006719867, | |
| "grad_norm": 0.5933504104614258, | |
| "learning_rate": 4.96165494760576e-05, | |
| "loss": 0.5751, | |
| "num_input_tokens_seen": 3928216, | |
| "step": 350, | |
| "train_runtime": 569.6058, | |
| "train_tokens_per_second": 6896.377 | |
| }, | |
| { | |
| "epoch": 0.17039659206815863, | |
| "grad_norm": 0.7012840509414673, | |
| "learning_rate": 4.96055128438589e-05, | |
| "loss": 0.5283, | |
| "num_input_tokens_seen": 3985672, | |
| "step": 355, | |
| "train_runtime": 578.024, | |
| "train_tokens_per_second": 6895.339 | |
| }, | |
| { | |
| "epoch": 0.17279654406911862, | |
| "grad_norm": 0.5886171460151672, | |
| "learning_rate": 4.959432088929036e-05, | |
| "loss": 0.5688, | |
| "num_input_tokens_seen": 4042336, | |
| "step": 360, | |
| "train_runtime": 586.1104, | |
| "train_tokens_per_second": 6896.885 | |
| }, | |
| { | |
| "epoch": 0.1751964960700786, | |
| "grad_norm": 0.6454927325248718, | |
| "learning_rate": 4.958297368300122e-05, | |
| "loss": 0.5236, | |
| "num_input_tokens_seen": 4097248, | |
| "step": 365, | |
| "train_runtime": 594.2204, | |
| "train_tokens_per_second": 6895.165 | |
| }, | |
| { | |
| "epoch": 0.17759644807103858, | |
| "grad_norm": 0.48636892437934875, | |
| "learning_rate": 4.957147129662074e-05, | |
| "loss": 0.5569, | |
| "num_input_tokens_seen": 4152816, | |
| "step": 370, | |
| "train_runtime": 602.2577, | |
| "train_tokens_per_second": 6895.413 | |
| }, | |
| { | |
| "epoch": 0.17999640007199855, | |
| "grad_norm": 0.5636932253837585, | |
| "learning_rate": 4.9559813802757785e-05, | |
| "loss": 0.5558, | |
| "num_input_tokens_seen": 4210824, | |
| "step": 375, | |
| "train_runtime": 610.6583, | |
| "train_tokens_per_second": 6895.549 | |
| }, | |
| { | |
| "epoch": 0.18239635207295854, | |
| "grad_norm": 0.4750101864337921, | |
| "learning_rate": 4.954800127500031e-05, | |
| "loss": 0.5055, | |
| "num_input_tokens_seen": 4263672, | |
| "step": 380, | |
| "train_runtime": 618.8445, | |
| "train_tokens_per_second": 6889.73 | |
| }, | |
| { | |
| "epoch": 0.18479630407391853, | |
| "grad_norm": 0.6123194694519043, | |
| "learning_rate": 4.953603378791493e-05, | |
| "loss": 0.5524, | |
| "num_input_tokens_seen": 4319024, | |
| "step": 385, | |
| "train_runtime": 626.7904, | |
| "train_tokens_per_second": 6890.699 | |
| }, | |
| { | |
| "epoch": 0.1871962560748785, | |
| "grad_norm": 0.49063947796821594, | |
| "learning_rate": 4.952391141704644e-05, | |
| "loss": 0.5653, | |
| "num_input_tokens_seen": 4377064, | |
| "step": 390, | |
| "train_runtime": 634.9033, | |
| "train_tokens_per_second": 6894.064 | |
| }, | |
| { | |
| "epoch": 0.1895962080758385, | |
| "grad_norm": 0.5559214949607849, | |
| "learning_rate": 4.951163423891735e-05, | |
| "loss": 0.6034, | |
| "num_input_tokens_seen": 4434984, | |
| "step": 395, | |
| "train_runtime": 643.2672, | |
| "train_tokens_per_second": 6894.466 | |
| }, | |
| { | |
| "epoch": 0.19199616007679846, | |
| "grad_norm": 0.3978354334831238, | |
| "learning_rate": 4.949920233102736e-05, | |
| "loss": 0.5667, | |
| "num_input_tokens_seen": 4492368, | |
| "step": 400, | |
| "train_runtime": 651.0435, | |
| "train_tokens_per_second": 6900.258 | |
| }, | |
| { | |
| "epoch": 0.19439611207775845, | |
| "grad_norm": 0.5354523658752441, | |
| "learning_rate": 4.948661577185295e-05, | |
| "loss": 0.5445, | |
| "num_input_tokens_seen": 4549008, | |
| "step": 405, | |
| "train_runtime": 659.5277, | |
| "train_tokens_per_second": 6897.372 | |
| }, | |
| { | |
| "epoch": 0.1967960640787184, | |
| "grad_norm": 0.4975457191467285, | |
| "learning_rate": 4.947387464084679e-05, | |
| "loss": 0.5462, | |
| "num_input_tokens_seen": 4609072, | |
| "step": 410, | |
| "train_runtime": 668.316, | |
| "train_tokens_per_second": 6896.546 | |
| }, | |
| { | |
| "epoch": 0.1991960160796784, | |
| "grad_norm": 0.5424690246582031, | |
| "learning_rate": 4.9460979018437314e-05, | |
| "loss": 0.5855, | |
| "num_input_tokens_seen": 4662560, | |
| "step": 415, | |
| "train_runtime": 676.3317, | |
| "train_tokens_per_second": 6893.895 | |
| }, | |
| { | |
| "epoch": 0.2015959680806384, | |
| "grad_norm": 0.5656135678291321, | |
| "learning_rate": 4.944792898602818e-05, | |
| "loss": 0.5909, | |
| "num_input_tokens_seen": 4719248, | |
| "step": 420, | |
| "train_runtime": 684.592, | |
| "train_tokens_per_second": 6893.519 | |
| }, | |
| { | |
| "epoch": 0.20399592008159836, | |
| "grad_norm": 0.4792700409889221, | |
| "learning_rate": 4.943472462599775e-05, | |
| "loss": 0.5211, | |
| "num_input_tokens_seen": 4774096, | |
| "step": 425, | |
| "train_runtime": 692.7133, | |
| "train_tokens_per_second": 6891.879 | |
| }, | |
| { | |
| "epoch": 0.20639587208255836, | |
| "grad_norm": 0.5212066173553467, | |
| "learning_rate": 4.942136602169858e-05, | |
| "loss": 0.5245, | |
| "num_input_tokens_seen": 4832616, | |
| "step": 430, | |
| "train_runtime": 700.7277, | |
| "train_tokens_per_second": 6896.567 | |
| }, | |
| { | |
| "epoch": 0.20879582408351832, | |
| "grad_norm": 0.5669515132904053, | |
| "learning_rate": 4.94078532574569e-05, | |
| "loss": 0.5304, | |
| "num_input_tokens_seen": 4887208, | |
| "step": 435, | |
| "train_runtime": 708.8314, | |
| "train_tokens_per_second": 6894.74 | |
| }, | |
| { | |
| "epoch": 0.21119577608447831, | |
| "grad_norm": 0.6369892358779907, | |
| "learning_rate": 4.939418641857209e-05, | |
| "loss": 0.5879, | |
| "num_input_tokens_seen": 4942504, | |
| "step": 440, | |
| "train_runtime": 716.88, | |
| "train_tokens_per_second": 6894.464 | |
| }, | |
| { | |
| "epoch": 0.21359572808543828, | |
| "grad_norm": 0.5132316946983337, | |
| "learning_rate": 4.938036559131608e-05, | |
| "loss": 0.5854, | |
| "num_input_tokens_seen": 4997880, | |
| "step": 445, | |
| "train_runtime": 724.9035, | |
| "train_tokens_per_second": 6894.546 | |
| }, | |
| { | |
| "epoch": 0.21599568008639827, | |
| "grad_norm": 0.5846990942955017, | |
| "learning_rate": 4.9366390862932896e-05, | |
| "loss": 0.5545, | |
| "num_input_tokens_seen": 5060096, | |
| "step": 450, | |
| "train_runtime": 733.2702, | |
| "train_tokens_per_second": 6900.725 | |
| }, | |
| { | |
| "epoch": 0.21839563208735827, | |
| "grad_norm": 0.5361617803573608, | |
| "learning_rate": 4.9352262321638056e-05, | |
| "loss": 0.528, | |
| "num_input_tokens_seen": 5120168, | |
| "step": 455, | |
| "train_runtime": 741.6463, | |
| "train_tokens_per_second": 6903.787 | |
| }, | |
| { | |
| "epoch": 0.22079558408831823, | |
| "grad_norm": 0.6068050265312195, | |
| "learning_rate": 4.9337980056618006e-05, | |
| "loss": 0.5462, | |
| "num_input_tokens_seen": 5175776, | |
| "step": 460, | |
| "train_runtime": 750.017, | |
| "train_tokens_per_second": 6900.878 | |
| }, | |
| { | |
| "epoch": 0.22319553608927822, | |
| "grad_norm": 0.6304349899291992, | |
| "learning_rate": 4.932354415802959e-05, | |
| "loss": 0.5399, | |
| "num_input_tokens_seen": 5232032, | |
| "step": 465, | |
| "train_runtime": 758.2013, | |
| "train_tokens_per_second": 6900.584 | |
| }, | |
| { | |
| "epoch": 0.2255954880902382, | |
| "grad_norm": 0.5615517497062683, | |
| "learning_rate": 4.9308954716999464e-05, | |
| "loss": 0.5224, | |
| "num_input_tokens_seen": 5292688, | |
| "step": 470, | |
| "train_runtime": 766.6597, | |
| "train_tokens_per_second": 6903.569 | |
| }, | |
| { | |
| "epoch": 0.22799544009119818, | |
| "grad_norm": 0.7061598896980286, | |
| "learning_rate": 4.92942118256235e-05, | |
| "loss": 0.5335, | |
| "num_input_tokens_seen": 5353096, | |
| "step": 475, | |
| "train_runtime": 775.3136, | |
| "train_tokens_per_second": 6904.427 | |
| }, | |
| { | |
| "epoch": 0.23039539209215815, | |
| "grad_norm": 0.6964676976203918, | |
| "learning_rate": 4.9279315576966265e-05, | |
| "loss": 0.4755, | |
| "num_input_tokens_seen": 5412360, | |
| "step": 480, | |
| "train_runtime": 784.1147, | |
| "train_tokens_per_second": 6902.511 | |
| }, | |
| { | |
| "epoch": 0.23279534409311814, | |
| "grad_norm": 0.6583765745162964, | |
| "learning_rate": 4.926426606506036e-05, | |
| "loss": 0.5725, | |
| "num_input_tokens_seen": 5466664, | |
| "step": 485, | |
| "train_runtime": 792.1938, | |
| "train_tokens_per_second": 6900.665 | |
| }, | |
| { | |
| "epoch": 0.23519529609407813, | |
| "grad_norm": 0.6751510500907898, | |
| "learning_rate": 4.924906338490586e-05, | |
| "loss": 0.5181, | |
| "num_input_tokens_seen": 5526480, | |
| "step": 490, | |
| "train_runtime": 800.9788, | |
| "train_tokens_per_second": 6899.658 | |
| }, | |
| { | |
| "epoch": 0.2375952480950381, | |
| "grad_norm": 0.5503116250038147, | |
| "learning_rate": 4.9233707632469746e-05, | |
| "loss": 0.5586, | |
| "num_input_tokens_seen": 5579704, | |
| "step": 495, | |
| "train_runtime": 808.8081, | |
| "train_tokens_per_second": 6898.674 | |
| }, | |
| { | |
| "epoch": 0.2399952000959981, | |
| "grad_norm": 0.5688736438751221, | |
| "learning_rate": 4.921819890468523e-05, | |
| "loss": 0.5465, | |
| "num_input_tokens_seen": 5633168, | |
| "step": 500, | |
| "train_runtime": 816.7042, | |
| "train_tokens_per_second": 6897.44 | |
| }, | |
| { | |
| "epoch": 0.24239515209695806, | |
| "grad_norm": 0.48173242807388306, | |
| "learning_rate": 4.9202537299451215e-05, | |
| "loss": 0.488, | |
| "num_input_tokens_seen": 5692232, | |
| "step": 505, | |
| "train_runtime": 825.1134, | |
| "train_tokens_per_second": 6898.727 | |
| }, | |
| { | |
| "epoch": 0.24479510409791805, | |
| "grad_norm": 0.5660738945007324, | |
| "learning_rate": 4.9186722915631626e-05, | |
| "loss": 0.5354, | |
| "num_input_tokens_seen": 5751464, | |
| "step": 510, | |
| "train_runtime": 833.8275, | |
| "train_tokens_per_second": 6897.666 | |
| }, | |
| { | |
| "epoch": 0.247195056098878, | |
| "grad_norm": 0.5903744697570801, | |
| "learning_rate": 4.9170755853054806e-05, | |
| "loss": 0.6093, | |
| "num_input_tokens_seen": 5811696, | |
| "step": 515, | |
| "train_runtime": 842.0821, | |
| "train_tokens_per_second": 6901.579 | |
| }, | |
| { | |
| "epoch": 0.249595008099838, | |
| "grad_norm": 0.6396485567092896, | |
| "learning_rate": 4.915463621251287e-05, | |
| "loss": 0.5436, | |
| "num_input_tokens_seen": 5867200, | |
| "step": 520, | |
| "train_runtime": 850.626, | |
| "train_tokens_per_second": 6897.509 | |
| }, | |
| { | |
| "epoch": 0.25199496010079797, | |
| "grad_norm": 0.5617818236351013, | |
| "learning_rate": 4.913836409576112e-05, | |
| "loss": 0.5537, | |
| "num_input_tokens_seen": 5924320, | |
| "step": 525, | |
| "train_runtime": 858.7807, | |
| "train_tokens_per_second": 6898.525 | |
| }, | |
| { | |
| "epoch": 0.25439491210175796, | |
| "grad_norm": 0.6151410937309265, | |
| "learning_rate": 4.912193960551732e-05, | |
| "loss": 0.5392, | |
| "num_input_tokens_seen": 5979680, | |
| "step": 530, | |
| "train_runtime": 866.7277, | |
| "train_tokens_per_second": 6899.145 | |
| }, | |
| { | |
| "epoch": 0.25679486410271796, | |
| "grad_norm": 0.6780862808227539, | |
| "learning_rate": 4.9105362845461114e-05, | |
| "loss": 0.5776, | |
| "num_input_tokens_seen": 6037568, | |
| "step": 535, | |
| "train_runtime": 874.7881, | |
| "train_tokens_per_second": 6901.749 | |
| }, | |
| { | |
| "epoch": 0.25919481610367795, | |
| "grad_norm": 0.6386091113090515, | |
| "learning_rate": 4.9088633920233345e-05, | |
| "loss": 0.5463, | |
| "num_input_tokens_seen": 6092712, | |
| "step": 540, | |
| "train_runtime": 883.1921, | |
| "train_tokens_per_second": 6898.513 | |
| }, | |
| { | |
| "epoch": 0.2615947681046379, | |
| "grad_norm": 0.49828580021858215, | |
| "learning_rate": 4.907175293543541e-05, | |
| "loss": 0.6055, | |
| "num_input_tokens_seen": 6147664, | |
| "step": 545, | |
| "train_runtime": 891.085, | |
| "train_tokens_per_second": 6899.077 | |
| }, | |
| { | |
| "epoch": 0.2639947201055979, | |
| "grad_norm": 0.5254030227661133, | |
| "learning_rate": 4.905471999762857e-05, | |
| "loss": 0.6124, | |
| "num_input_tokens_seen": 6199352, | |
| "step": 550, | |
| "train_runtime": 898.7767, | |
| "train_tokens_per_second": 6897.544 | |
| }, | |
| { | |
| "epoch": 0.2663946721065579, | |
| "grad_norm": 0.519650936126709, | |
| "learning_rate": 4.9037535214333287e-05, | |
| "loss": 0.5247, | |
| "num_input_tokens_seen": 6255144, | |
| "step": 555, | |
| "train_runtime": 906.8762, | |
| "train_tokens_per_second": 6897.462 | |
| }, | |
| { | |
| "epoch": 0.26879462410751787, | |
| "grad_norm": 0.568850040435791, | |
| "learning_rate": 4.9020198694028565e-05, | |
| "loss": 0.5647, | |
| "num_input_tokens_seen": 6306704, | |
| "step": 560, | |
| "train_runtime": 914.4502, | |
| "train_tokens_per_second": 6896.717 | |
| }, | |
| { | |
| "epoch": 0.2711945761084778, | |
| "grad_norm": 0.47335347533226013, | |
| "learning_rate": 4.900271054615123e-05, | |
| "loss": 0.4978, | |
| "num_input_tokens_seen": 6366360, | |
| "step": 565, | |
| "train_runtime": 923.5165, | |
| "train_tokens_per_second": 6893.607 | |
| }, | |
| { | |
| "epoch": 0.2735945281094378, | |
| "grad_norm": 0.6809021830558777, | |
| "learning_rate": 4.898507088109527e-05, | |
| "loss": 0.545, | |
| "num_input_tokens_seen": 6421288, | |
| "step": 570, | |
| "train_runtime": 931.4592, | |
| "train_tokens_per_second": 6893.794 | |
| }, | |
| { | |
| "epoch": 0.2759944801103978, | |
| "grad_norm": 0.41399407386779785, | |
| "learning_rate": 4.8967279810211114e-05, | |
| "loss": 0.5454, | |
| "num_input_tokens_seen": 6479424, | |
| "step": 575, | |
| "train_runtime": 939.8206, | |
| "train_tokens_per_second": 6894.32 | |
| }, | |
| { | |
| "epoch": 0.2783944321113578, | |
| "grad_norm": 0.6248930096626282, | |
| "learning_rate": 4.894933744580496e-05, | |
| "loss": 0.5506, | |
| "num_input_tokens_seen": 6534464, | |
| "step": 580, | |
| "train_runtime": 947.6162, | |
| "train_tokens_per_second": 6895.686 | |
| }, | |
| { | |
| "epoch": 0.2807943841123178, | |
| "grad_norm": 0.5835601687431335, | |
| "learning_rate": 4.893124390113802e-05, | |
| "loss": 0.5536, | |
| "num_input_tokens_seen": 6587088, | |
| "step": 585, | |
| "train_runtime": 955.2033, | |
| "train_tokens_per_second": 6896.006 | |
| }, | |
| { | |
| "epoch": 0.2831943361132777, | |
| "grad_norm": 0.6930661797523499, | |
| "learning_rate": 4.8912999290425854e-05, | |
| "loss": 0.5646, | |
| "num_input_tokens_seen": 6641552, | |
| "step": 590, | |
| "train_runtime": 963.1053, | |
| "train_tokens_per_second": 6895.977 | |
| }, | |
| { | |
| "epoch": 0.2855942881142377, | |
| "grad_norm": 0.6734236478805542, | |
| "learning_rate": 4.889460372883762e-05, | |
| "loss": 0.5492, | |
| "num_input_tokens_seen": 6695296, | |
| "step": 595, | |
| "train_runtime": 971.3483, | |
| "train_tokens_per_second": 6892.786 | |
| }, | |
| { | |
| "epoch": 0.2879942401151977, | |
| "grad_norm": 0.5208594799041748, | |
| "learning_rate": 4.887605733249535e-05, | |
| "loss": 0.5629, | |
| "num_input_tokens_seen": 6753000, | |
| "step": 600, | |
| "train_runtime": 979.3691, | |
| "train_tokens_per_second": 6895.255 | |
| }, | |
| { | |
| "epoch": 0.2903941921161577, | |
| "grad_norm": 0.5543494820594788, | |
| "learning_rate": 4.885736021847322e-05, | |
| "loss": 0.5165, | |
| "num_input_tokens_seen": 6808816, | |
| "step": 605, | |
| "train_runtime": 987.597, | |
| "train_tokens_per_second": 6894.326 | |
| }, | |
| { | |
| "epoch": 0.2927941441171177, | |
| "grad_norm": 0.4651249051094055, | |
| "learning_rate": 4.883851250479682e-05, | |
| "loss": 0.5292, | |
| "num_input_tokens_seen": 6866616, | |
| "step": 610, | |
| "train_runtime": 995.7307, | |
| "train_tokens_per_second": 6896.057 | |
| }, | |
| { | |
| "epoch": 0.2951940961180776, | |
| "grad_norm": 0.6964675188064575, | |
| "learning_rate": 4.881951431044241e-05, | |
| "loss": 0.5368, | |
| "num_input_tokens_seen": 6926136, | |
| "step": 615, | |
| "train_runtime": 1004.3343, | |
| "train_tokens_per_second": 6896.246 | |
| }, | |
| { | |
| "epoch": 0.2975940481190376, | |
| "grad_norm": 0.5867466330528259, | |
| "learning_rate": 4.8800365755336114e-05, | |
| "loss": 0.5104, | |
| "num_input_tokens_seen": 6982680, | |
| "step": 620, | |
| "train_runtime": 1012.6109, | |
| "train_tokens_per_second": 6895.719 | |
| }, | |
| { | |
| "epoch": 0.2999940001199976, | |
| "grad_norm": 0.7193952202796936, | |
| "learning_rate": 4.8781066960353264e-05, | |
| "loss": 0.5729, | |
| "num_input_tokens_seen": 7035152, | |
| "step": 625, | |
| "train_runtime": 1020.3607, | |
| "train_tokens_per_second": 6894.77 | |
| }, | |
| { | |
| "epoch": 0.3023939521209576, | |
| "grad_norm": 0.6436483860015869, | |
| "learning_rate": 4.876161804731756e-05, | |
| "loss": 0.5777, | |
| "num_input_tokens_seen": 7085976, | |
| "step": 630, | |
| "train_runtime": 1028.043, | |
| "train_tokens_per_second": 6892.684 | |
| }, | |
| { | |
| "epoch": 0.30479390412191754, | |
| "grad_norm": 0.8330582976341248, | |
| "learning_rate": 4.87420191390003e-05, | |
| "loss": 0.5729, | |
| "num_input_tokens_seen": 7139560, | |
| "step": 635, | |
| "train_runtime": 1035.6955, | |
| "train_tokens_per_second": 6893.493 | |
| }, | |
| { | |
| "epoch": 0.30719385612287753, | |
| "grad_norm": 0.5311642289161682, | |
| "learning_rate": 4.872227035911967e-05, | |
| "loss": 0.5212, | |
| "num_input_tokens_seen": 7194016, | |
| "step": 640, | |
| "train_runtime": 1043.4188, | |
| "train_tokens_per_second": 6894.658 | |
| }, | |
| { | |
| "epoch": 0.3095938081238375, | |
| "grad_norm": 0.5079819560050964, | |
| "learning_rate": 4.87023718323399e-05, | |
| "loss": 0.5227, | |
| "num_input_tokens_seen": 7249352, | |
| "step": 645, | |
| "train_runtime": 1051.5718, | |
| "train_tokens_per_second": 6893.825 | |
| }, | |
| { | |
| "epoch": 0.3119937601247975, | |
| "grad_norm": 0.5671476721763611, | |
| "learning_rate": 4.868232368427048e-05, | |
| "loss": 0.5057, | |
| "num_input_tokens_seen": 7312768, | |
| "step": 650, | |
| "train_runtime": 1060.8218, | |
| "train_tokens_per_second": 6893.494 | |
| }, | |
| { | |
| "epoch": 0.3143937121257575, | |
| "grad_norm": 0.5429338216781616, | |
| "learning_rate": 4.8662126041465414e-05, | |
| "loss": 0.522, | |
| "num_input_tokens_seen": 7371440, | |
| "step": 655, | |
| "train_runtime": 1068.911, | |
| "train_tokens_per_second": 6896.215 | |
| }, | |
| { | |
| "epoch": 0.31679366412671744, | |
| "grad_norm": 0.5430482625961304, | |
| "learning_rate": 4.864177903142237e-05, | |
| "loss": 0.5712, | |
| "num_input_tokens_seen": 7428856, | |
| "step": 660, | |
| "train_runtime": 1077.1698, | |
| "train_tokens_per_second": 6896.643 | |
| }, | |
| { | |
| "epoch": 0.31919361612767744, | |
| "grad_norm": 0.5577422380447388, | |
| "learning_rate": 4.862128278258191e-05, | |
| "loss": 0.5763, | |
| "num_input_tokens_seen": 7482928, | |
| "step": 665, | |
| "train_runtime": 1085.0793, | |
| "train_tokens_per_second": 6896.204 | |
| }, | |
| { | |
| "epoch": 0.32159356812863743, | |
| "grad_norm": 0.8080245852470398, | |
| "learning_rate": 4.8600637424326676e-05, | |
| "loss": 0.5921, | |
| "num_input_tokens_seen": 7537000, | |
| "step": 670, | |
| "train_runtime": 1092.9929, | |
| "train_tokens_per_second": 6895.744 | |
| }, | |
| { | |
| "epoch": 0.3239935201295974, | |
| "grad_norm": 0.5444366931915283, | |
| "learning_rate": 4.8579843086980536e-05, | |
| "loss": 0.5164, | |
| "num_input_tokens_seen": 7600512, | |
| "step": 675, | |
| "train_runtime": 1102.2798, | |
| "train_tokens_per_second": 6895.266 | |
| }, | |
| { | |
| "epoch": 0.3263934721305574, | |
| "grad_norm": 0.7307661771774292, | |
| "learning_rate": 4.855889990180781e-05, | |
| "loss": 0.4963, | |
| "num_input_tokens_seen": 7655032, | |
| "step": 680, | |
| "train_runtime": 1110.5484, | |
| "train_tokens_per_second": 6893.02 | |
| }, | |
| { | |
| "epoch": 0.32879342413151735, | |
| "grad_norm": 0.5061231851577759, | |
| "learning_rate": 4.853780800101241e-05, | |
| "loss": 0.5042, | |
| "num_input_tokens_seen": 7709432, | |
| "step": 685, | |
| "train_runtime": 1118.5898, | |
| "train_tokens_per_second": 6892.099 | |
| }, | |
| { | |
| "epoch": 0.33119337613247735, | |
| "grad_norm": 0.5457553863525391, | |
| "learning_rate": 4.851656751773702e-05, | |
| "loss": 0.5505, | |
| "num_input_tokens_seen": 7768248, | |
| "step": 690, | |
| "train_runtime": 1127.0452, | |
| "train_tokens_per_second": 6892.579 | |
| }, | |
| { | |
| "epoch": 0.33359332813343734, | |
| "grad_norm": 0.581109881401062, | |
| "learning_rate": 4.849517858606225e-05, | |
| "loss": 0.5219, | |
| "num_input_tokens_seen": 7821976, | |
| "step": 695, | |
| "train_runtime": 1135.0848, | |
| "train_tokens_per_second": 6891.094 | |
| }, | |
| { | |
| "epoch": 0.33599328013439733, | |
| "grad_norm": 0.6451846361160278, | |
| "learning_rate": 4.84736413410058e-05, | |
| "loss": 0.541, | |
| "num_input_tokens_seen": 7875264, | |
| "step": 700, | |
| "train_runtime": 1143.0269, | |
| "train_tokens_per_second": 6889.833 | |
| }, | |
| { | |
| "epoch": 0.33839323213535727, | |
| "grad_norm": 0.48146116733551025, | |
| "learning_rate": 4.8451955918521586e-05, | |
| "loss": 0.5666, | |
| "num_input_tokens_seen": 7929656, | |
| "step": 705, | |
| "train_runtime": 1150.8799, | |
| "train_tokens_per_second": 6890.081 | |
| }, | |
| { | |
| "epoch": 0.34079318413631726, | |
| "grad_norm": 0.5709965825080872, | |
| "learning_rate": 4.84301224554989e-05, | |
| "loss": 0.5295, | |
| "num_input_tokens_seen": 7980872, | |
| "step": 710, | |
| "train_runtime": 1158.4978, | |
| "train_tokens_per_second": 6888.983 | |
| }, | |
| { | |
| "epoch": 0.34319313613727725, | |
| "grad_norm": 0.6052954196929932, | |
| "learning_rate": 4.840814108976154e-05, | |
| "loss": 0.5509, | |
| "num_input_tokens_seen": 8037376, | |
| "step": 715, | |
| "train_runtime": 1166.5335, | |
| "train_tokens_per_second": 6889.966 | |
| }, | |
| { | |
| "epoch": 0.34559308813823725, | |
| "grad_norm": 0.5755806565284729, | |
| "learning_rate": 4.838601196006694e-05, | |
| "loss": 0.54, | |
| "num_input_tokens_seen": 8094024, | |
| "step": 720, | |
| "train_runtime": 1175.0556, | |
| "train_tokens_per_second": 6888.205 | |
| }, | |
| { | |
| "epoch": 0.34799304013919724, | |
| "grad_norm": 0.5676959753036499, | |
| "learning_rate": 4.8363735206105276e-05, | |
| "loss": 0.5663, | |
| "num_input_tokens_seen": 8152456, | |
| "step": 725, | |
| "train_runtime": 1183.2718, | |
| "train_tokens_per_second": 6889.758 | |
| }, | |
| { | |
| "epoch": 0.3503929921401572, | |
| "grad_norm": 0.7371501922607422, | |
| "learning_rate": 4.8341310968498656e-05, | |
| "loss": 0.5171, | |
| "num_input_tokens_seen": 8206424, | |
| "step": 730, | |
| "train_runtime": 1191.1851, | |
| "train_tokens_per_second": 6889.294 | |
| }, | |
| { | |
| "epoch": 0.35279294414111717, | |
| "grad_norm": 0.6847190260887146, | |
| "learning_rate": 4.831873938880012e-05, | |
| "loss": 0.5407, | |
| "num_input_tokens_seen": 8262160, | |
| "step": 735, | |
| "train_runtime": 1199.2457, | |
| "train_tokens_per_second": 6889.464 | |
| }, | |
| { | |
| "epoch": 0.35519289614207716, | |
| "grad_norm": 0.5282928347587585, | |
| "learning_rate": 4.829602060949282e-05, | |
| "loss": 0.5729, | |
| "num_input_tokens_seen": 8316480, | |
| "step": 740, | |
| "train_runtime": 1207.1347, | |
| "train_tokens_per_second": 6889.438 | |
| }, | |
| { | |
| "epoch": 0.35759284814303716, | |
| "grad_norm": 0.39273539185523987, | |
| "learning_rate": 4.827315477398914e-05, | |
| "loss": 0.4796, | |
| "num_input_tokens_seen": 8379024, | |
| "step": 745, | |
| "train_runtime": 1216.4818, | |
| "train_tokens_per_second": 6887.916 | |
| }, | |
| { | |
| "epoch": 0.3599928001439971, | |
| "grad_norm": 0.442878395318985, | |
| "learning_rate": 4.825014202662972e-05, | |
| "loss": 0.5178, | |
| "num_input_tokens_seen": 8436408, | |
| "step": 750, | |
| "train_runtime": 1224.6516, | |
| "train_tokens_per_second": 6888.823 | |
| }, | |
| { | |
| "epoch": 0.3623927521449571, | |
| "grad_norm": 0.5023097395896912, | |
| "learning_rate": 4.82269825126826e-05, | |
| "loss": 0.5436, | |
| "num_input_tokens_seen": 8494184, | |
| "step": 755, | |
| "train_runtime": 1233.1902, | |
| "train_tokens_per_second": 6887.975 | |
| }, | |
| { | |
| "epoch": 0.3647927041459171, | |
| "grad_norm": 0.6507300138473511, | |
| "learning_rate": 4.8203676378342263e-05, | |
| "loss": 0.5761, | |
| "num_input_tokens_seen": 8543600, | |
| "step": 760, | |
| "train_runtime": 1240.5356, | |
| "train_tokens_per_second": 6887.025 | |
| }, | |
| { | |
| "epoch": 0.36719265614687707, | |
| "grad_norm": 0.6500417590141296, | |
| "learning_rate": 4.818022377072876e-05, | |
| "loss": 0.5519, | |
| "num_input_tokens_seen": 8601672, | |
| "step": 765, | |
| "train_runtime": 1249.3942, | |
| "train_tokens_per_second": 6884.674 | |
| }, | |
| { | |
| "epoch": 0.36959260814783707, | |
| "grad_norm": 0.720543384552002, | |
| "learning_rate": 4.8156624837886744e-05, | |
| "loss": 0.5488, | |
| "num_input_tokens_seen": 8654824, | |
| "step": 770, | |
| "train_runtime": 1257.4098, | |
| "train_tokens_per_second": 6883.058 | |
| }, | |
| { | |
| "epoch": 0.371992560148797, | |
| "grad_norm": 0.5728187561035156, | |
| "learning_rate": 4.813287972878454e-05, | |
| "loss": 0.5093, | |
| "num_input_tokens_seen": 8709032, | |
| "step": 775, | |
| "train_runtime": 1265.5372, | |
| "train_tokens_per_second": 6881.688 | |
| }, | |
| { | |
| "epoch": 0.374392512149757, | |
| "grad_norm": 0.6271533966064453, | |
| "learning_rate": 4.810898859331322e-05, | |
| "loss": 0.5451, | |
| "num_input_tokens_seen": 8766264, | |
| "step": 780, | |
| "train_runtime": 1273.7019, | |
| "train_tokens_per_second": 6882.508 | |
| }, | |
| { | |
| "epoch": 0.376792464150717, | |
| "grad_norm": 0.5907756686210632, | |
| "learning_rate": 4.8084951582285634e-05, | |
| "loss": 0.4965, | |
| "num_input_tokens_seen": 8820344, | |
| "step": 785, | |
| "train_runtime": 1281.2863, | |
| "train_tokens_per_second": 6883.976 | |
| }, | |
| { | |
| "epoch": 0.379192416151677, | |
| "grad_norm": 0.5380600094795227, | |
| "learning_rate": 4.80607688474355e-05, | |
| "loss": 0.5298, | |
| "num_input_tokens_seen": 8881528, | |
| "step": 790, | |
| "train_runtime": 1289.7202, | |
| "train_tokens_per_second": 6886.399 | |
| }, | |
| { | |
| "epoch": 0.381592368152637, | |
| "grad_norm": 0.6812204718589783, | |
| "learning_rate": 4.803644054141639e-05, | |
| "loss": 0.5278, | |
| "num_input_tokens_seen": 8939712, | |
| "step": 795, | |
| "train_runtime": 1298.1298, | |
| "train_tokens_per_second": 6886.609 | |
| }, | |
| { | |
| "epoch": 0.3839923201535969, | |
| "grad_norm": 0.8065762519836426, | |
| "learning_rate": 4.8011966817800804e-05, | |
| "loss": 0.548, | |
| "num_input_tokens_seen": 8994888, | |
| "step": 800, | |
| "train_runtime": 1306.2424, | |
| "train_tokens_per_second": 6886.078 | |
| }, | |
| { | |
| "epoch": 0.3863922721545569, | |
| "grad_norm": 0.7721138596534729, | |
| "learning_rate": 4.79873478310792e-05, | |
| "loss": 0.5446, | |
| "num_input_tokens_seen": 9052200, | |
| "step": 805, | |
| "train_runtime": 1314.0422, | |
| "train_tokens_per_second": 6888.82 | |
| }, | |
| { | |
| "epoch": 0.3887922241555169, | |
| "grad_norm": 0.7508792281150818, | |
| "learning_rate": 4.796258373665899e-05, | |
| "loss": 0.5531, | |
| "num_input_tokens_seen": 9106936, | |
| "step": 810, | |
| "train_runtime": 1322.1708, | |
| "train_tokens_per_second": 6887.867 | |
| }, | |
| { | |
| "epoch": 0.3911921761564769, | |
| "grad_norm": 0.7303242087364197, | |
| "learning_rate": 4.793767469086361e-05, | |
| "loss": 0.5786, | |
| "num_input_tokens_seen": 9158400, | |
| "step": 815, | |
| "train_runtime": 1329.9099, | |
| "train_tokens_per_second": 6886.482 | |
| }, | |
| { | |
| "epoch": 0.3935921281574368, | |
| "grad_norm": 0.5493381023406982, | |
| "learning_rate": 4.791262085093147e-05, | |
| "loss": 0.5285, | |
| "num_input_tokens_seen": 9218552, | |
| "step": 820, | |
| "train_runtime": 1338.4057, | |
| "train_tokens_per_second": 6887.711 | |
| }, | |
| { | |
| "epoch": 0.3959920801583968, | |
| "grad_norm": 0.5721644163131714, | |
| "learning_rate": 4.788742237501499e-05, | |
| "loss": 0.5481, | |
| "num_input_tokens_seen": 9272768, | |
| "step": 825, | |
| "train_runtime": 1346.3952, | |
| "train_tokens_per_second": 6887.107 | |
| }, | |
| { | |
| "epoch": 0.3983920321593568, | |
| "grad_norm": 0.5689188241958618, | |
| "learning_rate": 4.786207942217965e-05, | |
| "loss": 0.5457, | |
| "num_input_tokens_seen": 9327048, | |
| "step": 830, | |
| "train_runtime": 1354.3004, | |
| "train_tokens_per_second": 6886.986 | |
| }, | |
| { | |
| "epoch": 0.4007919841603168, | |
| "grad_norm": 0.48985663056373596, | |
| "learning_rate": 4.783659215240289e-05, | |
| "loss": 0.5067, | |
| "num_input_tokens_seen": 9389344, | |
| "step": 835, | |
| "train_runtime": 1363.2987, | |
| "train_tokens_per_second": 6887.224 | |
| }, | |
| { | |
| "epoch": 0.4031919361612768, | |
| "grad_norm": 0.7661736011505127, | |
| "learning_rate": 4.78109607265732e-05, | |
| "loss": 0.5576, | |
| "num_input_tokens_seen": 9444656, | |
| "step": 840, | |
| "train_runtime": 1371.5402, | |
| "train_tokens_per_second": 6886.168 | |
| }, | |
| { | |
| "epoch": 0.40559188816223674, | |
| "grad_norm": 0.6617030501365662, | |
| "learning_rate": 4.778518530648899e-05, | |
| "loss": 0.5566, | |
| "num_input_tokens_seen": 9499464, | |
| "step": 845, | |
| "train_runtime": 1379.7517, | |
| "train_tokens_per_second": 6884.908 | |
| }, | |
| { | |
| "epoch": 0.40799184016319673, | |
| "grad_norm": 0.6450020670890808, | |
| "learning_rate": 4.77592660548577e-05, | |
| "loss": 0.5486, | |
| "num_input_tokens_seen": 9553432, | |
| "step": 850, | |
| "train_runtime": 1387.7923, | |
| "train_tokens_per_second": 6883.906 | |
| }, | |
| { | |
| "epoch": 0.4103917921641567, | |
| "grad_norm": 0.6538447737693787, | |
| "learning_rate": 4.7733203135294676e-05, | |
| "loss": 0.5289, | |
| "num_input_tokens_seen": 9608536, | |
| "step": 855, | |
| "train_runtime": 1396.0755, | |
| "train_tokens_per_second": 6882.533 | |
| }, | |
| { | |
| "epoch": 0.4127917441651167, | |
| "grad_norm": 0.5988488793373108, | |
| "learning_rate": 4.770699671232216e-05, | |
| "loss": 0.5261, | |
| "num_input_tokens_seen": 9661208, | |
| "step": 860, | |
| "train_runtime": 1403.8269, | |
| "train_tokens_per_second": 6882.051 | |
| }, | |
| { | |
| "epoch": 0.41519169616607665, | |
| "grad_norm": 0.5807068347930908, | |
| "learning_rate": 4.768064695136829e-05, | |
| "loss": 0.5306, | |
| "num_input_tokens_seen": 9721752, | |
| "step": 865, | |
| "train_runtime": 1412.2285, | |
| "train_tokens_per_second": 6883.98 | |
| }, | |
| { | |
| "epoch": 0.41759164816703664, | |
| "grad_norm": 0.48121166229248047, | |
| "learning_rate": 4.765415401876599e-05, | |
| "loss": 0.5549, | |
| "num_input_tokens_seen": 9779768, | |
| "step": 870, | |
| "train_runtime": 1420.4407, | |
| "train_tokens_per_second": 6885.024 | |
| }, | |
| { | |
| "epoch": 0.41999160016799664, | |
| "grad_norm": 0.565889835357666, | |
| "learning_rate": 4.7627518081751975e-05, | |
| "loss": 0.5355, | |
| "num_input_tokens_seen": 9835272, | |
| "step": 875, | |
| "train_runtime": 1428.7292, | |
| "train_tokens_per_second": 6883.93 | |
| }, | |
| { | |
| "epoch": 0.42239155216895663, | |
| "grad_norm": 0.7845768928527832, | |
| "learning_rate": 4.760073930846569e-05, | |
| "loss": 0.5411, | |
| "num_input_tokens_seen": 9890512, | |
| "step": 880, | |
| "train_runtime": 1436.5286, | |
| "train_tokens_per_second": 6885.009 | |
| }, | |
| { | |
| "epoch": 0.4247915041699166, | |
| "grad_norm": 0.6052142381668091, | |
| "learning_rate": 4.75738178679482e-05, | |
| "loss": 0.5432, | |
| "num_input_tokens_seen": 9944392, | |
| "step": 885, | |
| "train_runtime": 1444.2701, | |
| "train_tokens_per_second": 6885.41 | |
| }, | |
| { | |
| "epoch": 0.42719145617087656, | |
| "grad_norm": 0.6109101176261902, | |
| "learning_rate": 4.754675393014117e-05, | |
| "loss": 0.4997, | |
| "num_input_tokens_seen": 9999080, | |
| "step": 890, | |
| "train_runtime": 1452.4138, | |
| "train_tokens_per_second": 6884.457 | |
| }, | |
| { | |
| "epoch": 0.42959140817183655, | |
| "grad_norm": 0.8205054998397827, | |
| "learning_rate": 4.751954766588581e-05, | |
| "loss": 0.5276, | |
| "num_input_tokens_seen": 10053320, | |
| "step": 895, | |
| "train_runtime": 1460.9561, | |
| "train_tokens_per_second": 6881.329 | |
| }, | |
| { | |
| "epoch": 0.43199136017279655, | |
| "grad_norm": 0.6081852316856384, | |
| "learning_rate": 4.749219924692172e-05, | |
| "loss": 0.4801, | |
| "num_input_tokens_seen": 10112592, | |
| "step": 900, | |
| "train_runtime": 1469.5246, | |
| "train_tokens_per_second": 6881.54 | |
| }, | |
| { | |
| "epoch": 0.43439131217375654, | |
| "grad_norm": 0.6444746851921082, | |
| "learning_rate": 4.7464708845885877e-05, | |
| "loss": 0.4976, | |
| "num_input_tokens_seen": 10168072, | |
| "step": 905, | |
| "train_runtime": 1477.8554, | |
| "train_tokens_per_second": 6880.289 | |
| }, | |
| { | |
| "epoch": 0.43679126417471653, | |
| "grad_norm": 0.591349184513092, | |
| "learning_rate": 4.7437076636311514e-05, | |
| "loss": 0.5343, | |
| "num_input_tokens_seen": 10221648, | |
| "step": 910, | |
| "train_runtime": 1486.1153, | |
| "train_tokens_per_second": 6878.099 | |
| }, | |
| { | |
| "epoch": 0.43919121617567647, | |
| "grad_norm": 0.6491187810897827, | |
| "learning_rate": 4.7409302792627044e-05, | |
| "loss": 0.4946, | |
| "num_input_tokens_seen": 10284000, | |
| "step": 915, | |
| "train_runtime": 1494.8745, | |
| "train_tokens_per_second": 6879.507 | |
| }, | |
| { | |
| "epoch": 0.44159116817663646, | |
| "grad_norm": 0.6963967084884644, | |
| "learning_rate": 4.738138749015492e-05, | |
| "loss": 0.5109, | |
| "num_input_tokens_seen": 10340768, | |
| "step": 920, | |
| "train_runtime": 1502.9982, | |
| "train_tokens_per_second": 6880.094 | |
| }, | |
| { | |
| "epoch": 0.44399112017759645, | |
| "grad_norm": 0.4319298267364502, | |
| "learning_rate": 4.735333090511056e-05, | |
| "loss": 0.5082, | |
| "num_input_tokens_seen": 10400928, | |
| "step": 925, | |
| "train_runtime": 1511.7616, | |
| "train_tokens_per_second": 6880.006 | |
| }, | |
| { | |
| "epoch": 0.44639107217855645, | |
| "grad_norm": 0.6248960494995117, | |
| "learning_rate": 4.732513321460127e-05, | |
| "loss": 0.5612, | |
| "num_input_tokens_seen": 10456208, | |
| "step": 930, | |
| "train_runtime": 1519.9104, | |
| "train_tokens_per_second": 6879.49 | |
| }, | |
| { | |
| "epoch": 0.4487910241795164, | |
| "grad_norm": 0.7751626372337341, | |
| "learning_rate": 4.729679459662502e-05, | |
| "loss": 0.5253, | |
| "num_input_tokens_seen": 10513352, | |
| "step": 935, | |
| "train_runtime": 1528.6579, | |
| "train_tokens_per_second": 6877.505 | |
| }, | |
| { | |
| "epoch": 0.4511909761804764, | |
| "grad_norm": 0.5862913131713867, | |
| "learning_rate": 4.726831523006944e-05, | |
| "loss": 0.5403, | |
| "num_input_tokens_seen": 10568872, | |
| "step": 940, | |
| "train_runtime": 1537.6257, | |
| "train_tokens_per_second": 6873.501 | |
| }, | |
| { | |
| "epoch": 0.45359092818143637, | |
| "grad_norm": 0.7188037633895874, | |
| "learning_rate": 4.7239695294710586e-05, | |
| "loss": 0.5332, | |
| "num_input_tokens_seen": 10623984, | |
| "step": 945, | |
| "train_runtime": 1545.7364, | |
| "train_tokens_per_second": 6873.089 | |
| }, | |
| { | |
| "epoch": 0.45599088018239636, | |
| "grad_norm": 0.7903031706809998, | |
| "learning_rate": 4.7210934971211906e-05, | |
| "loss": 0.572, | |
| "num_input_tokens_seen": 10675064, | |
| "step": 950, | |
| "train_runtime": 1553.3218, | |
| "train_tokens_per_second": 6872.41 | |
| }, | |
| { | |
| "epoch": 0.45839083218335636, | |
| "grad_norm": 0.5360180139541626, | |
| "learning_rate": 4.718203444112301e-05, | |
| "loss": 0.4812, | |
| "num_input_tokens_seen": 10735624, | |
| "step": 955, | |
| "train_runtime": 1561.8181, | |
| "train_tokens_per_second": 6873.799 | |
| }, | |
| { | |
| "epoch": 0.4607907841843163, | |
| "grad_norm": 0.6711071133613586, | |
| "learning_rate": 4.7152993886878585e-05, | |
| "loss": 0.4681, | |
| "num_input_tokens_seen": 10790472, | |
| "step": 960, | |
| "train_runtime": 1569.6307, | |
| "train_tokens_per_second": 6874.529 | |
| }, | |
| { | |
| "epoch": 0.4631907361852763, | |
| "grad_norm": 0.6165657043457031, | |
| "learning_rate": 4.712381349179721e-05, | |
| "loss": 0.508, | |
| "num_input_tokens_seen": 10844896, | |
| "step": 965, | |
| "train_runtime": 1577.4118, | |
| "train_tokens_per_second": 6875.12 | |
| }, | |
| { | |
| "epoch": 0.4655906881862363, | |
| "grad_norm": 0.6834767460823059, | |
| "learning_rate": 4.709449344008021e-05, | |
| "loss": 0.4988, | |
| "num_input_tokens_seen": 10902552, | |
| "step": 970, | |
| "train_runtime": 1586.0811, | |
| "train_tokens_per_second": 6873.893 | |
| }, | |
| { | |
| "epoch": 0.46799064018719627, | |
| "grad_norm": 0.7366524338722229, | |
| "learning_rate": 4.706503391681049e-05, | |
| "loss": 0.5755, | |
| "num_input_tokens_seen": 10956224, | |
| "step": 975, | |
| "train_runtime": 1593.6535, | |
| "train_tokens_per_second": 6874.91 | |
| }, | |
| { | |
| "epoch": 0.47039059218815626, | |
| "grad_norm": 0.5903698205947876, | |
| "learning_rate": 4.7035435107951384e-05, | |
| "loss": 0.5283, | |
| "num_input_tokens_seen": 11011072, | |
| "step": 980, | |
| "train_runtime": 1601.6979, | |
| "train_tokens_per_second": 6874.625 | |
| }, | |
| { | |
| "epoch": 0.4727905441891162, | |
| "grad_norm": 0.631288468837738, | |
| "learning_rate": 4.700569720034545e-05, | |
| "loss": 0.4954, | |
| "num_input_tokens_seen": 11066344, | |
| "step": 985, | |
| "train_runtime": 1609.8085, | |
| "train_tokens_per_second": 6874.323 | |
| }, | |
| { | |
| "epoch": 0.4751904961900762, | |
| "grad_norm": 0.5448499917984009, | |
| "learning_rate": 4.697582038171332e-05, | |
| "loss": 0.5431, | |
| "num_input_tokens_seen": 11121472, | |
| "step": 990, | |
| "train_runtime": 1618.0718, | |
| "train_tokens_per_second": 6873.287 | |
| }, | |
| { | |
| "epoch": 0.4775904481910362, | |
| "grad_norm": 0.5397956967353821, | |
| "learning_rate": 4.694580484065248e-05, | |
| "loss": 0.4836, | |
| "num_input_tokens_seen": 11181736, | |
| "step": 995, | |
| "train_runtime": 1627.2301, | |
| "train_tokens_per_second": 6871.638 | |
| }, | |
| { | |
| "epoch": 0.4799904001919962, | |
| "grad_norm": 0.7059435248374939, | |
| "learning_rate": 4.6915650766636156e-05, | |
| "loss": 0.4765, | |
| "num_input_tokens_seen": 11241912, | |
| "step": 1000, | |
| "train_runtime": 1635.6606, | |
| "train_tokens_per_second": 6873.01 | |
| }, | |
| { | |
| "epoch": 0.4823903521929561, | |
| "grad_norm": 0.5551899075508118, | |
| "learning_rate": 4.6885358350011986e-05, | |
| "loss": 0.523, | |
| "num_input_tokens_seen": 11296568, | |
| "step": 1005, | |
| "train_runtime": 1644.0041, | |
| "train_tokens_per_second": 6871.375 | |
| }, | |
| { | |
| "epoch": 0.4847903041939161, | |
| "grad_norm": 0.659951388835907, | |
| "learning_rate": 4.6854927782000954e-05, | |
| "loss": 0.4891, | |
| "num_input_tokens_seen": 11351944, | |
| "step": 1010, | |
| "train_runtime": 1652.1239, | |
| "train_tokens_per_second": 6871.121 | |
| }, | |
| { | |
| "epoch": 0.4871902561948761, | |
| "grad_norm": 0.6763627529144287, | |
| "learning_rate": 4.6824359254696105e-05, | |
| "loss": 0.502, | |
| "num_input_tokens_seen": 11410584, | |
| "step": 1015, | |
| "train_runtime": 1661.21, | |
| "train_tokens_per_second": 6868.839 | |
| }, | |
| { | |
| "epoch": 0.4895902081958361, | |
| "grad_norm": 0.49618440866470337, | |
| "learning_rate": 4.6793652961061364e-05, | |
| "loss": 0.5451, | |
| "num_input_tokens_seen": 11465560, | |
| "step": 1020, | |
| "train_runtime": 1669.6454, | |
| "train_tokens_per_second": 6867.063 | |
| }, | |
| { | |
| "epoch": 0.4919901601967961, | |
| "grad_norm": 0.6427881717681885, | |
| "learning_rate": 4.676280909493028e-05, | |
| "loss": 0.5277, | |
| "num_input_tokens_seen": 11523960, | |
| "step": 1025, | |
| "train_runtime": 1678.2247, | |
| "train_tokens_per_second": 6866.756 | |
| }, | |
| { | |
| "epoch": 0.494390112197756, | |
| "grad_norm": 0.7086818218231201, | |
| "learning_rate": 4.673182785100485e-05, | |
| "loss": 0.4885, | |
| "num_input_tokens_seen": 11584904, | |
| "step": 1030, | |
| "train_runtime": 1687.3682, | |
| "train_tokens_per_second": 6865.665 | |
| }, | |
| { | |
| "epoch": 0.496790064198716, | |
| "grad_norm": 0.5998096466064453, | |
| "learning_rate": 4.6700709424854274e-05, | |
| "loss": 0.5266, | |
| "num_input_tokens_seen": 11642456, | |
| "step": 1035, | |
| "train_runtime": 1696.3396, | |
| "train_tokens_per_second": 6863.281 | |
| }, | |
| { | |
| "epoch": 0.499190016199676, | |
| "grad_norm": 0.6782186627388, | |
| "learning_rate": 4.66694540129137e-05, | |
| "loss": 0.5813, | |
| "num_input_tokens_seen": 11696912, | |
| "step": 1040, | |
| "train_runtime": 1704.0809, | |
| "train_tokens_per_second": 6864.059 | |
| }, | |
| { | |
| "epoch": 0.501589968200636, | |
| "grad_norm": 0.541053056716919, | |
| "learning_rate": 4.6638061812483005e-05, | |
| "loss": 0.4875, | |
| "num_input_tokens_seen": 11755104, | |
| "step": 1045, | |
| "train_runtime": 1712.4044, | |
| "train_tokens_per_second": 6864.677 | |
| }, | |
| { | |
| "epoch": 0.5039899202015959, | |
| "grad_norm": 0.6517828106880188, | |
| "learning_rate": 4.660653302172554e-05, | |
| "loss": 0.5367, | |
| "num_input_tokens_seen": 11810984, | |
| "step": 1050, | |
| "train_runtime": 1720.4999, | |
| "train_tokens_per_second": 6864.856 | |
| }, | |
| { | |
| "epoch": 0.5063898722025559, | |
| "grad_norm": 0.6961039900779724, | |
| "learning_rate": 4.6574867839666895e-05, | |
| "loss": 0.5314, | |
| "num_input_tokens_seen": 11862312, | |
| "step": 1055, | |
| "train_runtime": 1728.5831, | |
| "train_tokens_per_second": 6862.448 | |
| }, | |
| { | |
| "epoch": 0.5087898242035159, | |
| "grad_norm": 0.7300373911857605, | |
| "learning_rate": 4.654306646619361e-05, | |
| "loss": 0.5496, | |
| "num_input_tokens_seen": 11923072, | |
| "step": 1060, | |
| "train_runtime": 1737.4285, | |
| "train_tokens_per_second": 6862.482 | |
| }, | |
| { | |
| "epoch": 0.5111897762044759, | |
| "grad_norm": 0.7295413017272949, | |
| "learning_rate": 4.6511129102051954e-05, | |
| "loss": 0.5205, | |
| "num_input_tokens_seen": 11978568, | |
| "step": 1065, | |
| "train_runtime": 1745.4719, | |
| "train_tokens_per_second": 6862.653 | |
| }, | |
| { | |
| "epoch": 0.5135897282054359, | |
| "grad_norm": 0.5520017743110657, | |
| "learning_rate": 4.647905594884663e-05, | |
| "loss": 0.4768, | |
| "num_input_tokens_seen": 12035000, | |
| "step": 1070, | |
| "train_runtime": 1754.3574, | |
| "train_tokens_per_second": 6860.062 | |
| }, | |
| { | |
| "epoch": 0.5159896802063959, | |
| "grad_norm": 0.5629371404647827, | |
| "learning_rate": 4.6446847209039504e-05, | |
| "loss": 0.5136, | |
| "num_input_tokens_seen": 12096040, | |
| "step": 1075, | |
| "train_runtime": 1762.6751, | |
| "train_tokens_per_second": 6862.32 | |
| }, | |
| { | |
| "epoch": 0.5183896322073559, | |
| "grad_norm": 0.750357449054718, | |
| "learning_rate": 4.6414503085948334e-05, | |
| "loss": 0.5022, | |
| "num_input_tokens_seen": 12148448, | |
| "step": 1080, | |
| "train_runtime": 1770.3881, | |
| "train_tokens_per_second": 6862.025 | |
| }, | |
| { | |
| "epoch": 0.5207895842083158, | |
| "grad_norm": 0.9546124339103699, | |
| "learning_rate": 4.63820237837455e-05, | |
| "loss": 0.5196, | |
| "num_input_tokens_seen": 12207120, | |
| "step": 1085, | |
| "train_runtime": 1778.4216, | |
| "train_tokens_per_second": 6864.019 | |
| }, | |
| { | |
| "epoch": 0.5231895362092758, | |
| "grad_norm": 0.6891536712646484, | |
| "learning_rate": 4.634940950745668e-05, | |
| "loss": 0.5566, | |
| "num_input_tokens_seen": 12261136, | |
| "step": 1090, | |
| "train_runtime": 1786.6486, | |
| "train_tokens_per_second": 6862.646 | |
| }, | |
| { | |
| "epoch": 0.5255894882102358, | |
| "grad_norm": 0.7175304889678955, | |
| "learning_rate": 4.631666046295959e-05, | |
| "loss": 0.5483, | |
| "num_input_tokens_seen": 12313856, | |
| "step": 1095, | |
| "train_runtime": 1794.8084, | |
| "train_tokens_per_second": 6860.819 | |
| }, | |
| { | |
| "epoch": 0.5279894402111958, | |
| "grad_norm": 0.7148723602294922, | |
| "learning_rate": 4.628377685698268e-05, | |
| "loss": 0.5072, | |
| "num_input_tokens_seen": 12367984, | |
| "step": 1100, | |
| "train_runtime": 1802.927, | |
| "train_tokens_per_second": 6859.947 | |
| }, | |
| { | |
| "epoch": 0.5303893922121558, | |
| "grad_norm": 0.6276180148124695, | |
| "learning_rate": 4.6250758897103775e-05, | |
| "loss": 0.5316, | |
| "num_input_tokens_seen": 12422128, | |
| "step": 1105, | |
| "train_runtime": 1810.8688, | |
| "train_tokens_per_second": 6859.761 | |
| }, | |
| { | |
| "epoch": 0.5327893442131157, | |
| "grad_norm": 0.5570586919784546, | |
| "learning_rate": 4.621760679174887e-05, | |
| "loss": 0.4781, | |
| "num_input_tokens_seen": 12477576, | |
| "step": 1110, | |
| "train_runtime": 1818.8781, | |
| "train_tokens_per_second": 6860.04 | |
| }, | |
| { | |
| "epoch": 0.5351892962140757, | |
| "grad_norm": 0.46177980303764343, | |
| "learning_rate": 4.618432075019071e-05, | |
| "loss": 0.5028, | |
| "num_input_tokens_seen": 12536840, | |
| "step": 1115, | |
| "train_runtime": 1827.02, | |
| "train_tokens_per_second": 6861.906 | |
| }, | |
| { | |
| "epoch": 0.5375892482150357, | |
| "grad_norm": 0.8723595142364502, | |
| "learning_rate": 4.615090098254753e-05, | |
| "loss": 0.5637, | |
| "num_input_tokens_seen": 12592424, | |
| "step": 1120, | |
| "train_runtime": 1835.1133, | |
| "train_tokens_per_second": 6861.933 | |
| }, | |
| { | |
| "epoch": 0.5399892002159957, | |
| "grad_norm": 0.5950156450271606, | |
| "learning_rate": 4.6117347699781706e-05, | |
| "loss": 0.5276, | |
| "num_input_tokens_seen": 12650424, | |
| "step": 1125, | |
| "train_runtime": 1843.7548, | |
| "train_tokens_per_second": 6861.229 | |
| }, | |
| { | |
| "epoch": 0.5423891522169556, | |
| "grad_norm": 0.7282635569572449, | |
| "learning_rate": 4.608366111369843e-05, | |
| "loss": 0.518, | |
| "num_input_tokens_seen": 12706224, | |
| "step": 1130, | |
| "train_runtime": 1851.8221, | |
| "train_tokens_per_second": 6861.471 | |
| }, | |
| { | |
| "epoch": 0.5447891042179156, | |
| "grad_norm": 0.5508381724357605, | |
| "learning_rate": 4.6049841436944385e-05, | |
| "loss": 0.4956, | |
| "num_input_tokens_seen": 12767096, | |
| "step": 1135, | |
| "train_runtime": 1860.911, | |
| "train_tokens_per_second": 6860.67 | |
| }, | |
| { | |
| "epoch": 0.5471890562188756, | |
| "grad_norm": 0.57481849193573, | |
| "learning_rate": 4.6015888883006364e-05, | |
| "loss": 0.539, | |
| "num_input_tokens_seen": 12821808, | |
| "step": 1140, | |
| "train_runtime": 1868.849, | |
| "train_tokens_per_second": 6860.805 | |
| }, | |
| { | |
| "epoch": 0.5495890082198356, | |
| "grad_norm": 0.4912041425704956, | |
| "learning_rate": 4.598180366620996e-05, | |
| "loss": 0.5163, | |
| "num_input_tokens_seen": 12874928, | |
| "step": 1145, | |
| "train_runtime": 1876.9045, | |
| "train_tokens_per_second": 6859.661 | |
| }, | |
| { | |
| "epoch": 0.5519889602207956, | |
| "grad_norm": 0.666242778301239, | |
| "learning_rate": 4.594758600171821e-05, | |
| "loss": 0.5662, | |
| "num_input_tokens_seen": 12927848, | |
| "step": 1150, | |
| "train_runtime": 1884.5707, | |
| "train_tokens_per_second": 6859.837 | |
| }, | |
| { | |
| "epoch": 0.5543889122217556, | |
| "grad_norm": 0.6598814129829407, | |
| "learning_rate": 4.591323610553021e-05, | |
| "loss": 0.493, | |
| "num_input_tokens_seen": 12985640, | |
| "step": 1155, | |
| "train_runtime": 1892.6667, | |
| "train_tokens_per_second": 6861.028 | |
| }, | |
| { | |
| "epoch": 0.5567888642227156, | |
| "grad_norm": 0.8162060379981995, | |
| "learning_rate": 4.587875419447979e-05, | |
| "loss": 0.5289, | |
| "num_input_tokens_seen": 13041608, | |
| "step": 1160, | |
| "train_runtime": 1901.1294, | |
| "train_tokens_per_second": 6859.927 | |
| }, | |
| { | |
| "epoch": 0.5591888162236756, | |
| "grad_norm": 0.7061068415641785, | |
| "learning_rate": 4.5844140486234086e-05, | |
| "loss": 0.4997, | |
| "num_input_tokens_seen": 13094240, | |
| "step": 1165, | |
| "train_runtime": 1909.2382, | |
| "train_tokens_per_second": 6858.358 | |
| }, | |
| { | |
| "epoch": 0.5615887682246355, | |
| "grad_norm": 0.5444318056106567, | |
| "learning_rate": 4.580939519929226e-05, | |
| "loss": 0.5155, | |
| "num_input_tokens_seen": 13150544, | |
| "step": 1170, | |
| "train_runtime": 1917.2941, | |
| "train_tokens_per_second": 6858.908 | |
| }, | |
| { | |
| "epoch": 0.5639887202255955, | |
| "grad_norm": 0.5705589652061462, | |
| "learning_rate": 4.577451855298402e-05, | |
| "loss": 0.4927, | |
| "num_input_tokens_seen": 13211016, | |
| "step": 1175, | |
| "train_runtime": 1925.8239, | |
| "train_tokens_per_second": 6859.93 | |
| }, | |
| { | |
| "epoch": 0.5663886722265554, | |
| "grad_norm": 0.6715133190155029, | |
| "learning_rate": 4.5739510767468295e-05, | |
| "loss": 0.5525, | |
| "num_input_tokens_seen": 13269168, | |
| "step": 1180, | |
| "train_runtime": 1934.5386, | |
| "train_tokens_per_second": 6859.087 | |
| }, | |
| { | |
| "epoch": 0.5687886242275154, | |
| "grad_norm": 0.5893720388412476, | |
| "learning_rate": 4.570437206373183e-05, | |
| "loss": 0.5094, | |
| "num_input_tokens_seen": 13326336, | |
| "step": 1185, | |
| "train_runtime": 1942.8203, | |
| "train_tokens_per_second": 6859.274 | |
| }, | |
| { | |
| "epoch": 0.5711885762284754, | |
| "grad_norm": 0.5553702116012573, | |
| "learning_rate": 4.5669102663587795e-05, | |
| "loss": 0.5036, | |
| "num_input_tokens_seen": 13382784, | |
| "step": 1190, | |
| "train_runtime": 1950.7367, | |
| "train_tokens_per_second": 6860.374 | |
| }, | |
| { | |
| "epoch": 0.5735885282294354, | |
| "grad_norm": 0.9842544198036194, | |
| "learning_rate": 4.563370278967437e-05, | |
| "loss": 0.523, | |
| "num_input_tokens_seen": 13438016, | |
| "step": 1195, | |
| "train_runtime": 1958.7567, | |
| "train_tokens_per_second": 6860.482 | |
| }, | |
| { | |
| "epoch": 0.5759884802303954, | |
| "grad_norm": 0.7406736612319946, | |
| "learning_rate": 4.559817266545337e-05, | |
| "loss": 0.562, | |
| "num_input_tokens_seen": 13492904, | |
| "step": 1200, | |
| "train_runtime": 1966.6536, | |
| "train_tokens_per_second": 6860.844 | |
| }, | |
| { | |
| "epoch": 0.5783884322313554, | |
| "grad_norm": 0.6010822057723999, | |
| "learning_rate": 4.5562512515208816e-05, | |
| "loss": 0.5257, | |
| "num_input_tokens_seen": 13546992, | |
| "step": 1205, | |
| "train_runtime": 1974.2569, | |
| "train_tokens_per_second": 6861.818 | |
| }, | |
| { | |
| "epoch": 0.5807883842323154, | |
| "grad_norm": 0.5682114362716675, | |
| "learning_rate": 4.5526722564045486e-05, | |
| "loss": 0.5234, | |
| "num_input_tokens_seen": 13599704, | |
| "step": 1210, | |
| "train_runtime": 1982.0432, | |
| "train_tokens_per_second": 6861.457 | |
| }, | |
| { | |
| "epoch": 0.5831883362332754, | |
| "grad_norm": 0.7476803064346313, | |
| "learning_rate": 4.5490803037887556e-05, | |
| "loss": 0.4522, | |
| "num_input_tokens_seen": 13658840, | |
| "step": 1215, | |
| "train_runtime": 1990.3973, | |
| "train_tokens_per_second": 6862.369 | |
| }, | |
| { | |
| "epoch": 0.5855882882342354, | |
| "grad_norm": 0.8684011697769165, | |
| "learning_rate": 4.545475416347714e-05, | |
| "loss": 0.504, | |
| "num_input_tokens_seen": 13712920, | |
| "step": 1220, | |
| "train_runtime": 1998.5695, | |
| "train_tokens_per_second": 6861.367 | |
| }, | |
| { | |
| "epoch": 0.5879882402351952, | |
| "grad_norm": 0.6915135383605957, | |
| "learning_rate": 4.5418576168372864e-05, | |
| "loss": 0.5473, | |
| "num_input_tokens_seen": 13768056, | |
| "step": 1225, | |
| "train_runtime": 2006.2278, | |
| "train_tokens_per_second": 6862.658 | |
| }, | |
| { | |
| "epoch": 0.5903881922361552, | |
| "grad_norm": 0.6309444308280945, | |
| "learning_rate": 4.538226928094841e-05, | |
| "loss": 0.5321, | |
| "num_input_tokens_seen": 13826288, | |
| "step": 1230, | |
| "train_runtime": 2014.608, | |
| "train_tokens_per_second": 6863.016 | |
| }, | |
| { | |
| "epoch": 0.5927881442371152, | |
| "grad_norm": 0.7776080965995789, | |
| "learning_rate": 4.534583373039112e-05, | |
| "loss": 0.5578, | |
| "num_input_tokens_seen": 13880688, | |
| "step": 1235, | |
| "train_runtime": 2022.5528, | |
| "train_tokens_per_second": 6862.955 | |
| }, | |
| { | |
| "epoch": 0.5951880962380752, | |
| "grad_norm": 0.5800984501838684, | |
| "learning_rate": 4.530926974670052e-05, | |
| "loss": 0.5097, | |
| "num_input_tokens_seen": 13937072, | |
| "step": 1240, | |
| "train_runtime": 2030.7522, | |
| "train_tokens_per_second": 6863.01 | |
| }, | |
| { | |
| "epoch": 0.5975880482390352, | |
| "grad_norm": 0.6254319548606873, | |
| "learning_rate": 4.5272577560686834e-05, | |
| "loss": 0.5038, | |
| "num_input_tokens_seen": 13990528, | |
| "step": 1245, | |
| "train_runtime": 2038.6265, | |
| "train_tokens_per_second": 6862.723 | |
| }, | |
| { | |
| "epoch": 0.5999880002399952, | |
| "grad_norm": 0.7174450755119324, | |
| "learning_rate": 4.523575740396962e-05, | |
| "loss": 0.5304, | |
| "num_input_tokens_seen": 14044296, | |
| "step": 1250, | |
| "train_runtime": 2046.8343, | |
| "train_tokens_per_second": 6861.472 | |
| }, | |
| { | |
| "epoch": 0.6023879522409552, | |
| "grad_norm": 0.7481257915496826, | |
| "learning_rate": 4.5198809508976206e-05, | |
| "loss": 0.4927, | |
| "num_input_tokens_seen": 14102520, | |
| "step": 1255, | |
| "train_runtime": 2055.3394, | |
| "train_tokens_per_second": 6861.407 | |
| }, | |
| { | |
| "epoch": 0.6047879042419152, | |
| "grad_norm": 0.909005343914032, | |
| "learning_rate": 4.516173410894028e-05, | |
| "loss": 0.5067, | |
| "num_input_tokens_seen": 14153848, | |
| "step": 1260, | |
| "train_runtime": 2062.8941, | |
| "train_tokens_per_second": 6861.161 | |
| }, | |
| { | |
| "epoch": 0.6071878562428752, | |
| "grad_norm": 0.674818754196167, | |
| "learning_rate": 4.512453143790042e-05, | |
| "loss": 0.528, | |
| "num_input_tokens_seen": 14210416, | |
| "step": 1265, | |
| "train_runtime": 2071.062, | |
| "train_tokens_per_second": 6861.415 | |
| }, | |
| { | |
| "epoch": 0.6095878082438351, | |
| "grad_norm": 0.7137752771377563, | |
| "learning_rate": 4.508720173069859e-05, | |
| "loss": 0.5395, | |
| "num_input_tokens_seen": 14263360, | |
| "step": 1270, | |
| "train_runtime": 2079.097, | |
| "train_tokens_per_second": 6860.363 | |
| }, | |
| { | |
| "epoch": 0.6119877602447951, | |
| "grad_norm": 0.5564314723014832, | |
| "learning_rate": 4.5049745222978665e-05, | |
| "loss": 0.522, | |
| "num_input_tokens_seen": 14320200, | |
| "step": 1275, | |
| "train_runtime": 2087.1564, | |
| "train_tokens_per_second": 6861.105 | |
| }, | |
| { | |
| "epoch": 0.6143877122457551, | |
| "grad_norm": 0.7505349516868591, | |
| "learning_rate": 4.501216215118498e-05, | |
| "loss": 0.5303, | |
| "num_input_tokens_seen": 14376904, | |
| "step": 1280, | |
| "train_runtime": 2095.304, | |
| "train_tokens_per_second": 6861.488 | |
| }, | |
| { | |
| "epoch": 0.616787664246715, | |
| "grad_norm": 0.6077600121498108, | |
| "learning_rate": 4.497445275256076e-05, | |
| "loss": 0.5027, | |
| "num_input_tokens_seen": 14434888, | |
| "step": 1285, | |
| "train_runtime": 2103.8746, | |
| "train_tokens_per_second": 6861.097 | |
| }, | |
| { | |
| "epoch": 0.619187616247675, | |
| "grad_norm": 0.6120113730430603, | |
| "learning_rate": 4.4936617265146696e-05, | |
| "loss": 0.5192, | |
| "num_input_tokens_seen": 14489232, | |
| "step": 1290, | |
| "train_runtime": 2112.087, | |
| "train_tokens_per_second": 6860.149 | |
| }, | |
| { | |
| "epoch": 0.621587568248635, | |
| "grad_norm": 0.7720391750335693, | |
| "learning_rate": 4.489865592777941e-05, | |
| "loss": 0.5137, | |
| "num_input_tokens_seen": 14543200, | |
| "step": 1295, | |
| "train_runtime": 2119.9779, | |
| "train_tokens_per_second": 6860.072 | |
| }, | |
| { | |
| "epoch": 0.623987520249595, | |
| "grad_norm": 0.8337739706039429, | |
| "learning_rate": 4.486056898008996e-05, | |
| "loss": 0.5647, | |
| "num_input_tokens_seen": 14597160, | |
| "step": 1300, | |
| "train_runtime": 2127.8662, | |
| "train_tokens_per_second": 6859.999 | |
| }, | |
| { | |
| "epoch": 0.626387472250555, | |
| "grad_norm": 0.6936734914779663, | |
| "learning_rate": 4.48223566625023e-05, | |
| "loss": 0.5372, | |
| "num_input_tokens_seen": 14656120, | |
| "step": 1305, | |
| "train_runtime": 2136.0775, | |
| "train_tokens_per_second": 6861.23 | |
| }, | |
| { | |
| "epoch": 0.628787424251515, | |
| "grad_norm": 0.42849820852279663, | |
| "learning_rate": 4.47840192162318e-05, | |
| "loss": 0.4987, | |
| "num_input_tokens_seen": 14715168, | |
| "step": 1310, | |
| "train_runtime": 2144.4803, | |
| "train_tokens_per_second": 6861.881 | |
| }, | |
| { | |
| "epoch": 0.6311873762524749, | |
| "grad_norm": 0.6073727607727051, | |
| "learning_rate": 4.47455568832837e-05, | |
| "loss": 0.5242, | |
| "num_input_tokens_seen": 14771992, | |
| "step": 1315, | |
| "train_runtime": 2152.4662, | |
| "train_tokens_per_second": 6862.822 | |
| }, | |
| { | |
| "epoch": 0.6335873282534349, | |
| "grad_norm": 0.81267911195755, | |
| "learning_rate": 4.470696990645158e-05, | |
| "loss": 0.5488, | |
| "num_input_tokens_seen": 14827224, | |
| "step": 1320, | |
| "train_runtime": 2160.85, | |
| "train_tokens_per_second": 6861.755 | |
| }, | |
| { | |
| "epoch": 0.6359872802543949, | |
| "grad_norm": 0.9082570672035217, | |
| "learning_rate": 4.4668258529315855e-05, | |
| "loss": 0.5578, | |
| "num_input_tokens_seen": 14880216, | |
| "step": 1325, | |
| "train_runtime": 2168.5347, | |
| "train_tokens_per_second": 6861.876 | |
| }, | |
| { | |
| "epoch": 0.6383872322553549, | |
| "grad_norm": 0.4958833158016205, | |
| "learning_rate": 4.462942299624219e-05, | |
| "loss": 0.4897, | |
| "num_input_tokens_seen": 14938264, | |
| "step": 1330, | |
| "train_runtime": 2176.7759, | |
| "train_tokens_per_second": 6862.564 | |
| }, | |
| { | |
| "epoch": 0.6407871842563149, | |
| "grad_norm": 0.5597286224365234, | |
| "learning_rate": 4.459046355238e-05, | |
| "loss": 0.5071, | |
| "num_input_tokens_seen": 14996424, | |
| "step": 1335, | |
| "train_runtime": 2184.9625, | |
| "train_tokens_per_second": 6863.47 | |
| }, | |
| { | |
| "epoch": 0.6431871362572749, | |
| "grad_norm": 0.5538758635520935, | |
| "learning_rate": 4.455138044366088e-05, | |
| "loss": 0.5117, | |
| "num_input_tokens_seen": 15054880, | |
| "step": 1340, | |
| "train_runtime": 2193.2667, | |
| "train_tokens_per_second": 6864.136 | |
| }, | |
| { | |
| "epoch": 0.6455870882582349, | |
| "grad_norm": 0.6640130877494812, | |
| "learning_rate": 4.4512173916797085e-05, | |
| "loss": 0.4721, | |
| "num_input_tokens_seen": 15117888, | |
| "step": 1345, | |
| "train_runtime": 2202.1591, | |
| "train_tokens_per_second": 6865.03 | |
| }, | |
| { | |
| "epoch": 0.6479870402591948, | |
| "grad_norm": 0.8442539572715759, | |
| "learning_rate": 4.447284421927991e-05, | |
| "loss": 0.554, | |
| "num_input_tokens_seen": 15175016, | |
| "step": 1350, | |
| "train_runtime": 2211.0135, | |
| "train_tokens_per_second": 6863.376 | |
| }, | |
| { | |
| "epoch": 0.6503869922601548, | |
| "grad_norm": 0.7367165684700012, | |
| "learning_rate": 4.443339159937818e-05, | |
| "loss": 0.5125, | |
| "num_input_tokens_seen": 15230944, | |
| "step": 1355, | |
| "train_runtime": 2219.076, | |
| "train_tokens_per_second": 6863.642 | |
| }, | |
| { | |
| "epoch": 0.6527869442611148, | |
| "grad_norm": 0.6845333576202393, | |
| "learning_rate": 4.439381630613668e-05, | |
| "loss": 0.5286, | |
| "num_input_tokens_seen": 15287896, | |
| "step": 1360, | |
| "train_runtime": 2227.4192, | |
| "train_tokens_per_second": 6863.502 | |
| }, | |
| { | |
| "epoch": 0.6551868962620747, | |
| "grad_norm": 0.6416659355163574, | |
| "learning_rate": 4.435411858937456e-05, | |
| "loss": 0.6131, | |
| "num_input_tokens_seen": 15342584, | |
| "step": 1365, | |
| "train_runtime": 2235.1031, | |
| "train_tokens_per_second": 6864.374 | |
| }, | |
| { | |
| "epoch": 0.6575868482630347, | |
| "grad_norm": 0.5809879302978516, | |
| "learning_rate": 4.431429869968378e-05, | |
| "loss": 0.5062, | |
| "num_input_tokens_seen": 15404096, | |
| "step": 1370, | |
| "train_runtime": 2243.5171, | |
| "train_tokens_per_second": 6866.048 | |
| }, | |
| { | |
| "epoch": 0.6599868002639947, | |
| "grad_norm": 0.6339114308357239, | |
| "learning_rate": 4.427435688842748e-05, | |
| "loss": 0.4943, | |
| "num_input_tokens_seen": 15462616, | |
| "step": 1375, | |
| "train_runtime": 2251.8474, | |
| "train_tokens_per_second": 6866.636 | |
| }, | |
| { | |
| "epoch": 0.6623867522649547, | |
| "grad_norm": 0.4654648005962372, | |
| "learning_rate": 4.423429340773847e-05, | |
| "loss": 0.5096, | |
| "num_input_tokens_seen": 15519912, | |
| "step": 1380, | |
| "train_runtime": 2260.1318, | |
| "train_tokens_per_second": 6866.817 | |
| }, | |
| { | |
| "epoch": 0.6647867042659147, | |
| "grad_norm": 0.6752036809921265, | |
| "learning_rate": 4.41941085105176e-05, | |
| "loss": 0.5394, | |
| "num_input_tokens_seen": 15576136, | |
| "step": 1385, | |
| "train_runtime": 2268.2935, | |
| "train_tokens_per_second": 6866.896 | |
| }, | |
| { | |
| "epoch": 0.6671866562668747, | |
| "grad_norm": 0.5208489894866943, | |
| "learning_rate": 4.415380245043213e-05, | |
| "loss": 0.4537, | |
| "num_input_tokens_seen": 15633480, | |
| "step": 1390, | |
| "train_runtime": 2276.6508, | |
| "train_tokens_per_second": 6866.877 | |
| }, | |
| { | |
| "epoch": 0.6695866082678347, | |
| "grad_norm": 0.6454225778579712, | |
| "learning_rate": 4.4113375481914186e-05, | |
| "loss": 0.5155, | |
| "num_input_tokens_seen": 15688200, | |
| "step": 1395, | |
| "train_runtime": 2284.8437, | |
| "train_tokens_per_second": 6866.203 | |
| }, | |
| { | |
| "epoch": 0.6719865602687947, | |
| "grad_norm": 0.5845027565956116, | |
| "learning_rate": 4.407282786015913e-05, | |
| "loss": 0.5255, | |
| "num_input_tokens_seen": 15742392, | |
| "step": 1400, | |
| "train_runtime": 2292.7215, | |
| "train_tokens_per_second": 6866.247 | |
| }, | |
| { | |
| "epoch": 0.6743865122697547, | |
| "grad_norm": 0.9591690301895142, | |
| "learning_rate": 4.403215984112392e-05, | |
| "loss": 0.5122, | |
| "num_input_tokens_seen": 15799472, | |
| "step": 1405, | |
| "train_runtime": 2301.0926, | |
| "train_tokens_per_second": 6866.074 | |
| }, | |
| { | |
| "epoch": 0.6767864642707145, | |
| "grad_norm": 0.6333798766136169, | |
| "learning_rate": 4.3991371681525556e-05, | |
| "loss": 0.511, | |
| "num_input_tokens_seen": 15858960, | |
| "step": 1410, | |
| "train_runtime": 2309.9976, | |
| "train_tokens_per_second": 6865.358 | |
| }, | |
| { | |
| "epoch": 0.6791864162716745, | |
| "grad_norm": 0.5859664082527161, | |
| "learning_rate": 4.395046363883941e-05, | |
| "loss": 0.5375, | |
| "num_input_tokens_seen": 15915472, | |
| "step": 1415, | |
| "train_runtime": 2317.9598, | |
| "train_tokens_per_second": 6866.155 | |
| }, | |
| { | |
| "epoch": 0.6815863682726345, | |
| "grad_norm": 0.6732012629508972, | |
| "learning_rate": 4.390943597129761e-05, | |
| "loss": 0.5682, | |
| "num_input_tokens_seen": 15970752, | |
| "step": 1420, | |
| "train_runtime": 2325.6345, | |
| "train_tokens_per_second": 6867.267 | |
| }, | |
| { | |
| "epoch": 0.6839863202735945, | |
| "grad_norm": 0.7597581148147583, | |
| "learning_rate": 4.3868288937887445e-05, | |
| "loss": 0.5164, | |
| "num_input_tokens_seen": 16025456, | |
| "step": 1425, | |
| "train_runtime": 2333.8264, | |
| "train_tokens_per_second": 6866.601 | |
| }, | |
| { | |
| "epoch": 0.6863862722745545, | |
| "grad_norm": 0.7212057113647461, | |
| "learning_rate": 4.382702279834965e-05, | |
| "loss": 0.5524, | |
| "num_input_tokens_seen": 16075744, | |
| "step": 1430, | |
| "train_runtime": 2341.4051, | |
| "train_tokens_per_second": 6865.853 | |
| }, | |
| { | |
| "epoch": 0.6887862242755145, | |
| "grad_norm": 0.58528733253479, | |
| "learning_rate": 4.378563781317687e-05, | |
| "loss": 0.497, | |
| "num_input_tokens_seen": 16137672, | |
| "step": 1435, | |
| "train_runtime": 2350.3848, | |
| "train_tokens_per_second": 6865.97 | |
| }, | |
| { | |
| "epoch": 0.6911861762764745, | |
| "grad_norm": 0.570091962814331, | |
| "learning_rate": 4.374413424361195e-05, | |
| "loss": 0.4888, | |
| "num_input_tokens_seen": 16199088, | |
| "step": 1440, | |
| "train_runtime": 2358.886, | |
| "train_tokens_per_second": 6867.262 | |
| }, | |
| { | |
| "epoch": 0.6935861282774345, | |
| "grad_norm": 0.768666684627533, | |
| "learning_rate": 4.370251235164625e-05, | |
| "loss": 0.5343, | |
| "num_input_tokens_seen": 16253792, | |
| "step": 1445, | |
| "train_runtime": 2367.1689, | |
| "train_tokens_per_second": 6866.342 | |
| }, | |
| { | |
| "epoch": 0.6959860802783945, | |
| "grad_norm": 0.6287879347801208, | |
| "learning_rate": 4.366077240001813e-05, | |
| "loss": 0.4848, | |
| "num_input_tokens_seen": 16316608, | |
| "step": 1450, | |
| "train_runtime": 2376.0866, | |
| "train_tokens_per_second": 6867.009 | |
| }, | |
| { | |
| "epoch": 0.6983860322793544, | |
| "grad_norm": 0.74793541431427, | |
| "learning_rate": 4.361891465221112e-05, | |
| "loss": 0.4847, | |
| "num_input_tokens_seen": 16375648, | |
| "step": 1455, | |
| "train_runtime": 2384.4535, | |
| "train_tokens_per_second": 6867.673 | |
| }, | |
| { | |
| "epoch": 0.7007859842803144, | |
| "grad_norm": 0.6209436655044556, | |
| "learning_rate": 4.3576939372452394e-05, | |
| "loss": 0.5295, | |
| "num_input_tokens_seen": 16429360, | |
| "step": 1460, | |
| "train_runtime": 2392.6099, | |
| "train_tokens_per_second": 6866.711 | |
| }, | |
| { | |
| "epoch": 0.7031859362812743, | |
| "grad_norm": 0.7456108331680298, | |
| "learning_rate": 4.353484682571101e-05, | |
| "loss": 0.5144, | |
| "num_input_tokens_seen": 16480088, | |
| "step": 1465, | |
| "train_runtime": 2400.4701, | |
| "train_tokens_per_second": 6865.359 | |
| }, | |
| { | |
| "epoch": 0.7055858882822343, | |
| "grad_norm": 0.573098361492157, | |
| "learning_rate": 4.349263727769629e-05, | |
| "loss": 0.4636, | |
| "num_input_tokens_seen": 16538968, | |
| "step": 1470, | |
| "train_runtime": 2408.9134, | |
| "train_tokens_per_second": 6865.738 | |
| }, | |
| { | |
| "epoch": 0.7079858402831943, | |
| "grad_norm": 0.6599897146224976, | |
| "learning_rate": 4.3450310994856135e-05, | |
| "loss": 0.5415, | |
| "num_input_tokens_seen": 16595104, | |
| "step": 1475, | |
| "train_runtime": 2417.2231, | |
| "train_tokens_per_second": 6865.359 | |
| }, | |
| { | |
| "epoch": 0.7103857922841543, | |
| "grad_norm": 0.9016920328140259, | |
| "learning_rate": 4.3407868244375315e-05, | |
| "loss": 0.5367, | |
| "num_input_tokens_seen": 16650488, | |
| "step": 1480, | |
| "train_runtime": 2425.2913, | |
| "train_tokens_per_second": 6865.356 | |
| }, | |
| { | |
| "epoch": 0.7127857442851143, | |
| "grad_norm": 0.7661956548690796, | |
| "learning_rate": 4.3365309294173825e-05, | |
| "loss": 0.4729, | |
| "num_input_tokens_seen": 16701384, | |
| "step": 1485, | |
| "train_runtime": 2432.991, | |
| "train_tokens_per_second": 6864.548 | |
| }, | |
| { | |
| "epoch": 0.7151856962860743, | |
| "grad_norm": 0.8703396916389465, | |
| "learning_rate": 4.332263441290515e-05, | |
| "loss": 0.5373, | |
| "num_input_tokens_seen": 16754152, | |
| "step": 1490, | |
| "train_runtime": 2440.463, | |
| "train_tokens_per_second": 6865.153 | |
| }, | |
| { | |
| "epoch": 0.7175856482870343, | |
| "grad_norm": 0.633375883102417, | |
| "learning_rate": 4.3279843869954604e-05, | |
| "loss": 0.5037, | |
| "num_input_tokens_seen": 16809056, | |
| "step": 1495, | |
| "train_runtime": 2448.2645, | |
| "train_tokens_per_second": 6865.703 | |
| }, | |
| { | |
| "epoch": 0.7199856002879942, | |
| "grad_norm": 0.7101417779922485, | |
| "learning_rate": 4.3236937935437614e-05, | |
| "loss": 0.5324, | |
| "num_input_tokens_seen": 16859504, | |
| "step": 1500, | |
| "train_runtime": 2455.6516, | |
| "train_tokens_per_second": 6865.593 | |
| }, | |
| { | |
| "epoch": 0.7223855522889542, | |
| "grad_norm": 0.6423754692077637, | |
| "learning_rate": 4.3193916880198004e-05, | |
| "loss": 0.5109, | |
| "num_input_tokens_seen": 16919952, | |
| "step": 1505, | |
| "train_runtime": 2464.4089, | |
| "train_tokens_per_second": 6865.724 | |
| }, | |
| { | |
| "epoch": 0.7247855042899142, | |
| "grad_norm": 0.7076619863510132, | |
| "learning_rate": 4.3150780975806315e-05, | |
| "loss": 0.5425, | |
| "num_input_tokens_seen": 16976592, | |
| "step": 1510, | |
| "train_runtime": 2472.6158, | |
| "train_tokens_per_second": 6865.843 | |
| }, | |
| { | |
| "epoch": 0.7271854562908742, | |
| "grad_norm": 0.5288546085357666, | |
| "learning_rate": 4.310753049455806e-05, | |
| "loss": 0.515, | |
| "num_input_tokens_seen": 17034816, | |
| "step": 1515, | |
| "train_runtime": 2480.9341, | |
| "train_tokens_per_second": 6866.291 | |
| }, | |
| { | |
| "epoch": 0.7295854082918342, | |
| "grad_norm": 0.6262106895446777, | |
| "learning_rate": 4.3064165709472036e-05, | |
| "loss": 0.5271, | |
| "num_input_tokens_seen": 17088560, | |
| "step": 1520, | |
| "train_runtime": 2488.3235, | |
| "train_tokens_per_second": 6867.499 | |
| }, | |
| { | |
| "epoch": 0.7319853602927942, | |
| "grad_norm": 0.5250151753425598, | |
| "learning_rate": 4.3020686894288564e-05, | |
| "loss": 0.5055, | |
| "num_input_tokens_seen": 17144640, | |
| "step": 1525, | |
| "train_runtime": 2496.1311, | |
| "train_tokens_per_second": 6868.485 | |
| }, | |
| { | |
| "epoch": 0.7343853122937541, | |
| "grad_norm": 0.7805795669555664, | |
| "learning_rate": 4.2977094323467784e-05, | |
| "loss": 0.48, | |
| "num_input_tokens_seen": 17200416, | |
| "step": 1530, | |
| "train_runtime": 2504.3678, | |
| "train_tokens_per_second": 6868.167 | |
| }, | |
| { | |
| "epoch": 0.7367852642947141, | |
| "grad_norm": 0.7616066336631775, | |
| "learning_rate": 4.293338827218794e-05, | |
| "loss": 0.4972, | |
| "num_input_tokens_seen": 17256344, | |
| "step": 1535, | |
| "train_runtime": 2512.462, | |
| "train_tokens_per_second": 6868.3 | |
| }, | |
| { | |
| "epoch": 0.7391852162956741, | |
| "grad_norm": 0.7682455778121948, | |
| "learning_rate": 4.288956901634359e-05, | |
| "loss": 0.4691, | |
| "num_input_tokens_seen": 17314072, | |
| "step": 1540, | |
| "train_runtime": 2520.9232, | |
| "train_tokens_per_second": 6868.147 | |
| }, | |
| { | |
| "epoch": 0.741585168296634, | |
| "grad_norm": 0.7621558308601379, | |
| "learning_rate": 4.2845636832543914e-05, | |
| "loss": 0.4942, | |
| "num_input_tokens_seen": 17373728, | |
| "step": 1545, | |
| "train_runtime": 2529.3504, | |
| "train_tokens_per_second": 6868.85 | |
| }, | |
| { | |
| "epoch": 0.743985120297594, | |
| "grad_norm": 0.6085621118545532, | |
| "learning_rate": 4.2801591998110946e-05, | |
| "loss": 0.5119, | |
| "num_input_tokens_seen": 17425920, | |
| "step": 1550, | |
| "train_runtime": 2536.8035, | |
| "train_tokens_per_second": 6869.243 | |
| }, | |
| { | |
| "epoch": 0.746385072298554, | |
| "grad_norm": 0.6101738214492798, | |
| "learning_rate": 4.275743479107785e-05, | |
| "loss": 0.5201, | |
| "num_input_tokens_seen": 17480304, | |
| "step": 1555, | |
| "train_runtime": 2544.9492, | |
| "train_tokens_per_second": 6868.626 | |
| }, | |
| { | |
| "epoch": 0.748785024299514, | |
| "grad_norm": 0.6207472085952759, | |
| "learning_rate": 4.271316549018708e-05, | |
| "loss": 0.517, | |
| "num_input_tokens_seen": 17539776, | |
| "step": 1560, | |
| "train_runtime": 2553.0822, | |
| "train_tokens_per_second": 6870.04 | |
| }, | |
| { | |
| "epoch": 0.751184976300474, | |
| "grad_norm": 0.688941478729248, | |
| "learning_rate": 4.2668784374888756e-05, | |
| "loss": 0.4894, | |
| "num_input_tokens_seen": 17595928, | |
| "step": 1565, | |
| "train_runtime": 2561.6413, | |
| "train_tokens_per_second": 6869.005 | |
| }, | |
| { | |
| "epoch": 0.753584928301434, | |
| "grad_norm": 0.9783554673194885, | |
| "learning_rate": 4.262429172533878e-05, | |
| "loss": 0.5213, | |
| "num_input_tokens_seen": 17651664, | |
| "step": 1570, | |
| "train_runtime": 2569.4494, | |
| "train_tokens_per_second": 6869.824 | |
| }, | |
| { | |
| "epoch": 0.755984880302394, | |
| "grad_norm": 0.9513911604881287, | |
| "learning_rate": 4.257968782239714e-05, | |
| "loss": 0.506, | |
| "num_input_tokens_seen": 17703960, | |
| "step": 1575, | |
| "train_runtime": 2576.9625, | |
| "train_tokens_per_second": 6870.088 | |
| }, | |
| { | |
| "epoch": 0.758384832303354, | |
| "grad_norm": 0.7099276185035706, | |
| "learning_rate": 4.2534972947626094e-05, | |
| "loss": 0.5073, | |
| "num_input_tokens_seen": 17761448, | |
| "step": 1580, | |
| "train_runtime": 2585.427, | |
| "train_tokens_per_second": 6869.832 | |
| }, | |
| { | |
| "epoch": 0.760784784304314, | |
| "grad_norm": 0.5648279786109924, | |
| "learning_rate": 4.249014738328842e-05, | |
| "loss": 0.5265, | |
| "num_input_tokens_seen": 17817984, | |
| "step": 1585, | |
| "train_runtime": 2593.1431, | |
| "train_tokens_per_second": 6871.192 | |
| }, | |
| { | |
| "epoch": 0.763184736305274, | |
| "grad_norm": 0.6818917989730835, | |
| "learning_rate": 4.2445211412345615e-05, | |
| "loss": 0.5244, | |
| "num_input_tokens_seen": 17874768, | |
| "step": 1590, | |
| "train_runtime": 2601.224, | |
| "train_tokens_per_second": 6871.676 | |
| }, | |
| { | |
| "epoch": 0.7655846883062338, | |
| "grad_norm": 0.6163448691368103, | |
| "learning_rate": 4.240016531845612e-05, | |
| "loss": 0.5406, | |
| "num_input_tokens_seen": 17931864, | |
| "step": 1595, | |
| "train_runtime": 2609.5192, | |
| "train_tokens_per_second": 6871.712 | |
| }, | |
| { | |
| "epoch": 0.7679846403071938, | |
| "grad_norm": 0.6879476308822632, | |
| "learning_rate": 4.235500938597354e-05, | |
| "loss": 0.4871, | |
| "num_input_tokens_seen": 17985744, | |
| "step": 1600, | |
| "train_runtime": 2617.2291, | |
| "train_tokens_per_second": 6872.056 | |
| }, | |
| { | |
| "epoch": 0.7703845923081538, | |
| "grad_norm": 0.5437011122703552, | |
| "learning_rate": 4.230974389994483e-05, | |
| "loss": 0.5015, | |
| "num_input_tokens_seen": 18044152, | |
| "step": 1605, | |
| "train_runtime": 2625.4686, | |
| "train_tokens_per_second": 6872.736 | |
| }, | |
| { | |
| "epoch": 0.7727845443091138, | |
| "grad_norm": 0.5755176544189453, | |
| "learning_rate": 4.226436914610849e-05, | |
| "loss": 0.541, | |
| "num_input_tokens_seen": 18100976, | |
| "step": 1610, | |
| "train_runtime": 2633.5328, | |
| "train_tokens_per_second": 6873.268 | |
| }, | |
| { | |
| "epoch": 0.7751844963100738, | |
| "grad_norm": 0.6550777554512024, | |
| "learning_rate": 4.2218885410892785e-05, | |
| "loss": 0.5314, | |
| "num_input_tokens_seen": 18156240, | |
| "step": 1615, | |
| "train_runtime": 2641.1036, | |
| "train_tokens_per_second": 6874.49 | |
| }, | |
| { | |
| "epoch": 0.7775844483110338, | |
| "grad_norm": 0.6372175216674805, | |
| "learning_rate": 4.2173292981413914e-05, | |
| "loss": 0.4875, | |
| "num_input_tokens_seen": 18216472, | |
| "step": 1620, | |
| "train_runtime": 2649.6605, | |
| "train_tokens_per_second": 6875.021 | |
| }, | |
| { | |
| "epoch": 0.7799844003119938, | |
| "grad_norm": 0.5091462731361389, | |
| "learning_rate": 4.212759214547424e-05, | |
| "loss": 0.4954, | |
| "num_input_tokens_seen": 18271168, | |
| "step": 1625, | |
| "train_runtime": 2657.4608, | |
| "train_tokens_per_second": 6875.423 | |
| }, | |
| { | |
| "epoch": 0.7823843523129538, | |
| "grad_norm": 0.6974900960922241, | |
| "learning_rate": 4.2081783191560405e-05, | |
| "loss": 0.4939, | |
| "num_input_tokens_seen": 18326128, | |
| "step": 1630, | |
| "train_runtime": 2665.3267, | |
| "train_tokens_per_second": 6875.753 | |
| }, | |
| { | |
| "epoch": 0.7847843043139138, | |
| "grad_norm": 0.5476020574569702, | |
| "learning_rate": 4.203586640884156e-05, | |
| "loss": 0.4995, | |
| "num_input_tokens_seen": 18385280, | |
| "step": 1635, | |
| "train_runtime": 2673.6877, | |
| "train_tokens_per_second": 6876.375 | |
| }, | |
| { | |
| "epoch": 0.7871842563148737, | |
| "grad_norm": 0.5772519111633301, | |
| "learning_rate": 4.1989842087167534e-05, | |
| "loss": 0.5198, | |
| "num_input_tokens_seen": 18444000, | |
| "step": 1640, | |
| "train_runtime": 2682.4357, | |
| "train_tokens_per_second": 6875.84 | |
| }, | |
| { | |
| "epoch": 0.7895842083158336, | |
| "grad_norm": 0.6971266269683838, | |
| "learning_rate": 4.1943710517066984e-05, | |
| "loss": 0.4696, | |
| "num_input_tokens_seen": 18500344, | |
| "step": 1645, | |
| "train_runtime": 2690.5876, | |
| "train_tokens_per_second": 6875.949 | |
| }, | |
| { | |
| "epoch": 0.7919841603167936, | |
| "grad_norm": 0.7783945798873901, | |
| "learning_rate": 4.1897471989745575e-05, | |
| "loss": 0.4777, | |
| "num_input_tokens_seen": 18553136, | |
| "step": 1650, | |
| "train_runtime": 2698.2872, | |
| "train_tokens_per_second": 6875.894 | |
| }, | |
| { | |
| "epoch": 0.7943841123177536, | |
| "grad_norm": 0.7614520192146301, | |
| "learning_rate": 4.185112679708415e-05, | |
| "loss": 0.525, | |
| "num_input_tokens_seen": 18610264, | |
| "step": 1655, | |
| "train_runtime": 2706.4236, | |
| "train_tokens_per_second": 6876.331 | |
| }, | |
| { | |
| "epoch": 0.7967840643187136, | |
| "grad_norm": 0.5857712626457214, | |
| "learning_rate": 4.180467523163686e-05, | |
| "loss": 0.4906, | |
| "num_input_tokens_seen": 18670624, | |
| "step": 1660, | |
| "train_runtime": 2714.993, | |
| "train_tokens_per_second": 6876.859 | |
| }, | |
| { | |
| "epoch": 0.7991840163196736, | |
| "grad_norm": 0.5816935300827026, | |
| "learning_rate": 4.175811758662935e-05, | |
| "loss": 0.4851, | |
| "num_input_tokens_seen": 18727824, | |
| "step": 1665, | |
| "train_runtime": 2723.4951, | |
| "train_tokens_per_second": 6876.393 | |
| }, | |
| { | |
| "epoch": 0.8015839683206336, | |
| "grad_norm": 0.5751060843467712, | |
| "learning_rate": 4.1711454155956895e-05, | |
| "loss": 0.4694, | |
| "num_input_tokens_seen": 18785440, | |
| "step": 1670, | |
| "train_runtime": 2731.305, | |
| "train_tokens_per_second": 6877.826 | |
| }, | |
| { | |
| "epoch": 0.8039839203215936, | |
| "grad_norm": 0.8796506524085999, | |
| "learning_rate": 4.166468523418251e-05, | |
| "loss": 0.5254, | |
| "num_input_tokens_seen": 18839288, | |
| "step": 1675, | |
| "train_runtime": 2739.4392, | |
| "train_tokens_per_second": 6877.06 | |
| }, | |
| { | |
| "epoch": 0.8063838723225536, | |
| "grad_norm": 0.6676029562950134, | |
| "learning_rate": 4.1617811116535176e-05, | |
| "loss": 0.5521, | |
| "num_input_tokens_seen": 18893696, | |
| "step": 1680, | |
| "train_runtime": 2747.1069, | |
| "train_tokens_per_second": 6877.67 | |
| }, | |
| { | |
| "epoch": 0.8087838243235135, | |
| "grad_norm": 0.8193256258964539, | |
| "learning_rate": 4.1570832098907874e-05, | |
| "loss": 0.5444, | |
| "num_input_tokens_seen": 18946504, | |
| "step": 1685, | |
| "train_runtime": 2754.72, | |
| "train_tokens_per_second": 6877.833 | |
| }, | |
| { | |
| "epoch": 0.8111837763244735, | |
| "grad_norm": 0.5464473962783813, | |
| "learning_rate": 4.152374847785579e-05, | |
| "loss": 0.5321, | |
| "num_input_tokens_seen": 19003664, | |
| "step": 1690, | |
| "train_runtime": 2763.0844, | |
| "train_tokens_per_second": 6877.699 | |
| }, | |
| { | |
| "epoch": 0.8135837283254335, | |
| "grad_norm": 0.8191189169883728, | |
| "learning_rate": 4.1476560550594414e-05, | |
| "loss": 0.4826, | |
| "num_input_tokens_seen": 19056544, | |
| "step": 1695, | |
| "train_runtime": 2770.9361, | |
| "train_tokens_per_second": 6877.295 | |
| }, | |
| { | |
| "epoch": 0.8159836803263935, | |
| "grad_norm": 0.745058000087738, | |
| "learning_rate": 4.142926861499768e-05, | |
| "loss": 0.5543, | |
| "num_input_tokens_seen": 19107344, | |
| "step": 1700, | |
| "train_runtime": 2778.5593, | |
| "train_tokens_per_second": 6876.709 | |
| }, | |
| { | |
| "epoch": 0.8183836323273534, | |
| "grad_norm": 0.6147037744522095, | |
| "learning_rate": 4.138187296959606e-05, | |
| "loss": 0.505, | |
| "num_input_tokens_seen": 19162000, | |
| "step": 1705, | |
| "train_runtime": 2786.3906, | |
| "train_tokens_per_second": 6876.997 | |
| }, | |
| { | |
| "epoch": 0.8207835843283134, | |
| "grad_norm": 0.687018632888794, | |
| "learning_rate": 4.13343739135747e-05, | |
| "loss": 0.522, | |
| "num_input_tokens_seen": 19217512, | |
| "step": 1710, | |
| "train_runtime": 2794.2498, | |
| "train_tokens_per_second": 6877.521 | |
| }, | |
| { | |
| "epoch": 0.8231835363292734, | |
| "grad_norm": 0.6172505617141724, | |
| "learning_rate": 4.128677174677153e-05, | |
| "loss": 0.5411, | |
| "num_input_tokens_seen": 19276384, | |
| "step": 1715, | |
| "train_runtime": 2802.4832, | |
| "train_tokens_per_second": 6878.323 | |
| }, | |
| { | |
| "epoch": 0.8255834883302334, | |
| "grad_norm": 0.735072135925293, | |
| "learning_rate": 4.123906676967536e-05, | |
| "loss": 0.513, | |
| "num_input_tokens_seen": 19328432, | |
| "step": 1720, | |
| "train_runtime": 2810.1311, | |
| "train_tokens_per_second": 6878.125 | |
| }, | |
| { | |
| "epoch": 0.8279834403311934, | |
| "grad_norm": 0.9113159775733948, | |
| "learning_rate": 4.1191259283424e-05, | |
| "loss": 0.5244, | |
| "num_input_tokens_seen": 19384016, | |
| "step": 1725, | |
| "train_runtime": 2818.2045, | |
| "train_tokens_per_second": 6878.144 | |
| }, | |
| { | |
| "epoch": 0.8303833923321533, | |
| "grad_norm": 0.8989443778991699, | |
| "learning_rate": 4.1143349589802326e-05, | |
| "loss": 0.5471, | |
| "num_input_tokens_seen": 19442016, | |
| "step": 1730, | |
| "train_runtime": 2826.3519, | |
| "train_tokens_per_second": 6878.838 | |
| }, | |
| { | |
| "epoch": 0.8327833443331133, | |
| "grad_norm": 0.572564423084259, | |
| "learning_rate": 4.1095337991240436e-05, | |
| "loss": 0.5352, | |
| "num_input_tokens_seen": 19496880, | |
| "step": 1735, | |
| "train_runtime": 2834.1751, | |
| "train_tokens_per_second": 6879.208 | |
| }, | |
| { | |
| "epoch": 0.8351832963340733, | |
| "grad_norm": 0.4649478793144226, | |
| "learning_rate": 4.104722479081167e-05, | |
| "loss": 0.4709, | |
| "num_input_tokens_seen": 19555656, | |
| "step": 1740, | |
| "train_runtime": 2842.1514, | |
| "train_tokens_per_second": 6880.582 | |
| }, | |
| { | |
| "epoch": 0.8375832483350333, | |
| "grad_norm": 0.6450087428092957, | |
| "learning_rate": 4.099901029223075e-05, | |
| "loss": 0.5104, | |
| "num_input_tokens_seen": 19610352, | |
| "step": 1745, | |
| "train_runtime": 2849.9024, | |
| "train_tokens_per_second": 6881.061 | |
| }, | |
| { | |
| "epoch": 0.8399832003359933, | |
| "grad_norm": 0.7608988881111145, | |
| "learning_rate": 4.095069479985183e-05, | |
| "loss": 0.5151, | |
| "num_input_tokens_seen": 19666656, | |
| "step": 1750, | |
| "train_runtime": 2858.2857, | |
| "train_tokens_per_second": 6880.577 | |
| }, | |
| { | |
| "epoch": 0.8423831523369533, | |
| "grad_norm": 0.5766634345054626, | |
| "learning_rate": 4.090227861866659e-05, | |
| "loss": 0.5355, | |
| "num_input_tokens_seen": 19723528, | |
| "step": 1755, | |
| "train_runtime": 2866.3853, | |
| "train_tokens_per_second": 6880.976 | |
| }, | |
| { | |
| "epoch": 0.8447831043379133, | |
| "grad_norm": 0.8256959915161133, | |
| "learning_rate": 4.085376205430233e-05, | |
| "loss": 0.5475, | |
| "num_input_tokens_seen": 19775232, | |
| "step": 1760, | |
| "train_runtime": 2873.9931, | |
| "train_tokens_per_second": 6880.751 | |
| }, | |
| { | |
| "epoch": 0.8471830563388733, | |
| "grad_norm": 0.6020644903182983, | |
| "learning_rate": 4.080514541301998e-05, | |
| "loss": 0.5043, | |
| "num_input_tokens_seen": 19832592, | |
| "step": 1765, | |
| "train_runtime": 2881.8352, | |
| "train_tokens_per_second": 6881.931 | |
| }, | |
| { | |
| "epoch": 0.8495830083398332, | |
| "grad_norm": 0.6027383804321289, | |
| "learning_rate": 4.075642900171223e-05, | |
| "loss": 0.5501, | |
| "num_input_tokens_seen": 19886104, | |
| "step": 1770, | |
| "train_runtime": 2889.3788, | |
| "train_tokens_per_second": 6882.484 | |
| }, | |
| { | |
| "epoch": 0.8519829603407932, | |
| "grad_norm": 0.7463006377220154, | |
| "learning_rate": 4.070761312790157e-05, | |
| "loss": 0.5666, | |
| "num_input_tokens_seen": 19944808, | |
| "step": 1775, | |
| "train_runtime": 2897.8024, | |
| "train_tokens_per_second": 6882.736 | |
| }, | |
| { | |
| "epoch": 0.8543829123417531, | |
| "grad_norm": 0.5846840143203735, | |
| "learning_rate": 4.065869809973833e-05, | |
| "loss": 0.5026, | |
| "num_input_tokens_seen": 20000048, | |
| "step": 1780, | |
| "train_runtime": 2905.6359, | |
| "train_tokens_per_second": 6883.191 | |
| }, | |
| { | |
| "epoch": 0.8567828643427131, | |
| "grad_norm": 0.6461730599403381, | |
| "learning_rate": 4.060968422599879e-05, | |
| "loss": 0.4991, | |
| "num_input_tokens_seen": 20054800, | |
| "step": 1785, | |
| "train_runtime": 2913.7209, | |
| "train_tokens_per_second": 6882.883 | |
| }, | |
| { | |
| "epoch": 0.8591828163436731, | |
| "grad_norm": 0.7940958142280579, | |
| "learning_rate": 4.0560571816083156e-05, | |
| "loss": 0.5496, | |
| "num_input_tokens_seen": 20111120, | |
| "step": 1790, | |
| "train_runtime": 2921.8875, | |
| "train_tokens_per_second": 6882.921 | |
| }, | |
| { | |
| "epoch": 0.8615827683446331, | |
| "grad_norm": 0.6765144467353821, | |
| "learning_rate": 4.051136118001364e-05, | |
| "loss": 0.4827, | |
| "num_input_tokens_seen": 20165552, | |
| "step": 1795, | |
| "train_runtime": 2929.7258, | |
| "train_tokens_per_second": 6883.085 | |
| }, | |
| { | |
| "epoch": 0.8639827203455931, | |
| "grad_norm": 0.9223127365112305, | |
| "learning_rate": 4.046205262843254e-05, | |
| "loss": 0.4949, | |
| "num_input_tokens_seen": 20221072, | |
| "step": 1800, | |
| "train_runtime": 2938.3425, | |
| "train_tokens_per_second": 6881.796 | |
| }, | |
| { | |
| "epoch": 0.8663826723465531, | |
| "grad_norm": 0.5317054390907288, | |
| "learning_rate": 4.041264647260022e-05, | |
| "loss": 0.4844, | |
| "num_input_tokens_seen": 20277640, | |
| "step": 1805, | |
| "train_runtime": 2947.9518, | |
| "train_tokens_per_second": 6878.552 | |
| }, | |
| { | |
| "epoch": 0.8687826243475131, | |
| "grad_norm": 0.5232411623001099, | |
| "learning_rate": 4.036314302439319e-05, | |
| "loss": 0.4938, | |
| "num_input_tokens_seen": 20333328, | |
| "step": 1810, | |
| "train_runtime": 2955.884, | |
| "train_tokens_per_second": 6878.933 | |
| }, | |
| { | |
| "epoch": 0.8711825763484731, | |
| "grad_norm": 0.7968527674674988, | |
| "learning_rate": 4.031354259630209e-05, | |
| "loss": 0.5246, | |
| "num_input_tokens_seen": 20389752, | |
| "step": 1815, | |
| "train_runtime": 2963.7323, | |
| "train_tokens_per_second": 6879.755 | |
| }, | |
| { | |
| "epoch": 0.8735825283494331, | |
| "grad_norm": 0.5793075561523438, | |
| "learning_rate": 4.026384550142978e-05, | |
| "loss": 0.5467, | |
| "num_input_tokens_seen": 20447184, | |
| "step": 1820, | |
| "train_runtime": 2971.7237, | |
| "train_tokens_per_second": 6880.58 | |
| }, | |
| { | |
| "epoch": 0.875982480350393, | |
| "grad_norm": 0.6629696488380432, | |
| "learning_rate": 4.0214052053489304e-05, | |
| "loss": 0.4753, | |
| "num_input_tokens_seen": 20501512, | |
| "step": 1825, | |
| "train_runtime": 2979.5222, | |
| "train_tokens_per_second": 6880.805 | |
| }, | |
| { | |
| "epoch": 0.8783824323513529, | |
| "grad_norm": 0.6974778175354004, | |
| "learning_rate": 4.016416256680194e-05, | |
| "loss": 0.5134, | |
| "num_input_tokens_seen": 20556688, | |
| "step": 1830, | |
| "train_runtime": 2987.3905, | |
| "train_tokens_per_second": 6881.152 | |
| }, | |
| { | |
| "epoch": 0.8807823843523129, | |
| "grad_norm": 0.7780594825744629, | |
| "learning_rate": 4.011417735629522e-05, | |
| "loss": 0.4771, | |
| "num_input_tokens_seen": 20613504, | |
| "step": 1835, | |
| "train_runtime": 2995.7447, | |
| "train_tokens_per_second": 6880.928 | |
| }, | |
| { | |
| "epoch": 0.8831823363532729, | |
| "grad_norm": 0.6135735511779785, | |
| "learning_rate": 4.006409673750094e-05, | |
| "loss": 0.4904, | |
| "num_input_tokens_seen": 20670776, | |
| "step": 1840, | |
| "train_runtime": 3004.2957, | |
| "train_tokens_per_second": 6880.407 | |
| }, | |
| { | |
| "epoch": 0.8855822883542329, | |
| "grad_norm": 0.6567316651344299, | |
| "learning_rate": 4.0013921026553125e-05, | |
| "loss": 0.5172, | |
| "num_input_tokens_seen": 20726776, | |
| "step": 1845, | |
| "train_runtime": 3012.3296, | |
| "train_tokens_per_second": 6880.647 | |
| }, | |
| { | |
| "epoch": 0.8879822403551929, | |
| "grad_norm": 0.733647882938385, | |
| "learning_rate": 3.9963650540186116e-05, | |
| "loss": 0.5168, | |
| "num_input_tokens_seen": 20781792, | |
| "step": 1850, | |
| "train_runtime": 3020.8457, | |
| "train_tokens_per_second": 6879.462 | |
| }, | |
| { | |
| "epoch": 0.8903821923561529, | |
| "grad_norm": 0.7651314735412598, | |
| "learning_rate": 3.991328559573248e-05, | |
| "loss": 0.551, | |
| "num_input_tokens_seen": 20835512, | |
| "step": 1855, | |
| "train_runtime": 3028.6209, | |
| "train_tokens_per_second": 6879.538 | |
| }, | |
| { | |
| "epoch": 0.8927821443571129, | |
| "grad_norm": 0.7899940013885498, | |
| "learning_rate": 3.9862826511121085e-05, | |
| "loss": 0.5242, | |
| "num_input_tokens_seen": 20887216, | |
| "step": 1860, | |
| "train_runtime": 3036.1277, | |
| "train_tokens_per_second": 6879.558 | |
| }, | |
| { | |
| "epoch": 0.8951820963580729, | |
| "grad_norm": 0.6774663329124451, | |
| "learning_rate": 3.981227360487504e-05, | |
| "loss": 0.5273, | |
| "num_input_tokens_seen": 20943744, | |
| "step": 1865, | |
| "train_runtime": 3044.3369, | |
| "train_tokens_per_second": 6879.575 | |
| }, | |
| { | |
| "epoch": 0.8975820483590328, | |
| "grad_norm": 0.6696859002113342, | |
| "learning_rate": 3.976162719610972e-05, | |
| "loss": 0.5006, | |
| "num_input_tokens_seen": 20991568, | |
| "step": 1870, | |
| "train_runtime": 3053.2072, | |
| "train_tokens_per_second": 6875.252 | |
| }, | |
| { | |
| "epoch": 0.8999820003599928, | |
| "grad_norm": 0.7721266746520996, | |
| "learning_rate": 3.971088760453071e-05, | |
| "loss": 0.5214, | |
| "num_input_tokens_seen": 21047408, | |
| "step": 1875, | |
| "train_runtime": 3061.9813, | |
| "train_tokens_per_second": 6873.787 | |
| }, | |
| { | |
| "epoch": 0.9023819523609528, | |
| "grad_norm": 0.7528117299079895, | |
| "learning_rate": 3.966005515043183e-05, | |
| "loss": 0.5172, | |
| "num_input_tokens_seen": 21105344, | |
| "step": 1880, | |
| "train_runtime": 3070.238, | |
| "train_tokens_per_second": 6874.172 | |
| }, | |
| { | |
| "epoch": 0.9047819043619127, | |
| "grad_norm": 0.7893593311309814, | |
| "learning_rate": 3.960913015469311e-05, | |
| "loss": 0.5581, | |
| "num_input_tokens_seen": 21161704, | |
| "step": 1885, | |
| "train_runtime": 3078.4575, | |
| "train_tokens_per_second": 6874.126 | |
| }, | |
| { | |
| "epoch": 0.9071818563628727, | |
| "grad_norm": 0.6411826610565186, | |
| "learning_rate": 3.95581129387787e-05, | |
| "loss": 0.5006, | |
| "num_input_tokens_seen": 21220960, | |
| "step": 1890, | |
| "train_runtime": 3087.9925, | |
| "train_tokens_per_second": 6872.089 | |
| }, | |
| { | |
| "epoch": 0.9095818083638327, | |
| "grad_norm": 0.48201116919517517, | |
| "learning_rate": 3.950700382473494e-05, | |
| "loss": 0.5143, | |
| "num_input_tokens_seen": 21285456, | |
| "step": 1895, | |
| "train_runtime": 3097.6261, | |
| "train_tokens_per_second": 6871.538 | |
| }, | |
| { | |
| "epoch": 0.9119817603647927, | |
| "grad_norm": 0.7874345779418945, | |
| "learning_rate": 3.9455803135188265e-05, | |
| "loss": 0.5133, | |
| "num_input_tokens_seen": 21340656, | |
| "step": 1900, | |
| "train_runtime": 3105.618, | |
| "train_tokens_per_second": 6871.629 | |
| }, | |
| { | |
| "epoch": 0.9143817123657527, | |
| "grad_norm": 0.8059301972389221, | |
| "learning_rate": 3.940451119334315e-05, | |
| "loss": 0.4716, | |
| "num_input_tokens_seen": 21402256, | |
| "step": 1905, | |
| "train_runtime": 3114.7644, | |
| "train_tokens_per_second": 6871.228 | |
| }, | |
| { | |
| "epoch": 0.9167816643667127, | |
| "grad_norm": 0.5982013940811157, | |
| "learning_rate": 3.935312832298014e-05, | |
| "loss": 0.4752, | |
| "num_input_tokens_seen": 21456968, | |
| "step": 1910, | |
| "train_runtime": 3122.6252, | |
| "train_tokens_per_second": 6871.452 | |
| }, | |
| { | |
| "epoch": 0.9191816163676726, | |
| "grad_norm": 0.6114861965179443, | |
| "learning_rate": 3.9301654848453744e-05, | |
| "loss": 0.5358, | |
| "num_input_tokens_seen": 21510880, | |
| "step": 1915, | |
| "train_runtime": 3130.5306, | |
| "train_tokens_per_second": 6871.321 | |
| }, | |
| { | |
| "epoch": 0.9215815683686326, | |
| "grad_norm": 0.6739422678947449, | |
| "learning_rate": 3.9250091094690424e-05, | |
| "loss": 0.508, | |
| "num_input_tokens_seen": 21567176, | |
| "step": 1920, | |
| "train_runtime": 3139.4979, | |
| "train_tokens_per_second": 6869.626 | |
| }, | |
| { | |
| "epoch": 0.9239815203695926, | |
| "grad_norm": 0.9573784470558167, | |
| "learning_rate": 3.9198437387186514e-05, | |
| "loss": 0.4969, | |
| "num_input_tokens_seen": 21616728, | |
| "step": 1925, | |
| "train_runtime": 3147.1512, | |
| "train_tokens_per_second": 6868.665 | |
| }, | |
| { | |
| "epoch": 0.9263814723705526, | |
| "grad_norm": 0.6872597336769104, | |
| "learning_rate": 3.914669405200619e-05, | |
| "loss": 0.5231, | |
| "num_input_tokens_seen": 21669600, | |
| "step": 1930, | |
| "train_runtime": 3154.6855, | |
| "train_tokens_per_second": 6869.021 | |
| }, | |
| { | |
| "epoch": 0.9287814243715126, | |
| "grad_norm": 0.5402712225914001, | |
| "learning_rate": 3.909486141577941e-05, | |
| "loss": 0.5557, | |
| "num_input_tokens_seen": 21725144, | |
| "step": 1935, | |
| "train_runtime": 3162.9029, | |
| "train_tokens_per_second": 6868.736 | |
| }, | |
| { | |
| "epoch": 0.9311813763724726, | |
| "grad_norm": 0.5620856881141663, | |
| "learning_rate": 3.904293980569983e-05, | |
| "loss": 0.5202, | |
| "num_input_tokens_seen": 21780960, | |
| "step": 1940, | |
| "train_runtime": 3171.7075, | |
| "train_tokens_per_second": 6867.266 | |
| }, | |
| { | |
| "epoch": 0.9335813283734326, | |
| "grad_norm": 0.48633241653442383, | |
| "learning_rate": 3.899092954952276e-05, | |
| "loss": 0.4965, | |
| "num_input_tokens_seen": 21835904, | |
| "step": 1945, | |
| "train_runtime": 3180.9981, | |
| "train_tokens_per_second": 6864.482 | |
| }, | |
| { | |
| "epoch": 0.9359812803743925, | |
| "grad_norm": 0.6408486366271973, | |
| "learning_rate": 3.89388309755631e-05, | |
| "loss": 0.5271, | |
| "num_input_tokens_seen": 21890264, | |
| "step": 1950, | |
| "train_runtime": 3188.8619, | |
| "train_tokens_per_second": 6864.601 | |
| }, | |
| { | |
| "epoch": 0.9383812323753525, | |
| "grad_norm": 0.6832561492919922, | |
| "learning_rate": 3.888664441269324e-05, | |
| "loss": 0.513, | |
| "num_input_tokens_seen": 21943944, | |
| "step": 1955, | |
| "train_runtime": 3196.9004, | |
| "train_tokens_per_second": 6864.131 | |
| }, | |
| { | |
| "epoch": 0.9407811843763125, | |
| "grad_norm": 0.7224368453025818, | |
| "learning_rate": 3.8834370190341016e-05, | |
| "loss": 0.4975, | |
| "num_input_tokens_seen": 22000688, | |
| "step": 1960, | |
| "train_runtime": 3205.2356, | |
| "train_tokens_per_second": 6863.985 | |
| }, | |
| { | |
| "epoch": 0.9431811363772724, | |
| "grad_norm": 0.921877384185791, | |
| "learning_rate": 3.8782008638487585e-05, | |
| "loss": 0.5142, | |
| "num_input_tokens_seen": 22056928, | |
| "step": 1965, | |
| "train_runtime": 3213.437, | |
| "train_tokens_per_second": 6863.968 | |
| }, | |
| { | |
| "epoch": 0.9455810883782324, | |
| "grad_norm": 0.8015443682670593, | |
| "learning_rate": 3.872956008766541e-05, | |
| "loss": 0.5345, | |
| "num_input_tokens_seen": 22109984, | |
| "step": 1970, | |
| "train_runtime": 3221.3456, | |
| "train_tokens_per_second": 6863.586 | |
| }, | |
| { | |
| "epoch": 0.9479810403791924, | |
| "grad_norm": 0.60637366771698, | |
| "learning_rate": 3.867702486895611e-05, | |
| "loss": 0.519, | |
| "num_input_tokens_seen": 22167792, | |
| "step": 1975, | |
| "train_runtime": 3229.4918, | |
| "train_tokens_per_second": 6864.173 | |
| }, | |
| { | |
| "epoch": 0.9503809923801524, | |
| "grad_norm": 0.6260784268379211, | |
| "learning_rate": 3.86244033139884e-05, | |
| "loss": 0.4549, | |
| "num_input_tokens_seen": 22224944, | |
| "step": 1980, | |
| "train_runtime": 3237.4363, | |
| "train_tokens_per_second": 6864.983 | |
| }, | |
| { | |
| "epoch": 0.9527809443811124, | |
| "grad_norm": 0.7488238215446472, | |
| "learning_rate": 3.857169575493601e-05, | |
| "loss": 0.4988, | |
| "num_input_tokens_seen": 22280208, | |
| "step": 1985, | |
| "train_runtime": 3245.3144, | |
| "train_tokens_per_second": 6865.347 | |
| }, | |
| { | |
| "epoch": 0.9551808963820724, | |
| "grad_norm": 1.2673466205596924, | |
| "learning_rate": 3.851890252451553e-05, | |
| "loss": 0.5948, | |
| "num_input_tokens_seen": 22331688, | |
| "step": 1990, | |
| "train_runtime": 3252.7162, | |
| "train_tokens_per_second": 6865.551 | |
| }, | |
| { | |
| "epoch": 0.9575808483830324, | |
| "grad_norm": 0.7167654633522034, | |
| "learning_rate": 3.846602395598441e-05, | |
| "loss": 0.4765, | |
| "num_input_tokens_seen": 22391056, | |
| "step": 1995, | |
| "train_runtime": 3261.3251, | |
| "train_tokens_per_second": 6865.631 | |
| }, | |
| { | |
| "epoch": 0.9599808003839924, | |
| "grad_norm": 0.7767099142074585, | |
| "learning_rate": 3.8413060383138735e-05, | |
| "loss": 0.5067, | |
| "num_input_tokens_seen": 22442560, | |
| "step": 2000, | |
| "train_runtime": 3268.751, | |
| "train_tokens_per_second": 6865.791 | |
| }, | |
| { | |
| "epoch": 0.9623807523849524, | |
| "grad_norm": 0.6243239641189575, | |
| "learning_rate": 3.836001214031122e-05, | |
| "loss": 0.441, | |
| "num_input_tokens_seen": 22504640, | |
| "step": 2005, | |
| "train_runtime": 3277.3712, | |
| "train_tokens_per_second": 6866.674 | |
| }, | |
| { | |
| "epoch": 0.9647807043859122, | |
| "grad_norm": 0.7347325086593628, | |
| "learning_rate": 3.830687956236907e-05, | |
| "loss": 0.4923, | |
| "num_input_tokens_seen": 22565448, | |
| "step": 2010, | |
| "train_runtime": 3285.5854, | |
| "train_tokens_per_second": 6868.014 | |
| }, | |
| { | |
| "epoch": 0.9671806563868722, | |
| "grad_norm": 0.7760552167892456, | |
| "learning_rate": 3.8253662984711795e-05, | |
| "loss": 0.4971, | |
| "num_input_tokens_seen": 22618928, | |
| "step": 2015, | |
| "train_runtime": 3293.6417, | |
| "train_tokens_per_second": 6867.453 | |
| }, | |
| { | |
| "epoch": 0.9695806083878322, | |
| "grad_norm": 0.6205884218215942, | |
| "learning_rate": 3.820036274326922e-05, | |
| "loss": 0.4979, | |
| "num_input_tokens_seen": 22674720, | |
| "step": 2020, | |
| "train_runtime": 3301.4874, | |
| "train_tokens_per_second": 6868.032 | |
| }, | |
| { | |
| "epoch": 0.9719805603887922, | |
| "grad_norm": 0.7021058797836304, | |
| "learning_rate": 3.8146979174499265e-05, | |
| "loss": 0.48, | |
| "num_input_tokens_seen": 22734768, | |
| "step": 2025, | |
| "train_runtime": 3309.628, | |
| "train_tokens_per_second": 6869.282 | |
| }, | |
| { | |
| "epoch": 0.9743805123897522, | |
| "grad_norm": 0.8105769753456116, | |
| "learning_rate": 3.809351261538585e-05, | |
| "loss": 0.4802, | |
| "num_input_tokens_seen": 22792864, | |
| "step": 2030, | |
| "train_runtime": 3318.078, | |
| "train_tokens_per_second": 6869.297 | |
| }, | |
| { | |
| "epoch": 0.9767804643907122, | |
| "grad_norm": 0.7583296895027161, | |
| "learning_rate": 3.8039963403436806e-05, | |
| "loss": 0.5393, | |
| "num_input_tokens_seen": 22846392, | |
| "step": 2035, | |
| "train_runtime": 3326.837, | |
| "train_tokens_per_second": 6867.301 | |
| }, | |
| { | |
| "epoch": 0.9791804163916722, | |
| "grad_norm": 0.7417272925376892, | |
| "learning_rate": 3.798633187668166e-05, | |
| "loss": 0.5505, | |
| "num_input_tokens_seen": 22899608, | |
| "step": 2040, | |
| "train_runtime": 3337.0101, | |
| "train_tokens_per_second": 6862.313 | |
| }, | |
| { | |
| "epoch": 0.9815803683926322, | |
| "grad_norm": 0.6118446588516235, | |
| "learning_rate": 3.793261837366959e-05, | |
| "loss": 0.4829, | |
| "num_input_tokens_seen": 22960648, | |
| "step": 2045, | |
| "train_runtime": 3348.0559, | |
| "train_tokens_per_second": 6857.905 | |
| }, | |
| { | |
| "epoch": 0.9839803203935922, | |
| "grad_norm": 0.6822954416275024, | |
| "learning_rate": 3.7878823233467234e-05, | |
| "loss": 0.5252, | |
| "num_input_tokens_seen": 23017960, | |
| "step": 2050, | |
| "train_runtime": 3357.979, | |
| "train_tokens_per_second": 6854.706 | |
| }, | |
| { | |
| "epoch": 0.9863802723945521, | |
| "grad_norm": 0.8443323373794556, | |
| "learning_rate": 3.782494679565656e-05, | |
| "loss": 0.5098, | |
| "num_input_tokens_seen": 23073264, | |
| "step": 2055, | |
| "train_runtime": 3367.9787, | |
| "train_tokens_per_second": 6850.775 | |
| }, | |
| { | |
| "epoch": 0.988780224395512, | |
| "grad_norm": 0.8180744647979736, | |
| "learning_rate": 3.777098940033275e-05, | |
| "loss": 0.4722, | |
| "num_input_tokens_seen": 23130952, | |
| "step": 2060, | |
| "train_runtime": 3379.0655, | |
| "train_tokens_per_second": 6845.37 | |
| }, | |
| { | |
| "epoch": 0.991180176396472, | |
| "grad_norm": 1.0012092590332031, | |
| "learning_rate": 3.7716951388102e-05, | |
| "loss": 0.512, | |
| "num_input_tokens_seen": 23184912, | |
| "step": 2065, | |
| "train_runtime": 3390.0285, | |
| "train_tokens_per_second": 6839.15 | |
| }, | |
| { | |
| "epoch": 0.993580128397432, | |
| "grad_norm": 0.8469212651252747, | |
| "learning_rate": 3.766283310007943e-05, | |
| "loss": 0.5002, | |
| "num_input_tokens_seen": 23238656, | |
| "step": 2070, | |
| "train_runtime": 3398.1559, | |
| "train_tokens_per_second": 6838.608 | |
| }, | |
| { | |
| "epoch": 0.995980080398392, | |
| "grad_norm": 0.7020851969718933, | |
| "learning_rate": 3.7608634877886885e-05, | |
| "loss": 0.5014, | |
| "num_input_tokens_seen": 23293008, | |
| "step": 2075, | |
| "train_runtime": 3406.0069, | |
| "train_tokens_per_second": 6838.802 | |
| }, | |
| { | |
| "epoch": 0.998380032399352, | |
| "grad_norm": 0.9155061841011047, | |
| "learning_rate": 3.755435706365079e-05, | |
| "loss": 0.4932, | |
| "num_input_tokens_seen": 23349040, | |
| "step": 2080, | |
| "train_runtime": 3414.3354, | |
| "train_tokens_per_second": 6838.531 | |
| }, | |
| { | |
| "epoch": 1.000479990400192, | |
| "grad_norm": 0.7089964151382446, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.5376, | |
| "num_input_tokens_seen": 23400800, | |
| "step": 2085, | |
| "train_runtime": 3421.5018, | |
| "train_tokens_per_second": 6839.336 | |
| }, | |
| { | |
| "epoch": 1.002879942401152, | |
| "grad_norm": 0.5927316546440125, | |
| "learning_rate": 3.7445564030063646e-05, | |
| "loss": 0.4811, | |
| "num_input_tokens_seen": 23456048, | |
| "step": 2090, | |
| "train_runtime": 3429.5202, | |
| "train_tokens_per_second": 6839.455 | |
| }, | |
| { | |
| "epoch": 1.005279894402112, | |
| "grad_norm": 0.5862952470779419, | |
| "learning_rate": 3.739104949746893e-05, | |
| "loss": 0.4931, | |
| "num_input_tokens_seen": 23511576, | |
| "step": 2095, | |
| "train_runtime": 3437.359, | |
| "train_tokens_per_second": 6840.012 | |
| }, | |
| { | |
| "epoch": 1.0076798464030718, | |
| "grad_norm": 0.8004628419876099, | |
| "learning_rate": 3.7336456746339e-05, | |
| "loss": 0.4666, | |
| "num_input_tokens_seen": 23567088, | |
| "step": 2100, | |
| "train_runtime": 3445.3003, | |
| "train_tokens_per_second": 6840.358 | |
| }, | |
| { | |
| "epoch": 1.010079798404032, | |
| "grad_norm": 0.5078383088111877, | |
| "learning_rate": 3.728178612129075e-05, | |
| "loss": 0.4806, | |
| "num_input_tokens_seen": 23626528, | |
| "step": 2105, | |
| "train_runtime": 3454.0548, | |
| "train_tokens_per_second": 6840.23 | |
| }, | |
| { | |
| "epoch": 1.0124797504049918, | |
| "grad_norm": 0.8467037081718445, | |
| "learning_rate": 3.722703796743267e-05, | |
| "loss": 0.4856, | |
| "num_input_tokens_seen": 23681288, | |
| "step": 2110, | |
| "train_runtime": 3462.258, | |
| "train_tokens_per_second": 6839.839 | |
| }, | |
| { | |
| "epoch": 1.014879702405952, | |
| "grad_norm": 0.6897312998771667, | |
| "learning_rate": 3.7172212630362627e-05, | |
| "loss": 0.5198, | |
| "num_input_tokens_seen": 23740272, | |
| "step": 2115, | |
| "train_runtime": 3470.5143, | |
| "train_tokens_per_second": 6840.563 | |
| }, | |
| { | |
| "epoch": 1.0172796544069118, | |
| "grad_norm": 0.7425886392593384, | |
| "learning_rate": 3.7117310456165696e-05, | |
| "loss": 0.5217, | |
| "num_input_tokens_seen": 23796168, | |
| "step": 2120, | |
| "train_runtime": 3478.8621, | |
| "train_tokens_per_second": 6840.216 | |
| }, | |
| { | |
| "epoch": 1.019679606407872, | |
| "grad_norm": 0.7550194263458252, | |
| "learning_rate": 3.7062331791412045e-05, | |
| "loss": 0.5463, | |
| "num_input_tokens_seen": 23852288, | |
| "step": 2125, | |
| "train_runtime": 3486.8348, | |
| "train_tokens_per_second": 6840.67 | |
| }, | |
| { | |
| "epoch": 1.0220795584088318, | |
| "grad_norm": 0.5753782391548157, | |
| "learning_rate": 3.700727698315463e-05, | |
| "loss": 0.5069, | |
| "num_input_tokens_seen": 23906400, | |
| "step": 2130, | |
| "train_runtime": 3494.7803, | |
| "train_tokens_per_second": 6840.602 | |
| }, | |
| { | |
| "epoch": 1.024479510409792, | |
| "grad_norm": 0.7684709429740906, | |
| "learning_rate": 3.6952146378927095e-05, | |
| "loss": 0.4976, | |
| "num_input_tokens_seen": 23966288, | |
| "step": 2135, | |
| "train_runtime": 3503.1065, | |
| "train_tokens_per_second": 6841.439 | |
| }, | |
| { | |
| "epoch": 1.0268794624107518, | |
| "grad_norm": 0.8290258646011353, | |
| "learning_rate": 3.689694032674153e-05, | |
| "loss": 0.4863, | |
| "num_input_tokens_seen": 24019784, | |
| "step": 2140, | |
| "train_runtime": 3511.9759, | |
| "train_tokens_per_second": 6839.393 | |
| }, | |
| { | |
| "epoch": 1.0292794144117117, | |
| "grad_norm": 0.5777615904808044, | |
| "learning_rate": 3.684165917508628e-05, | |
| "loss": 0.5026, | |
| "num_input_tokens_seen": 24075104, | |
| "step": 2145, | |
| "train_runtime": 3522.5617, | |
| "train_tokens_per_second": 6834.544 | |
| }, | |
| { | |
| "epoch": 1.0316793664126718, | |
| "grad_norm": 0.8155114650726318, | |
| "learning_rate": 3.678630327292381e-05, | |
| "loss": 0.5197, | |
| "num_input_tokens_seen": 24125896, | |
| "step": 2150, | |
| "train_runtime": 3530.4751, | |
| "train_tokens_per_second": 6833.612 | |
| }, | |
| { | |
| "epoch": 1.0340793184136317, | |
| "grad_norm": 0.5378252267837524, | |
| "learning_rate": 3.673087296968838e-05, | |
| "loss": 0.4873, | |
| "num_input_tokens_seen": 24182088, | |
| "step": 2155, | |
| "train_runtime": 3538.664, | |
| "train_tokens_per_second": 6833.677 | |
| }, | |
| { | |
| "epoch": 1.0364792704145918, | |
| "grad_norm": 0.8574205040931702, | |
| "learning_rate": 3.667536861528396e-05, | |
| "loss": 0.515, | |
| "num_input_tokens_seen": 24242048, | |
| "step": 2160, | |
| "train_runtime": 3547.103, | |
| "train_tokens_per_second": 6834.323 | |
| }, | |
| { | |
| "epoch": 1.0388792224155516, | |
| "grad_norm": 0.8171690106391907, | |
| "learning_rate": 3.661979056008191e-05, | |
| "loss": 0.486, | |
| "num_input_tokens_seen": 24294336, | |
| "step": 2165, | |
| "train_runtime": 3554.7165, | |
| "train_tokens_per_second": 6834.395 | |
| }, | |
| { | |
| "epoch": 1.0412791744165117, | |
| "grad_norm": 0.7367947101593018, | |
| "learning_rate": 3.6564139154918895e-05, | |
| "loss": 0.5121, | |
| "num_input_tokens_seen": 24348872, | |
| "step": 2170, | |
| "train_runtime": 3562.3935, | |
| "train_tokens_per_second": 6834.975 | |
| }, | |
| { | |
| "epoch": 1.0436791264174716, | |
| "grad_norm": 0.718895673751831, | |
| "learning_rate": 3.6508414751094556e-05, | |
| "loss": 0.5462, | |
| "num_input_tokens_seen": 24402136, | |
| "step": 2175, | |
| "train_runtime": 3570.1249, | |
| "train_tokens_per_second": 6835.093 | |
| }, | |
| { | |
| "epoch": 1.0460790784184317, | |
| "grad_norm": 0.7847620248794556, | |
| "learning_rate": 3.6452617700369345e-05, | |
| "loss": 0.4975, | |
| "num_input_tokens_seen": 24451792, | |
| "step": 2180, | |
| "train_runtime": 3577.4533, | |
| "train_tokens_per_second": 6834.972 | |
| }, | |
| { | |
| "epoch": 1.0484790304193916, | |
| "grad_norm": 0.7218212485313416, | |
| "learning_rate": 3.639674835496232e-05, | |
| "loss": 0.568, | |
| "num_input_tokens_seen": 24508800, | |
| "step": 2185, | |
| "train_runtime": 3585.3931, | |
| "train_tokens_per_second": 6835.736 | |
| }, | |
| { | |
| "epoch": 1.0508789824203515, | |
| "grad_norm": 0.6216446161270142, | |
| "learning_rate": 3.634080706754887e-05, | |
| "loss": 0.5024, | |
| "num_input_tokens_seen": 24567000, | |
| "step": 2190, | |
| "train_runtime": 3593.4867, | |
| "train_tokens_per_second": 6836.536 | |
| }, | |
| { | |
| "epoch": 1.0532789344213116, | |
| "grad_norm": 0.7098725438117981, | |
| "learning_rate": 3.628479419125852e-05, | |
| "loss": 0.5057, | |
| "num_input_tokens_seen": 24629752, | |
| "step": 2195, | |
| "train_runtime": 3602.2113, | |
| "train_tokens_per_second": 6837.398 | |
| }, | |
| { | |
| "epoch": 1.0556788864222715, | |
| "grad_norm": 0.7154077887535095, | |
| "learning_rate": 3.6228710079672734e-05, | |
| "loss": 0.5329, | |
| "num_input_tokens_seen": 24685968, | |
| "step": 2200, | |
| "train_runtime": 3610.3704, | |
| "train_tokens_per_second": 6837.517 | |
| }, | |
| { | |
| "epoch": 1.0580788384232316, | |
| "grad_norm": 0.6186597347259521, | |
| "learning_rate": 3.6172555086822615e-05, | |
| "loss": 0.5114, | |
| "num_input_tokens_seen": 24745552, | |
| "step": 2205, | |
| "train_runtime": 3618.4119, | |
| "train_tokens_per_second": 6838.788 | |
| }, | |
| { | |
| "epoch": 1.0604787904241915, | |
| "grad_norm": 0.7932461500167847, | |
| "learning_rate": 3.6116329567186724e-05, | |
| "loss": 0.4939, | |
| "num_input_tokens_seen": 24799856, | |
| "step": 2210, | |
| "train_runtime": 3626.1603, | |
| "train_tokens_per_second": 6839.151 | |
| }, | |
| { | |
| "epoch": 1.0628787424251516, | |
| "grad_norm": 0.7647953629493713, | |
| "learning_rate": 3.6060033875688804e-05, | |
| "loss": 0.5289, | |
| "num_input_tokens_seen": 24853952, | |
| "step": 2215, | |
| "train_runtime": 3633.6609, | |
| "train_tokens_per_second": 6839.921 | |
| }, | |
| { | |
| "epoch": 1.0652786944261114, | |
| "grad_norm": 0.722197413444519, | |
| "learning_rate": 3.600366836769557e-05, | |
| "loss": 0.5015, | |
| "num_input_tokens_seen": 24911328, | |
| "step": 2220, | |
| "train_runtime": 3641.5303, | |
| "train_tokens_per_second": 6840.895 | |
| }, | |
| { | |
| "epoch": 1.0676786464270716, | |
| "grad_norm": 0.9403772354125977, | |
| "learning_rate": 3.5947233399014444e-05, | |
| "loss": 0.4982, | |
| "num_input_tokens_seen": 24967496, | |
| "step": 2225, | |
| "train_runtime": 3649.8212, | |
| "train_tokens_per_second": 6840.745 | |
| }, | |
| { | |
| "epoch": 1.0700785984280314, | |
| "grad_norm": 0.5855931639671326, | |
| "learning_rate": 3.589072932589134e-05, | |
| "loss": 0.4706, | |
| "num_input_tokens_seen": 25028408, | |
| "step": 2230, | |
| "train_runtime": 3658.1326, | |
| "train_tokens_per_second": 6841.854 | |
| }, | |
| { | |
| "epoch": 1.0724785504289913, | |
| "grad_norm": 0.7537211179733276, | |
| "learning_rate": 3.583415650500837e-05, | |
| "loss": 0.5351, | |
| "num_input_tokens_seen": 25082672, | |
| "step": 2235, | |
| "train_runtime": 3665.8181, | |
| "train_tokens_per_second": 6842.312 | |
| }, | |
| { | |
| "epoch": 1.0748785024299514, | |
| "grad_norm": 0.7052933573722839, | |
| "learning_rate": 3.577751529348163e-05, | |
| "loss": 0.5137, | |
| "num_input_tokens_seen": 25138272, | |
| "step": 2240, | |
| "train_runtime": 3673.8839, | |
| "train_tokens_per_second": 6842.424 | |
| }, | |
| { | |
| "epoch": 1.0772784544309113, | |
| "grad_norm": 0.6160354614257812, | |
| "learning_rate": 3.572080604885894e-05, | |
| "loss": 0.4984, | |
| "num_input_tokens_seen": 25198880, | |
| "step": 2245, | |
| "train_runtime": 3682.6208, | |
| "train_tokens_per_second": 6842.649 | |
| }, | |
| { | |
| "epoch": 1.0796784064318714, | |
| "grad_norm": 0.7151322960853577, | |
| "learning_rate": 3.566402912911755e-05, | |
| "loss": 0.4745, | |
| "num_input_tokens_seen": 25255672, | |
| "step": 2250, | |
| "train_runtime": 3691.127, | |
| "train_tokens_per_second": 6842.266 | |
| }, | |
| { | |
| "epoch": 1.0820783584328313, | |
| "grad_norm": 0.6750310063362122, | |
| "learning_rate": 3.560718489266194e-05, | |
| "loss": 0.4705, | |
| "num_input_tokens_seen": 25310096, | |
| "step": 2255, | |
| "train_runtime": 3698.9218, | |
| "train_tokens_per_second": 6842.561 | |
| }, | |
| { | |
| "epoch": 1.0844783104337914, | |
| "grad_norm": 0.7280714511871338, | |
| "learning_rate": 3.555027369832151e-05, | |
| "loss": 0.529, | |
| "num_input_tokens_seen": 25365416, | |
| "step": 2260, | |
| "train_runtime": 3706.9184, | |
| "train_tokens_per_second": 6842.723 | |
| }, | |
| { | |
| "epoch": 1.0868782624347513, | |
| "grad_norm": 0.7498377561569214, | |
| "learning_rate": 3.5493295905348334e-05, | |
| "loss": 0.4974, | |
| "num_input_tokens_seen": 25421480, | |
| "step": 2265, | |
| "train_runtime": 3715.1661, | |
| "train_tokens_per_second": 6842.623 | |
| }, | |
| { | |
| "epoch": 1.0892782144357114, | |
| "grad_norm": 0.7328541874885559, | |
| "learning_rate": 3.54362518734149e-05, | |
| "loss": 0.4618, | |
| "num_input_tokens_seen": 25482160, | |
| "step": 2270, | |
| "train_runtime": 3723.7211, | |
| "train_tokens_per_second": 6843.198 | |
| }, | |
| { | |
| "epoch": 1.0916781664366713, | |
| "grad_norm": 0.6172477006912231, | |
| "learning_rate": 3.537914196261181e-05, | |
| "loss": 0.5266, | |
| "num_input_tokens_seen": 25538416, | |
| "step": 2275, | |
| "train_runtime": 3731.9378, | |
| "train_tokens_per_second": 6843.205 | |
| }, | |
| { | |
| "epoch": 1.0940781184376314, | |
| "grad_norm": 0.5969734191894531, | |
| "learning_rate": 3.5321966533445547e-05, | |
| "loss": 0.5244, | |
| "num_input_tokens_seen": 25594328, | |
| "step": 2280, | |
| "train_runtime": 3739.9474, | |
| "train_tokens_per_second": 6843.499 | |
| }, | |
| { | |
| "epoch": 1.0964780704385912, | |
| "grad_norm": 0.9102872610092163, | |
| "learning_rate": 3.526472594683617e-05, | |
| "loss": 0.5011, | |
| "num_input_tokens_seen": 25647608, | |
| "step": 2285, | |
| "train_runtime": 3747.8696, | |
| "train_tokens_per_second": 6843.25 | |
| }, | |
| { | |
| "epoch": 1.0988780224395511, | |
| "grad_norm": 0.7734837532043457, | |
| "learning_rate": 3.5207420564115045e-05, | |
| "loss": 0.5229, | |
| "num_input_tokens_seen": 25702960, | |
| "step": 2290, | |
| "train_runtime": 3755.5877, | |
| "train_tokens_per_second": 6843.925 | |
| }, | |
| { | |
| "epoch": 1.1012779744405112, | |
| "grad_norm": 0.6865848898887634, | |
| "learning_rate": 3.515005074702256e-05, | |
| "loss": 0.5035, | |
| "num_input_tokens_seen": 25758120, | |
| "step": 2295, | |
| "train_runtime": 3763.673, | |
| "train_tokens_per_second": 6843.878 | |
| }, | |
| { | |
| "epoch": 1.1036779264414711, | |
| "grad_norm": 0.6671602129936218, | |
| "learning_rate": 3.509261685770585e-05, | |
| "loss": 0.4939, | |
| "num_input_tokens_seen": 25817024, | |
| "step": 2300, | |
| "train_runtime": 3772.0902, | |
| "train_tokens_per_second": 6844.222 | |
| }, | |
| { | |
| "epoch": 1.1060778784424312, | |
| "grad_norm": 0.6217396855354309, | |
| "learning_rate": 3.5035119258716495e-05, | |
| "loss": 0.5389, | |
| "num_input_tokens_seen": 25876744, | |
| "step": 2305, | |
| "train_runtime": 3780.9145, | |
| "train_tokens_per_second": 6844.044 | |
| }, | |
| { | |
| "epoch": 1.108477830443391, | |
| "grad_norm": 0.7444595098495483, | |
| "learning_rate": 3.497755831300828e-05, | |
| "loss": 0.49, | |
| "num_input_tokens_seen": 25928600, | |
| "step": 2310, | |
| "train_runtime": 3788.9853, | |
| "train_tokens_per_second": 6843.151 | |
| }, | |
| { | |
| "epoch": 1.1108777824443512, | |
| "grad_norm": 0.6591025590896606, | |
| "learning_rate": 3.491993438393481e-05, | |
| "loss": 0.4658, | |
| "num_input_tokens_seen": 25985192, | |
| "step": 2315, | |
| "train_runtime": 3797.2779, | |
| "train_tokens_per_second": 6843.11 | |
| }, | |
| { | |
| "epoch": 1.113277734445311, | |
| "grad_norm": 0.7887580394744873, | |
| "learning_rate": 3.486224783524731e-05, | |
| "loss": 0.5464, | |
| "num_input_tokens_seen": 26040520, | |
| "step": 2320, | |
| "train_runtime": 3804.9274, | |
| "train_tokens_per_second": 6843.894 | |
| }, | |
| { | |
| "epoch": 1.1156776864462712, | |
| "grad_norm": 0.8074533939361572, | |
| "learning_rate": 3.480449903109229e-05, | |
| "loss": 0.5227, | |
| "num_input_tokens_seen": 26093336, | |
| "step": 2325, | |
| "train_runtime": 3812.5053, | |
| "train_tokens_per_second": 6844.144 | |
| }, | |
| { | |
| "epoch": 1.118077638447231, | |
| "grad_norm": 0.7056359648704529, | |
| "learning_rate": 3.474668833600923e-05, | |
| "loss": 0.4759, | |
| "num_input_tokens_seen": 26148320, | |
| "step": 2330, | |
| "train_runtime": 3820.6134, | |
| "train_tokens_per_second": 6844.011 | |
| }, | |
| { | |
| "epoch": 1.120477590448191, | |
| "grad_norm": 0.841861367225647, | |
| "learning_rate": 3.4688816114928327e-05, | |
| "loss": 0.5181, | |
| "num_input_tokens_seen": 26206080, | |
| "step": 2335, | |
| "train_runtime": 3828.5922, | |
| "train_tokens_per_second": 6844.835 | |
| }, | |
| { | |
| "epoch": 1.122877542449151, | |
| "grad_norm": 0.6521568298339844, | |
| "learning_rate": 3.4630882733168116e-05, | |
| "loss": 0.4938, | |
| "num_input_tokens_seen": 26262688, | |
| "step": 2340, | |
| "train_runtime": 3836.5264, | |
| "train_tokens_per_second": 6845.434 | |
| }, | |
| { | |
| "epoch": 1.125277494450111, | |
| "grad_norm": 0.7665443420410156, | |
| "learning_rate": 3.4572888556433246e-05, | |
| "loss": 0.4681, | |
| "num_input_tokens_seen": 26321160, | |
| "step": 2345, | |
| "train_runtime": 3844.9857, | |
| "train_tokens_per_second": 6845.581 | |
| }, | |
| { | |
| "epoch": 1.127677446451071, | |
| "grad_norm": 0.616336464881897, | |
| "learning_rate": 3.451483395081212e-05, | |
| "loss": 0.4631, | |
| "num_input_tokens_seen": 26378192, | |
| "step": 2350, | |
| "train_runtime": 3853.2119, | |
| "train_tokens_per_second": 6845.767 | |
| }, | |
| { | |
| "epoch": 1.130077398452031, | |
| "grad_norm": 0.6478726863861084, | |
| "learning_rate": 3.445671928277461e-05, | |
| "loss": 0.4676, | |
| "num_input_tokens_seen": 26430848, | |
| "step": 2355, | |
| "train_runtime": 3861.022, | |
| "train_tokens_per_second": 6845.558 | |
| }, | |
| { | |
| "epoch": 1.132477350452991, | |
| "grad_norm": 0.6371597647666931, | |
| "learning_rate": 3.4398544919169715e-05, | |
| "loss": 0.4904, | |
| "num_input_tokens_seen": 26489064, | |
| "step": 2360, | |
| "train_runtime": 3868.9291, | |
| "train_tokens_per_second": 6846.614 | |
| }, | |
| { | |
| "epoch": 1.134877302453951, | |
| "grad_norm": 0.6929451823234558, | |
| "learning_rate": 3.4340311227223273e-05, | |
| "loss": 0.5352, | |
| "num_input_tokens_seen": 26543528, | |
| "step": 2365, | |
| "train_runtime": 3877.0017, | |
| "train_tokens_per_second": 6846.406 | |
| }, | |
| { | |
| "epoch": 1.137277254454911, | |
| "grad_norm": 0.9073979258537292, | |
| "learning_rate": 3.428201857453562e-05, | |
| "loss": 0.5051, | |
| "num_input_tokens_seen": 26596928, | |
| "step": 2370, | |
| "train_runtime": 3884.7443, | |
| "train_tokens_per_second": 6846.507 | |
| }, | |
| { | |
| "epoch": 1.139677206455871, | |
| "grad_norm": 0.7150000929832458, | |
| "learning_rate": 3.422366732907931e-05, | |
| "loss": 0.4361, | |
| "num_input_tokens_seen": 26654072, | |
| "step": 2375, | |
| "train_runtime": 3893.2295, | |
| "train_tokens_per_second": 6846.263 | |
| }, | |
| { | |
| "epoch": 1.1420771584568308, | |
| "grad_norm": 0.6671944260597229, | |
| "learning_rate": 3.416525785919673e-05, | |
| "loss": 0.488, | |
| "num_input_tokens_seen": 26707464, | |
| "step": 2380, | |
| "train_runtime": 3901.0068, | |
| "train_tokens_per_second": 6846.3 | |
| }, | |
| { | |
| "epoch": 1.1444771104577909, | |
| "grad_norm": 0.585337221622467, | |
| "learning_rate": 3.410679053359784e-05, | |
| "loss": 0.4326, | |
| "num_input_tokens_seen": 26766704, | |
| "step": 2385, | |
| "train_runtime": 3909.5898, | |
| "train_tokens_per_second": 6846.423 | |
| }, | |
| { | |
| "epoch": 1.1468770624587508, | |
| "grad_norm": 0.5534717440605164, | |
| "learning_rate": 3.404826572135779e-05, | |
| "loss": 0.4831, | |
| "num_input_tokens_seen": 26826328, | |
| "step": 2390, | |
| "train_runtime": 3918.5924, | |
| "train_tokens_per_second": 6845.909 | |
| }, | |
| { | |
| "epoch": 1.1492770144597109, | |
| "grad_norm": 0.5429486632347107, | |
| "learning_rate": 3.398968379191462e-05, | |
| "loss": 0.4909, | |
| "num_input_tokens_seen": 26880888, | |
| "step": 2395, | |
| "train_runtime": 3926.453, | |
| "train_tokens_per_second": 6846.099 | |
| }, | |
| { | |
| "epoch": 1.1516769664606707, | |
| "grad_norm": 0.8771390914916992, | |
| "learning_rate": 3.393104511506694e-05, | |
| "loss": 0.4903, | |
| "num_input_tokens_seen": 26937800, | |
| "step": 2400, | |
| "train_runtime": 3934.3502, | |
| "train_tokens_per_second": 6846.823 | |
| }, | |
| { | |
| "epoch": 1.1540769184616309, | |
| "grad_norm": 0.7701951861381531, | |
| "learning_rate": 3.387235006097155e-05, | |
| "loss": 0.4994, | |
| "num_input_tokens_seen": 26993776, | |
| "step": 2405, | |
| "train_runtime": 3942.0785, | |
| "train_tokens_per_second": 6847.6 | |
| }, | |
| { | |
| "epoch": 1.1564768704625907, | |
| "grad_norm": 0.5495705008506775, | |
| "learning_rate": 3.381359900014116e-05, | |
| "loss": 0.4745, | |
| "num_input_tokens_seen": 27053440, | |
| "step": 2410, | |
| "train_runtime": 3950.7471, | |
| "train_tokens_per_second": 6847.677 | |
| }, | |
| { | |
| "epoch": 1.1588768224635508, | |
| "grad_norm": 0.7725142240524292, | |
| "learning_rate": 3.375479230344199e-05, | |
| "loss": 0.5404, | |
| "num_input_tokens_seen": 27104744, | |
| "step": 2415, | |
| "train_runtime": 3958.6488, | |
| "train_tokens_per_second": 6846.969 | |
| }, | |
| { | |
| "epoch": 1.1612767744645107, | |
| "grad_norm": 1.0459918975830078, | |
| "learning_rate": 3.369593034209149e-05, | |
| "loss": 0.5069, | |
| "num_input_tokens_seen": 27159864, | |
| "step": 2420, | |
| "train_runtime": 3967.0288, | |
| "train_tokens_per_second": 6846.4 | |
| }, | |
| { | |
| "epoch": 1.1636767264654706, | |
| "grad_norm": 0.6602296829223633, | |
| "learning_rate": 3.363701348765597e-05, | |
| "loss": 0.4541, | |
| "num_input_tokens_seen": 27219344, | |
| "step": 2425, | |
| "train_runtime": 3976.0119, | |
| "train_tokens_per_second": 6845.891 | |
| }, | |
| { | |
| "epoch": 1.1660766784664307, | |
| "grad_norm": 0.5902988910675049, | |
| "learning_rate": 3.3578042112048226e-05, | |
| "loss": 0.4447, | |
| "num_input_tokens_seen": 27279536, | |
| "step": 2430, | |
| "train_runtime": 3984.8836, | |
| "train_tokens_per_second": 6845.755 | |
| }, | |
| { | |
| "epoch": 1.1684766304673906, | |
| "grad_norm": 0.9325588941574097, | |
| "learning_rate": 3.351901658752524e-05, | |
| "loss": 0.5227, | |
| "num_input_tokens_seen": 27336160, | |
| "step": 2435, | |
| "train_runtime": 3992.8679, | |
| "train_tokens_per_second": 6846.247 | |
| }, | |
| { | |
| "epoch": 1.1708765824683507, | |
| "grad_norm": 0.6601638793945312, | |
| "learning_rate": 3.34599372866858e-05, | |
| "loss": 0.4813, | |
| "num_input_tokens_seen": 27393304, | |
| "step": 2440, | |
| "train_runtime": 4001.2293, | |
| "train_tokens_per_second": 6846.222 | |
| }, | |
| { | |
| "epoch": 1.1732765344693106, | |
| "grad_norm": 0.8339878916740417, | |
| "learning_rate": 3.3400804582468154e-05, | |
| "loss": 0.5101, | |
| "num_input_tokens_seen": 27444632, | |
| "step": 2445, | |
| "train_runtime": 4008.6642, | |
| "train_tokens_per_second": 6846.329 | |
| }, | |
| { | |
| "epoch": 1.1756764864702707, | |
| "grad_norm": 0.8969867825508118, | |
| "learning_rate": 3.334161884814769e-05, | |
| "loss": 0.4709, | |
| "num_input_tokens_seen": 27502576, | |
| "step": 2450, | |
| "train_runtime": 4016.7436, | |
| "train_tokens_per_second": 6846.983 | |
| }, | |
| { | |
| "epoch": 1.1780764384712306, | |
| "grad_norm": 0.8373593091964722, | |
| "learning_rate": 3.3282380457334505e-05, | |
| "loss": 0.5498, | |
| "num_input_tokens_seen": 27559352, | |
| "step": 2455, | |
| "train_runtime": 4024.9244, | |
| "train_tokens_per_second": 6847.173 | |
| }, | |
| { | |
| "epoch": 1.1804763904721907, | |
| "grad_norm": 0.8110735416412354, | |
| "learning_rate": 3.3223089783971114e-05, | |
| "loss": 0.507, | |
| "num_input_tokens_seen": 27615472, | |
| "step": 2460, | |
| "train_runtime": 4032.7198, | |
| "train_tokens_per_second": 6847.853 | |
| }, | |
| { | |
| "epoch": 1.1828763424731505, | |
| "grad_norm": 0.7023930549621582, | |
| "learning_rate": 3.3163747202330066e-05, | |
| "loss": 0.498, | |
| "num_input_tokens_seen": 27671096, | |
| "step": 2465, | |
| "train_runtime": 4040.3448, | |
| "train_tokens_per_second": 6848.697 | |
| }, | |
| { | |
| "epoch": 1.1852762944741104, | |
| "grad_norm": 0.783581554889679, | |
| "learning_rate": 3.310435308701156e-05, | |
| "loss": 0.5188, | |
| "num_input_tokens_seen": 27722512, | |
| "step": 2470, | |
| "train_runtime": 4048.0115, | |
| "train_tokens_per_second": 6848.427 | |
| }, | |
| { | |
| "epoch": 1.1876762464750705, | |
| "grad_norm": 0.7718804478645325, | |
| "learning_rate": 3.304490781294114e-05, | |
| "loss": 0.4861, | |
| "num_input_tokens_seen": 27778280, | |
| "step": 2475, | |
| "train_runtime": 4055.8209, | |
| "train_tokens_per_second": 6848.991 | |
| }, | |
| { | |
| "epoch": 1.1900761984760304, | |
| "grad_norm": 0.5067981481552124, | |
| "learning_rate": 3.2985411755367246e-05, | |
| "loss": 0.4792, | |
| "num_input_tokens_seen": 27839424, | |
| "step": 2480, | |
| "train_runtime": 4064.5853, | |
| "train_tokens_per_second": 6849.266 | |
| }, | |
| { | |
| "epoch": 1.1924761504769905, | |
| "grad_norm": 0.7346833348274231, | |
| "learning_rate": 3.292586528985894e-05, | |
| "loss": 0.4599, | |
| "num_input_tokens_seen": 27894440, | |
| "step": 2485, | |
| "train_runtime": 4072.5002, | |
| "train_tokens_per_second": 6849.463 | |
| }, | |
| { | |
| "epoch": 1.1948761024779504, | |
| "grad_norm": 0.5885698199272156, | |
| "learning_rate": 3.2866268792303424e-05, | |
| "loss": 0.4936, | |
| "num_input_tokens_seen": 27959096, | |
| "step": 2490, | |
| "train_runtime": 4082.1306, | |
| "train_tokens_per_second": 6849.143 | |
| }, | |
| { | |
| "epoch": 1.1972760544789105, | |
| "grad_norm": 0.5944679975509644, | |
| "learning_rate": 3.2806622638903764e-05, | |
| "loss": 0.5008, | |
| "num_input_tokens_seen": 28010352, | |
| "step": 2495, | |
| "train_runtime": 4089.5284, | |
| "train_tokens_per_second": 6849.287 | |
| }, | |
| { | |
| "epoch": 1.1996760064798704, | |
| "grad_norm": 0.7197619080543518, | |
| "learning_rate": 3.274692720617649e-05, | |
| "loss": 0.5232, | |
| "num_input_tokens_seen": 28067424, | |
| "step": 2500, | |
| "train_runtime": 4098.1617, | |
| "train_tokens_per_second": 6848.784 | |
| }, | |
| { | |
| "epoch": 1.2020759584808305, | |
| "grad_norm": 0.71132493019104, | |
| "learning_rate": 3.2687182870949185e-05, | |
| "loss": 0.4749, | |
| "num_input_tokens_seen": 28126704, | |
| "step": 2505, | |
| "train_runtime": 4106.4308, | |
| "train_tokens_per_second": 6849.428 | |
| }, | |
| { | |
| "epoch": 1.2044759104817904, | |
| "grad_norm": 0.7117146849632263, | |
| "learning_rate": 3.2627390010358133e-05, | |
| "loss": 0.4965, | |
| "num_input_tokens_seen": 28184072, | |
| "step": 2510, | |
| "train_runtime": 4114.8063, | |
| "train_tokens_per_second": 6849.429 | |
| }, | |
| { | |
| "epoch": 1.2068758624827503, | |
| "grad_norm": 0.7712971568107605, | |
| "learning_rate": 3.256754900184593e-05, | |
| "loss": 0.489, | |
| "num_input_tokens_seen": 28237608, | |
| "step": 2515, | |
| "train_runtime": 4122.4987, | |
| "train_tokens_per_second": 6849.634 | |
| }, | |
| { | |
| "epoch": 1.2092758144837104, | |
| "grad_norm": 0.843129575252533, | |
| "learning_rate": 3.2507660223159115e-05, | |
| "loss": 0.449, | |
| "num_input_tokens_seen": 28299544, | |
| "step": 2520, | |
| "train_runtime": 4131.2681, | |
| "train_tokens_per_second": 6850.086 | |
| }, | |
| { | |
| "epoch": 1.2116757664846702, | |
| "grad_norm": 0.6665219068527222, | |
| "learning_rate": 3.2447724052345786e-05, | |
| "loss": 0.4269, | |
| "num_input_tokens_seen": 28357640, | |
| "step": 2525, | |
| "train_runtime": 4139.6319, | |
| "train_tokens_per_second": 6850.281 | |
| }, | |
| { | |
| "epoch": 1.2140757184856303, | |
| "grad_norm": 0.7961658835411072, | |
| "learning_rate": 3.238774086775317e-05, | |
| "loss": 0.4937, | |
| "num_input_tokens_seen": 28411848, | |
| "step": 2530, | |
| "train_runtime": 4147.578, | |
| "train_tokens_per_second": 6850.226 | |
| }, | |
| { | |
| "epoch": 1.2164756704865902, | |
| "grad_norm": 0.7647880911827087, | |
| "learning_rate": 3.2327711048025314e-05, | |
| "loss": 0.473, | |
| "num_input_tokens_seen": 28465072, | |
| "step": 2535, | |
| "train_runtime": 4155.7446, | |
| "train_tokens_per_second": 6849.572 | |
| }, | |
| { | |
| "epoch": 1.2188756224875503, | |
| "grad_norm": 0.7645636796951294, | |
| "learning_rate": 3.226763497210061e-05, | |
| "loss": 0.5217, | |
| "num_input_tokens_seen": 28513584, | |
| "step": 2540, | |
| "train_runtime": 4162.7633, | |
| "train_tokens_per_second": 6849.677 | |
| }, | |
| { | |
| "epoch": 1.2212755744885102, | |
| "grad_norm": 0.9397866725921631, | |
| "learning_rate": 3.2207513019209455e-05, | |
| "loss": 0.5058, | |
| "num_input_tokens_seen": 28569888, | |
| "step": 2545, | |
| "train_runtime": 4170.8063, | |
| "train_tokens_per_second": 6849.968 | |
| }, | |
| { | |
| "epoch": 1.2236755264894703, | |
| "grad_norm": 0.8510188460350037, | |
| "learning_rate": 3.2147345568871874e-05, | |
| "loss": 0.4699, | |
| "num_input_tokens_seen": 28623888, | |
| "step": 2550, | |
| "train_runtime": 4178.6198, | |
| "train_tokens_per_second": 6850.082 | |
| }, | |
| { | |
| "epoch": 1.2260754784904302, | |
| "grad_norm": 0.7524721622467041, | |
| "learning_rate": 3.208713300089504e-05, | |
| "loss": 0.4585, | |
| "num_input_tokens_seen": 28680088, | |
| "step": 2555, | |
| "train_runtime": 4187.0852, | |
| "train_tokens_per_second": 6849.655 | |
| }, | |
| { | |
| "epoch": 1.22847543049139, | |
| "grad_norm": 0.6238115429878235, | |
| "learning_rate": 3.2026875695370975e-05, | |
| "loss": 0.4872, | |
| "num_input_tokens_seen": 28733184, | |
| "step": 2560, | |
| "train_runtime": 4194.4934, | |
| "train_tokens_per_second": 6850.216 | |
| }, | |
| { | |
| "epoch": 1.2308753824923502, | |
| "grad_norm": 0.8195456862449646, | |
| "learning_rate": 3.1966574032674074e-05, | |
| "loss": 0.5134, | |
| "num_input_tokens_seen": 28787400, | |
| "step": 2565, | |
| "train_runtime": 4202.0819, | |
| "train_tokens_per_second": 6850.747 | |
| }, | |
| { | |
| "epoch": 1.23327533449331, | |
| "grad_norm": 0.7062321305274963, | |
| "learning_rate": 3.190622839345878e-05, | |
| "loss": 0.4758, | |
| "num_input_tokens_seen": 28840944, | |
| "step": 2570, | |
| "train_runtime": 4209.9012, | |
| "train_tokens_per_second": 6850.741 | |
| }, | |
| { | |
| "epoch": 1.2356752864942702, | |
| "grad_norm": 0.6290914416313171, | |
| "learning_rate": 3.184583915865709e-05, | |
| "loss": 0.5343, | |
| "num_input_tokens_seen": 28893352, | |
| "step": 2575, | |
| "train_runtime": 4217.2229, | |
| "train_tokens_per_second": 6851.275 | |
| }, | |
| { | |
| "epoch": 1.23807523849523, | |
| "grad_norm": 0.6599912643432617, | |
| "learning_rate": 3.178540670947624e-05, | |
| "loss": 0.4822, | |
| "num_input_tokens_seen": 28952544, | |
| "step": 2580, | |
| "train_runtime": 4225.8796, | |
| "train_tokens_per_second": 6851.247 | |
| }, | |
| { | |
| "epoch": 1.2404751904961901, | |
| "grad_norm": 0.6899898052215576, | |
| "learning_rate": 3.172493142739622e-05, | |
| "loss": 0.4529, | |
| "num_input_tokens_seen": 29007344, | |
| "step": 2585, | |
| "train_runtime": 4233.7269, | |
| "train_tokens_per_second": 6851.492 | |
| }, | |
| { | |
| "epoch": 1.24287514249715, | |
| "grad_norm": 0.8615679144859314, | |
| "learning_rate": 3.1664413694167424e-05, | |
| "loss": 0.5018, | |
| "num_input_tokens_seen": 29065880, | |
| "step": 2590, | |
| "train_runtime": 4242.1314, | |
| "train_tokens_per_second": 6851.716 | |
| }, | |
| { | |
| "epoch": 1.2452750944981101, | |
| "grad_norm": 0.829759955406189, | |
| "learning_rate": 3.160385389180822e-05, | |
| "loss": 0.5014, | |
| "num_input_tokens_seen": 29120600, | |
| "step": 2595, | |
| "train_runtime": 4250.6385, | |
| "train_tokens_per_second": 6850.877 | |
| }, | |
| { | |
| "epoch": 1.24767504649907, | |
| "grad_norm": 1.099179744720459, | |
| "learning_rate": 3.154325240260254e-05, | |
| "loss": 0.4823, | |
| "num_input_tokens_seen": 29174832, | |
| "step": 2600, | |
| "train_runtime": 4258.7641, | |
| "train_tokens_per_second": 6850.54 | |
| }, | |
| { | |
| "epoch": 1.25007499850003, | |
| "grad_norm": 0.7731813788414001, | |
| "learning_rate": 3.148260960909745e-05, | |
| "loss": 0.4527, | |
| "num_input_tokens_seen": 29228680, | |
| "step": 2605, | |
| "train_runtime": 4266.7683, | |
| "train_tokens_per_second": 6850.309 | |
| }, | |
| { | |
| "epoch": 1.25247495050099, | |
| "grad_norm": 0.7874563336372375, | |
| "learning_rate": 3.1421925894100745e-05, | |
| "loss": 0.5152, | |
| "num_input_tokens_seen": 29282976, | |
| "step": 2610, | |
| "train_runtime": 4274.5977, | |
| "train_tokens_per_second": 6850.464 | |
| }, | |
| { | |
| "epoch": 1.2548749025019499, | |
| "grad_norm": 0.6936095952987671, | |
| "learning_rate": 3.1361201640678554e-05, | |
| "loss": 0.5055, | |
| "num_input_tokens_seen": 29337384, | |
| "step": 2615, | |
| "train_runtime": 4282.725, | |
| "train_tokens_per_second": 6850.168 | |
| }, | |
| { | |
| "epoch": 1.25727485450291, | |
| "grad_norm": 0.8180893063545227, | |
| "learning_rate": 3.130043723215291e-05, | |
| "loss": 0.4808, | |
| "num_input_tokens_seen": 29398256, | |
| "step": 2620, | |
| "train_runtime": 4291.6094, | |
| "train_tokens_per_second": 6850.17 | |
| }, | |
| { | |
| "epoch": 1.2596748065038699, | |
| "grad_norm": 0.7401306629180908, | |
| "learning_rate": 3.123963305209932e-05, | |
| "loss": 0.5101, | |
| "num_input_tokens_seen": 29455288, | |
| "step": 2625, | |
| "train_runtime": 4299.6287, | |
| "train_tokens_per_second": 6850.659 | |
| }, | |
| { | |
| "epoch": 1.26207475850483, | |
| "grad_norm": 0.7376925349235535, | |
| "learning_rate": 3.1178789484344326e-05, | |
| "loss": 0.468, | |
| "num_input_tokens_seen": 29513208, | |
| "step": 2630, | |
| "train_runtime": 4308.0487, | |
| "train_tokens_per_second": 6850.714 | |
| }, | |
| { | |
| "epoch": 1.2644747105057899, | |
| "grad_norm": 0.7442266345024109, | |
| "learning_rate": 3.1117906912963124e-05, | |
| "loss": 0.5214, | |
| "num_input_tokens_seen": 29566424, | |
| "step": 2635, | |
| "train_runtime": 4315.7814, | |
| "train_tokens_per_second": 6850.77 | |
| }, | |
| { | |
| "epoch": 1.26687466250675, | |
| "grad_norm": 0.7198356986045837, | |
| "learning_rate": 3.105698572227712e-05, | |
| "loss": 0.5059, | |
| "num_input_tokens_seen": 29621112, | |
| "step": 2640, | |
| "train_runtime": 4324.1308, | |
| "train_tokens_per_second": 6850.189 | |
| }, | |
| { | |
| "epoch": 1.2692746145077098, | |
| "grad_norm": 0.6759196519851685, | |
| "learning_rate": 3.0996026296851516e-05, | |
| "loss": 0.4705, | |
| "num_input_tokens_seen": 29672896, | |
| "step": 2645, | |
| "train_runtime": 4331.5888, | |
| "train_tokens_per_second": 6850.349 | |
| }, | |
| { | |
| "epoch": 1.2716745665086697, | |
| "grad_norm": 0.659756600856781, | |
| "learning_rate": 3.093502902149285e-05, | |
| "loss": 0.4753, | |
| "num_input_tokens_seen": 29724344, | |
| "step": 2650, | |
| "train_runtime": 4339.2532, | |
| "train_tokens_per_second": 6850.106 | |
| }, | |
| { | |
| "epoch": 1.2740745185096298, | |
| "grad_norm": 0.7627817988395691, | |
| "learning_rate": 3.087399428124659e-05, | |
| "loss": 0.5218, | |
| "num_input_tokens_seen": 29779744, | |
| "step": 2655, | |
| "train_runtime": 4347.2112, | |
| "train_tokens_per_second": 6850.31 | |
| }, | |
| { | |
| "epoch": 1.2764744705105897, | |
| "grad_norm": 0.5417824387550354, | |
| "learning_rate": 3.081292246139473e-05, | |
| "loss": 0.4784, | |
| "num_input_tokens_seen": 29834824, | |
| "step": 2660, | |
| "train_runtime": 4355.3061, | |
| "train_tokens_per_second": 6850.224 | |
| }, | |
| { | |
| "epoch": 1.2788744225115498, | |
| "grad_norm": 0.7506272792816162, | |
| "learning_rate": 3.0751813947453265e-05, | |
| "loss": 0.4886, | |
| "num_input_tokens_seen": 29890520, | |
| "step": 2665, | |
| "train_runtime": 4362.9276, | |
| "train_tokens_per_second": 6851.024 | |
| }, | |
| { | |
| "epoch": 1.2812743745125097, | |
| "grad_norm": 0.6071366667747498, | |
| "learning_rate": 3.069066912516991e-05, | |
| "loss": 0.5277, | |
| "num_input_tokens_seen": 29945288, | |
| "step": 2670, | |
| "train_runtime": 4370.6908, | |
| "train_tokens_per_second": 6851.386 | |
| }, | |
| { | |
| "epoch": 1.2836743265134698, | |
| "grad_norm": 0.7744503021240234, | |
| "learning_rate": 3.0629488380521504e-05, | |
| "loss": 0.5158, | |
| "num_input_tokens_seen": 30001032, | |
| "step": 2675, | |
| "train_runtime": 4378.8355, | |
| "train_tokens_per_second": 6851.372 | |
| }, | |
| { | |
| "epoch": 1.2860742785144297, | |
| "grad_norm": 0.4839749336242676, | |
| "learning_rate": 3.056827209971167e-05, | |
| "loss": 0.5022, | |
| "num_input_tokens_seen": 30057416, | |
| "step": 2680, | |
| "train_runtime": 4387.5074, | |
| "train_tokens_per_second": 6850.682 | |
| }, | |
| { | |
| "epoch": 1.2884742305153898, | |
| "grad_norm": 0.5500566363334656, | |
| "learning_rate": 3.0507020669168367e-05, | |
| "loss": 0.4875, | |
| "num_input_tokens_seen": 30113512, | |
| "step": 2685, | |
| "train_runtime": 4395.5794, | |
| "train_tokens_per_second": 6850.863 | |
| }, | |
| { | |
| "epoch": 1.2908741825163497, | |
| "grad_norm": 0.7816157341003418, | |
| "learning_rate": 3.044573447554141e-05, | |
| "loss": 0.4872, | |
| "num_input_tokens_seen": 30171064, | |
| "step": 2690, | |
| "train_runtime": 4404.1038, | |
| "train_tokens_per_second": 6850.671 | |
| }, | |
| { | |
| "epoch": 1.2932741345173095, | |
| "grad_norm": 0.6968929767608643, | |
| "learning_rate": 3.038441390570008e-05, | |
| "loss": 0.4715, | |
| "num_input_tokens_seen": 30226872, | |
| "step": 2695, | |
| "train_runtime": 4412.2507, | |
| "train_tokens_per_second": 6850.67 | |
| }, | |
| { | |
| "epoch": 1.2956740865182697, | |
| "grad_norm": 0.8923588395118713, | |
| "learning_rate": 3.0323059346730666e-05, | |
| "loss": 0.5249, | |
| "num_input_tokens_seen": 30281784, | |
| "step": 2700, | |
| "train_runtime": 4420.3662, | |
| "train_tokens_per_second": 6850.515 | |
| }, | |
| { | |
| "epoch": 1.2980740385192295, | |
| "grad_norm": 0.9175417423248291, | |
| "learning_rate": 3.026167118593396e-05, | |
| "loss": 0.5334, | |
| "num_input_tokens_seen": 30336824, | |
| "step": 2705, | |
| "train_runtime": 4428.4152, | |
| "train_tokens_per_second": 6850.492 | |
| }, | |
| { | |
| "epoch": 1.3004739905201896, | |
| "grad_norm": 0.5945408344268799, | |
| "learning_rate": 3.0200249810822922e-05, | |
| "loss": 0.4795, | |
| "num_input_tokens_seen": 30391968, | |
| "step": 2710, | |
| "train_runtime": 4436.7566, | |
| "train_tokens_per_second": 6850.042 | |
| }, | |
| { | |
| "epoch": 1.3028739425211495, | |
| "grad_norm": 0.6741787195205688, | |
| "learning_rate": 3.0138795609120156e-05, | |
| "loss": 0.5054, | |
| "num_input_tokens_seen": 30448056, | |
| "step": 2715, | |
| "train_runtime": 4445.0926, | |
| "train_tokens_per_second": 6849.814 | |
| }, | |
| { | |
| "epoch": 1.3052738945221096, | |
| "grad_norm": 0.7565773129463196, | |
| "learning_rate": 3.0077308968755484e-05, | |
| "loss": 0.4871, | |
| "num_input_tokens_seen": 30509528, | |
| "step": 2720, | |
| "train_runtime": 4454.1899, | |
| "train_tokens_per_second": 6849.624 | |
| }, | |
| { | |
| "epoch": 1.3076738465230695, | |
| "grad_norm": 0.7174657583236694, | |
| "learning_rate": 3.0015790277863504e-05, | |
| "loss": 0.5235, | |
| "num_input_tokens_seen": 30564064, | |
| "step": 2725, | |
| "train_runtime": 4462.4576, | |
| "train_tokens_per_second": 6849.155 | |
| }, | |
| { | |
| "epoch": 1.3100737985240296, | |
| "grad_norm": 0.808497965335846, | |
| "learning_rate": 2.9954239924781114e-05, | |
| "loss": 0.5481, | |
| "num_input_tokens_seen": 30617256, | |
| "step": 2730, | |
| "train_runtime": 4469.9742, | |
| "train_tokens_per_second": 6849.538 | |
| }, | |
| { | |
| "epoch": 1.3124737505249895, | |
| "grad_norm": 0.7192595601081848, | |
| "learning_rate": 2.9892658298045105e-05, | |
| "loss": 0.4882, | |
| "num_input_tokens_seen": 30676776, | |
| "step": 2735, | |
| "train_runtime": 4478.1351, | |
| "train_tokens_per_second": 6850.346 | |
| }, | |
| { | |
| "epoch": 1.3148737025259494, | |
| "grad_norm": 0.7198320627212524, | |
| "learning_rate": 2.983104578638966e-05, | |
| "loss": 0.5133, | |
| "num_input_tokens_seen": 30729600, | |
| "step": 2740, | |
| "train_runtime": 4486.2754, | |
| "train_tokens_per_second": 6849.691 | |
| }, | |
| { | |
| "epoch": 1.3172736545269095, | |
| "grad_norm": 0.6649105548858643, | |
| "learning_rate": 2.976940277874395e-05, | |
| "loss": 0.4772, | |
| "num_input_tokens_seen": 30786720, | |
| "step": 2745, | |
| "train_runtime": 4494.0586, | |
| "train_tokens_per_second": 6850.538 | |
| }, | |
| { | |
| "epoch": 1.3196736065278696, | |
| "grad_norm": 0.8715736269950867, | |
| "learning_rate": 2.9707729664229623e-05, | |
| "loss": 0.5323, | |
| "num_input_tokens_seen": 30844488, | |
| "step": 2750, | |
| "train_runtime": 4502.1358, | |
| "train_tokens_per_second": 6851.079 | |
| }, | |
| { | |
| "epoch": 1.3220735585288295, | |
| "grad_norm": 0.7848823666572571, | |
| "learning_rate": 2.964602683215839e-05, | |
| "loss": 0.5318, | |
| "num_input_tokens_seen": 30901200, | |
| "step": 2755, | |
| "train_runtime": 4510.5455, | |
| "train_tokens_per_second": 6850.879 | |
| }, | |
| { | |
| "epoch": 1.3244735105297893, | |
| "grad_norm": 0.5609360337257385, | |
| "learning_rate": 2.958429467202956e-05, | |
| "loss": 0.4453, | |
| "num_input_tokens_seen": 30957496, | |
| "step": 2760, | |
| "train_runtime": 4519.3334, | |
| "train_tokens_per_second": 6850.014 | |
| }, | |
| { | |
| "epoch": 1.3268734625307494, | |
| "grad_norm": 0.8397387266159058, | |
| "learning_rate": 2.9522533573527568e-05, | |
| "loss": 0.4547, | |
| "num_input_tokens_seen": 31014440, | |
| "step": 2765, | |
| "train_runtime": 4527.735, | |
| "train_tokens_per_second": 6849.88 | |
| }, | |
| { | |
| "epoch": 1.3292734145317093, | |
| "grad_norm": 0.883388340473175, | |
| "learning_rate": 2.9460743926519524e-05, | |
| "loss": 0.4866, | |
| "num_input_tokens_seen": 31069232, | |
| "step": 2770, | |
| "train_runtime": 4535.7952, | |
| "train_tokens_per_second": 6849.787 | |
| }, | |
| { | |
| "epoch": 1.3316733665326694, | |
| "grad_norm": 0.6454315185546875, | |
| "learning_rate": 2.9398926121052757e-05, | |
| "loss": 0.4363, | |
| "num_input_tokens_seen": 31124192, | |
| "step": 2775, | |
| "train_runtime": 4543.7024, | |
| "train_tokens_per_second": 6849.963 | |
| }, | |
| { | |
| "epoch": 1.3340733185336293, | |
| "grad_norm": 0.8647413849830627, | |
| "learning_rate": 2.933708054735232e-05, | |
| "loss": 0.5387, | |
| "num_input_tokens_seen": 31181208, | |
| "step": 2780, | |
| "train_runtime": 4551.829, | |
| "train_tokens_per_second": 6850.259 | |
| }, | |
| { | |
| "epoch": 1.3364732705345892, | |
| "grad_norm": 0.8238906860351562, | |
| "learning_rate": 2.9275207595818587e-05, | |
| "loss": 0.4733, | |
| "num_input_tokens_seen": 31238792, | |
| "step": 2785, | |
| "train_runtime": 4560.1671, | |
| "train_tokens_per_second": 6850.361 | |
| }, | |
| { | |
| "epoch": 1.3388732225355493, | |
| "grad_norm": 0.8096624612808228, | |
| "learning_rate": 2.9213307657024747e-05, | |
| "loss": 0.4498, | |
| "num_input_tokens_seen": 31293408, | |
| "step": 2790, | |
| "train_runtime": 4568.3465, | |
| "train_tokens_per_second": 6850.051 | |
| }, | |
| { | |
| "epoch": 1.3412731745365094, | |
| "grad_norm": 0.6373225450515747, | |
| "learning_rate": 2.9151381121714326e-05, | |
| "loss": 0.4626, | |
| "num_input_tokens_seen": 31351360, | |
| "step": 2795, | |
| "train_runtime": 4576.4713, | |
| "train_tokens_per_second": 6850.553 | |
| }, | |
| { | |
| "epoch": 1.3436731265374693, | |
| "grad_norm": 0.9298360347747803, | |
| "learning_rate": 2.9089428380798765e-05, | |
| "loss": 0.5147, | |
| "num_input_tokens_seen": 31408064, | |
| "step": 2800, | |
| "train_runtime": 4584.763, | |
| "train_tokens_per_second": 6850.532 | |
| }, | |
| { | |
| "epoch": 1.3460730785384292, | |
| "grad_norm": 0.7824495434761047, | |
| "learning_rate": 2.9027449825354914e-05, | |
| "loss": 0.5005, | |
| "num_input_tokens_seen": 31465944, | |
| "step": 2805, | |
| "train_runtime": 4593.7143, | |
| "train_tokens_per_second": 6849.783 | |
| }, | |
| { | |
| "epoch": 1.3484730305393893, | |
| "grad_norm": 0.8347817063331604, | |
| "learning_rate": 2.8965445846622575e-05, | |
| "loss": 0.5212, | |
| "num_input_tokens_seen": 31519296, | |
| "step": 2810, | |
| "train_runtime": 4601.6577, | |
| "train_tokens_per_second": 6849.552 | |
| }, | |
| { | |
| "epoch": 1.3508729825403492, | |
| "grad_norm": 0.7829338312149048, | |
| "learning_rate": 2.8903416836002046e-05, | |
| "loss": 0.4881, | |
| "num_input_tokens_seen": 31575040, | |
| "step": 2815, | |
| "train_runtime": 4609.5566, | |
| "train_tokens_per_second": 6849.908 | |
| }, | |
| { | |
| "epoch": 1.3532729345413093, | |
| "grad_norm": 0.7527592182159424, | |
| "learning_rate": 2.8841363185051627e-05, | |
| "loss": 0.5284, | |
| "num_input_tokens_seen": 31627864, | |
| "step": 2820, | |
| "train_runtime": 4617.3734, | |
| "train_tokens_per_second": 6849.752 | |
| }, | |
| { | |
| "epoch": 1.3556728865422691, | |
| "grad_norm": 0.5921339988708496, | |
| "learning_rate": 2.877928528548518e-05, | |
| "loss": 0.5337, | |
| "num_input_tokens_seen": 31681448, | |
| "step": 2825, | |
| "train_runtime": 4625.135, | |
| "train_tokens_per_second": 6849.843 | |
| }, | |
| { | |
| "epoch": 1.358072838543229, | |
| "grad_norm": 0.8095146417617798, | |
| "learning_rate": 2.871718352916961e-05, | |
| "loss": 0.4355, | |
| "num_input_tokens_seen": 31734720, | |
| "step": 2830, | |
| "train_runtime": 4632.6583, | |
| "train_tokens_per_second": 6850.218 | |
| }, | |
| { | |
| "epoch": 1.3604727905441891, | |
| "grad_norm": 0.863218367099762, | |
| "learning_rate": 2.8655058308122435e-05, | |
| "loss": 0.522, | |
| "num_input_tokens_seen": 31786472, | |
| "step": 2835, | |
| "train_runtime": 4640.2065, | |
| "train_tokens_per_second": 6850.228 | |
| }, | |
| { | |
| "epoch": 1.3628727425451492, | |
| "grad_norm": 0.6763318181037903, | |
| "learning_rate": 2.8592910014509284e-05, | |
| "loss": 0.4825, | |
| "num_input_tokens_seen": 31842040, | |
| "step": 2840, | |
| "train_runtime": 4648.7432, | |
| "train_tokens_per_second": 6849.602 | |
| }, | |
| { | |
| "epoch": 1.3652726945461091, | |
| "grad_norm": 0.9902337789535522, | |
| "learning_rate": 2.853073904064144e-05, | |
| "loss": 0.4791, | |
| "num_input_tokens_seen": 31901936, | |
| "step": 2845, | |
| "train_runtime": 4657.7444, | |
| "train_tokens_per_second": 6849.224 | |
| }, | |
| { | |
| "epoch": 1.367672646547069, | |
| "grad_norm": 0.607513427734375, | |
| "learning_rate": 2.8468545778973365e-05, | |
| "loss": 0.4962, | |
| "num_input_tokens_seen": 31955760, | |
| "step": 2850, | |
| "train_runtime": 4665.9209, | |
| "train_tokens_per_second": 6848.757 | |
| }, | |
| { | |
| "epoch": 1.370072598548029, | |
| "grad_norm": 0.7585775256156921, | |
| "learning_rate": 2.8406330622100185e-05, | |
| "loss": 0.5193, | |
| "num_input_tokens_seen": 32012936, | |
| "step": 2855, | |
| "train_runtime": 4674.1143, | |
| "train_tokens_per_second": 6848.984 | |
| }, | |
| { | |
| "epoch": 1.372472550548989, | |
| "grad_norm": 0.6520575284957886, | |
| "learning_rate": 2.834409396275526e-05, | |
| "loss": 0.4838, | |
| "num_input_tokens_seen": 32075400, | |
| "step": 2860, | |
| "train_runtime": 4683.1148, | |
| "train_tokens_per_second": 6849.159 | |
| }, | |
| { | |
| "epoch": 1.374872502549949, | |
| "grad_norm": 0.7430661916732788, | |
| "learning_rate": 2.8281836193807677e-05, | |
| "loss": 0.5193, | |
| "num_input_tokens_seen": 32127560, | |
| "step": 2865, | |
| "train_runtime": 4690.6625, | |
| "train_tokens_per_second": 6849.258 | |
| }, | |
| { | |
| "epoch": 1.377272454550909, | |
| "grad_norm": 0.6538442373275757, | |
| "learning_rate": 2.821955770825978e-05, | |
| "loss": 0.563, | |
| "num_input_tokens_seen": 32182368, | |
| "step": 2870, | |
| "train_runtime": 4698.3261, | |
| "train_tokens_per_second": 6849.752 | |
| }, | |
| { | |
| "epoch": 1.3796724065518688, | |
| "grad_norm": 0.6958315968513489, | |
| "learning_rate": 2.81572588992447e-05, | |
| "loss": 0.4983, | |
| "num_input_tokens_seen": 32238704, | |
| "step": 2875, | |
| "train_runtime": 4706.8956, | |
| "train_tokens_per_second": 6849.25 | |
| }, | |
| { | |
| "epoch": 1.382072358552829, | |
| "grad_norm": 0.5171172618865967, | |
| "learning_rate": 2.809494016002382e-05, | |
| "loss": 0.4887, | |
| "num_input_tokens_seen": 32299312, | |
| "step": 2880, | |
| "train_runtime": 4717.4351, | |
| "train_tokens_per_second": 6846.795 | |
| }, | |
| { | |
| "epoch": 1.384472310553789, | |
| "grad_norm": 0.7386242151260376, | |
| "learning_rate": 2.8032601883984373e-05, | |
| "loss": 0.4676, | |
| "num_input_tokens_seen": 32353968, | |
| "step": 2885, | |
| "train_runtime": 4727.1468, | |
| "train_tokens_per_second": 6844.291 | |
| }, | |
| { | |
| "epoch": 1.386872262554749, | |
| "grad_norm": 0.6488030552864075, | |
| "learning_rate": 2.7970244464636907e-05, | |
| "loss": 0.5187, | |
| "num_input_tokens_seen": 32408248, | |
| "step": 2890, | |
| "train_runtime": 4737.0735, | |
| "train_tokens_per_second": 6841.407 | |
| }, | |
| { | |
| "epoch": 1.3892722145557088, | |
| "grad_norm": 0.7091050744056702, | |
| "learning_rate": 2.7907868295612805e-05, | |
| "loss": 0.5009, | |
| "num_input_tokens_seen": 32461008, | |
| "step": 2895, | |
| "train_runtime": 4746.6232, | |
| "train_tokens_per_second": 6838.758 | |
| }, | |
| { | |
| "epoch": 1.391672166556669, | |
| "grad_norm": 0.735463559627533, | |
| "learning_rate": 2.7845473770661816e-05, | |
| "loss": 0.4448, | |
| "num_input_tokens_seen": 32519744, | |
| "step": 2900, | |
| "train_runtime": 4756.731, | |
| "train_tokens_per_second": 6836.574 | |
| }, | |
| { | |
| "epoch": 1.3940721185576288, | |
| "grad_norm": 0.8551938533782959, | |
| "learning_rate": 2.7783061283649547e-05, | |
| "loss": 0.4562, | |
| "num_input_tokens_seen": 32575104, | |
| "step": 2905, | |
| "train_runtime": 4767.5045, | |
| "train_tokens_per_second": 6832.737 | |
| }, | |
| { | |
| "epoch": 1.396472070558589, | |
| "grad_norm": 0.8265554904937744, | |
| "learning_rate": 2.7720631228555003e-05, | |
| "loss": 0.4771, | |
| "num_input_tokens_seen": 32633880, | |
| "step": 2910, | |
| "train_runtime": 4778.118, | |
| "train_tokens_per_second": 6829.861 | |
| }, | |
| { | |
| "epoch": 1.3988720225595488, | |
| "grad_norm": 0.7008459568023682, | |
| "learning_rate": 2.7658183999468096e-05, | |
| "loss": 0.5213, | |
| "num_input_tokens_seen": 32687728, | |
| "step": 2915, | |
| "train_runtime": 4787.6745, | |
| "train_tokens_per_second": 6827.475 | |
| }, | |
| { | |
| "epoch": 1.4012719745605087, | |
| "grad_norm": 0.714462399482727, | |
| "learning_rate": 2.759571999058712e-05, | |
| "loss": 0.4879, | |
| "num_input_tokens_seen": 32744776, | |
| "step": 2920, | |
| "train_runtime": 4798.5825, | |
| "train_tokens_per_second": 6823.843 | |
| }, | |
| { | |
| "epoch": 1.4036719265614688, | |
| "grad_norm": 0.7445899248123169, | |
| "learning_rate": 2.7533239596216326e-05, | |
| "loss": 0.4801, | |
| "num_input_tokens_seen": 32802640, | |
| "step": 2925, | |
| "train_runtime": 4809.0391, | |
| "train_tokens_per_second": 6821.038 | |
| }, | |
| { | |
| "epoch": 1.4060718785624289, | |
| "grad_norm": 0.7316624522209167, | |
| "learning_rate": 2.747074321076336e-05, | |
| "loss": 0.4811, | |
| "num_input_tokens_seen": 32858848, | |
| "step": 2930, | |
| "train_runtime": 4819.753, | |
| "train_tokens_per_second": 6817.538 | |
| }, | |
| { | |
| "epoch": 1.4084718305633888, | |
| "grad_norm": 0.8229737877845764, | |
| "learning_rate": 2.7408231228736854e-05, | |
| "loss": 0.4749, | |
| "num_input_tokens_seen": 32915328, | |
| "step": 2935, | |
| "train_runtime": 4829.6875, | |
| "train_tokens_per_second": 6815.209 | |
| }, | |
| { | |
| "epoch": 1.4108717825643486, | |
| "grad_norm": 0.6625364422798157, | |
| "learning_rate": 2.7345704044743857e-05, | |
| "loss": 0.5214, | |
| "num_input_tokens_seen": 32970256, | |
| "step": 2940, | |
| "train_runtime": 4839.5418, | |
| "train_tokens_per_second": 6812.681 | |
| }, | |
| { | |
| "epoch": 1.4132717345653087, | |
| "grad_norm": 0.7320582270622253, | |
| "learning_rate": 2.7283162053487406e-05, | |
| "loss": 0.5137, | |
| "num_input_tokens_seen": 33024728, | |
| "step": 2945, | |
| "train_runtime": 4849.3505, | |
| "train_tokens_per_second": 6810.134 | |
| }, | |
| { | |
| "epoch": 1.4156716865662686, | |
| "grad_norm": 0.8458564281463623, | |
| "learning_rate": 2.7220605649763997e-05, | |
| "loss": 0.4864, | |
| "num_input_tokens_seen": 33083776, | |
| "step": 2950, | |
| "train_runtime": 4859.7251, | |
| "train_tokens_per_second": 6807.746 | |
| }, | |
| { | |
| "epoch": 1.4180716385672287, | |
| "grad_norm": 0.6681801676750183, | |
| "learning_rate": 2.71580352284611e-05, | |
| "loss": 0.4656, | |
| "num_input_tokens_seen": 33141792, | |
| "step": 2955, | |
| "train_runtime": 4870.089, | |
| "train_tokens_per_second": 6805.172 | |
| }, | |
| { | |
| "epoch": 1.4204715905681886, | |
| "grad_norm": 0.5828260779380798, | |
| "learning_rate": 2.7095451184554684e-05, | |
| "loss": 0.4626, | |
| "num_input_tokens_seen": 33200320, | |
| "step": 2960, | |
| "train_runtime": 4879.7888, | |
| "train_tokens_per_second": 6803.639 | |
| }, | |
| { | |
| "epoch": 1.4228715425691485, | |
| "grad_norm": 0.6321309208869934, | |
| "learning_rate": 2.7032853913106702e-05, | |
| "loss": 0.5166, | |
| "num_input_tokens_seen": 33258192, | |
| "step": 2965, | |
| "train_runtime": 4889.401, | |
| "train_tokens_per_second": 6802.1 | |
| }, | |
| { | |
| "epoch": 1.4252714945701086, | |
| "grad_norm": 0.5766092538833618, | |
| "learning_rate": 2.697024380926261e-05, | |
| "loss": 0.4709, | |
| "num_input_tokens_seen": 33315416, | |
| "step": 2970, | |
| "train_runtime": 4899.761, | |
| "train_tokens_per_second": 6799.396 | |
| }, | |
| { | |
| "epoch": 1.4276714465710687, | |
| "grad_norm": 0.5863097906112671, | |
| "learning_rate": 2.6907621268248867e-05, | |
| "loss": 0.4682, | |
| "num_input_tokens_seen": 33374248, | |
| "step": 2975, | |
| "train_runtime": 4910.9171, | |
| "train_tokens_per_second": 6795.93 | |
| }, | |
| { | |
| "epoch": 1.4300713985720286, | |
| "grad_norm": 0.6625893115997314, | |
| "learning_rate": 2.6844986685370438e-05, | |
| "loss": 0.4795, | |
| "num_input_tokens_seen": 33430576, | |
| "step": 2980, | |
| "train_runtime": 4920.8367, | |
| "train_tokens_per_second": 6793.677 | |
| }, | |
| { | |
| "epoch": 1.4324713505729885, | |
| "grad_norm": 0.889992356300354, | |
| "learning_rate": 2.6782340456008304e-05, | |
| "loss": 0.5081, | |
| "num_input_tokens_seen": 33481872, | |
| "step": 2985, | |
| "train_runtime": 4930.6268, | |
| "train_tokens_per_second": 6790.591 | |
| }, | |
| { | |
| "epoch": 1.4348713025739486, | |
| "grad_norm": 0.8572867512702942, | |
| "learning_rate": 2.6719682975616972e-05, | |
| "loss": 0.5238, | |
| "num_input_tokens_seen": 33535608, | |
| "step": 2990, | |
| "train_runtime": 4940.3628, | |
| "train_tokens_per_second": 6788.086 | |
| }, | |
| { | |
| "epoch": 1.4372712545749085, | |
| "grad_norm": 0.7185449600219727, | |
| "learning_rate": 2.6657014639721963e-05, | |
| "loss": 0.4628, | |
| "num_input_tokens_seen": 33595176, | |
| "step": 2995, | |
| "train_runtime": 4950.583, | |
| "train_tokens_per_second": 6786.105 | |
| }, | |
| { | |
| "epoch": 1.4396712065758686, | |
| "grad_norm": 0.6952937245368958, | |
| "learning_rate": 2.659433584391733e-05, | |
| "loss": 0.4726, | |
| "num_input_tokens_seen": 33655192, | |
| "step": 3000, | |
| "train_runtime": 4960.7955, | |
| "train_tokens_per_second": 6784.233 | |
| }, | |
| { | |
| "epoch": 1.4420711585768284, | |
| "grad_norm": 0.5073747634887695, | |
| "learning_rate": 2.6531646983863135e-05, | |
| "loss": 0.5086, | |
| "num_input_tokens_seen": 33710344, | |
| "step": 3005, | |
| "train_runtime": 4971.2496, | |
| "train_tokens_per_second": 6781.06 | |
| }, | |
| { | |
| "epoch": 1.4444711105777883, | |
| "grad_norm": 0.5523395538330078, | |
| "learning_rate": 2.6468948455283006e-05, | |
| "loss": 0.4855, | |
| "num_input_tokens_seen": 33762880, | |
| "step": 3010, | |
| "train_runtime": 4981.002, | |
| "train_tokens_per_second": 6778.331 | |
| }, | |
| { | |
| "epoch": 1.4468710625787484, | |
| "grad_norm": 0.7493255138397217, | |
| "learning_rate": 2.6406240653961562e-05, | |
| "loss": 0.5121, | |
| "num_input_tokens_seen": 33814912, | |
| "step": 3015, | |
| "train_runtime": 4990.9252, | |
| "train_tokens_per_second": 6775.279 | |
| }, | |
| { | |
| "epoch": 1.4492710145797085, | |
| "grad_norm": 0.7933918833732605, | |
| "learning_rate": 2.6343523975741995e-05, | |
| "loss": 0.4822, | |
| "num_input_tokens_seen": 33869336, | |
| "step": 3020, | |
| "train_runtime": 5000.7837, | |
| "train_tokens_per_second": 6772.806 | |
| }, | |
| { | |
| "epoch": 1.4516709665806684, | |
| "grad_norm": 0.827980101108551, | |
| "learning_rate": 2.628079881652351e-05, | |
| "loss": 0.5094, | |
| "num_input_tokens_seen": 33921376, | |
| "step": 3025, | |
| "train_runtime": 5010.3271, | |
| "train_tokens_per_second": 6770.292 | |
| }, | |
| { | |
| "epoch": 1.4540709185816283, | |
| "grad_norm": 0.7234380841255188, | |
| "learning_rate": 2.6218065572258847e-05, | |
| "loss": 0.4494, | |
| "num_input_tokens_seen": 33979216, | |
| "step": 3030, | |
| "train_runtime": 5021.1603, | |
| "train_tokens_per_second": 6767.204 | |
| }, | |
| { | |
| "epoch": 1.4564708705825884, | |
| "grad_norm": 0.6564066410064697, | |
| "learning_rate": 2.6155324638951795e-05, | |
| "loss": 0.5281, | |
| "num_input_tokens_seen": 34036320, | |
| "step": 3035, | |
| "train_runtime": 5032.1108, | |
| "train_tokens_per_second": 6763.826 | |
| }, | |
| { | |
| "epoch": 1.4588708225835483, | |
| "grad_norm": 0.9267168045043945, | |
| "learning_rate": 2.6092576412654668e-05, | |
| "loss": 0.5001, | |
| "num_input_tokens_seen": 34090128, | |
| "step": 3040, | |
| "train_runtime": 5042.1218, | |
| "train_tokens_per_second": 6761.068 | |
| }, | |
| { | |
| "epoch": 1.4612707745845084, | |
| "grad_norm": 0.6622974276542664, | |
| "learning_rate": 2.602982128946583e-05, | |
| "loss": 0.4876, | |
| "num_input_tokens_seen": 34148400, | |
| "step": 3045, | |
| "train_runtime": 5052.2931, | |
| "train_tokens_per_second": 6758.99 | |
| }, | |
| { | |
| "epoch": 1.4636707265854683, | |
| "grad_norm": 0.6938877105712891, | |
| "learning_rate": 2.596705966552718e-05, | |
| "loss": 0.4316, | |
| "num_input_tokens_seen": 34205656, | |
| "step": 3050, | |
| "train_runtime": 5063.4654, | |
| "train_tokens_per_second": 6755.385 | |
| }, | |
| { | |
| "epoch": 1.4660706785864281, | |
| "grad_norm": 1.1527178287506104, | |
| "learning_rate": 2.5904291937021623e-05, | |
| "loss": 0.5168, | |
| "num_input_tokens_seen": 34256136, | |
| "step": 3055, | |
| "train_runtime": 5073.3962, | |
| "train_tokens_per_second": 6752.111 | |
| }, | |
| { | |
| "epoch": 1.4684706305873882, | |
| "grad_norm": 0.8553231358528137, | |
| "learning_rate": 2.5841518500170647e-05, | |
| "loss": 0.4756, | |
| "num_input_tokens_seen": 34311976, | |
| "step": 3060, | |
| "train_runtime": 5083.9773, | |
| "train_tokens_per_second": 6749.042 | |
| }, | |
| { | |
| "epoch": 1.4708705825883484, | |
| "grad_norm": 0.6087079644203186, | |
| "learning_rate": 2.5778739751231747e-05, | |
| "loss": 0.4665, | |
| "num_input_tokens_seen": 34370640, | |
| "step": 3065, | |
| "train_runtime": 5094.5141, | |
| "train_tokens_per_second": 6746.598 | |
| }, | |
| { | |
| "epoch": 1.4732705345893082, | |
| "grad_norm": 0.7348918318748474, | |
| "learning_rate": 2.5715956086495947e-05, | |
| "loss": 0.4652, | |
| "num_input_tokens_seen": 34421432, | |
| "step": 3070, | |
| "train_runtime": 5103.6348, | |
| "train_tokens_per_second": 6744.494 | |
| }, | |
| { | |
| "epoch": 1.4756704865902681, | |
| "grad_norm": 1.1253235340118408, | |
| "learning_rate": 2.565316790228532e-05, | |
| "loss": 0.4909, | |
| "num_input_tokens_seen": 34478304, | |
| "step": 3075, | |
| "train_runtime": 5113.6496, | |
| "train_tokens_per_second": 6742.406 | |
| }, | |
| { | |
| "epoch": 1.4780704385912282, | |
| "grad_norm": 0.7545915842056274, | |
| "learning_rate": 2.5590375594950443e-05, | |
| "loss": 0.4865, | |
| "num_input_tokens_seen": 34532640, | |
| "step": 3080, | |
| "train_runtime": 5123.1565, | |
| "train_tokens_per_second": 6740.501 | |
| }, | |
| { | |
| "epoch": 1.480470390592188, | |
| "grad_norm": 0.8254991769790649, | |
| "learning_rate": 2.5527579560867947e-05, | |
| "loss": 0.503, | |
| "num_input_tokens_seen": 34597280, | |
| "step": 3085, | |
| "train_runtime": 5135.0435, | |
| "train_tokens_per_second": 6737.485 | |
| }, | |
| { | |
| "epoch": 1.4828703425931482, | |
| "grad_norm": 0.7427690625190735, | |
| "learning_rate": 2.546478019643797e-05, | |
| "loss": 0.4799, | |
| "num_input_tokens_seen": 34654488, | |
| "step": 3090, | |
| "train_runtime": 5145.9423, | |
| "train_tokens_per_second": 6734.333 | |
| }, | |
| { | |
| "epoch": 1.485270294594108, | |
| "grad_norm": 0.6483776569366455, | |
| "learning_rate": 2.540197789808168e-05, | |
| "loss": 0.4463, | |
| "num_input_tokens_seen": 34716120, | |
| "step": 3095, | |
| "train_runtime": 5158.485, | |
| "train_tokens_per_second": 6729.906 | |
| }, | |
| { | |
| "epoch": 1.487670246595068, | |
| "grad_norm": 0.5190485715866089, | |
| "learning_rate": 2.5339173062238774e-05, | |
| "loss": 0.4597, | |
| "num_input_tokens_seen": 34777640, | |
| "step": 3100, | |
| "train_runtime": 5171.0585, | |
| "train_tokens_per_second": 6725.439 | |
| }, | |
| { | |
| "epoch": 1.490070198596028, | |
| "grad_norm": 0.5749461054801941, | |
| "learning_rate": 2.5276366085364937e-05, | |
| "loss": 0.5084, | |
| "num_input_tokens_seen": 34831992, | |
| "step": 3105, | |
| "train_runtime": 5181.3994, | |
| "train_tokens_per_second": 6722.507 | |
| }, | |
| { | |
| "epoch": 1.4924701505969882, | |
| "grad_norm": 0.7715994119644165, | |
| "learning_rate": 2.52135573639294e-05, | |
| "loss": 0.4786, | |
| "num_input_tokens_seen": 34894736, | |
| "step": 3110, | |
| "train_runtime": 5191.5337, | |
| "train_tokens_per_second": 6721.47 | |
| }, | |
| { | |
| "epoch": 1.494870102597948, | |
| "grad_norm": 0.9101441502571106, | |
| "learning_rate": 2.5150747294412398e-05, | |
| "loss": 0.5175, | |
| "num_input_tokens_seen": 34951296, | |
| "step": 3115, | |
| "train_runtime": 5201.6456, | |
| "train_tokens_per_second": 6719.277 | |
| }, | |
| { | |
| "epoch": 1.497270054598908, | |
| "grad_norm": 0.7418543696403503, | |
| "learning_rate": 2.508793627330267e-05, | |
| "loss": 0.451, | |
| "num_input_tokens_seen": 35006168, | |
| "step": 3120, | |
| "train_runtime": 5211.4651, | |
| "train_tokens_per_second": 6717.145 | |
| }, | |
| { | |
| "epoch": 1.499670006599868, | |
| "grad_norm": 0.7147541642189026, | |
| "learning_rate": 2.502512469709497e-05, | |
| "loss": 0.5077, | |
| "num_input_tokens_seen": 35059176, | |
| "step": 3125, | |
| "train_runtime": 5221.3263, | |
| "train_tokens_per_second": 6714.611 | |
| }, | |
| { | |
| "epoch": 1.5020699586008281, | |
| "grad_norm": 0.5535465478897095, | |
| "learning_rate": 2.4962312962287544e-05, | |
| "loss": 0.4924, | |
| "num_input_tokens_seen": 35114264, | |
| "step": 3130, | |
| "train_runtime": 5230.6201, | |
| "train_tokens_per_second": 6713.212 | |
| }, | |
| { | |
| "epoch": 1.504469910601788, | |
| "grad_norm": 0.7213118672370911, | |
| "learning_rate": 2.4899501465379644e-05, | |
| "loss": 0.5004, | |
| "num_input_tokens_seen": 35168424, | |
| "step": 3135, | |
| "train_runtime": 5241.0072, | |
| "train_tokens_per_second": 6710.242 | |
| }, | |
| { | |
| "epoch": 1.506869862602748, | |
| "grad_norm": 0.7794874310493469, | |
| "learning_rate": 2.4836690602869044e-05, | |
| "loss": 0.5145, | |
| "num_input_tokens_seen": 35224296, | |
| "step": 3140, | |
| "train_runtime": 5250.7072, | |
| "train_tokens_per_second": 6708.486 | |
| }, | |
| { | |
| "epoch": 1.5092698146037078, | |
| "grad_norm": 0.9129291772842407, | |
| "learning_rate": 2.4773880771249477e-05, | |
| "loss": 0.4889, | |
| "num_input_tokens_seen": 35280088, | |
| "step": 3145, | |
| "train_runtime": 5261.3252, | |
| "train_tokens_per_second": 6705.552 | |
| }, | |
| { | |
| "epoch": 1.511669766604668, | |
| "grad_norm": 0.7600094079971313, | |
| "learning_rate": 2.4711072367008176e-05, | |
| "loss": 0.4967, | |
| "num_input_tokens_seen": 35340720, | |
| "step": 3150, | |
| "train_runtime": 5271.563, | |
| "train_tokens_per_second": 6704.031 | |
| }, | |
| { | |
| "epoch": 1.514069718605628, | |
| "grad_norm": 0.5989595055580139, | |
| "learning_rate": 2.4648265786623388e-05, | |
| "loss": 0.4843, | |
| "num_input_tokens_seen": 35397240, | |
| "step": 3155, | |
| "train_runtime": 5282.0778, | |
| "train_tokens_per_second": 6701.386 | |
| }, | |
| { | |
| "epoch": 1.5164696706065879, | |
| "grad_norm": 0.6885458827018738, | |
| "learning_rate": 2.4585461426561818e-05, | |
| "loss": 0.5011, | |
| "num_input_tokens_seen": 35460504, | |
| "step": 3160, | |
| "train_runtime": 5293.3254, | |
| "train_tokens_per_second": 6699.098 | |
| }, | |
| { | |
| "epoch": 1.5188696226075478, | |
| "grad_norm": 0.5150988698005676, | |
| "learning_rate": 2.452265968327618e-05, | |
| "loss": 0.512, | |
| "num_input_tokens_seen": 35517032, | |
| "step": 3165, | |
| "train_runtime": 5303.2586, | |
| "train_tokens_per_second": 6697.209 | |
| }, | |
| { | |
| "epoch": 1.5212695746085079, | |
| "grad_norm": 0.7029662132263184, | |
| "learning_rate": 2.4459860953202635e-05, | |
| "loss": 0.4807, | |
| "num_input_tokens_seen": 35567328, | |
| "step": 3170, | |
| "train_runtime": 5312.0452, | |
| "train_tokens_per_second": 6695.6 | |
| }, | |
| { | |
| "epoch": 1.523669526609468, | |
| "grad_norm": 0.6837257742881775, | |
| "learning_rate": 2.4397065632758374e-05, | |
| "loss": 0.4578, | |
| "num_input_tokens_seen": 35622032, | |
| "step": 3175, | |
| "train_runtime": 5321.4999, | |
| "train_tokens_per_second": 6693.983 | |
| }, | |
| { | |
| "epoch": 1.5260694786104279, | |
| "grad_norm": 0.7105430364608765, | |
| "learning_rate": 2.4334274118339014e-05, | |
| "loss": 0.512, | |
| "num_input_tokens_seen": 35684184, | |
| "step": 3180, | |
| "train_runtime": 5331.4522, | |
| "train_tokens_per_second": 6693.145 | |
| }, | |
| { | |
| "epoch": 1.5284694306113877, | |
| "grad_norm": 0.788021445274353, | |
| "learning_rate": 2.4271486806316173e-05, | |
| "loss": 0.5011, | |
| "num_input_tokens_seen": 35741544, | |
| "step": 3185, | |
| "train_runtime": 5341.311, | |
| "train_tokens_per_second": 6691.53 | |
| }, | |
| { | |
| "epoch": 1.5308693826123476, | |
| "grad_norm": 0.8190677165985107, | |
| "learning_rate": 2.420870409303495e-05, | |
| "loss": 0.4627, | |
| "num_input_tokens_seen": 35797096, | |
| "step": 3190, | |
| "train_runtime": 5350.6319, | |
| "train_tokens_per_second": 6690.256 | |
| }, | |
| { | |
| "epoch": 1.5332693346133077, | |
| "grad_norm": 0.9217768907546997, | |
| "learning_rate": 2.4145926374811395e-05, | |
| "loss": 0.4672, | |
| "num_input_tokens_seen": 35849520, | |
| "step": 3195, | |
| "train_runtime": 5360.1483, | |
| "train_tokens_per_second": 6688.158 | |
| }, | |
| { | |
| "epoch": 1.5356692866142678, | |
| "grad_norm": 0.729516327381134, | |
| "learning_rate": 2.4083154047930014e-05, | |
| "loss": 0.4645, | |
| "num_input_tokens_seen": 35908672, | |
| "step": 3200, | |
| "train_runtime": 5371.5509, | |
| "train_tokens_per_second": 6684.973 | |
| }, | |
| { | |
| "epoch": 1.5380692386152277, | |
| "grad_norm": 0.7882852554321289, | |
| "learning_rate": 2.4020387508641322e-05, | |
| "loss": 0.4833, | |
| "num_input_tokens_seen": 35963328, | |
| "step": 3205, | |
| "train_runtime": 5382.0522, | |
| "train_tokens_per_second": 6682.085 | |
| }, | |
| { | |
| "epoch": 1.5404691906161876, | |
| "grad_norm": 0.6502909660339355, | |
| "learning_rate": 2.3957627153159277e-05, | |
| "loss": 0.4763, | |
| "num_input_tokens_seen": 36021192, | |
| "step": 3210, | |
| "train_runtime": 5392.0941, | |
| "train_tokens_per_second": 6680.372 | |
| }, | |
| { | |
| "epoch": 1.5428691426171477, | |
| "grad_norm": 0.8590161204338074, | |
| "learning_rate": 2.3894873377658788e-05, | |
| "loss": 0.4768, | |
| "num_input_tokens_seen": 36078448, | |
| "step": 3215, | |
| "train_runtime": 5402.6273, | |
| "train_tokens_per_second": 6677.945 | |
| }, | |
| { | |
| "epoch": 1.5452690946181078, | |
| "grad_norm": 1.034970760345459, | |
| "learning_rate": 2.383212657827324e-05, | |
| "loss": 0.502, | |
| "num_input_tokens_seen": 36132656, | |
| "step": 3220, | |
| "train_runtime": 5412.5546, | |
| "train_tokens_per_second": 6675.712 | |
| }, | |
| { | |
| "epoch": 1.5476690466190677, | |
| "grad_norm": 0.5326734185218811, | |
| "learning_rate": 2.3769387151092e-05, | |
| "loss": 0.4883, | |
| "num_input_tokens_seen": 36191712, | |
| "step": 3225, | |
| "train_runtime": 5422.7637, | |
| "train_tokens_per_second": 6674.034 | |
| }, | |
| { | |
| "epoch": 1.5500689986200276, | |
| "grad_norm": 0.9736510515213013, | |
| "learning_rate": 2.370665549215787e-05, | |
| "loss": 0.5341, | |
| "num_input_tokens_seen": 36245160, | |
| "step": 3230, | |
| "train_runtime": 5432.9922, | |
| "train_tokens_per_second": 6671.307 | |
| }, | |
| { | |
| "epoch": 1.5524689506209874, | |
| "grad_norm": 0.6917448043823242, | |
| "learning_rate": 2.3643931997464617e-05, | |
| "loss": 0.4849, | |
| "num_input_tokens_seen": 36303576, | |
| "step": 3235, | |
| "train_runtime": 5443.3631, | |
| "train_tokens_per_second": 6669.328 | |
| }, | |
| { | |
| "epoch": 1.5548689026219475, | |
| "grad_norm": 0.9082401394844055, | |
| "learning_rate": 2.35812170629545e-05, | |
| "loss": 0.4583, | |
| "num_input_tokens_seen": 36360840, | |
| "step": 3240, | |
| "train_runtime": 5453.245, | |
| "train_tokens_per_second": 6667.744 | |
| }, | |
| { | |
| "epoch": 1.5572688546229077, | |
| "grad_norm": 0.6470857262611389, | |
| "learning_rate": 2.351851108451571e-05, | |
| "loss": 0.4604, | |
| "num_input_tokens_seen": 36422200, | |
| "step": 3245, | |
| "train_runtime": 5463.4424, | |
| "train_tokens_per_second": 6666.529 | |
| }, | |
| { | |
| "epoch": 1.5596688066238675, | |
| "grad_norm": 0.8061736822128296, | |
| "learning_rate": 2.34558144579799e-05, | |
| "loss": 0.5048, | |
| "num_input_tokens_seen": 36476632, | |
| "step": 3250, | |
| "train_runtime": 5473.1542, | |
| "train_tokens_per_second": 6664.645 | |
| }, | |
| { | |
| "epoch": 1.5620687586248274, | |
| "grad_norm": 0.7560340762138367, | |
| "learning_rate": 2.339312757911973e-05, | |
| "loss": 0.5113, | |
| "num_input_tokens_seen": 36529792, | |
| "step": 3255, | |
| "train_runtime": 5482.3009, | |
| "train_tokens_per_second": 6663.223 | |
| }, | |
| { | |
| "epoch": 1.5644687106257875, | |
| "grad_norm": 0.7179074883460999, | |
| "learning_rate": 2.3330450843646296e-05, | |
| "loss": 0.5005, | |
| "num_input_tokens_seen": 36586016, | |
| "step": 3260, | |
| "train_runtime": 5492.5745, | |
| "train_tokens_per_second": 6660.996 | |
| }, | |
| { | |
| "epoch": 1.5668686626267476, | |
| "grad_norm": 0.5973109602928162, | |
| "learning_rate": 2.3267784647206658e-05, | |
| "loss": 0.4804, | |
| "num_input_tokens_seen": 36641112, | |
| "step": 3265, | |
| "train_runtime": 5502.2894, | |
| "train_tokens_per_second": 6659.248 | |
| }, | |
| { | |
| "epoch": 1.5692686146277075, | |
| "grad_norm": 0.9687879681587219, | |
| "learning_rate": 2.3205129385381355e-05, | |
| "loss": 0.4928, | |
| "num_input_tokens_seen": 36697088, | |
| "step": 3270, | |
| "train_runtime": 5512.5707, | |
| "train_tokens_per_second": 6656.983 | |
| }, | |
| { | |
| "epoch": 1.5716685666286674, | |
| "grad_norm": 0.6984615325927734, | |
| "learning_rate": 2.3142485453681925e-05, | |
| "loss": 0.4872, | |
| "num_input_tokens_seen": 36755920, | |
| "step": 3275, | |
| "train_runtime": 5523.731, | |
| "train_tokens_per_second": 6654.184 | |
| }, | |
| { | |
| "epoch": 1.5740685186296273, | |
| "grad_norm": 0.7793405652046204, | |
| "learning_rate": 2.307985324754835e-05, | |
| "loss": 0.5391, | |
| "num_input_tokens_seen": 36811304, | |
| "step": 3280, | |
| "train_runtime": 5534.0048, | |
| "train_tokens_per_second": 6651.838 | |
| }, | |
| { | |
| "epoch": 1.5764684706305874, | |
| "grad_norm": 0.7121679782867432, | |
| "learning_rate": 2.3017233162346608e-05, | |
| "loss": 0.4955, | |
| "num_input_tokens_seen": 36868680, | |
| "step": 3285, | |
| "train_runtime": 5543.499, | |
| "train_tokens_per_second": 6650.796 | |
| }, | |
| { | |
| "epoch": 1.5788684226315475, | |
| "grad_norm": 0.9568763375282288, | |
| "learning_rate": 2.295462559336618e-05, | |
| "loss": 0.4775, | |
| "num_input_tokens_seen": 36925400, | |
| "step": 3290, | |
| "train_runtime": 5553.1982, | |
| "train_tokens_per_second": 6649.394 | |
| }, | |
| { | |
| "epoch": 1.5812683746325074, | |
| "grad_norm": 0.5952507257461548, | |
| "learning_rate": 2.2892030935817517e-05, | |
| "loss": 0.457, | |
| "num_input_tokens_seen": 36984032, | |
| "step": 3295, | |
| "train_runtime": 5563.7199, | |
| "train_tokens_per_second": 6647.357 | |
| }, | |
| { | |
| "epoch": 1.5836683266334672, | |
| "grad_norm": 0.8516509532928467, | |
| "learning_rate": 2.2829449584829558e-05, | |
| "loss": 0.5231, | |
| "num_input_tokens_seen": 37038928, | |
| "step": 3300, | |
| "train_runtime": 5573.6606, | |
| "train_tokens_per_second": 6645.35 | |
| }, | |
| { | |
| "epoch": 1.5860682786344273, | |
| "grad_norm": 0.569814920425415, | |
| "learning_rate": 2.2766881935447275e-05, | |
| "loss": 0.5044, | |
| "num_input_tokens_seen": 37092208, | |
| "step": 3305, | |
| "train_runtime": 5583.51, | |
| "train_tokens_per_second": 6643.17 | |
| }, | |
| { | |
| "epoch": 1.5884682306353874, | |
| "grad_norm": 0.8386396169662476, | |
| "learning_rate": 2.2704328382629138e-05, | |
| "loss": 0.4753, | |
| "num_input_tokens_seen": 37147680, | |
| "step": 3310, | |
| "train_runtime": 5592.6848, | |
| "train_tokens_per_second": 6642.191 | |
| }, | |
| { | |
| "epoch": 1.5908681826363473, | |
| "grad_norm": 0.7655364871025085, | |
| "learning_rate": 2.264178932124462e-05, | |
| "loss": 0.4796, | |
| "num_input_tokens_seen": 37203656, | |
| "step": 3315, | |
| "train_runtime": 5601.9649, | |
| "train_tokens_per_second": 6641.18 | |
| }, | |
| { | |
| "epoch": 1.5932681346373072, | |
| "grad_norm": 0.8739466071128845, | |
| "learning_rate": 2.257926514607171e-05, | |
| "loss": 0.4852, | |
| "num_input_tokens_seen": 37263520, | |
| "step": 3320, | |
| "train_runtime": 5612.1576, | |
| "train_tokens_per_second": 6639.785 | |
| }, | |
| { | |
| "epoch": 1.595668086638267, | |
| "grad_norm": 0.6632476449012756, | |
| "learning_rate": 2.2516756251794463e-05, | |
| "loss": 0.5121, | |
| "num_input_tokens_seen": 37318192, | |
| "step": 3325, | |
| "train_runtime": 5621.7888, | |
| "train_tokens_per_second": 6638.135 | |
| }, | |
| { | |
| "epoch": 1.5980680386392272, | |
| "grad_norm": 0.7768703699111938, | |
| "learning_rate": 2.245426303300044e-05, | |
| "loss": 0.5128, | |
| "num_input_tokens_seen": 37374224, | |
| "step": 3330, | |
| "train_runtime": 5631.8308, | |
| "train_tokens_per_second": 6636.248 | |
| }, | |
| { | |
| "epoch": 1.6004679906401873, | |
| "grad_norm": 0.7217375636100769, | |
| "learning_rate": 2.2391785884178256e-05, | |
| "loss": 0.4835, | |
| "num_input_tokens_seen": 37435240, | |
| "step": 3335, | |
| "train_runtime": 5642.3272, | |
| "train_tokens_per_second": 6634.716 | |
| }, | |
| { | |
| "epoch": 1.6028679426411472, | |
| "grad_norm": 0.5615156888961792, | |
| "learning_rate": 2.2329325199715114e-05, | |
| "loss": 0.4575, | |
| "num_input_tokens_seen": 37492120, | |
| "step": 3340, | |
| "train_runtime": 5652.3686, | |
| "train_tokens_per_second": 6632.993 | |
| }, | |
| { | |
| "epoch": 1.605267894642107, | |
| "grad_norm": 0.826392650604248, | |
| "learning_rate": 2.226688137389425e-05, | |
| "loss": 0.4922, | |
| "num_input_tokens_seen": 37548408, | |
| "step": 3345, | |
| "train_runtime": 5662.4517, | |
| "train_tokens_per_second": 6631.122 | |
| }, | |
| { | |
| "epoch": 1.6076678466430672, | |
| "grad_norm": 0.589180052280426, | |
| "learning_rate": 2.220445480089248e-05, | |
| "loss": 0.4807, | |
| "num_input_tokens_seen": 37610280, | |
| "step": 3350, | |
| "train_runtime": 5674.3947, | |
| "train_tokens_per_second": 6628.069 | |
| }, | |
| { | |
| "epoch": 1.6100677986440273, | |
| "grad_norm": 0.8704653978347778, | |
| "learning_rate": 2.214204587477774e-05, | |
| "loss": 0.5322, | |
| "num_input_tokens_seen": 37668512, | |
| "step": 3355, | |
| "train_runtime": 5684.5435, | |
| "train_tokens_per_second": 6626.48 | |
| }, | |
| { | |
| "epoch": 1.6124677506449872, | |
| "grad_norm": 0.7563439607620239, | |
| "learning_rate": 2.207965498950655e-05, | |
| "loss": 0.4843, | |
| "num_input_tokens_seen": 37727112, | |
| "step": 3360, | |
| "train_runtime": 5694.2908, | |
| "train_tokens_per_second": 6625.428 | |
| }, | |
| { | |
| "epoch": 1.614867702645947, | |
| "grad_norm": 0.7133488059043884, | |
| "learning_rate": 2.2017282538921556e-05, | |
| "loss": 0.4732, | |
| "num_input_tokens_seen": 37780192, | |
| "step": 3365, | |
| "train_runtime": 5703.5817, | |
| "train_tokens_per_second": 6623.942 | |
| }, | |
| { | |
| "epoch": 1.617267654646907, | |
| "grad_norm": 0.8156766295433044, | |
| "learning_rate": 2.1954928916749006e-05, | |
| "loss": 0.5115, | |
| "num_input_tokens_seen": 37839376, | |
| "step": 3370, | |
| "train_runtime": 5713.8648, | |
| "train_tokens_per_second": 6622.379 | |
| }, | |
| { | |
| "epoch": 1.619667606647867, | |
| "grad_norm": 0.7063591480255127, | |
| "learning_rate": 2.1892594516596343e-05, | |
| "loss": 0.5177, | |
| "num_input_tokens_seen": 37894296, | |
| "step": 3375, | |
| "train_runtime": 5723.4986, | |
| "train_tokens_per_second": 6620.827 | |
| }, | |
| { | |
| "epoch": 1.6220675586488271, | |
| "grad_norm": 0.8170085549354553, | |
| "learning_rate": 2.183027973194964e-05, | |
| "loss": 0.4848, | |
| "num_input_tokens_seen": 37951552, | |
| "step": 3380, | |
| "train_runtime": 5733.3985, | |
| "train_tokens_per_second": 6619.382 | |
| }, | |
| { | |
| "epoch": 1.624467510649787, | |
| "grad_norm": 0.6729702353477478, | |
| "learning_rate": 2.176798495617114e-05, | |
| "loss": 0.4927, | |
| "num_input_tokens_seen": 38011968, | |
| "step": 3385, | |
| "train_runtime": 5743.0143, | |
| "train_tokens_per_second": 6618.818 | |
| }, | |
| { | |
| "epoch": 1.6268674626507469, | |
| "grad_norm": 0.7593095898628235, | |
| "learning_rate": 2.1705710582496815e-05, | |
| "loss": 0.4888, | |
| "num_input_tokens_seen": 38067280, | |
| "step": 3390, | |
| "train_runtime": 5752.7516, | |
| "train_tokens_per_second": 6617.23 | |
| }, | |
| { | |
| "epoch": 1.629267414651707, | |
| "grad_norm": 1.1748439073562622, | |
| "learning_rate": 2.1643457004033807e-05, | |
| "loss": 0.5178, | |
| "num_input_tokens_seen": 38124912, | |
| "step": 3395, | |
| "train_runtime": 5763.3474, | |
| "train_tokens_per_second": 6615.064 | |
| }, | |
| { | |
| "epoch": 1.631667366652667, | |
| "grad_norm": 0.8947390913963318, | |
| "learning_rate": 2.1581224613758005e-05, | |
| "loss": 0.5112, | |
| "num_input_tokens_seen": 38178808, | |
| "step": 3400, | |
| "train_runtime": 5772.7591, | |
| "train_tokens_per_second": 6613.615 | |
| }, | |
| { | |
| "epoch": 1.634067318653627, | |
| "grad_norm": 0.702033519744873, | |
| "learning_rate": 2.1519013804511562e-05, | |
| "loss": 0.5106, | |
| "num_input_tokens_seen": 38233976, | |
| "step": 3405, | |
| "train_runtime": 5782.5071, | |
| "train_tokens_per_second": 6612.007 | |
| }, | |
| { | |
| "epoch": 1.6364672706545869, | |
| "grad_norm": 0.9868459105491638, | |
| "learning_rate": 2.145682496900039e-05, | |
| "loss": 0.501, | |
| "num_input_tokens_seen": 38291736, | |
| "step": 3410, | |
| "train_runtime": 5792.3708, | |
| "train_tokens_per_second": 6610.719 | |
| }, | |
| { | |
| "epoch": 1.6388672226555467, | |
| "grad_norm": 1.0660921335220337, | |
| "learning_rate": 2.1394658499791684e-05, | |
| "loss": 0.4836, | |
| "num_input_tokens_seen": 38347056, | |
| "step": 3415, | |
| "train_runtime": 5800.6961, | |
| "train_tokens_per_second": 6610.768 | |
| }, | |
| { | |
| "epoch": 1.6412671746565068, | |
| "grad_norm": 0.809270441532135, | |
| "learning_rate": 2.1332514789311448e-05, | |
| "loss": 0.5138, | |
| "num_input_tokens_seen": 38399184, | |
| "step": 3420, | |
| "train_runtime": 5808.2869, | |
| "train_tokens_per_second": 6611.103 | |
| }, | |
| { | |
| "epoch": 1.643667126657467, | |
| "grad_norm": 0.7200763821601868, | |
| "learning_rate": 2.1270394229842044e-05, | |
| "loss": 0.4522, | |
| "num_input_tokens_seen": 38456896, | |
| "step": 3425, | |
| "train_runtime": 5816.1423, | |
| "train_tokens_per_second": 6612.097 | |
| }, | |
| { | |
| "epoch": 1.6460670786584268, | |
| "grad_norm": 0.8460598587989807, | |
| "learning_rate": 2.1208297213519686e-05, | |
| "loss": 0.4847, | |
| "num_input_tokens_seen": 38512168, | |
| "step": 3430, | |
| "train_runtime": 5823.8311, | |
| "train_tokens_per_second": 6612.858 | |
| }, | |
| { | |
| "epoch": 1.6484670306593867, | |
| "grad_norm": 0.7235488891601562, | |
| "learning_rate": 2.1146224132331944e-05, | |
| "loss": 0.4733, | |
| "num_input_tokens_seen": 38573240, | |
| "step": 3435, | |
| "train_runtime": 5832.0444, | |
| "train_tokens_per_second": 6614.017 | |
| }, | |
| { | |
| "epoch": 1.6508669826603468, | |
| "grad_norm": 0.8452171087265015, | |
| "learning_rate": 2.1084175378115344e-05, | |
| "loss": 0.5236, | |
| "num_input_tokens_seen": 38624080, | |
| "step": 3440, | |
| "train_runtime": 5839.2065, | |
| "train_tokens_per_second": 6614.611 | |
| }, | |
| { | |
| "epoch": 1.653266934661307, | |
| "grad_norm": 0.7488996982574463, | |
| "learning_rate": 2.1022151342552815e-05, | |
| "loss": 0.5226, | |
| "num_input_tokens_seen": 38679488, | |
| "step": 3445, | |
| "train_runtime": 5846.9076, | |
| "train_tokens_per_second": 6615.375 | |
| }, | |
| { | |
| "epoch": 1.6556668866622668, | |
| "grad_norm": 0.7845451235771179, | |
| "learning_rate": 2.0960152417171243e-05, | |
| "loss": 0.4533, | |
| "num_input_tokens_seen": 38736136, | |
| "step": 3450, | |
| "train_runtime": 5855.1703, | |
| "train_tokens_per_second": 6615.715 | |
| }, | |
| { | |
| "epoch": 1.6580668386632267, | |
| "grad_norm": 0.9303568005561829, | |
| "learning_rate": 2.089817899333904e-05, | |
| "loss": 0.483, | |
| "num_input_tokens_seen": 38788592, | |
| "step": 3455, | |
| "train_runtime": 5862.705, | |
| "train_tokens_per_second": 6616.16 | |
| }, | |
| { | |
| "epoch": 1.6604667906641866, | |
| "grad_norm": 0.7032025456428528, | |
| "learning_rate": 2.083623146226362e-05, | |
| "loss": 0.4556, | |
| "num_input_tokens_seen": 38846528, | |
| "step": 3460, | |
| "train_runtime": 5870.8119, | |
| "train_tokens_per_second": 6616.892 | |
| }, | |
| { | |
| "epoch": 1.6628667426651467, | |
| "grad_norm": 1.0094935894012451, | |
| "learning_rate": 2.0774310214988942e-05, | |
| "loss": 0.545, | |
| "num_input_tokens_seen": 38896768, | |
| "step": 3465, | |
| "train_runtime": 5879.1312, | |
| "train_tokens_per_second": 6616.074 | |
| }, | |
| { | |
| "epoch": 1.6652666946661068, | |
| "grad_norm": 0.8336009979248047, | |
| "learning_rate": 2.071241564239305e-05, | |
| "loss": 0.4741, | |
| "num_input_tokens_seen": 38952672, | |
| "step": 3470, | |
| "train_runtime": 5888.8317, | |
| "train_tokens_per_second": 6614.669 | |
| }, | |
| { | |
| "epoch": 1.6676666466670667, | |
| "grad_norm": 0.6727505326271057, | |
| "learning_rate": 2.0650548135185618e-05, | |
| "loss": 0.4831, | |
| "num_input_tokens_seen": 39007376, | |
| "step": 3475, | |
| "train_runtime": 5898.9169, | |
| "train_tokens_per_second": 6612.634 | |
| }, | |
| { | |
| "epoch": 1.6700665986680265, | |
| "grad_norm": 0.7282326221466064, | |
| "learning_rate": 2.0588708083905468e-05, | |
| "loss": 0.5174, | |
| "num_input_tokens_seen": 39064568, | |
| "step": 3480, | |
| "train_runtime": 5909.1279, | |
| "train_tokens_per_second": 6610.886 | |
| }, | |
| { | |
| "epoch": 1.6724665506689866, | |
| "grad_norm": 0.6648644208908081, | |
| "learning_rate": 2.0526895878918077e-05, | |
| "loss": 0.5055, | |
| "num_input_tokens_seen": 39117320, | |
| "step": 3485, | |
| "train_runtime": 5918.494, | |
| "train_tokens_per_second": 6609.337 | |
| }, | |
| { | |
| "epoch": 1.6748665026699467, | |
| "grad_norm": 0.8427759408950806, | |
| "learning_rate": 2.0465111910413192e-05, | |
| "loss": 0.5316, | |
| "num_input_tokens_seen": 39171840, | |
| "step": 3490, | |
| "train_runtime": 5927.2143, | |
| "train_tokens_per_second": 6608.811 | |
| }, | |
| { | |
| "epoch": 1.6772664546709066, | |
| "grad_norm": 0.6149888634681702, | |
| "learning_rate": 2.040335656840228e-05, | |
| "loss": 0.4517, | |
| "num_input_tokens_seen": 39226624, | |
| "step": 3495, | |
| "train_runtime": 5935.062, | |
| "train_tokens_per_second": 6609.303 | |
| }, | |
| { | |
| "epoch": 1.6796664066718665, | |
| "grad_norm": 0.9388527870178223, | |
| "learning_rate": 2.03416302427161e-05, | |
| "loss": 0.5067, | |
| "num_input_tokens_seen": 39284168, | |
| "step": 3500, | |
| "train_runtime": 5942.9844, | |
| "train_tokens_per_second": 6610.175 | |
| }, | |
| { | |
| "epoch": 1.6820663586728264, | |
| "grad_norm": 0.8548518419265747, | |
| "learning_rate": 2.027993332300227e-05, | |
| "loss": 0.5064, | |
| "num_input_tokens_seen": 39340120, | |
| "step": 3505, | |
| "train_runtime": 5951.1485, | |
| "train_tokens_per_second": 6610.509 | |
| }, | |
| { | |
| "epoch": 1.6844663106737865, | |
| "grad_norm": 0.6581935882568359, | |
| "learning_rate": 2.021826619872278e-05, | |
| "loss": 0.4523, | |
| "num_input_tokens_seen": 39399136, | |
| "step": 3510, | |
| "train_runtime": 5959.3451, | |
| "train_tokens_per_second": 6611.32 | |
| }, | |
| { | |
| "epoch": 1.6868662626747466, | |
| "grad_norm": 0.6218190789222717, | |
| "learning_rate": 2.0156629259151515e-05, | |
| "loss": 0.4804, | |
| "num_input_tokens_seen": 39456808, | |
| "step": 3515, | |
| "train_runtime": 5967.3525, | |
| "train_tokens_per_second": 6612.113 | |
| }, | |
| { | |
| "epoch": 1.6892662146757065, | |
| "grad_norm": 0.8073654174804688, | |
| "learning_rate": 2.0095022893371826e-05, | |
| "loss": 0.4838, | |
| "num_input_tokens_seen": 39516000, | |
| "step": 3520, | |
| "train_runtime": 5975.9682, | |
| "train_tokens_per_second": 6612.485 | |
| }, | |
| { | |
| "epoch": 1.6916661666766664, | |
| "grad_norm": 0.7715812921524048, | |
| "learning_rate": 2.0033447490274083e-05, | |
| "loss": 0.4669, | |
| "num_input_tokens_seen": 39569280, | |
| "step": 3525, | |
| "train_runtime": 5983.6596, | |
| "train_tokens_per_second": 6612.89 | |
| }, | |
| { | |
| "epoch": 1.6940661186776265, | |
| "grad_norm": 0.8139777183532715, | |
| "learning_rate": 1.99719034385532e-05, | |
| "loss": 0.5031, | |
| "num_input_tokens_seen": 39625464, | |
| "step": 3530, | |
| "train_runtime": 5991.822, | |
| "train_tokens_per_second": 6613.258 | |
| }, | |
| { | |
| "epoch": 1.6964660706785866, | |
| "grad_norm": 0.7577908635139465, | |
| "learning_rate": 1.9910391126706158e-05, | |
| "loss": 0.4991, | |
| "num_input_tokens_seen": 39676928, | |
| "step": 3535, | |
| "train_runtime": 5999.8126, | |
| "train_tokens_per_second": 6613.028 | |
| }, | |
| { | |
| "epoch": 1.6988660226795465, | |
| "grad_norm": 0.5273564457893372, | |
| "learning_rate": 1.9848910943029624e-05, | |
| "loss": 0.4548, | |
| "num_input_tokens_seen": 39734168, | |
| "step": 3540, | |
| "train_runtime": 6008.5552, | |
| "train_tokens_per_second": 6612.932 | |
| }, | |
| { | |
| "epoch": 1.7012659746805063, | |
| "grad_norm": 0.8542927503585815, | |
| "learning_rate": 1.978746327561741e-05, | |
| "loss": 0.4886, | |
| "num_input_tokens_seen": 39795520, | |
| "step": 3545, | |
| "train_runtime": 6017.0289, | |
| "train_tokens_per_second": 6613.816 | |
| }, | |
| { | |
| "epoch": 1.7036659266814662, | |
| "grad_norm": 0.6213528513908386, | |
| "learning_rate": 1.972604851235811e-05, | |
| "loss": 0.4737, | |
| "num_input_tokens_seen": 39851264, | |
| "step": 3550, | |
| "train_runtime": 6025.5762, | |
| "train_tokens_per_second": 6613.685 | |
| }, | |
| { | |
| "epoch": 1.7060658786824263, | |
| "grad_norm": 0.7265267372131348, | |
| "learning_rate": 1.9664667040932577e-05, | |
| "loss": 0.5013, | |
| "num_input_tokens_seen": 39904120, | |
| "step": 3555, | |
| "train_runtime": 6033.0567, | |
| "train_tokens_per_second": 6614.246 | |
| }, | |
| { | |
| "epoch": 1.7084658306833864, | |
| "grad_norm": 0.8746877312660217, | |
| "learning_rate": 1.9603319248811542e-05, | |
| "loss": 0.4541, | |
| "num_input_tokens_seen": 39957104, | |
| "step": 3560, | |
| "train_runtime": 6040.7403, | |
| "train_tokens_per_second": 6614.604 | |
| }, | |
| { | |
| "epoch": 1.7108657826843463, | |
| "grad_norm": 0.690990686416626, | |
| "learning_rate": 1.9542005523253103e-05, | |
| "loss": 0.5057, | |
| "num_input_tokens_seen": 40014640, | |
| "step": 3565, | |
| "train_runtime": 6048.7384, | |
| "train_tokens_per_second": 6615.37 | |
| }, | |
| { | |
| "epoch": 1.7132657346853062, | |
| "grad_norm": 0.5996572375297546, | |
| "learning_rate": 1.948072625130032e-05, | |
| "loss": 0.5071, | |
| "num_input_tokens_seen": 40071928, | |
| "step": 3570, | |
| "train_runtime": 6056.481, | |
| "train_tokens_per_second": 6616.371 | |
| }, | |
| { | |
| "epoch": 1.7156656866862663, | |
| "grad_norm": 1.0447416305541992, | |
| "learning_rate": 1.9419481819778785e-05, | |
| "loss": 0.5099, | |
| "num_input_tokens_seen": 40125856, | |
| "step": 3575, | |
| "train_runtime": 6063.7113, | |
| "train_tokens_per_second": 6617.376 | |
| }, | |
| { | |
| "epoch": 1.7180656386872264, | |
| "grad_norm": 1.0107308626174927, | |
| "learning_rate": 1.9358272615294153e-05, | |
| "loss": 0.4823, | |
| "num_input_tokens_seen": 40181760, | |
| "step": 3580, | |
| "train_runtime": 6071.812, | |
| "train_tokens_per_second": 6617.754 | |
| }, | |
| { | |
| "epoch": 1.7204655906881863, | |
| "grad_norm": 0.7742976546287537, | |
| "learning_rate": 1.9297099024229675e-05, | |
| "loss": 0.5261, | |
| "num_input_tokens_seen": 40236472, | |
| "step": 3585, | |
| "train_runtime": 6079.4422, | |
| "train_tokens_per_second": 6618.448 | |
| }, | |
| { | |
| "epoch": 1.7228655426891462, | |
| "grad_norm": 0.7820068597793579, | |
| "learning_rate": 1.923596143274385e-05, | |
| "loss": 0.4674, | |
| "num_input_tokens_seen": 40295104, | |
| "step": 3590, | |
| "train_runtime": 6087.6682, | |
| "train_tokens_per_second": 6619.136 | |
| }, | |
| { | |
| "epoch": 1.725265494690106, | |
| "grad_norm": 0.6710221171379089, | |
| "learning_rate": 1.9174860226767876e-05, | |
| "loss": 0.5175, | |
| "num_input_tokens_seen": 40345800, | |
| "step": 3595, | |
| "train_runtime": 6095.1949, | |
| "train_tokens_per_second": 6619.28 | |
| }, | |
| { | |
| "epoch": 1.7276654466910661, | |
| "grad_norm": 0.7176735401153564, | |
| "learning_rate": 1.91137957920033e-05, | |
| "loss": 0.5171, | |
| "num_input_tokens_seen": 40402256, | |
| "step": 3600, | |
| "train_runtime": 6103.2553, | |
| "train_tokens_per_second": 6619.788 | |
| }, | |
| { | |
| "epoch": 1.7300653986920262, | |
| "grad_norm": 0.9111002087593079, | |
| "learning_rate": 1.905276851391954e-05, | |
| "loss": 0.4883, | |
| "num_input_tokens_seen": 40458888, | |
| "step": 3605, | |
| "train_runtime": 6111.7843, | |
| "train_tokens_per_second": 6619.816 | |
| }, | |
| { | |
| "epoch": 1.7324653506929861, | |
| "grad_norm": 0.7179924845695496, | |
| "learning_rate": 1.899177877775146e-05, | |
| "loss": 0.4852, | |
| "num_input_tokens_seen": 40516112, | |
| "step": 3610, | |
| "train_runtime": 6120.0523, | |
| "train_tokens_per_second": 6620.223 | |
| }, | |
| { | |
| "epoch": 1.734865302693946, | |
| "grad_norm": 0.7747234106063843, | |
| "learning_rate": 1.8930826968496943e-05, | |
| "loss": 0.5067, | |
| "num_input_tokens_seen": 40572824, | |
| "step": 3615, | |
| "train_runtime": 6128.2202, | |
| "train_tokens_per_second": 6620.654 | |
| }, | |
| { | |
| "epoch": 1.7372652546949061, | |
| "grad_norm": 0.7451600432395935, | |
| "learning_rate": 1.8869913470914448e-05, | |
| "loss": 0.4881, | |
| "num_input_tokens_seen": 40631656, | |
| "step": 3620, | |
| "train_runtime": 6136.6832, | |
| "train_tokens_per_second": 6621.11 | |
| }, | |
| { | |
| "epoch": 1.7396652066958662, | |
| "grad_norm": 0.9544029235839844, | |
| "learning_rate": 1.880903866952062e-05, | |
| "loss": 0.5206, | |
| "num_input_tokens_seen": 40687064, | |
| "step": 3625, | |
| "train_runtime": 6144.9437, | |
| "train_tokens_per_second": 6621.227 | |
| }, | |
| { | |
| "epoch": 1.742065158696826, | |
| "grad_norm": 0.7754983901977539, | |
| "learning_rate": 1.8748202948587813e-05, | |
| "loss": 0.4979, | |
| "num_input_tokens_seen": 40743400, | |
| "step": 3630, | |
| "train_runtime": 6153.1589, | |
| "train_tokens_per_second": 6621.542 | |
| }, | |
| { | |
| "epoch": 1.744465110697786, | |
| "grad_norm": 0.7278411388397217, | |
| "learning_rate": 1.8687406692141673e-05, | |
| "loss": 0.4632, | |
| "num_input_tokens_seen": 40802376, | |
| "step": 3635, | |
| "train_runtime": 6161.8706, | |
| "train_tokens_per_second": 6621.751 | |
| }, | |
| { | |
| "epoch": 1.7468650626987459, | |
| "grad_norm": 0.6943597793579102, | |
| "learning_rate": 1.8626650283958762e-05, | |
| "loss": 0.4851, | |
| "num_input_tokens_seen": 40854616, | |
| "step": 3640, | |
| "train_runtime": 6169.6683, | |
| "train_tokens_per_second": 6621.85 | |
| }, | |
| { | |
| "epoch": 1.749265014699706, | |
| "grad_norm": 0.8194776177406311, | |
| "learning_rate": 1.8565934107564068e-05, | |
| "loss": 0.4573, | |
| "num_input_tokens_seen": 40911032, | |
| "step": 3645, | |
| "train_runtime": 6178.2227, | |
| "train_tokens_per_second": 6621.812 | |
| }, | |
| { | |
| "epoch": 1.751664966700666, | |
| "grad_norm": 0.8596030473709106, | |
| "learning_rate": 1.8505258546228623e-05, | |
| "loss": 0.4862, | |
| "num_input_tokens_seen": 40970312, | |
| "step": 3650, | |
| "train_runtime": 6186.5562, | |
| "train_tokens_per_second": 6622.475 | |
| }, | |
| { | |
| "epoch": 1.754064918701626, | |
| "grad_norm": 0.6645076274871826, | |
| "learning_rate": 1.8444623982967098e-05, | |
| "loss": 0.4606, | |
| "num_input_tokens_seen": 41028576, | |
| "step": 3655, | |
| "train_runtime": 6195.0286, | |
| "train_tokens_per_second": 6622.823 | |
| }, | |
| { | |
| "epoch": 1.7564648707025858, | |
| "grad_norm": 0.668375551700592, | |
| "learning_rate": 1.8384030800535332e-05, | |
| "loss": 0.4504, | |
| "num_input_tokens_seen": 41088352, | |
| "step": 3660, | |
| "train_runtime": 6203.7002, | |
| "train_tokens_per_second": 6623.201 | |
| }, | |
| { | |
| "epoch": 1.758864822703546, | |
| "grad_norm": 0.6859973669052124, | |
| "learning_rate": 1.832347938142796e-05, | |
| "loss": 0.5408, | |
| "num_input_tokens_seen": 41144096, | |
| "step": 3665, | |
| "train_runtime": 6211.4168, | |
| "train_tokens_per_second": 6623.947 | |
| }, | |
| { | |
| "epoch": 1.761264774704506, | |
| "grad_norm": 0.8838623762130737, | |
| "learning_rate": 1.8262970107875994e-05, | |
| "loss": 0.4798, | |
| "num_input_tokens_seen": 41199488, | |
| "step": 3670, | |
| "train_runtime": 6219.0044, | |
| "train_tokens_per_second": 6624.772 | |
| }, | |
| { | |
| "epoch": 1.763664726705466, | |
| "grad_norm": 0.8268917202949524, | |
| "learning_rate": 1.8202503361844393e-05, | |
| "loss": 0.5226, | |
| "num_input_tokens_seen": 41254392, | |
| "step": 3675, | |
| "train_runtime": 6226.8544, | |
| "train_tokens_per_second": 6625.238 | |
| }, | |
| { | |
| "epoch": 1.7660646787064258, | |
| "grad_norm": 0.9109818339347839, | |
| "learning_rate": 1.8142079525029672e-05, | |
| "loss": 0.5196, | |
| "num_input_tokens_seen": 41310952, | |
| "step": 3680, | |
| "train_runtime": 6234.9064, | |
| "train_tokens_per_second": 6625.753 | |
| }, | |
| { | |
| "epoch": 1.7684646307073857, | |
| "grad_norm": 0.8743447661399841, | |
| "learning_rate": 1.808169897885745e-05, | |
| "loss": 0.4813, | |
| "num_input_tokens_seen": 41363784, | |
| "step": 3685, | |
| "train_runtime": 6242.8579, | |
| "train_tokens_per_second": 6625.777 | |
| }, | |
| { | |
| "epoch": 1.7708645827083458, | |
| "grad_norm": 0.8028547763824463, | |
| "learning_rate": 1.802136210448012e-05, | |
| "loss": 0.4864, | |
| "num_input_tokens_seen": 41418736, | |
| "step": 3690, | |
| "train_runtime": 6250.665, | |
| "train_tokens_per_second": 6626.293 | |
| }, | |
| { | |
| "epoch": 1.773264534709306, | |
| "grad_norm": 0.8359841108322144, | |
| "learning_rate": 1.796106928277437e-05, | |
| "loss": 0.451, | |
| "num_input_tokens_seen": 41480096, | |
| "step": 3695, | |
| "train_runtime": 6259.4151, | |
| "train_tokens_per_second": 6626.833 | |
| }, | |
| { | |
| "epoch": 1.7756644867102658, | |
| "grad_norm": 0.6087771654129028, | |
| "learning_rate": 1.7900820894338786e-05, | |
| "loss": 0.4405, | |
| "num_input_tokens_seen": 41535640, | |
| "step": 3700, | |
| "train_runtime": 6267.1679, | |
| "train_tokens_per_second": 6627.498 | |
| }, | |
| { | |
| "epoch": 1.7780644387112257, | |
| "grad_norm": 0.7156651020050049, | |
| "learning_rate": 1.7840617319491527e-05, | |
| "loss": 0.51, | |
| "num_input_tokens_seen": 41592104, | |
| "step": 3705, | |
| "train_runtime": 6275.4346, | |
| "train_tokens_per_second": 6627.765 | |
| }, | |
| { | |
| "epoch": 1.7804643907121858, | |
| "grad_norm": 0.7992216348648071, | |
| "learning_rate": 1.7780458938267807e-05, | |
| "loss": 0.4488, | |
| "num_input_tokens_seen": 41649776, | |
| "step": 3710, | |
| "train_runtime": 6283.7454, | |
| "train_tokens_per_second": 6628.177 | |
| }, | |
| { | |
| "epoch": 1.7828643427131459, | |
| "grad_norm": 0.7933105230331421, | |
| "learning_rate": 1.772034613041758e-05, | |
| "loss": 0.4581, | |
| "num_input_tokens_seen": 41707280, | |
| "step": 3715, | |
| "train_runtime": 6291.9245, | |
| "train_tokens_per_second": 6628.7 | |
| }, | |
| { | |
| "epoch": 1.7852642947141057, | |
| "grad_norm": 0.8297272324562073, | |
| "learning_rate": 1.7660279275403124e-05, | |
| "loss": 0.4598, | |
| "num_input_tokens_seen": 41765768, | |
| "step": 3720, | |
| "train_runtime": 6300.2081, | |
| "train_tokens_per_second": 6629.268 | |
| }, | |
| { | |
| "epoch": 1.7876642467150656, | |
| "grad_norm": 0.6287772059440613, | |
| "learning_rate": 1.7600258752396626e-05, | |
| "loss": 0.4783, | |
| "num_input_tokens_seen": 41819576, | |
| "step": 3725, | |
| "train_runtime": 6308.2419, | |
| "train_tokens_per_second": 6629.355 | |
| }, | |
| { | |
| "epoch": 1.7900641987160257, | |
| "grad_norm": 0.7246582508087158, | |
| "learning_rate": 1.754028494027782e-05, | |
| "loss": 0.4821, | |
| "num_input_tokens_seen": 41876528, | |
| "step": 3730, | |
| "train_runtime": 6316.3849, | |
| "train_tokens_per_second": 6629.825 | |
| }, | |
| { | |
| "epoch": 1.7924641507169856, | |
| "grad_norm": 0.752740204334259, | |
| "learning_rate": 1.748035821763154e-05, | |
| "loss": 0.4984, | |
| "num_input_tokens_seen": 41933488, | |
| "step": 3735, | |
| "train_runtime": 6324.4895, | |
| "train_tokens_per_second": 6630.336 | |
| }, | |
| { | |
| "epoch": 1.7948641027179457, | |
| "grad_norm": 0.7370868921279907, | |
| "learning_rate": 1.7420478962745424e-05, | |
| "loss": 0.4707, | |
| "num_input_tokens_seen": 41989264, | |
| "step": 3740, | |
| "train_runtime": 6332.3923, | |
| "train_tokens_per_second": 6630.869 | |
| }, | |
| { | |
| "epoch": 1.7972640547189056, | |
| "grad_norm": 0.5607179999351501, | |
| "learning_rate": 1.736064755360742e-05, | |
| "loss": 0.5113, | |
| "num_input_tokens_seen": 42045264, | |
| "step": 3745, | |
| "train_runtime": 6340.5688, | |
| "train_tokens_per_second": 6631.15 | |
| }, | |
| { | |
| "epoch": 1.7996640067198655, | |
| "grad_norm": 0.851588785648346, | |
| "learning_rate": 1.7300864367903462e-05, | |
| "loss": 0.4807, | |
| "num_input_tokens_seen": 42103712, | |
| "step": 3750, | |
| "train_runtime": 6348.5367, | |
| "train_tokens_per_second": 6632.034 | |
| }, | |
| { | |
| "epoch": 1.8020639587208256, | |
| "grad_norm": 0.6969419717788696, | |
| "learning_rate": 1.7241129783015108e-05, | |
| "loss": 0.5129, | |
| "num_input_tokens_seen": 42156568, | |
| "step": 3755, | |
| "train_runtime": 6356.2935, | |
| "train_tokens_per_second": 6632.256 | |
| }, | |
| { | |
| "epoch": 1.8044639107217857, | |
| "grad_norm": 0.705589771270752, | |
| "learning_rate": 1.7181444176017077e-05, | |
| "loss": 0.4709, | |
| "num_input_tokens_seen": 42214056, | |
| "step": 3760, | |
| "train_runtime": 6364.5049, | |
| "train_tokens_per_second": 6632.732 | |
| }, | |
| { | |
| "epoch": 1.8068638627227456, | |
| "grad_norm": 0.9332826733589172, | |
| "learning_rate": 1.7121807923674926e-05, | |
| "loss": 0.4609, | |
| "num_input_tokens_seen": 42270872, | |
| "step": 3765, | |
| "train_runtime": 6372.8289, | |
| "train_tokens_per_second": 6632.984 | |
| }, | |
| { | |
| "epoch": 1.8092638147237055, | |
| "grad_norm": 0.6459842324256897, | |
| "learning_rate": 1.7062221402442678e-05, | |
| "loss": 0.5136, | |
| "num_input_tokens_seen": 42324392, | |
| "step": 3770, | |
| "train_runtime": 6380.6203, | |
| "train_tokens_per_second": 6633.272 | |
| }, | |
| { | |
| "epoch": 1.8116637667246656, | |
| "grad_norm": 0.8273303508758545, | |
| "learning_rate": 1.7002684988460417e-05, | |
| "loss": 0.465, | |
| "num_input_tokens_seen": 42381736, | |
| "step": 3775, | |
| "train_runtime": 6388.9298, | |
| "train_tokens_per_second": 6633.621 | |
| }, | |
| { | |
| "epoch": 1.8140637187256254, | |
| "grad_norm": 0.6155418157577515, | |
| "learning_rate": 1.694319905755193e-05, | |
| "loss": 0.4924, | |
| "num_input_tokens_seen": 42442312, | |
| "step": 3780, | |
| "train_runtime": 6399.8787, | |
| "train_tokens_per_second": 6631.737 | |
| }, | |
| { | |
| "epoch": 1.8164636707265855, | |
| "grad_norm": 1.0188329219818115, | |
| "learning_rate": 1.6883763985222305e-05, | |
| "loss": 0.468, | |
| "num_input_tokens_seen": 42496896, | |
| "step": 3785, | |
| "train_runtime": 6409.4045, | |
| "train_tokens_per_second": 6630.397 | |
| }, | |
| { | |
| "epoch": 1.8188636227275454, | |
| "grad_norm": 0.604070782661438, | |
| "learning_rate": 1.6824380146655633e-05, | |
| "loss": 0.5271, | |
| "num_input_tokens_seen": 42554600, | |
| "step": 3790, | |
| "train_runtime": 6419.249, | |
| "train_tokens_per_second": 6629.218 | |
| }, | |
| { | |
| "epoch": 1.8212635747285053, | |
| "grad_norm": 0.7463460564613342, | |
| "learning_rate": 1.6765047916712545e-05, | |
| "loss": 0.5052, | |
| "num_input_tokens_seen": 42611168, | |
| "step": 3795, | |
| "train_runtime": 6429.2745, | |
| "train_tokens_per_second": 6627.679 | |
| }, | |
| { | |
| "epoch": 1.8236635267294654, | |
| "grad_norm": 0.6504276990890503, | |
| "learning_rate": 1.6705767669927914e-05, | |
| "loss": 0.4572, | |
| "num_input_tokens_seen": 42668344, | |
| "step": 3800, | |
| "train_runtime": 6440.1221, | |
| "train_tokens_per_second": 6625.394 | |
| }, | |
| { | |
| "epoch": 1.8260634787304255, | |
| "grad_norm": 0.8336795568466187, | |
| "learning_rate": 1.6646539780508478e-05, | |
| "loss": 0.4514, | |
| "num_input_tokens_seen": 42725880, | |
| "step": 3805, | |
| "train_runtime": 6450.437, | |
| "train_tokens_per_second": 6623.719 | |
| }, | |
| { | |
| "epoch": 1.8284634307313854, | |
| "grad_norm": 0.6106321215629578, | |
| "learning_rate": 1.658736462233045e-05, | |
| "loss": 0.4553, | |
| "num_input_tokens_seen": 42785824, | |
| "step": 3810, | |
| "train_runtime": 6460.6963, | |
| "train_tokens_per_second": 6622.479 | |
| }, | |
| { | |
| "epoch": 1.8308633827323453, | |
| "grad_norm": 0.9887316823005676, | |
| "learning_rate": 1.6528242568937174e-05, | |
| "loss": 0.5347, | |
| "num_input_tokens_seen": 42840440, | |
| "step": 3815, | |
| "train_runtime": 6470.5401, | |
| "train_tokens_per_second": 6620.845 | |
| }, | |
| { | |
| "epoch": 1.8332633347333054, | |
| "grad_norm": 0.6800510287284851, | |
| "learning_rate": 1.6469173993536787e-05, | |
| "loss": 0.5028, | |
| "num_input_tokens_seen": 42893576, | |
| "step": 3820, | |
| "train_runtime": 6480.2024, | |
| "train_tokens_per_second": 6619.172 | |
| }, | |
| { | |
| "epoch": 1.8356632867342653, | |
| "grad_norm": 0.5527476668357849, | |
| "learning_rate": 1.641015926899985e-05, | |
| "loss": 0.4997, | |
| "num_input_tokens_seen": 42952744, | |
| "step": 3825, | |
| "train_runtime": 6490.1332, | |
| "train_tokens_per_second": 6618.161 | |
| }, | |
| { | |
| "epoch": 1.8380632387352254, | |
| "grad_norm": 0.833662211894989, | |
| "learning_rate": 1.6351198767856978e-05, | |
| "loss": 0.5076, | |
| "num_input_tokens_seen": 43010768, | |
| "step": 3830, | |
| "train_runtime": 6498.8469, | |
| "train_tokens_per_second": 6618.215 | |
| }, | |
| { | |
| "epoch": 1.8404631907361853, | |
| "grad_norm": 0.8122771978378296, | |
| "learning_rate": 1.6292292862296482e-05, | |
| "loss": 0.4789, | |
| "num_input_tokens_seen": 43067120, | |
| "step": 3835, | |
| "train_runtime": 6506.9502, | |
| "train_tokens_per_second": 6618.634 | |
| }, | |
| { | |
| "epoch": 1.8428631427371451, | |
| "grad_norm": 0.7453281283378601, | |
| "learning_rate": 1.6233441924162085e-05, | |
| "loss": 0.472, | |
| "num_input_tokens_seen": 43124944, | |
| "step": 3840, | |
| "train_runtime": 6514.9238, | |
| "train_tokens_per_second": 6619.409 | |
| }, | |
| { | |
| "epoch": 1.8452630947381052, | |
| "grad_norm": 0.7798519730567932, | |
| "learning_rate": 1.617464632495048e-05, | |
| "loss": 0.4968, | |
| "num_input_tokens_seen": 43181496, | |
| "step": 3845, | |
| "train_runtime": 6522.7215, | |
| "train_tokens_per_second": 6620.165 | |
| }, | |
| { | |
| "epoch": 1.8476630467390653, | |
| "grad_norm": 0.770413339138031, | |
| "learning_rate": 1.611590643580906e-05, | |
| "loss": 0.4799, | |
| "num_input_tokens_seen": 43236224, | |
| "step": 3850, | |
| "train_runtime": 6531.17, | |
| "train_tokens_per_second": 6619.981 | |
| }, | |
| { | |
| "epoch": 1.8500629987400252, | |
| "grad_norm": 0.7712330222129822, | |
| "learning_rate": 1.6057222627533554e-05, | |
| "loss": 0.4825, | |
| "num_input_tokens_seen": 43291464, | |
| "step": 3855, | |
| "train_runtime": 6539.3507, | |
| "train_tokens_per_second": 6620.147 | |
| }, | |
| { | |
| "epoch": 1.852462950740985, | |
| "grad_norm": 0.667767345905304, | |
| "learning_rate": 1.599859527056566e-05, | |
| "loss": 0.4525, | |
| "num_input_tokens_seen": 43349520, | |
| "step": 3860, | |
| "train_runtime": 6547.7333, | |
| "train_tokens_per_second": 6620.538 | |
| }, | |
| { | |
| "epoch": 1.8548629027419452, | |
| "grad_norm": 0.8143635988235474, | |
| "learning_rate": 1.594002473499073e-05, | |
| "loss": 0.4601, | |
| "num_input_tokens_seen": 43410208, | |
| "step": 3865, | |
| "train_runtime": 6556.4635, | |
| "train_tokens_per_second": 6620.979 | |
| }, | |
| { | |
| "epoch": 1.857262854742905, | |
| "grad_norm": 0.6884592771530151, | |
| "learning_rate": 1.588151139053544e-05, | |
| "loss": 0.4458, | |
| "num_input_tokens_seen": 43469344, | |
| "step": 3870, | |
| "train_runtime": 6565.0785, | |
| "train_tokens_per_second": 6621.298 | |
| }, | |
| { | |
| "epoch": 1.8596628067438652, | |
| "grad_norm": 0.8038159608840942, | |
| "learning_rate": 1.5823055606565458e-05, | |
| "loss": 0.4859, | |
| "num_input_tokens_seen": 43526440, | |
| "step": 3875, | |
| "train_runtime": 6573.964, | |
| "train_tokens_per_second": 6621.034 | |
| }, | |
| { | |
| "epoch": 1.862062758744825, | |
| "grad_norm": 0.6315177083015442, | |
| "learning_rate": 1.5764657752083072e-05, | |
| "loss": 0.4795, | |
| "num_input_tokens_seen": 43583936, | |
| "step": 3880, | |
| "train_runtime": 6582.4382, | |
| "train_tokens_per_second": 6621.245 | |
| }, | |
| { | |
| "epoch": 1.864462710745785, | |
| "grad_norm": 0.7281184792518616, | |
| "learning_rate": 1.5706318195724894e-05, | |
| "loss": 0.4707, | |
| "num_input_tokens_seen": 43639480, | |
| "step": 3885, | |
| "train_runtime": 6590.3977, | |
| "train_tokens_per_second": 6621.676 | |
| }, | |
| { | |
| "epoch": 1.866862662746745, | |
| "grad_norm": 0.8681549429893494, | |
| "learning_rate": 1.5648037305759566e-05, | |
| "loss": 0.4557, | |
| "num_input_tokens_seen": 43690520, | |
| "step": 3890, | |
| "train_runtime": 6598.0076, | |
| "train_tokens_per_second": 6621.775 | |
| }, | |
| { | |
| "epoch": 1.8692626147477052, | |
| "grad_norm": 0.9573807120323181, | |
| "learning_rate": 1.5589815450085355e-05, | |
| "loss": 0.4621, | |
| "num_input_tokens_seen": 43749480, | |
| "step": 3895, | |
| "train_runtime": 6606.515, | |
| "train_tokens_per_second": 6622.172 | |
| }, | |
| { | |
| "epoch": 1.871662566748665, | |
| "grad_norm": 0.9825738072395325, | |
| "learning_rate": 1.5531652996227885e-05, | |
| "loss": 0.4627, | |
| "num_input_tokens_seen": 43799824, | |
| "step": 3900, | |
| "train_runtime": 6614.0046, | |
| "train_tokens_per_second": 6622.285 | |
| }, | |
| { | |
| "epoch": 1.874062518749625, | |
| "grad_norm": 0.8160600662231445, | |
| "learning_rate": 1.5473550311337833e-05, | |
| "loss": 0.4806, | |
| "num_input_tokens_seen": 43858032, | |
| "step": 3905, | |
| "train_runtime": 6622.3127, | |
| "train_tokens_per_second": 6622.767 | |
| }, | |
| { | |
| "epoch": 1.876462470750585, | |
| "grad_norm": 0.8037713766098022, | |
| "learning_rate": 1.541550776218855e-05, | |
| "loss": 0.4767, | |
| "num_input_tokens_seen": 43914232, | |
| "step": 3910, | |
| "train_runtime": 6630.3703, | |
| "train_tokens_per_second": 6623.194 | |
| }, | |
| { | |
| "epoch": 1.878862422751545, | |
| "grad_norm": 0.8697477579116821, | |
| "learning_rate": 1.535752571517379e-05, | |
| "loss": 0.4582, | |
| "num_input_tokens_seen": 43970744, | |
| "step": 3915, | |
| "train_runtime": 6638.2775, | |
| "train_tokens_per_second": 6623.818 | |
| }, | |
| { | |
| "epoch": 1.881262374752505, | |
| "grad_norm": 0.6897442936897278, | |
| "learning_rate": 1.529960453630538e-05, | |
| "loss": 0.4725, | |
| "num_input_tokens_seen": 44028408, | |
| "step": 3920, | |
| "train_runtime": 6646.2538, | |
| "train_tokens_per_second": 6624.545 | |
| }, | |
| { | |
| "epoch": 1.883662326753465, | |
| "grad_norm": 0.7267577052116394, | |
| "learning_rate": 1.5241744591210954e-05, | |
| "loss": 0.4661, | |
| "num_input_tokens_seen": 44085968, | |
| "step": 3925, | |
| "train_runtime": 6654.4818, | |
| "train_tokens_per_second": 6625.004 | |
| }, | |
| { | |
| "epoch": 1.8860622787544248, | |
| "grad_norm": 0.6550572514533997, | |
| "learning_rate": 1.5183946245131563e-05, | |
| "loss": 0.5171, | |
| "num_input_tokens_seen": 44143360, | |
| "step": 3930, | |
| "train_runtime": 6662.7155, | |
| "train_tokens_per_second": 6625.431 | |
| }, | |
| { | |
| "epoch": 1.8884622307553849, | |
| "grad_norm": 0.8330610394477844, | |
| "learning_rate": 1.5126209862919427e-05, | |
| "loss": 0.4935, | |
| "num_input_tokens_seen": 44193864, | |
| "step": 3935, | |
| "train_runtime": 6669.9997, | |
| "train_tokens_per_second": 6625.767 | |
| }, | |
| { | |
| "epoch": 1.890862182756345, | |
| "grad_norm": 0.8436587452888489, | |
| "learning_rate": 1.506853580903564e-05, | |
| "loss": 0.5181, | |
| "num_input_tokens_seen": 44249464, | |
| "step": 3940, | |
| "train_runtime": 6677.7583, | |
| "train_tokens_per_second": 6626.395 | |
| }, | |
| { | |
| "epoch": 1.8932621347573049, | |
| "grad_norm": 0.8945364356040955, | |
| "learning_rate": 1.5010924447547808e-05, | |
| "loss": 0.445, | |
| "num_input_tokens_seen": 44306480, | |
| "step": 3945, | |
| "train_runtime": 6685.9167, | |
| "train_tokens_per_second": 6626.837 | |
| }, | |
| { | |
| "epoch": 1.8956620867582648, | |
| "grad_norm": 0.7293525338172913, | |
| "learning_rate": 1.4953376142127828e-05, | |
| "loss": 0.4933, | |
| "num_input_tokens_seen": 44363776, | |
| "step": 3950, | |
| "train_runtime": 6693.7184, | |
| "train_tokens_per_second": 6627.673 | |
| }, | |
| { | |
| "epoch": 1.8980620387592249, | |
| "grad_norm": 0.8093637228012085, | |
| "learning_rate": 1.4895891256049548e-05, | |
| "loss": 0.4952, | |
| "num_input_tokens_seen": 44419016, | |
| "step": 3955, | |
| "train_runtime": 6701.9236, | |
| "train_tokens_per_second": 6627.801 | |
| }, | |
| { | |
| "epoch": 1.900461990760185, | |
| "grad_norm": 0.8808810710906982, | |
| "learning_rate": 1.483847015218647e-05, | |
| "loss": 0.5036, | |
| "num_input_tokens_seen": 44473296, | |
| "step": 3960, | |
| "train_runtime": 6709.8008, | |
| "train_tokens_per_second": 6628.11 | |
| }, | |
| { | |
| "epoch": 1.9028619427611448, | |
| "grad_norm": 0.606708288192749, | |
| "learning_rate": 1.4781113193009466e-05, | |
| "loss": 0.4709, | |
| "num_input_tokens_seen": 44533064, | |
| "step": 3965, | |
| "train_runtime": 6718.435, | |
| "train_tokens_per_second": 6628.488 | |
| }, | |
| { | |
| "epoch": 1.9052618947621047, | |
| "grad_norm": 0.7501396536827087, | |
| "learning_rate": 1.472382074058451e-05, | |
| "loss": 0.487, | |
| "num_input_tokens_seen": 44591088, | |
| "step": 3970, | |
| "train_runtime": 6726.4717, | |
| "train_tokens_per_second": 6629.194 | |
| }, | |
| { | |
| "epoch": 1.9076618467630646, | |
| "grad_norm": 0.7472719550132751, | |
| "learning_rate": 1.4666593156570376e-05, | |
| "loss": 0.4822, | |
| "num_input_tokens_seen": 44639864, | |
| "step": 3975, | |
| "train_runtime": 6733.726, | |
| "train_tokens_per_second": 6629.296 | |
| }, | |
| { | |
| "epoch": 1.9100617987640247, | |
| "grad_norm": 0.9028266668319702, | |
| "learning_rate": 1.460943080221635e-05, | |
| "loss": 0.4792, | |
| "num_input_tokens_seen": 44697568, | |
| "step": 3980, | |
| "train_runtime": 6742.2116, | |
| "train_tokens_per_second": 6629.511 | |
| }, | |
| { | |
| "epoch": 1.9124617507649848, | |
| "grad_norm": 0.6775950193405151, | |
| "learning_rate": 1.4552334038359938e-05, | |
| "loss": 0.4861, | |
| "num_input_tokens_seen": 44750848, | |
| "step": 3985, | |
| "train_runtime": 6749.8459, | |
| "train_tokens_per_second": 6629.907 | |
| }, | |
| { | |
| "epoch": 1.9148617027659447, | |
| "grad_norm": 0.7115968465805054, | |
| "learning_rate": 1.4495303225424656e-05, | |
| "loss": 0.4546, | |
| "num_input_tokens_seen": 44804648, | |
| "step": 3990, | |
| "train_runtime": 6758.0597, | |
| "train_tokens_per_second": 6629.809 | |
| }, | |
| { | |
| "epoch": 1.9172616547669046, | |
| "grad_norm": 0.8527563214302063, | |
| "learning_rate": 1.4438338723417654e-05, | |
| "loss": 0.5007, | |
| "num_input_tokens_seen": 44860632, | |
| "step": 3995, | |
| "train_runtime": 6766.2816, | |
| "train_tokens_per_second": 6630.027 | |
| }, | |
| { | |
| "epoch": 1.9196616067678647, | |
| "grad_norm": 0.8954775333404541, | |
| "learning_rate": 1.4381440891927512e-05, | |
| "loss": 0.5301, | |
| "num_input_tokens_seen": 44913712, | |
| "step": 4000, | |
| "train_runtime": 6774.2353, | |
| "train_tokens_per_second": 6630.078 | |
| }, | |
| { | |
| "epoch": 1.9220615587688248, | |
| "grad_norm": 0.7284995317459106, | |
| "learning_rate": 1.432461009012196e-05, | |
| "loss": 0.5028, | |
| "num_input_tokens_seen": 44970992, | |
| "step": 4005, | |
| "train_runtime": 6782.3775, | |
| "train_tokens_per_second": 6630.565 | |
| }, | |
| { | |
| "epoch": 1.9244615107697847, | |
| "grad_norm": 1.017869472503662, | |
| "learning_rate": 1.4267846676745598e-05, | |
| "loss": 0.4618, | |
| "num_input_tokens_seen": 45024328, | |
| "step": 4010, | |
| "train_runtime": 6790.5882, | |
| "train_tokens_per_second": 6630.402 | |
| }, | |
| { | |
| "epoch": 1.9268614627707445, | |
| "grad_norm": 0.7588083148002625, | |
| "learning_rate": 1.4211151010117627e-05, | |
| "loss": 0.5078, | |
| "num_input_tokens_seen": 45082296, | |
| "step": 4015, | |
| "train_runtime": 6798.9435, | |
| "train_tokens_per_second": 6630.78 | |
| }, | |
| { | |
| "epoch": 1.9292614147717044, | |
| "grad_norm": 0.66818767786026, | |
| "learning_rate": 1.4154523448129597e-05, | |
| "loss": 0.4823, | |
| "num_input_tokens_seen": 45137992, | |
| "step": 4020, | |
| "train_runtime": 6806.9385, | |
| "train_tokens_per_second": 6631.174 | |
| }, | |
| { | |
| "epoch": 1.9316613667726645, | |
| "grad_norm": 0.700678825378418, | |
| "learning_rate": 1.4097964348243172e-05, | |
| "loss": 0.4639, | |
| "num_input_tokens_seen": 45197208, | |
| "step": 4025, | |
| "train_runtime": 6815.6104, | |
| "train_tokens_per_second": 6631.425 | |
| }, | |
| { | |
| "epoch": 1.9340613187736246, | |
| "grad_norm": 0.8906050324440002, | |
| "learning_rate": 1.4041474067487814e-05, | |
| "loss": 0.4599, | |
| "num_input_tokens_seen": 45256040, | |
| "step": 4030, | |
| "train_runtime": 6824.0323, | |
| "train_tokens_per_second": 6631.862 | |
| }, | |
| { | |
| "epoch": 1.9364612707745845, | |
| "grad_norm": 0.8205930590629578, | |
| "learning_rate": 1.3985052962458593e-05, | |
| "loss": 0.4903, | |
| "num_input_tokens_seen": 45311968, | |
| "step": 4035, | |
| "train_runtime": 6831.8772, | |
| "train_tokens_per_second": 6632.433 | |
| }, | |
| { | |
| "epoch": 1.9388612227755444, | |
| "grad_norm": 0.9148489832878113, | |
| "learning_rate": 1.3928701389313897e-05, | |
| "loss": 0.4939, | |
| "num_input_tokens_seen": 45361584, | |
| "step": 4040, | |
| "train_runtime": 6839.5045, | |
| "train_tokens_per_second": 6632.291 | |
| }, | |
| { | |
| "epoch": 1.9412611747765045, | |
| "grad_norm": 1.021208643913269, | |
| "learning_rate": 1.3872419703773187e-05, | |
| "loss": 0.4876, | |
| "num_input_tokens_seen": 45421616, | |
| "step": 4045, | |
| "train_runtime": 6848.0389, | |
| "train_tokens_per_second": 6632.792 | |
| }, | |
| { | |
| "epoch": 1.9436611267774646, | |
| "grad_norm": 0.8669795393943787, | |
| "learning_rate": 1.3816208261114755e-05, | |
| "loss": 0.5142, | |
| "num_input_tokens_seen": 45475784, | |
| "step": 4050, | |
| "train_runtime": 6855.5247, | |
| "train_tokens_per_second": 6633.451 | |
| }, | |
| { | |
| "epoch": 1.9460610787784245, | |
| "grad_norm": 1.084006428718567, | |
| "learning_rate": 1.3760067416173511e-05, | |
| "loss": 0.4949, | |
| "num_input_tokens_seen": 45529816, | |
| "step": 4055, | |
| "train_runtime": 6863.458, | |
| "train_tokens_per_second": 6633.655 | |
| }, | |
| { | |
| "epoch": 1.9484610307793844, | |
| "grad_norm": 0.639717161655426, | |
| "learning_rate": 1.3703997523338688e-05, | |
| "loss": 0.4917, | |
| "num_input_tokens_seen": 45585432, | |
| "step": 4060, | |
| "train_runtime": 6870.9893, | |
| "train_tokens_per_second": 6634.479 | |
| }, | |
| { | |
| "epoch": 1.9508609827803443, | |
| "grad_norm": 0.7942274808883667, | |
| "learning_rate": 1.3647998936551643e-05, | |
| "loss": 0.4542, | |
| "num_input_tokens_seen": 45642256, | |
| "step": 4065, | |
| "train_runtime": 6879.1089, | |
| "train_tokens_per_second": 6634.908 | |
| }, | |
| { | |
| "epoch": 1.9532609347813044, | |
| "grad_norm": 0.7706002593040466, | |
| "learning_rate": 1.3592072009303603e-05, | |
| "loss": 0.4767, | |
| "num_input_tokens_seen": 45700704, | |
| "step": 4070, | |
| "train_runtime": 6887.1919, | |
| "train_tokens_per_second": 6635.608 | |
| }, | |
| { | |
| "epoch": 1.9556608867822645, | |
| "grad_norm": 0.6891798377037048, | |
| "learning_rate": 1.3536217094633471e-05, | |
| "loss": 0.4649, | |
| "num_input_tokens_seen": 45754672, | |
| "step": 4075, | |
| "train_runtime": 6895.2959, | |
| "train_tokens_per_second": 6635.636 | |
| }, | |
| { | |
| "epoch": 1.9580608387832243, | |
| "grad_norm": 0.6927337646484375, | |
| "learning_rate": 1.3480434545125562e-05, | |
| "loss": 0.4794, | |
| "num_input_tokens_seen": 45805360, | |
| "step": 4080, | |
| "train_runtime": 6902.6999, | |
| "train_tokens_per_second": 6635.861 | |
| }, | |
| { | |
| "epoch": 1.9604607907841842, | |
| "grad_norm": 0.7922900319099426, | |
| "learning_rate": 1.3424724712907355e-05, | |
| "loss": 0.5073, | |
| "num_input_tokens_seen": 45859408, | |
| "step": 4085, | |
| "train_runtime": 6910.3792, | |
| "train_tokens_per_second": 6636.309 | |
| }, | |
| { | |
| "epoch": 1.9628607427851443, | |
| "grad_norm": 0.5073052048683167, | |
| "learning_rate": 1.3369087949647352e-05, | |
| "loss": 0.4844, | |
| "num_input_tokens_seen": 45915912, | |
| "step": 4090, | |
| "train_runtime": 6918.4066, | |
| "train_tokens_per_second": 6636.776 | |
| }, | |
| { | |
| "epoch": 1.9652606947861044, | |
| "grad_norm": 0.805068850517273, | |
| "learning_rate": 1.3313524606552763e-05, | |
| "loss": 0.4683, | |
| "num_input_tokens_seen": 45972424, | |
| "step": 4095, | |
| "train_runtime": 6926.7284, | |
| "train_tokens_per_second": 6636.961 | |
| }, | |
| { | |
| "epoch": 1.9676606467870643, | |
| "grad_norm": 0.7410593628883362, | |
| "learning_rate": 1.3258035034367338e-05, | |
| "loss": 0.4847, | |
| "num_input_tokens_seen": 46029616, | |
| "step": 4100, | |
| "train_runtime": 6934.891, | |
| "train_tokens_per_second": 6637.396 | |
| }, | |
| { | |
| "epoch": 1.9700605987880242, | |
| "grad_norm": 0.9381468296051025, | |
| "learning_rate": 1.3202619583369189e-05, | |
| "loss": 0.5131, | |
| "num_input_tokens_seen": 46087816, | |
| "step": 4105, | |
| "train_runtime": 6943.9707, | |
| "train_tokens_per_second": 6637.098 | |
| }, | |
| { | |
| "epoch": 1.972460550788984, | |
| "grad_norm": 0.7725812792778015, | |
| "learning_rate": 1.3147278603368487e-05, | |
| "loss": 0.496, | |
| "num_input_tokens_seen": 46141504, | |
| "step": 4110, | |
| "train_runtime": 6951.6679, | |
| "train_tokens_per_second": 6637.472 | |
| }, | |
| { | |
| "epoch": 1.9748605027899442, | |
| "grad_norm": 0.9349031448364258, | |
| "learning_rate": 1.3092012443705332e-05, | |
| "loss": 0.4513, | |
| "num_input_tokens_seen": 46202072, | |
| "step": 4115, | |
| "train_runtime": 6960.2643, | |
| "train_tokens_per_second": 6637.977 | |
| }, | |
| { | |
| "epoch": 1.9772604547909043, | |
| "grad_norm": 0.5486748218536377, | |
| "learning_rate": 1.3036821453247506e-05, | |
| "loss": 0.4997, | |
| "num_input_tokens_seen": 46258400, | |
| "step": 4120, | |
| "train_runtime": 6968.5186, | |
| "train_tokens_per_second": 6638.197 | |
| }, | |
| { | |
| "epoch": 1.9796604067918642, | |
| "grad_norm": 0.8410947322845459, | |
| "learning_rate": 1.2981705980388295e-05, | |
| "loss": 0.5062, | |
| "num_input_tokens_seen": 46309656, | |
| "step": 4125, | |
| "train_runtime": 6975.9975, | |
| "train_tokens_per_second": 6638.428 | |
| }, | |
| { | |
| "epoch": 1.982060358792824, | |
| "grad_norm": 0.6465336680412292, | |
| "learning_rate": 1.2926666373044294e-05, | |
| "loss": 0.4891, | |
| "num_input_tokens_seen": 46366888, | |
| "step": 4130, | |
| "train_runtime": 6984.2364, | |
| "train_tokens_per_second": 6638.791 | |
| }, | |
| { | |
| "epoch": 1.9844603107937842, | |
| "grad_norm": 0.6658479571342468, | |
| "learning_rate": 1.2871702978653163e-05, | |
| "loss": 0.5002, | |
| "num_input_tokens_seen": 46419304, | |
| "step": 4135, | |
| "train_runtime": 6991.7902, | |
| "train_tokens_per_second": 6639.116 | |
| }, | |
| { | |
| "epoch": 1.9868602627947443, | |
| "grad_norm": 0.8227950930595398, | |
| "learning_rate": 1.28168161441715e-05, | |
| "loss": 0.5105, | |
| "num_input_tokens_seen": 46469520, | |
| "step": 4140, | |
| "train_runtime": 6999.0924, | |
| "train_tokens_per_second": 6639.364 | |
| }, | |
| { | |
| "epoch": 1.9892602147957041, | |
| "grad_norm": 1.1198500394821167, | |
| "learning_rate": 1.27620062160726e-05, | |
| "loss": 0.5154, | |
| "num_input_tokens_seen": 46523240, | |
| "step": 4145, | |
| "train_runtime": 7007.0718, | |
| "train_tokens_per_second": 6639.47 | |
| }, | |
| { | |
| "epoch": 1.991660166796664, | |
| "grad_norm": 0.8290591835975647, | |
| "learning_rate": 1.2707273540344274e-05, | |
| "loss": 0.5361, | |
| "num_input_tokens_seen": 46577712, | |
| "step": 4150, | |
| "train_runtime": 7015.0992, | |
| "train_tokens_per_second": 6639.637 | |
| }, | |
| { | |
| "epoch": 1.994060118797624, | |
| "grad_norm": 0.6306242346763611, | |
| "learning_rate": 1.265261846248672e-05, | |
| "loss": 0.4873, | |
| "num_input_tokens_seen": 46629984, | |
| "step": 4155, | |
| "train_runtime": 7022.7592, | |
| "train_tokens_per_second": 6639.838 | |
| }, | |
| { | |
| "epoch": 1.996460070798584, | |
| "grad_norm": 0.8492105007171631, | |
| "learning_rate": 1.2598041327510254e-05, | |
| "loss": 0.4779, | |
| "num_input_tokens_seen": 46689664, | |
| "step": 4160, | |
| "train_runtime": 7031.9599, | |
| "train_tokens_per_second": 6639.637 | |
| }, | |
| { | |
| "epoch": 1.9988600227995441, | |
| "grad_norm": 0.8231053352355957, | |
| "learning_rate": 1.25435424799332e-05, | |
| "loss": 0.4451, | |
| "num_input_tokens_seen": 46752192, | |
| "step": 4165, | |
| "train_runtime": 7041.1099, | |
| "train_tokens_per_second": 6639.89 | |
| }, | |
| { | |
| "epoch": 2.000959980800384, | |
| "grad_norm": 0.6937538385391235, | |
| "learning_rate": 1.2489122263779684e-05, | |
| "loss": 0.4431, | |
| "num_input_tokens_seen": 46800120, | |
| "step": 4170, | |
| "train_runtime": 7048.3249, | |
| "train_tokens_per_second": 6639.893 | |
| }, | |
| { | |
| "epoch": 2.003359932801344, | |
| "grad_norm": 0.5429336428642273, | |
| "learning_rate": 1.2434781022577476e-05, | |
| "loss": 0.4561, | |
| "num_input_tokens_seen": 46859352, | |
| "step": 4175, | |
| "train_runtime": 7056.8347, | |
| "train_tokens_per_second": 6640.279 | |
| }, | |
| { | |
| "epoch": 2.005759884802304, | |
| "grad_norm": 0.7788823843002319, | |
| "learning_rate": 1.2380519099355831e-05, | |
| "loss": 0.4531, | |
| "num_input_tokens_seen": 46918656, | |
| "step": 4180, | |
| "train_runtime": 7065.0446, | |
| "train_tokens_per_second": 6640.957 | |
| }, | |
| { | |
| "epoch": 2.008159836803264, | |
| "grad_norm": 0.7995026111602783, | |
| "learning_rate": 1.2326336836643274e-05, | |
| "loss": 0.5048, | |
| "num_input_tokens_seen": 46976896, | |
| "step": 4185, | |
| "train_runtime": 7073.7177, | |
| "train_tokens_per_second": 6641.048 | |
| }, | |
| { | |
| "epoch": 2.010559788804224, | |
| "grad_norm": 0.7401773929595947, | |
| "learning_rate": 1.227223457646551e-05, | |
| "loss": 0.4846, | |
| "num_input_tokens_seen": 47033584, | |
| "step": 4190, | |
| "train_runtime": 7081.7376, | |
| "train_tokens_per_second": 6641.532 | |
| }, | |
| { | |
| "epoch": 2.012959740805184, | |
| "grad_norm": 1.0051988363265991, | |
| "learning_rate": 1.22182126603432e-05, | |
| "loss": 0.497, | |
| "num_input_tokens_seen": 47084560, | |
| "step": 4195, | |
| "train_runtime": 7089.243, | |
| "train_tokens_per_second": 6641.691 | |
| }, | |
| { | |
| "epoch": 2.0153596928061437, | |
| "grad_norm": 0.7586055994033813, | |
| "learning_rate": 1.2164271429289837e-05, | |
| "loss": 0.4671, | |
| "num_input_tokens_seen": 47141040, | |
| "step": 4200, | |
| "train_runtime": 7097.5162, | |
| "train_tokens_per_second": 6641.907 | |
| }, | |
| { | |
| "epoch": 2.017759644807104, | |
| "grad_norm": 0.6509086489677429, | |
| "learning_rate": 1.2110411223809612e-05, | |
| "loss": 0.4329, | |
| "num_input_tokens_seen": 47198656, | |
| "step": 4205, | |
| "train_runtime": 7107.6834, | |
| "train_tokens_per_second": 6640.512 | |
| }, | |
| { | |
| "epoch": 2.020159596808064, | |
| "grad_norm": 0.7223982810974121, | |
| "learning_rate": 1.2056632383895217e-05, | |
| "loss": 0.4903, | |
| "num_input_tokens_seen": 47255504, | |
| "step": 4210, | |
| "train_runtime": 7117.033, | |
| "train_tokens_per_second": 6639.776 | |
| }, | |
| { | |
| "epoch": 2.0225595488090238, | |
| "grad_norm": 0.9436632990837097, | |
| "learning_rate": 1.2002935249025732e-05, | |
| "loss": 0.4788, | |
| "num_input_tokens_seen": 47307728, | |
| "step": 4215, | |
| "train_runtime": 7126.5999, | |
| "train_tokens_per_second": 6638.191 | |
| }, | |
| { | |
| "epoch": 2.0249595008099837, | |
| "grad_norm": 0.7383816838264465, | |
| "learning_rate": 1.1949320158164466e-05, | |
| "loss": 0.4692, | |
| "num_input_tokens_seen": 47365504, | |
| "step": 4220, | |
| "train_runtime": 7136.5388, | |
| "train_tokens_per_second": 6637.041 | |
| }, | |
| { | |
| "epoch": 2.027359452810944, | |
| "grad_norm": 0.8641635775566101, | |
| "learning_rate": 1.1895787449756834e-05, | |
| "loss": 0.4565, | |
| "num_input_tokens_seen": 47424664, | |
| "step": 4225, | |
| "train_runtime": 7147.349, | |
| "train_tokens_per_second": 6635.28 | |
| }, | |
| { | |
| "epoch": 2.029759404811904, | |
| "grad_norm": 0.8401957750320435, | |
| "learning_rate": 1.1842337461728232e-05, | |
| "loss": 0.5177, | |
| "num_input_tokens_seen": 47482624, | |
| "step": 4230, | |
| "train_runtime": 7158.1241, | |
| "train_tokens_per_second": 6633.389 | |
| }, | |
| { | |
| "epoch": 2.0321593568128637, | |
| "grad_norm": 0.7083563208580017, | |
| "learning_rate": 1.1788970531481832e-05, | |
| "loss": 0.4509, | |
| "num_input_tokens_seen": 47541264, | |
| "step": 4235, | |
| "train_runtime": 7168.9418, | |
| "train_tokens_per_second": 6631.559 | |
| }, | |
| { | |
| "epoch": 2.0345593088138236, | |
| "grad_norm": 0.7770140171051025, | |
| "learning_rate": 1.1735686995896559e-05, | |
| "loss": 0.5111, | |
| "num_input_tokens_seen": 47596256, | |
| "step": 4240, | |
| "train_runtime": 7178.4941, | |
| "train_tokens_per_second": 6630.396 | |
| }, | |
| { | |
| "epoch": 2.0369592608147835, | |
| "grad_norm": 0.8754630088806152, | |
| "learning_rate": 1.1682487191324868e-05, | |
| "loss": 0.5576, | |
| "num_input_tokens_seen": 47649808, | |
| "step": 4245, | |
| "train_runtime": 7188.2139, | |
| "train_tokens_per_second": 6628.88 | |
| }, | |
| { | |
| "epoch": 2.039359212815744, | |
| "grad_norm": 0.6423441767692566, | |
| "learning_rate": 1.1629371453590671e-05, | |
| "loss": 0.4836, | |
| "num_input_tokens_seen": 47709328, | |
| "step": 4250, | |
| "train_runtime": 7198.5845, | |
| "train_tokens_per_second": 6627.599 | |
| }, | |
| { | |
| "epoch": 2.0417591648167037, | |
| "grad_norm": 0.7070155143737793, | |
| "learning_rate": 1.1576340117987233e-05, | |
| "loss": 0.5057, | |
| "num_input_tokens_seen": 47765800, | |
| "step": 4255, | |
| "train_runtime": 7209.1424, | |
| "train_tokens_per_second": 6625.726 | |
| }, | |
| { | |
| "epoch": 2.0441591168176636, | |
| "grad_norm": 0.8831612467765808, | |
| "learning_rate": 1.1523393519274996e-05, | |
| "loss": 0.4447, | |
| "num_input_tokens_seen": 47820320, | |
| "step": 4260, | |
| "train_runtime": 7218.3344, | |
| "train_tokens_per_second": 6624.841 | |
| }, | |
| { | |
| "epoch": 2.0465590688186235, | |
| "grad_norm": 0.6510924100875854, | |
| "learning_rate": 1.1470531991679523e-05, | |
| "loss": 0.5101, | |
| "num_input_tokens_seen": 47876928, | |
| "step": 4265, | |
| "train_runtime": 7228.3009, | |
| "train_tokens_per_second": 6623.538 | |
| }, | |
| { | |
| "epoch": 2.048959020819584, | |
| "grad_norm": 0.6335709691047668, | |
| "learning_rate": 1.1417755868889343e-05, | |
| "loss": 0.4432, | |
| "num_input_tokens_seen": 47933280, | |
| "step": 4270, | |
| "train_runtime": 7237.6205, | |
| "train_tokens_per_second": 6622.795 | |
| }, | |
| { | |
| "epoch": 2.0513589728205437, | |
| "grad_norm": 0.7883151769638062, | |
| "learning_rate": 1.1365065484053895e-05, | |
| "loss": 0.4606, | |
| "num_input_tokens_seen": 47991280, | |
| "step": 4275, | |
| "train_runtime": 7247.4539, | |
| "train_tokens_per_second": 6621.812 | |
| }, | |
| { | |
| "epoch": 2.0537589248215036, | |
| "grad_norm": 0.8296838998794556, | |
| "learning_rate": 1.1312461169781383e-05, | |
| "loss": 0.4669, | |
| "num_input_tokens_seen": 48045896, | |
| "step": 4280, | |
| "train_runtime": 7257.2601, | |
| "train_tokens_per_second": 6620.391 | |
| }, | |
| { | |
| "epoch": 2.0561588768224635, | |
| "grad_norm": 0.8068815469741821, | |
| "learning_rate": 1.1259943258136682e-05, | |
| "loss": 0.4849, | |
| "num_input_tokens_seen": 48105824, | |
| "step": 4285, | |
| "train_runtime": 7268.0346, | |
| "train_tokens_per_second": 6618.822 | |
| }, | |
| { | |
| "epoch": 2.0585588288234233, | |
| "grad_norm": 0.977588415145874, | |
| "learning_rate": 1.1207512080639273e-05, | |
| "loss": 0.4956, | |
| "num_input_tokens_seen": 48160632, | |
| "step": 4290, | |
| "train_runtime": 7277.9569, | |
| "train_tokens_per_second": 6617.329 | |
| }, | |
| { | |
| "epoch": 2.0609587808243837, | |
| "grad_norm": 0.7364087700843811, | |
| "learning_rate": 1.1155167968261105e-05, | |
| "loss": 0.4357, | |
| "num_input_tokens_seen": 48217992, | |
| "step": 4295, | |
| "train_runtime": 7288.3331, | |
| "train_tokens_per_second": 6615.778 | |
| }, | |
| { | |
| "epoch": 2.0633587328253435, | |
| "grad_norm": 0.757265031337738, | |
| "learning_rate": 1.1102911251424526e-05, | |
| "loss": 0.4907, | |
| "num_input_tokens_seen": 48276216, | |
| "step": 4300, | |
| "train_runtime": 7298.7103, | |
| "train_tokens_per_second": 6614.349 | |
| }, | |
| { | |
| "epoch": 2.0657586848263034, | |
| "grad_norm": 0.773041844367981, | |
| "learning_rate": 1.1050742260000226e-05, | |
| "loss": 0.4687, | |
| "num_input_tokens_seen": 48331296, | |
| "step": 4305, | |
| "train_runtime": 7308.4104, | |
| "train_tokens_per_second": 6613.106 | |
| }, | |
| { | |
| "epoch": 2.0681586368272633, | |
| "grad_norm": 1.1142570972442627, | |
| "learning_rate": 1.0998661323305107e-05, | |
| "loss": 0.4574, | |
| "num_input_tokens_seen": 48387368, | |
| "step": 4310, | |
| "train_runtime": 7317.8081, | |
| "train_tokens_per_second": 6612.276 | |
| }, | |
| { | |
| "epoch": 2.0705585888282236, | |
| "grad_norm": 1.0279673337936401, | |
| "learning_rate": 1.094666877010023e-05, | |
| "loss": 0.5004, | |
| "num_input_tokens_seen": 48440296, | |
| "step": 4315, | |
| "train_runtime": 7327.8587, | |
| "train_tokens_per_second": 6610.43 | |
| }, | |
| { | |
| "epoch": 2.0729585408291835, | |
| "grad_norm": 0.9261734485626221, | |
| "learning_rate": 1.0894764928588721e-05, | |
| "loss": 0.4747, | |
| "num_input_tokens_seen": 48492496, | |
| "step": 4320, | |
| "train_runtime": 7336.9344, | |
| "train_tokens_per_second": 6609.368 | |
| }, | |
| { | |
| "epoch": 2.0753584928301434, | |
| "grad_norm": 1.1111286878585815, | |
| "learning_rate": 1.0842950126413742e-05, | |
| "loss": 0.5137, | |
| "num_input_tokens_seen": 48549184, | |
| "step": 4325, | |
| "train_runtime": 7346.6107, | |
| "train_tokens_per_second": 6608.378 | |
| }, | |
| { | |
| "epoch": 2.0777584448311033, | |
| "grad_norm": 0.8526914119720459, | |
| "learning_rate": 1.0791224690656384e-05, | |
| "loss": 0.4573, | |
| "num_input_tokens_seen": 48601016, | |
| "step": 4330, | |
| "train_runtime": 7354.8806, | |
| "train_tokens_per_second": 6607.995 | |
| }, | |
| { | |
| "epoch": 2.080158396832063, | |
| "grad_norm": 0.5850500464439392, | |
| "learning_rate": 1.0739588947833593e-05, | |
| "loss": 0.4814, | |
| "num_input_tokens_seen": 48655504, | |
| "step": 4335, | |
| "train_runtime": 7363.7381, | |
| "train_tokens_per_second": 6607.446 | |
| }, | |
| { | |
| "epoch": 2.0825583488330235, | |
| "grad_norm": 1.0572696924209595, | |
| "learning_rate": 1.068804322389616e-05, | |
| "loss": 0.4997, | |
| "num_input_tokens_seen": 48708616, | |
| "step": 4340, | |
| "train_runtime": 7372.6454, | |
| "train_tokens_per_second": 6606.667 | |
| }, | |
| { | |
| "epoch": 2.0849583008339834, | |
| "grad_norm": 0.5862051844596863, | |
| "learning_rate": 1.06365878442266e-05, | |
| "loss": 0.4459, | |
| "num_input_tokens_seen": 48769440, | |
| "step": 4345, | |
| "train_runtime": 7382.0777, | |
| "train_tokens_per_second": 6606.465 | |
| }, | |
| { | |
| "epoch": 2.0873582528349433, | |
| "grad_norm": 0.7404434680938721, | |
| "learning_rate": 1.0585223133637143e-05, | |
| "loss": 0.4882, | |
| "num_input_tokens_seen": 48827720, | |
| "step": 4350, | |
| "train_runtime": 7391.0584, | |
| "train_tokens_per_second": 6606.323 | |
| }, | |
| { | |
| "epoch": 2.089758204835903, | |
| "grad_norm": 0.7802624106407166, | |
| "learning_rate": 1.053394941636768e-05, | |
| "loss": 0.5322, | |
| "num_input_tokens_seen": 48879552, | |
| "step": 4355, | |
| "train_runtime": 7398.7138, | |
| "train_tokens_per_second": 6606.493 | |
| }, | |
| { | |
| "epoch": 2.0921581568368635, | |
| "grad_norm": 0.7315226197242737, | |
| "learning_rate": 1.0482767016083694e-05, | |
| "loss": 0.4515, | |
| "num_input_tokens_seen": 48932848, | |
| "step": 4360, | |
| "train_runtime": 7406.1993, | |
| "train_tokens_per_second": 6607.012 | |
| }, | |
| { | |
| "epoch": 2.0945581088378233, | |
| "grad_norm": 0.967128574848175, | |
| "learning_rate": 1.0431676255874232e-05, | |
| "loss": 0.5213, | |
| "num_input_tokens_seen": 48989744, | |
| "step": 4365, | |
| "train_runtime": 7414.1239, | |
| "train_tokens_per_second": 6607.624 | |
| }, | |
| { | |
| "epoch": 2.0969580608387832, | |
| "grad_norm": 0.731792151927948, | |
| "learning_rate": 1.0380677458249852e-05, | |
| "loss": 0.4821, | |
| "num_input_tokens_seen": 49043888, | |
| "step": 4370, | |
| "train_runtime": 7421.75, | |
| "train_tokens_per_second": 6608.13 | |
| }, | |
| { | |
| "epoch": 2.099358012839743, | |
| "grad_norm": 0.8551647067070007, | |
| "learning_rate": 1.0329770945140618e-05, | |
| "loss": 0.5018, | |
| "num_input_tokens_seen": 49099976, | |
| "step": 4375, | |
| "train_runtime": 7429.6538, | |
| "train_tokens_per_second": 6608.649 | |
| }, | |
| { | |
| "epoch": 2.101757964840703, | |
| "grad_norm": 0.8482736945152283, | |
| "learning_rate": 1.0278957037894048e-05, | |
| "loss": 0.5266, | |
| "num_input_tokens_seen": 49158168, | |
| "step": 4380, | |
| "train_runtime": 7437.7108, | |
| "train_tokens_per_second": 6609.314 | |
| }, | |
| { | |
| "epoch": 2.1041579168416633, | |
| "grad_norm": 0.8070186376571655, | |
| "learning_rate": 1.0228236057273063e-05, | |
| "loss": 0.4906, | |
| "num_input_tokens_seen": 49209920, | |
| "step": 4385, | |
| "train_runtime": 7445.1797, | |
| "train_tokens_per_second": 6609.635 | |
| }, | |
| { | |
| "epoch": 2.106557868842623, | |
| "grad_norm": 0.7493661046028137, | |
| "learning_rate": 1.0177608323454008e-05, | |
| "loss": 0.5067, | |
| "num_input_tokens_seen": 49262384, | |
| "step": 4390, | |
| "train_runtime": 7452.9186, | |
| "train_tokens_per_second": 6609.811 | |
| }, | |
| { | |
| "epoch": 2.108957820843583, | |
| "grad_norm": 0.7874744534492493, | |
| "learning_rate": 1.0127074156024594e-05, | |
| "loss": 0.4642, | |
| "num_input_tokens_seen": 49315632, | |
| "step": 4395, | |
| "train_runtime": 7460.8462, | |
| "train_tokens_per_second": 6609.925 | |
| }, | |
| { | |
| "epoch": 2.111357772844543, | |
| "grad_norm": 0.9224854707717896, | |
| "learning_rate": 1.0076633873981883e-05, | |
| "loss": 0.4984, | |
| "num_input_tokens_seen": 49371384, | |
| "step": 4400, | |
| "train_runtime": 7468.9769, | |
| "train_tokens_per_second": 6610.194 | |
| }, | |
| { | |
| "epoch": 2.1137577248455033, | |
| "grad_norm": 0.8540477156639099, | |
| "learning_rate": 1.0026287795730319e-05, | |
| "loss": 0.4767, | |
| "num_input_tokens_seen": 49426056, | |
| "step": 4405, | |
| "train_runtime": 7477.3027, | |
| "train_tokens_per_second": 6610.145 | |
| }, | |
| { | |
| "epoch": 2.116157676846463, | |
| "grad_norm": 1.0904680490493774, | |
| "learning_rate": 9.976036239079656e-06, | |
| "loss": 0.491, | |
| "num_input_tokens_seen": 49483160, | |
| "step": 4410, | |
| "train_runtime": 7485.9905, | |
| "train_tokens_per_second": 6610.102 | |
| }, | |
| { | |
| "epoch": 2.118557628847423, | |
| "grad_norm": 0.5771769881248474, | |
| "learning_rate": 9.925879521242978e-06, | |
| "loss": 0.4566, | |
| "num_input_tokens_seen": 49537568, | |
| "step": 4415, | |
| "train_runtime": 7494.7254, | |
| "train_tokens_per_second": 6609.657 | |
| }, | |
| { | |
| "epoch": 2.120957580848383, | |
| "grad_norm": 0.765743613243103, | |
| "learning_rate": 9.87581795883473e-06, | |
| "loss": 0.4878, | |
| "num_input_tokens_seen": 49594120, | |
| "step": 4420, | |
| "train_runtime": 7503.2956, | |
| "train_tokens_per_second": 6609.645 | |
| }, | |
| { | |
| "epoch": 2.123357532849343, | |
| "grad_norm": 0.8731431365013123, | |
| "learning_rate": 9.825851867868646e-06, | |
| "loss": 0.4871, | |
| "num_input_tokens_seen": 49647944, | |
| "step": 4425, | |
| "train_runtime": 7511.5023, | |
| "train_tokens_per_second": 6609.589 | |
| }, | |
| { | |
| "epoch": 2.125757484850303, | |
| "grad_norm": 0.9633266925811768, | |
| "learning_rate": 9.775981563755835e-06, | |
| "loss": 0.4747, | |
| "num_input_tokens_seen": 49702848, | |
| "step": 4430, | |
| "train_runtime": 7520.7688, | |
| "train_tokens_per_second": 6608.746 | |
| }, | |
| { | |
| "epoch": 2.128157436851263, | |
| "grad_norm": 0.8484842777252197, | |
| "learning_rate": 9.726207361302716e-06, | |
| "loss": 0.4871, | |
| "num_input_tokens_seen": 49754336, | |
| "step": 4435, | |
| "train_runtime": 7530.7578, | |
| "train_tokens_per_second": 6606.817 | |
| }, | |
| { | |
| "epoch": 2.130557388852223, | |
| "grad_norm": 0.6933907270431519, | |
| "learning_rate": 9.676529574709104e-06, | |
| "loss": 0.4813, | |
| "num_input_tokens_seen": 49818104, | |
| "step": 4440, | |
| "train_runtime": 7543.2161, | |
| "train_tokens_per_second": 6604.359 | |
| }, | |
| { | |
| "epoch": 2.132957340853183, | |
| "grad_norm": 0.8864620327949524, | |
| "learning_rate": 9.62694851756616e-06, | |
| "loss": 0.5196, | |
| "num_input_tokens_seen": 49872640, | |
| "step": 4445, | |
| "train_runtime": 7553.5538, | |
| "train_tokens_per_second": 6602.54 | |
| }, | |
| { | |
| "epoch": 2.135357292854143, | |
| "grad_norm": 0.6627900004386902, | |
| "learning_rate": 9.577464502854432e-06, | |
| "loss": 0.441, | |
| "num_input_tokens_seen": 49929176, | |
| "step": 4450, | |
| "train_runtime": 7563.6578, | |
| "train_tokens_per_second": 6601.194 | |
| }, | |
| { | |
| "epoch": 2.137757244855103, | |
| "grad_norm": 0.8925694823265076, | |
| "learning_rate": 9.528077842941929e-06, | |
| "loss": 0.4755, | |
| "num_input_tokens_seen": 49984040, | |
| "step": 4455, | |
| "train_runtime": 7574.4434, | |
| "train_tokens_per_second": 6599.038 | |
| }, | |
| { | |
| "epoch": 2.140157196856063, | |
| "grad_norm": 0.7881972789764404, | |
| "learning_rate": 9.478788849582071e-06, | |
| "loss": 0.4841, | |
| "num_input_tokens_seen": 50036368, | |
| "step": 4460, | |
| "train_runtime": 7584.774, | |
| "train_tokens_per_second": 6596.949 | |
| }, | |
| { | |
| "epoch": 2.1425571488570228, | |
| "grad_norm": 0.7480626106262207, | |
| "learning_rate": 9.42959783391176e-06, | |
| "loss": 0.4813, | |
| "num_input_tokens_seen": 50091376, | |
| "step": 4465, | |
| "train_runtime": 7594.7525, | |
| "train_tokens_per_second": 6595.524 | |
| }, | |
| { | |
| "epoch": 2.1449571008579826, | |
| "grad_norm": 0.8503336310386658, | |
| "learning_rate": 9.38050510644944e-06, | |
| "loss": 0.4844, | |
| "num_input_tokens_seen": 50148472, | |
| "step": 4470, | |
| "train_runtime": 7604.4881, | |
| "train_tokens_per_second": 6594.589 | |
| }, | |
| { | |
| "epoch": 2.147357052858943, | |
| "grad_norm": 0.896701991558075, | |
| "learning_rate": 9.331510977093077e-06, | |
| "loss": 0.4784, | |
| "num_input_tokens_seen": 50202392, | |
| "step": 4475, | |
| "train_runtime": 7614.8511, | |
| "train_tokens_per_second": 6592.695 | |
| }, | |
| { | |
| "epoch": 2.149757004859903, | |
| "grad_norm": 0.7483791708946228, | |
| "learning_rate": 9.282615755118266e-06, | |
| "loss": 0.4473, | |
| "num_input_tokens_seen": 50262048, | |
| "step": 4480, | |
| "train_runtime": 7625.4864, | |
| "train_tokens_per_second": 6591.324 | |
| }, | |
| { | |
| "epoch": 2.1521569568608627, | |
| "grad_norm": 0.8028972148895264, | |
| "learning_rate": 9.23381974917622e-06, | |
| "loss": 0.4611, | |
| "num_input_tokens_seen": 50318512, | |
| "step": 4485, | |
| "train_runtime": 7635.9972, | |
| "train_tokens_per_second": 6589.645 | |
| }, | |
| { | |
| "epoch": 2.1545569088618226, | |
| "grad_norm": 0.7019287347793579, | |
| "learning_rate": 9.185123267291881e-06, | |
| "loss": 0.4622, | |
| "num_input_tokens_seen": 50371472, | |
| "step": 4490, | |
| "train_runtime": 7645.9049, | |
| "train_tokens_per_second": 6588.033 | |
| }, | |
| { | |
| "epoch": 2.156956860862783, | |
| "grad_norm": 0.849296510219574, | |
| "learning_rate": 9.136526616861921e-06, | |
| "loss": 0.501, | |
| "num_input_tokens_seen": 50425888, | |
| "step": 4495, | |
| "train_runtime": 7656.303, | |
| "train_tokens_per_second": 6586.193 | |
| }, | |
| { | |
| "epoch": 2.159356812863743, | |
| "grad_norm": 0.5608788728713989, | |
| "learning_rate": 9.088030104652829e-06, | |
| "loss": 0.4828, | |
| "num_input_tokens_seen": 50484136, | |
| "step": 4500, | |
| "train_runtime": 7666.3853, | |
| "train_tokens_per_second": 6585.129 | |
| }, | |
| { | |
| "epoch": 2.1617567648647027, | |
| "grad_norm": 0.7533180713653564, | |
| "learning_rate": 9.03963403679899e-06, | |
| "loss": 0.463, | |
| "num_input_tokens_seen": 50540376, | |
| "step": 4505, | |
| "train_runtime": 7676.9709, | |
| "train_tokens_per_second": 6583.375 | |
| }, | |
| { | |
| "epoch": 2.1641567168656626, | |
| "grad_norm": 0.8343721032142639, | |
| "learning_rate": 8.99133871880071e-06, | |
| "loss": 0.4948, | |
| "num_input_tokens_seen": 50594968, | |
| "step": 4510, | |
| "train_runtime": 7687.0369, | |
| "train_tokens_per_second": 6581.856 | |
| }, | |
| { | |
| "epoch": 2.1665566688666225, | |
| "grad_norm": 1.0494121313095093, | |
| "learning_rate": 8.943144455522314e-06, | |
| "loss": 0.4919, | |
| "num_input_tokens_seen": 50649296, | |
| "step": 4515, | |
| "train_runtime": 7697.4192, | |
| "train_tokens_per_second": 6580.036 | |
| }, | |
| { | |
| "epoch": 2.168956620867583, | |
| "grad_norm": 0.8824997544288635, | |
| "learning_rate": 8.895051551190248e-06, | |
| "loss": 0.4279, | |
| "num_input_tokens_seen": 50706696, | |
| "step": 4520, | |
| "train_runtime": 7707.9222, | |
| "train_tokens_per_second": 6578.517 | |
| }, | |
| { | |
| "epoch": 2.1713565728685427, | |
| "grad_norm": 0.8693490028381348, | |
| "learning_rate": 8.847060309391084e-06, | |
| "loss": 0.4776, | |
| "num_input_tokens_seen": 50758984, | |
| "step": 4525, | |
| "train_runtime": 7717.5559, | |
| "train_tokens_per_second": 6577.08 | |
| }, | |
| { | |
| "epoch": 2.1737565248695025, | |
| "grad_norm": 0.6775808334350586, | |
| "learning_rate": 8.799171033069695e-06, | |
| "loss": 0.4821, | |
| "num_input_tokens_seen": 50812536, | |
| "step": 4530, | |
| "train_runtime": 7727.2348, | |
| "train_tokens_per_second": 6575.772 | |
| }, | |
| { | |
| "epoch": 2.1761564768704624, | |
| "grad_norm": 0.7019457817077637, | |
| "learning_rate": 8.75138402452725e-06, | |
| "loss": 0.4698, | |
| "num_input_tokens_seen": 50867192, | |
| "step": 4535, | |
| "train_runtime": 7737.0022, | |
| "train_tokens_per_second": 6574.535 | |
| }, | |
| { | |
| "epoch": 2.1785564288714228, | |
| "grad_norm": 0.6866047978401184, | |
| "learning_rate": 8.7036995854194e-06, | |
| "loss": 0.4612, | |
| "num_input_tokens_seen": 50925384, | |
| "step": 4540, | |
| "train_runtime": 7746.4582, | |
| "train_tokens_per_second": 6574.022 | |
| }, | |
| { | |
| "epoch": 2.1809563808723826, | |
| "grad_norm": 0.605133593082428, | |
| "learning_rate": 8.656118016754292e-06, | |
| "loss": 0.4939, | |
| "num_input_tokens_seen": 50983216, | |
| "step": 4545, | |
| "train_runtime": 7757.2379, | |
| "train_tokens_per_second": 6572.341 | |
| }, | |
| { | |
| "epoch": 2.1833563328733425, | |
| "grad_norm": 0.6981828212738037, | |
| "learning_rate": 8.608639618890702e-06, | |
| "loss": 0.5204, | |
| "num_input_tokens_seen": 51038664, | |
| "step": 4550, | |
| "train_runtime": 7767.547, | |
| "train_tokens_per_second": 6570.757 | |
| }, | |
| { | |
| "epoch": 2.1857562848743024, | |
| "grad_norm": 0.8705071806907654, | |
| "learning_rate": 8.561264691536172e-06, | |
| "loss": 0.4907, | |
| "num_input_tokens_seen": 51096648, | |
| "step": 4555, | |
| "train_runtime": 7777.3381, | |
| "train_tokens_per_second": 6569.94 | |
| }, | |
| { | |
| "epoch": 2.1881562368752627, | |
| "grad_norm": 0.7312107682228088, | |
| "learning_rate": 8.51399353374506e-06, | |
| "loss": 0.5114, | |
| "num_input_tokens_seen": 51152456, | |
| "step": 4560, | |
| "train_runtime": 7787.2126, | |
| "train_tokens_per_second": 6568.776 | |
| }, | |
| { | |
| "epoch": 2.1905561888762226, | |
| "grad_norm": 0.8138951063156128, | |
| "learning_rate": 8.466826443916667e-06, | |
| "loss": 0.4822, | |
| "num_input_tokens_seen": 51207840, | |
| "step": 4565, | |
| "train_runtime": 7796.4469, | |
| "train_tokens_per_second": 6568.1 | |
| }, | |
| { | |
| "epoch": 2.1929561408771825, | |
| "grad_norm": 0.6703912019729614, | |
| "learning_rate": 8.4197637197934e-06, | |
| "loss": 0.4849, | |
| "num_input_tokens_seen": 51261448, | |
| "step": 4570, | |
| "train_runtime": 7806.184, | |
| "train_tokens_per_second": 6566.774 | |
| }, | |
| { | |
| "epoch": 2.1953560928781424, | |
| "grad_norm": 0.9687227010726929, | |
| "learning_rate": 8.37280565845884e-06, | |
| "loss": 0.467, | |
| "num_input_tokens_seen": 51317720, | |
| "step": 4575, | |
| "train_runtime": 7816.3502, | |
| "train_tokens_per_second": 6565.433 | |
| }, | |
| { | |
| "epoch": 2.1977560448791023, | |
| "grad_norm": 0.8064000606536865, | |
| "learning_rate": 8.325952556335878e-06, | |
| "loss": 0.4851, | |
| "num_input_tokens_seen": 51372576, | |
| "step": 4580, | |
| "train_runtime": 7825.9422, | |
| "train_tokens_per_second": 6564.395 | |
| }, | |
| { | |
| "epoch": 2.2001559968800626, | |
| "grad_norm": 0.8729395866394043, | |
| "learning_rate": 8.279204709184843e-06, | |
| "loss": 0.5434, | |
| "num_input_tokens_seen": 51422552, | |
| "step": 4585, | |
| "train_runtime": 7835.905, | |
| "train_tokens_per_second": 6562.427 | |
| }, | |
| { | |
| "epoch": 2.2025559488810225, | |
| "grad_norm": 0.898769199848175, | |
| "learning_rate": 8.232562412101674e-06, | |
| "loss": 0.5217, | |
| "num_input_tokens_seen": 51477960, | |
| "step": 4590, | |
| "train_runtime": 7846.3182, | |
| "train_tokens_per_second": 6560.779 | |
| }, | |
| { | |
| "epoch": 2.2049559008819823, | |
| "grad_norm": 0.9951900243759155, | |
| "learning_rate": 8.186025959515995e-06, | |
| "loss": 0.4839, | |
| "num_input_tokens_seen": 51537952, | |
| "step": 4595, | |
| "train_runtime": 7856.7191, | |
| "train_tokens_per_second": 6559.73 | |
| }, | |
| { | |
| "epoch": 2.2073558528829422, | |
| "grad_norm": 0.8248569965362549, | |
| "learning_rate": 8.139595645189282e-06, | |
| "loss": 0.4497, | |
| "num_input_tokens_seen": 51592688, | |
| "step": 4600, | |
| "train_runtime": 7866.4031, | |
| "train_tokens_per_second": 6558.612 | |
| }, | |
| { | |
| "epoch": 2.209755804883902, | |
| "grad_norm": 0.8907241821289062, | |
| "learning_rate": 8.09327176221305e-06, | |
| "loss": 0.4774, | |
| "num_input_tokens_seen": 51645280, | |
| "step": 4605, | |
| "train_runtime": 7876.1364, | |
| "train_tokens_per_second": 6557.185 | |
| }, | |
| { | |
| "epoch": 2.2121557568848624, | |
| "grad_norm": 0.6718706488609314, | |
| "learning_rate": 8.047054603006931e-06, | |
| "loss": 0.5308, | |
| "num_input_tokens_seen": 51698536, | |
| "step": 4610, | |
| "train_runtime": 7886.3852, | |
| "train_tokens_per_second": 6555.416 | |
| }, | |
| { | |
| "epoch": 2.2145557088858223, | |
| "grad_norm": 0.6906898617744446, | |
| "learning_rate": 8.000944459316864e-06, | |
| "loss": 0.4422, | |
| "num_input_tokens_seen": 51756256, | |
| "step": 4615, | |
| "train_runtime": 7897.4196, | |
| "train_tokens_per_second": 6553.565 | |
| }, | |
| { | |
| "epoch": 2.216955660886782, | |
| "grad_norm": 0.7952353954315186, | |
| "learning_rate": 7.954941622213272e-06, | |
| "loss": 0.5049, | |
| "num_input_tokens_seen": 51813256, | |
| "step": 4620, | |
| "train_runtime": 7907.2916, | |
| "train_tokens_per_second": 6552.592 | |
| }, | |
| { | |
| "epoch": 2.219355612887742, | |
| "grad_norm": 0.7251629829406738, | |
| "learning_rate": 7.909046382089203e-06, | |
| "loss": 0.4541, | |
| "num_input_tokens_seen": 51867560, | |
| "step": 4625, | |
| "train_runtime": 7917.3897, | |
| "train_tokens_per_second": 6551.093 | |
| }, | |
| { | |
| "epoch": 2.2217555648887024, | |
| "grad_norm": 0.7001914978027344, | |
| "learning_rate": 7.863259028658485e-06, | |
| "loss": 0.4918, | |
| "num_input_tokens_seen": 51920280, | |
| "step": 4630, | |
| "train_runtime": 7927.2271, | |
| "train_tokens_per_second": 6549.614 | |
| }, | |
| { | |
| "epoch": 2.2241555168896623, | |
| "grad_norm": 0.722760021686554, | |
| "learning_rate": 7.817579850953904e-06, | |
| "loss": 0.4356, | |
| "num_input_tokens_seen": 51975984, | |
| "step": 4635, | |
| "train_runtime": 7939.1498, | |
| "train_tokens_per_second": 6546.795 | |
| }, | |
| { | |
| "epoch": 2.226555468890622, | |
| "grad_norm": 0.8394641876220703, | |
| "learning_rate": 7.77200913732542e-06, | |
| "loss": 0.5007, | |
| "num_input_tokens_seen": 52031784, | |
| "step": 4640, | |
| "train_runtime": 7948.6393, | |
| "train_tokens_per_second": 6545.999 | |
| }, | |
| { | |
| "epoch": 2.228955420891582, | |
| "grad_norm": 0.8581427335739136, | |
| "learning_rate": 7.72654717543828e-06, | |
| "loss": 0.4482, | |
| "num_input_tokens_seen": 52086728, | |
| "step": 4645, | |
| "train_runtime": 7958.8142, | |
| "train_tokens_per_second": 6544.534 | |
| }, | |
| { | |
| "epoch": 2.2313553728925424, | |
| "grad_norm": 0.8242650032043457, | |
| "learning_rate": 7.681194252271242e-06, | |
| "loss": 0.4219, | |
| "num_input_tokens_seen": 52143544, | |
| "step": 4650, | |
| "train_runtime": 7968.3907, | |
| "train_tokens_per_second": 6543.799 | |
| }, | |
| { | |
| "epoch": 2.2337553248935023, | |
| "grad_norm": 0.7680621147155762, | |
| "learning_rate": 7.635950654114782e-06, | |
| "loss": 0.4771, | |
| "num_input_tokens_seen": 52203016, | |
| "step": 4655, | |
| "train_runtime": 7978.6952, | |
| "train_tokens_per_second": 6542.801 | |
| }, | |
| { | |
| "epoch": 2.236155276894462, | |
| "grad_norm": 0.6597278118133545, | |
| "learning_rate": 7.5908166665692285e-06, | |
| "loss": 0.4791, | |
| "num_input_tokens_seen": 52258320, | |
| "step": 4660, | |
| "train_runtime": 7988.4947, | |
| "train_tokens_per_second": 6541.698 | |
| }, | |
| { | |
| "epoch": 2.238555228895422, | |
| "grad_norm": 0.8721866011619568, | |
| "learning_rate": 7.545792574543003e-06, | |
| "loss": 0.4895, | |
| "num_input_tokens_seen": 52313336, | |
| "step": 4665, | |
| "train_runtime": 7998.6775, | |
| "train_tokens_per_second": 6540.248 | |
| }, | |
| { | |
| "epoch": 2.240955180896382, | |
| "grad_norm": 1.1070098876953125, | |
| "learning_rate": 7.500878662250818e-06, | |
| "loss": 0.5019, | |
| "num_input_tokens_seen": 52366728, | |
| "step": 4670, | |
| "train_runtime": 8008.484, | |
| "train_tokens_per_second": 6538.906 | |
| }, | |
| { | |
| "epoch": 2.2433551328973422, | |
| "grad_norm": 0.6862952709197998, | |
| "learning_rate": 7.456075213211883e-06, | |
| "loss": 0.4622, | |
| "num_input_tokens_seen": 52423136, | |
| "step": 4675, | |
| "train_runtime": 8018.9005, | |
| "train_tokens_per_second": 6537.447 | |
| }, | |
| { | |
| "epoch": 2.245755084898302, | |
| "grad_norm": 0.7063257098197937, | |
| "learning_rate": 7.411382510248091e-06, | |
| "loss": 0.4422, | |
| "num_input_tokens_seen": 52480088, | |
| "step": 4680, | |
| "train_runtime": 8028.8285, | |
| "train_tokens_per_second": 6536.456 | |
| }, | |
| { | |
| "epoch": 2.248155036899262, | |
| "grad_norm": 0.7958875894546509, | |
| "learning_rate": 7.366800835482246e-06, | |
| "loss": 0.4774, | |
| "num_input_tokens_seen": 52538696, | |
| "step": 4685, | |
| "train_runtime": 8038.5124, | |
| "train_tokens_per_second": 6535.873 | |
| }, | |
| { | |
| "epoch": 2.250554988900222, | |
| "grad_norm": 0.7092862725257874, | |
| "learning_rate": 7.3223304703363135e-06, | |
| "loss": 0.4537, | |
| "num_input_tokens_seen": 52598800, | |
| "step": 4690, | |
| "train_runtime": 8049.8427, | |
| "train_tokens_per_second": 6534.14 | |
| }, | |
| { | |
| "epoch": 2.2529549409011818, | |
| "grad_norm": 0.6956859230995178, | |
| "learning_rate": 7.277971695529592e-06, | |
| "loss": 0.4435, | |
| "num_input_tokens_seen": 52657280, | |
| "step": 4695, | |
| "train_runtime": 8060.2392, | |
| "train_tokens_per_second": 6532.967 | |
| }, | |
| { | |
| "epoch": 2.255354892902142, | |
| "grad_norm": 0.6482681632041931, | |
| "learning_rate": 7.233724791076968e-06, | |
| "loss": 0.455, | |
| "num_input_tokens_seen": 52713952, | |
| "step": 4700, | |
| "train_runtime": 8070.1937, | |
| "train_tokens_per_second": 6531.931 | |
| }, | |
| { | |
| "epoch": 2.257754844903102, | |
| "grad_norm": 0.7593861222267151, | |
| "learning_rate": 7.189590036287167e-06, | |
| "loss": 0.4506, | |
| "num_input_tokens_seen": 52772688, | |
| "step": 4705, | |
| "train_runtime": 8080.8866, | |
| "train_tokens_per_second": 6530.557 | |
| }, | |
| { | |
| "epoch": 2.260154796904062, | |
| "grad_norm": 0.8229504823684692, | |
| "learning_rate": 7.145567709760942e-06, | |
| "loss": 0.4944, | |
| "num_input_tokens_seen": 52829984, | |
| "step": 4710, | |
| "train_runtime": 8091.297, | |
| "train_tokens_per_second": 6529.236 | |
| }, | |
| { | |
| "epoch": 2.2625547489050217, | |
| "grad_norm": 0.7563186287879944, | |
| "learning_rate": 7.1016580893893514e-06, | |
| "loss": 0.485, | |
| "num_input_tokens_seen": 52888368, | |
| "step": 4715, | |
| "train_runtime": 8102.4796, | |
| "train_tokens_per_second": 6527.43 | |
| }, | |
| { | |
| "epoch": 2.264954700905982, | |
| "grad_norm": 0.8408580422401428, | |
| "learning_rate": 7.057861452352005e-06, | |
| "loss": 0.4722, | |
| "num_input_tokens_seen": 52945664, | |
| "step": 4720, | |
| "train_runtime": 8112.5815, | |
| "train_tokens_per_second": 6526.364 | |
| }, | |
| { | |
| "epoch": 2.267354652906942, | |
| "grad_norm": 0.791147768497467, | |
| "learning_rate": 7.014178075115305e-06, | |
| "loss": 0.5043, | |
| "num_input_tokens_seen": 53001096, | |
| "step": 4725, | |
| "train_runtime": 8122.5542, | |
| "train_tokens_per_second": 6525.176 | |
| }, | |
| { | |
| "epoch": 2.269754604907902, | |
| "grad_norm": 0.8713123798370361, | |
| "learning_rate": 6.9706082334306895e-06, | |
| "loss": 0.4978, | |
| "num_input_tokens_seen": 53054936, | |
| "step": 4730, | |
| "train_runtime": 8132.2978, | |
| "train_tokens_per_second": 6523.979 | |
| }, | |
| { | |
| "epoch": 2.2721545569088617, | |
| "grad_norm": 0.9158002734184265, | |
| "learning_rate": 6.927152202332898e-06, | |
| "loss": 0.4493, | |
| "num_input_tokens_seen": 53115032, | |
| "step": 4735, | |
| "train_runtime": 8142.6092, | |
| "train_tokens_per_second": 6523.097 | |
| }, | |
| { | |
| "epoch": 2.274554508909822, | |
| "grad_norm": 0.8470547795295715, | |
| "learning_rate": 6.883810256138268e-06, | |
| "loss": 0.5082, | |
| "num_input_tokens_seen": 53168048, | |
| "step": 4740, | |
| "train_runtime": 8152.4189, | |
| "train_tokens_per_second": 6521.751 | |
| }, | |
| { | |
| "epoch": 2.276954460910782, | |
| "grad_norm": 0.8152704834938049, | |
| "learning_rate": 6.8405826684429495e-06, | |
| "loss": 0.4622, | |
| "num_input_tokens_seen": 53228112, | |
| "step": 4745, | |
| "train_runtime": 8163.4113, | |
| "train_tokens_per_second": 6520.327 | |
| }, | |
| { | |
| "epoch": 2.279354412911742, | |
| "grad_norm": 1.1918436288833618, | |
| "learning_rate": 6.7974697121212044e-06, | |
| "loss": 0.475, | |
| "num_input_tokens_seen": 53282056, | |
| "step": 4750, | |
| "train_runtime": 8172.6885, | |
| "train_tokens_per_second": 6519.526 | |
| }, | |
| { | |
| "epoch": 2.2817543649127017, | |
| "grad_norm": 0.8063285946846008, | |
| "learning_rate": 6.754471659323708e-06, | |
| "loss": 0.4444, | |
| "num_input_tokens_seen": 53342728, | |
| "step": 4755, | |
| "train_runtime": 8181.7917, | |
| "train_tokens_per_second": 6519.688 | |
| }, | |
| { | |
| "epoch": 2.2841543169136616, | |
| "grad_norm": 0.8364700078964233, | |
| "learning_rate": 6.711588781475786e-06, | |
| "loss": 0.4833, | |
| "num_input_tokens_seen": 53397656, | |
| "step": 4760, | |
| "train_runtime": 8189.9068, | |
| "train_tokens_per_second": 6519.935 | |
| }, | |
| { | |
| "epoch": 2.286554268914622, | |
| "grad_norm": 0.8302350640296936, | |
| "learning_rate": 6.668821349275714e-06, | |
| "loss": 0.4532, | |
| "num_input_tokens_seen": 53452736, | |
| "step": 4765, | |
| "train_runtime": 8198.4472, | |
| "train_tokens_per_second": 6519.861 | |
| }, | |
| { | |
| "epoch": 2.2889542209155818, | |
| "grad_norm": 0.7638778686523438, | |
| "learning_rate": 6.626169632693041e-06, | |
| "loss": 0.4679, | |
| "num_input_tokens_seen": 53510640, | |
| "step": 4770, | |
| "train_runtime": 8207.0649, | |
| "train_tokens_per_second": 6520.07 | |
| }, | |
| { | |
| "epoch": 2.2913541729165416, | |
| "grad_norm": 0.6307675242424011, | |
| "learning_rate": 6.5836339009668564e-06, | |
| "loss": 0.4336, | |
| "num_input_tokens_seen": 53568536, | |
| "step": 4775, | |
| "train_runtime": 8216.2863, | |
| "train_tokens_per_second": 6519.799 | |
| }, | |
| { | |
| "epoch": 2.2937541249175015, | |
| "grad_norm": 0.7008303999900818, | |
| "learning_rate": 6.541214422604078e-06, | |
| "loss": 0.4903, | |
| "num_input_tokens_seen": 53623272, | |
| "step": 4780, | |
| "train_runtime": 8224.518, | |
| "train_tokens_per_second": 6519.929 | |
| }, | |
| { | |
| "epoch": 2.2961540769184614, | |
| "grad_norm": 0.7568659782409668, | |
| "learning_rate": 6.49891146537778e-06, | |
| "loss": 0.4665, | |
| "num_input_tokens_seen": 53680840, | |
| "step": 4785, | |
| "train_runtime": 8233.1619, | |
| "train_tokens_per_second": 6520.076 | |
| }, | |
| { | |
| "epoch": 2.2985540289194217, | |
| "grad_norm": 0.7729014158248901, | |
| "learning_rate": 6.456725296325511e-06, | |
| "loss": 0.4648, | |
| "num_input_tokens_seen": 53736888, | |
| "step": 4790, | |
| "train_runtime": 8241.812, | |
| "train_tokens_per_second": 6520.033 | |
| }, | |
| { | |
| "epoch": 2.3009539809203816, | |
| "grad_norm": 0.8767671585083008, | |
| "learning_rate": 6.414656181747578e-06, | |
| "loss": 0.4426, | |
| "num_input_tokens_seen": 53793888, | |
| "step": 4795, | |
| "train_runtime": 8250.382, | |
| "train_tokens_per_second": 6520.169 | |
| }, | |
| { | |
| "epoch": 2.3033539329213415, | |
| "grad_norm": 0.5542830228805542, | |
| "learning_rate": 6.3727043872053775e-06, | |
| "loss": 0.4942, | |
| "num_input_tokens_seen": 53853120, | |
| "step": 4800, | |
| "train_runtime": 8259.364, | |
| "train_tokens_per_second": 6520.25 | |
| }, | |
| { | |
| "epoch": 2.3057538849223014, | |
| "grad_norm": 0.677183985710144, | |
| "learning_rate": 6.330870177519749e-06, | |
| "loss": 0.4601, | |
| "num_input_tokens_seen": 53911008, | |
| "step": 4805, | |
| "train_runtime": 8268.0332, | |
| "train_tokens_per_second": 6520.415 | |
| }, | |
| { | |
| "epoch": 2.3081538369232617, | |
| "grad_norm": 0.6295929551124573, | |
| "learning_rate": 6.2891538167692525e-06, | |
| "loss": 0.4975, | |
| "num_input_tokens_seen": 53970856, | |
| "step": 4810, | |
| "train_runtime": 8276.673, | |
| "train_tokens_per_second": 6520.839 | |
| }, | |
| { | |
| "epoch": 2.3105537889242216, | |
| "grad_norm": 0.6823136806488037, | |
| "learning_rate": 6.247555568288524e-06, | |
| "loss": 0.5108, | |
| "num_input_tokens_seen": 54024760, | |
| "step": 4815, | |
| "train_runtime": 8284.8494, | |
| "train_tokens_per_second": 6520.91 | |
| }, | |
| { | |
| "epoch": 2.3129537409251815, | |
| "grad_norm": 1.1955187320709229, | |
| "learning_rate": 6.2060756946666385e-06, | |
| "loss": 0.4972, | |
| "num_input_tokens_seen": 54079992, | |
| "step": 4820, | |
| "train_runtime": 8293.4716, | |
| "train_tokens_per_second": 6520.791 | |
| }, | |
| { | |
| "epoch": 2.3153536929261413, | |
| "grad_norm": 0.5726960301399231, | |
| "learning_rate": 6.164714457745416e-06, | |
| "loss": 0.4765, | |
| "num_input_tokens_seen": 54137056, | |
| "step": 4825, | |
| "train_runtime": 8302.0452, | |
| "train_tokens_per_second": 6520.93 | |
| }, | |
| { | |
| "epoch": 2.3177536449271017, | |
| "grad_norm": 0.8014964461326599, | |
| "learning_rate": 6.123472118617779e-06, | |
| "loss": 0.502, | |
| "num_input_tokens_seen": 54187216, | |
| "step": 4830, | |
| "train_runtime": 8309.8904, | |
| "train_tokens_per_second": 6520.81 | |
| }, | |
| { | |
| "epoch": 2.3201535969280616, | |
| "grad_norm": 0.6722724437713623, | |
| "learning_rate": 6.082348937626103e-06, | |
| "loss": 0.5223, | |
| "num_input_tokens_seen": 54243408, | |
| "step": 4835, | |
| "train_runtime": 8318.422, | |
| "train_tokens_per_second": 6520.877 | |
| }, | |
| { | |
| "epoch": 2.3225535489290214, | |
| "grad_norm": 0.7219895720481873, | |
| "learning_rate": 6.041345174360602e-06, | |
| "loss": 0.4379, | |
| "num_input_tokens_seen": 54300888, | |
| "step": 4840, | |
| "train_runtime": 8327.0808, | |
| "train_tokens_per_second": 6520.999 | |
| }, | |
| { | |
| "epoch": 2.3249535009299813, | |
| "grad_norm": 0.5452620983123779, | |
| "learning_rate": 6.0004610876576385e-06, | |
| "loss": 0.425, | |
| "num_input_tokens_seen": 54359080, | |
| "step": 4845, | |
| "train_runtime": 8335.9015, | |
| "train_tokens_per_second": 6521.08 | |
| }, | |
| { | |
| "epoch": 2.327353452930941, | |
| "grad_norm": 0.7828608751296997, | |
| "learning_rate": 5.9596969355981165e-06, | |
| "loss": 0.4783, | |
| "num_input_tokens_seen": 54414784, | |
| "step": 4850, | |
| "train_runtime": 8343.8457, | |
| "train_tokens_per_second": 6521.547 | |
| }, | |
| { | |
| "epoch": 2.3297534049319015, | |
| "grad_norm": 0.7745143175125122, | |
| "learning_rate": 5.9190529755058786e-06, | |
| "loss": 0.4625, | |
| "num_input_tokens_seen": 54469544, | |
| "step": 4855, | |
| "train_runtime": 8352.9742, | |
| "train_tokens_per_second": 6520.976 | |
| }, | |
| { | |
| "epoch": 2.3321533569328614, | |
| "grad_norm": 0.7965600490570068, | |
| "learning_rate": 5.878529463946028e-06, | |
| "loss": 0.4517, | |
| "num_input_tokens_seen": 54525088, | |
| "step": 4860, | |
| "train_runtime": 8362.2759, | |
| "train_tokens_per_second": 6520.365 | |
| }, | |
| { | |
| "epoch": 2.3345533089338213, | |
| "grad_norm": 0.7234916090965271, | |
| "learning_rate": 5.838126656723353e-06, | |
| "loss": 0.4848, | |
| "num_input_tokens_seen": 54581656, | |
| "step": 4865, | |
| "train_runtime": 8372.4358, | |
| "train_tokens_per_second": 6519.209 | |
| }, | |
| { | |
| "epoch": 2.336953260934781, | |
| "grad_norm": 0.8496655225753784, | |
| "learning_rate": 5.797844808880681e-06, | |
| "loss": 0.4535, | |
| "num_input_tokens_seen": 54633656, | |
| "step": 4870, | |
| "train_runtime": 8381.8667, | |
| "train_tokens_per_second": 6518.077 | |
| }, | |
| { | |
| "epoch": 2.339353212935741, | |
| "grad_norm": 0.8986937999725342, | |
| "learning_rate": 5.757684174697306e-06, | |
| "loss": 0.5149, | |
| "num_input_tokens_seen": 54688552, | |
| "step": 4875, | |
| "train_runtime": 8392.2449, | |
| "train_tokens_per_second": 6516.558 | |
| }, | |
| { | |
| "epoch": 2.3417531649367014, | |
| "grad_norm": 0.8993620276451111, | |
| "learning_rate": 5.717645007687333e-06, | |
| "loss": 0.4811, | |
| "num_input_tokens_seen": 54745736, | |
| "step": 4880, | |
| "train_runtime": 8401.6978, | |
| "train_tokens_per_second": 6516.032 | |
| }, | |
| { | |
| "epoch": 2.3441531169376613, | |
| "grad_norm": 0.8470688462257385, | |
| "learning_rate": 5.677727560598117e-06, | |
| "loss": 0.4531, | |
| "num_input_tokens_seen": 54801056, | |
| "step": 4885, | |
| "train_runtime": 8411.9299, | |
| "train_tokens_per_second": 6514.683 | |
| }, | |
| { | |
| "epoch": 2.346553068938621, | |
| "grad_norm": 0.7177883982658386, | |
| "learning_rate": 5.637932085408665e-06, | |
| "loss": 0.428, | |
| "num_input_tokens_seen": 54862792, | |
| "step": 4890, | |
| "train_runtime": 8422.5464, | |
| "train_tokens_per_second": 6513.801 | |
| }, | |
| { | |
| "epoch": 2.348953020939581, | |
| "grad_norm": 0.9984344840049744, | |
| "learning_rate": 5.598258833328024e-06, | |
| "loss": 0.5082, | |
| "num_input_tokens_seen": 54917120, | |
| "step": 4895, | |
| "train_runtime": 8432.5181, | |
| "train_tokens_per_second": 6512.541 | |
| }, | |
| { | |
| "epoch": 2.3513529729405414, | |
| "grad_norm": 0.7532204985618591, | |
| "learning_rate": 5.558708054793702e-06, | |
| "loss": 0.4747, | |
| "num_input_tokens_seen": 54970952, | |
| "step": 4900, | |
| "train_runtime": 8442.5045, | |
| "train_tokens_per_second": 6511.214 | |
| }, | |
| { | |
| "epoch": 2.3537529249415012, | |
| "grad_norm": 0.9301844835281372, | |
| "learning_rate": 5.519279999470114e-06, | |
| "loss": 0.4653, | |
| "num_input_tokens_seen": 55030344, | |
| "step": 4905, | |
| "train_runtime": 8453.3379, | |
| "train_tokens_per_second": 6509.895 | |
| }, | |
| { | |
| "epoch": 2.356152876942461, | |
| "grad_norm": 0.7001831531524658, | |
| "learning_rate": 5.47997491624696e-06, | |
| "loss": 0.4505, | |
| "num_input_tokens_seen": 55089240, | |
| "step": 4910, | |
| "train_runtime": 8463.8354, | |
| "train_tokens_per_second": 6508.78 | |
| }, | |
| { | |
| "epoch": 2.358552828943421, | |
| "grad_norm": 1.0007083415985107, | |
| "learning_rate": 5.440793053237703e-06, | |
| "loss": 0.4951, | |
| "num_input_tokens_seen": 55145288, | |
| "step": 4915, | |
| "train_runtime": 8472.6075, | |
| "train_tokens_per_second": 6508.656 | |
| }, | |
| { | |
| "epoch": 2.3609527809443813, | |
| "grad_norm": 0.807292103767395, | |
| "learning_rate": 5.401734657777949e-06, | |
| "loss": 0.4555, | |
| "num_input_tokens_seen": 55202104, | |
| "step": 4920, | |
| "train_runtime": 8481.8958, | |
| "train_tokens_per_second": 6508.227 | |
| }, | |
| { | |
| "epoch": 2.363352732945341, | |
| "grad_norm": 0.8415015339851379, | |
| "learning_rate": 5.362799976423946e-06, | |
| "loss": 0.4936, | |
| "num_input_tokens_seen": 55259704, | |
| "step": 4925, | |
| "train_runtime": 8490.9011, | |
| "train_tokens_per_second": 6508.108 | |
| }, | |
| { | |
| "epoch": 2.365752684946301, | |
| "grad_norm": 0.6624288558959961, | |
| "learning_rate": 5.323989254950973e-06, | |
| "loss": 0.4645, | |
| "num_input_tokens_seen": 55317744, | |
| "step": 4930, | |
| "train_runtime": 8500.053, | |
| "train_tokens_per_second": 6507.929 | |
| }, | |
| { | |
| "epoch": 2.368152636947261, | |
| "grad_norm": 0.8374559283256531, | |
| "learning_rate": 5.285302738351813e-06, | |
| "loss": 0.4797, | |
| "num_input_tokens_seen": 55372296, | |
| "step": 4935, | |
| "train_runtime": 8507.8541, | |
| "train_tokens_per_second": 6508.374 | |
| }, | |
| { | |
| "epoch": 2.370552588948221, | |
| "grad_norm": 0.5884356498718262, | |
| "learning_rate": 5.246740670835227e-06, | |
| "loss": 0.4606, | |
| "num_input_tokens_seen": 55433904, | |
| "step": 4940, | |
| "train_runtime": 8517.3387, | |
| "train_tokens_per_second": 6508.36 | |
| }, | |
| { | |
| "epoch": 2.372952540949181, | |
| "grad_norm": 0.7946999669075012, | |
| "learning_rate": 5.208303295824368e-06, | |
| "loss": 0.4901, | |
| "num_input_tokens_seen": 55489480, | |
| "step": 4945, | |
| "train_runtime": 8525.7706, | |
| "train_tokens_per_second": 6508.442 | |
| }, | |
| { | |
| "epoch": 2.375352492950141, | |
| "grad_norm": 0.8008665442466736, | |
| "learning_rate": 5.16999085595527e-06, | |
| "loss": 0.4489, | |
| "num_input_tokens_seen": 55548432, | |
| "step": 4950, | |
| "train_runtime": 8534.5861, | |
| "train_tokens_per_second": 6508.626 | |
| }, | |
| { | |
| "epoch": 2.377752444951101, | |
| "grad_norm": 0.6131346225738525, | |
| "learning_rate": 5.1318035930753295e-06, | |
| "loss": 0.4751, | |
| "num_input_tokens_seen": 55606952, | |
| "step": 4955, | |
| "train_runtime": 8544.472, | |
| "train_tokens_per_second": 6507.945 | |
| }, | |
| { | |
| "epoch": 2.380152396952061, | |
| "grad_norm": 0.6987022757530212, | |
| "learning_rate": 5.09374174824174e-06, | |
| "loss": 0.4716, | |
| "num_input_tokens_seen": 55665912, | |
| "step": 4960, | |
| "train_runtime": 8553.5875, | |
| "train_tokens_per_second": 6507.902 | |
| }, | |
| { | |
| "epoch": 2.3825523489530207, | |
| "grad_norm": 0.9554920792579651, | |
| "learning_rate": 5.0558055617200205e-06, | |
| "loss": 0.4208, | |
| "num_input_tokens_seen": 55719624, | |
| "step": 4965, | |
| "train_runtime": 8561.4317, | |
| "train_tokens_per_second": 6508.213 | |
| }, | |
| { | |
| "epoch": 2.384952300953981, | |
| "grad_norm": 0.7300603985786438, | |
| "learning_rate": 5.0179952729824395e-06, | |
| "loss": 0.4832, | |
| "num_input_tokens_seen": 55774472, | |
| "step": 4970, | |
| "train_runtime": 8570.1123, | |
| "train_tokens_per_second": 6508.021 | |
| }, | |
| { | |
| "epoch": 2.387352252954941, | |
| "grad_norm": 0.8243890404701233, | |
| "learning_rate": 4.980311120706569e-06, | |
| "loss": 0.5135, | |
| "num_input_tokens_seen": 55826392, | |
| "step": 4975, | |
| "train_runtime": 8578.1037, | |
| "train_tokens_per_second": 6508.011 | |
| }, | |
| { | |
| "epoch": 2.389752204955901, | |
| "grad_norm": 0.7249002456665039, | |
| "learning_rate": 4.942753342773718e-06, | |
| "loss": 0.5443, | |
| "num_input_tokens_seen": 55880968, | |
| "step": 4980, | |
| "train_runtime": 8586.3873, | |
| "train_tokens_per_second": 6508.088 | |
| }, | |
| { | |
| "epoch": 2.3921521569568607, | |
| "grad_norm": 0.883586585521698, | |
| "learning_rate": 4.90532217626746e-06, | |
| "loss": 0.4719, | |
| "num_input_tokens_seen": 55933504, | |
| "step": 4985, | |
| "train_runtime": 8594.9139, | |
| "train_tokens_per_second": 6507.745 | |
| }, | |
| { | |
| "epoch": 2.394552108957821, | |
| "grad_norm": 0.9183365702629089, | |
| "learning_rate": 4.868017857472157e-06, | |
| "loss": 0.4971, | |
| "num_input_tokens_seen": 55986736, | |
| "step": 4990, | |
| "train_runtime": 8603.0537, | |
| "train_tokens_per_second": 6507.775 | |
| }, | |
| { | |
| "epoch": 2.396952060958781, | |
| "grad_norm": 0.9093974232673645, | |
| "learning_rate": 4.830840621871416e-06, | |
| "loss": 0.471, | |
| "num_input_tokens_seen": 56042472, | |
| "step": 4995, | |
| "train_runtime": 8612.3964, | |
| "train_tokens_per_second": 6507.187 | |
| }, | |
| { | |
| "epoch": 2.3993520129597408, | |
| "grad_norm": 0.8658146858215332, | |
| "learning_rate": 4.793790704146639e-06, | |
| "loss": 0.5096, | |
| "num_input_tokens_seen": 56094608, | |
| "step": 5000, | |
| "train_runtime": 8620.6801, | |
| "train_tokens_per_second": 6506.982 | |
| }, | |
| { | |
| "epoch": 2.4017519649607006, | |
| "grad_norm": 0.881760835647583, | |
| "learning_rate": 4.756868338175552e-06, | |
| "loss": 0.4545, | |
| "num_input_tokens_seen": 56152192, | |
| "step": 5005, | |
| "train_runtime": 8628.9033, | |
| "train_tokens_per_second": 6507.454 | |
| }, | |
| { | |
| "epoch": 2.404151916961661, | |
| "grad_norm": 0.6396927833557129, | |
| "learning_rate": 4.7200737570306765e-06, | |
| "loss": 0.482, | |
| "num_input_tokens_seen": 56209072, | |
| "step": 5010, | |
| "train_runtime": 8637.3318, | |
| "train_tokens_per_second": 6507.689 | |
| }, | |
| { | |
| "epoch": 2.406551868962621, | |
| "grad_norm": 0.7207968831062317, | |
| "learning_rate": 4.683407192977923e-06, | |
| "loss": 0.4701, | |
| "num_input_tokens_seen": 56265496, | |
| "step": 5015, | |
| "train_runtime": 8645.9013, | |
| "train_tokens_per_second": 6507.765 | |
| }, | |
| { | |
| "epoch": 2.4089518209635807, | |
| "grad_norm": 0.6970353126525879, | |
| "learning_rate": 4.646868877475083e-06, | |
| "loss": 0.4906, | |
| "num_input_tokens_seen": 56324336, | |
| "step": 5020, | |
| "train_runtime": 8654.8609, | |
| "train_tokens_per_second": 6507.827 | |
| }, | |
| { | |
| "epoch": 2.4113517729645406, | |
| "grad_norm": 0.6664267182350159, | |
| "learning_rate": 4.610459041170376e-06, | |
| "loss": 0.4497, | |
| "num_input_tokens_seen": 56387160, | |
| "step": 5025, | |
| "train_runtime": 8664.2456, | |
| "train_tokens_per_second": 6508.029 | |
| }, | |
| { | |
| "epoch": 2.4137517249655005, | |
| "grad_norm": 0.6361657977104187, | |
| "learning_rate": 4.574177913900992e-06, | |
| "loss": 0.4473, | |
| "num_input_tokens_seen": 56450040, | |
| "step": 5030, | |
| "train_runtime": 8672.7824, | |
| "train_tokens_per_second": 6508.873 | |
| }, | |
| { | |
| "epoch": 2.416151676966461, | |
| "grad_norm": 0.9782693386077881, | |
| "learning_rate": 4.538025724691647e-06, | |
| "loss": 0.5403, | |
| "num_input_tokens_seen": 56509192, | |
| "step": 5035, | |
| "train_runtime": 8680.8979, | |
| "train_tokens_per_second": 6509.602 | |
| }, | |
| { | |
| "epoch": 2.4185516289674207, | |
| "grad_norm": 1.0109143257141113, | |
| "learning_rate": 4.502002701753149e-06, | |
| "loss": 0.4535, | |
| "num_input_tokens_seen": 56564168, | |
| "step": 5040, | |
| "train_runtime": 8689.3056, | |
| "train_tokens_per_second": 6509.63 | |
| }, | |
| { | |
| "epoch": 2.4209515809683806, | |
| "grad_norm": 0.8760951161384583, | |
| "learning_rate": 4.4661090724809286e-06, | |
| "loss": 0.4666, | |
| "num_input_tokens_seen": 56619720, | |
| "step": 5045, | |
| "train_runtime": 8698.0152, | |
| "train_tokens_per_second": 6509.499 | |
| }, | |
| { | |
| "epoch": 2.4233515329693405, | |
| "grad_norm": 0.879936933517456, | |
| "learning_rate": 4.430345063453614e-06, | |
| "loss": 0.4685, | |
| "num_input_tokens_seen": 56674064, | |
| "step": 5050, | |
| "train_runtime": 8707.0335, | |
| "train_tokens_per_second": 6508.998 | |
| }, | |
| { | |
| "epoch": 2.4257514849703004, | |
| "grad_norm": 0.5749469995498657, | |
| "learning_rate": 4.394710900431628e-06, | |
| "loss": 0.5077, | |
| "num_input_tokens_seen": 56730176, | |
| "step": 5055, | |
| "train_runtime": 8715.7157, | |
| "train_tokens_per_second": 6508.952 | |
| }, | |
| { | |
| "epoch": 2.4281514369712607, | |
| "grad_norm": 0.670002818107605, | |
| "learning_rate": 4.359206808355715e-06, | |
| "loss": 0.4711, | |
| "num_input_tokens_seen": 56786912, | |
| "step": 5060, | |
| "train_runtime": 8724.2214, | |
| "train_tokens_per_second": 6509.109 | |
| }, | |
| { | |
| "epoch": 2.4305513889722206, | |
| "grad_norm": 0.8267392516136169, | |
| "learning_rate": 4.32383301134556e-06, | |
| "loss": 0.468, | |
| "num_input_tokens_seen": 56846864, | |
| "step": 5065, | |
| "train_runtime": 8733.1875, | |
| "train_tokens_per_second": 6509.292 | |
| }, | |
| { | |
| "epoch": 2.4329513409731804, | |
| "grad_norm": 0.9042259454727173, | |
| "learning_rate": 4.288589732698365e-06, | |
| "loss": 0.4722, | |
| "num_input_tokens_seen": 56903624, | |
| "step": 5070, | |
| "train_runtime": 8741.802, | |
| "train_tokens_per_second": 6509.37 | |
| }, | |
| { | |
| "epoch": 2.4353512929741403, | |
| "grad_norm": 0.9303114414215088, | |
| "learning_rate": 4.253477194887423e-06, | |
| "loss": 0.4879, | |
| "num_input_tokens_seen": 56961168, | |
| "step": 5075, | |
| "train_runtime": 8750.9039, | |
| "train_tokens_per_second": 6509.175 | |
| }, | |
| { | |
| "epoch": 2.4377512449751007, | |
| "grad_norm": 0.8733497858047485, | |
| "learning_rate": 4.218495619560725e-06, | |
| "loss": 0.4762, | |
| "num_input_tokens_seen": 57017760, | |
| "step": 5080, | |
| "train_runtime": 8759.4851, | |
| "train_tokens_per_second": 6509.259 | |
| }, | |
| { | |
| "epoch": 2.4401511969760605, | |
| "grad_norm": 0.8203326463699341, | |
| "learning_rate": 4.1836452275395624e-06, | |
| "loss": 0.4934, | |
| "num_input_tokens_seen": 57072760, | |
| "step": 5085, | |
| "train_runtime": 8768.1106, | |
| "train_tokens_per_second": 6509.129 | |
| }, | |
| { | |
| "epoch": 2.4425511489770204, | |
| "grad_norm": 1.0363794565200806, | |
| "learning_rate": 4.148926238817141e-06, | |
| "loss": 0.4518, | |
| "num_input_tokens_seen": 57128592, | |
| "step": 5090, | |
| "train_runtime": 8776.3031, | |
| "train_tokens_per_second": 6509.414 | |
| }, | |
| { | |
| "epoch": 2.4449511009779803, | |
| "grad_norm": 0.9167368412017822, | |
| "learning_rate": 4.114338872557175e-06, | |
| "loss": 0.4542, | |
| "num_input_tokens_seen": 57184720, | |
| "step": 5095, | |
| "train_runtime": 8784.8429, | |
| "train_tokens_per_second": 6509.476 | |
| }, | |
| { | |
| "epoch": 2.4473510529789406, | |
| "grad_norm": 0.662429928779602, | |
| "learning_rate": 4.079883347092506e-06, | |
| "loss": 0.4811, | |
| "num_input_tokens_seen": 57248888, | |
| "step": 5100, | |
| "train_runtime": 8794.2311, | |
| "train_tokens_per_second": 6509.823 | |
| }, | |
| { | |
| "epoch": 2.4497510049799005, | |
| "grad_norm": 0.6756502389907837, | |
| "learning_rate": 4.045559879923747e-06, | |
| "loss": 0.454, | |
| "num_input_tokens_seen": 57307744, | |
| "step": 5105, | |
| "train_runtime": 8803.0414, | |
| "train_tokens_per_second": 6509.994 | |
| }, | |
| { | |
| "epoch": 2.4521509569808604, | |
| "grad_norm": 0.7121127843856812, | |
| "learning_rate": 4.011368687717867e-06, | |
| "loss": 0.4506, | |
| "num_input_tokens_seen": 57363824, | |
| "step": 5110, | |
| "train_runtime": 8811.1922, | |
| "train_tokens_per_second": 6510.336 | |
| }, | |
| { | |
| "epoch": 2.4545509089818203, | |
| "grad_norm": 0.764569878578186, | |
| "learning_rate": 3.977309986306874e-06, | |
| "loss": 0.4614, | |
| "num_input_tokens_seen": 57422952, | |
| "step": 5115, | |
| "train_runtime": 8819.6634, | |
| "train_tokens_per_second": 6510.787 | |
| }, | |
| { | |
| "epoch": 2.45695086098278, | |
| "grad_norm": 0.9439240097999573, | |
| "learning_rate": 3.943383990686425e-06, | |
| "loss": 0.5036, | |
| "num_input_tokens_seen": 57475568, | |
| "step": 5120, | |
| "train_runtime": 8827.7896, | |
| "train_tokens_per_second": 6510.754 | |
| }, | |
| { | |
| "epoch": 2.4593508129837405, | |
| "grad_norm": 0.7676842212677002, | |
| "learning_rate": 3.909590915014455e-06, | |
| "loss": 0.4741, | |
| "num_input_tokens_seen": 57533000, | |
| "step": 5125, | |
| "train_runtime": 8836.2004, | |
| "train_tokens_per_second": 6511.056 | |
| }, | |
| { | |
| "epoch": 2.4617507649847004, | |
| "grad_norm": 0.7224127054214478, | |
| "learning_rate": 3.875930972609851e-06, | |
| "loss": 0.4555, | |
| "num_input_tokens_seen": 57591416, | |
| "step": 5130, | |
| "train_runtime": 8844.7508, | |
| "train_tokens_per_second": 6511.367 | |
| }, | |
| { | |
| "epoch": 2.4641507169856602, | |
| "grad_norm": 0.8699045777320862, | |
| "learning_rate": 3.842404375951089e-06, | |
| "loss": 0.4948, | |
| "num_input_tokens_seen": 57648120, | |
| "step": 5135, | |
| "train_runtime": 8853.2169, | |
| "train_tokens_per_second": 6511.545 | |
| }, | |
| { | |
| "epoch": 2.46655066898662, | |
| "grad_norm": 0.8307254910469055, | |
| "learning_rate": 3.809011336674917e-06, | |
| "loss": 0.4747, | |
| "num_input_tokens_seen": 57705096, | |
| "step": 5140, | |
| "train_runtime": 8861.9212, | |
| "train_tokens_per_second": 6511.579 | |
| }, | |
| { | |
| "epoch": 2.46895062098758, | |
| "grad_norm": 1.0947297811508179, | |
| "learning_rate": 3.7757520655749863e-06, | |
| "loss": 0.4711, | |
| "num_input_tokens_seen": 57760000, | |
| "step": 5145, | |
| "train_runtime": 8870.5168, | |
| "train_tokens_per_second": 6511.458 | |
| }, | |
| { | |
| "epoch": 2.4713505729885403, | |
| "grad_norm": 0.6444729566574097, | |
| "learning_rate": 3.7426267726005354e-06, | |
| "loss": 0.4566, | |
| "num_input_tokens_seen": 57814992, | |
| "step": 5150, | |
| "train_runtime": 8879.2323, | |
| "train_tokens_per_second": 6511.26 | |
| }, | |
| { | |
| "epoch": 2.4737505249895, | |
| "grad_norm": 0.7921139001846313, | |
| "learning_rate": 3.709635666855077e-06, | |
| "loss": 0.4552, | |
| "num_input_tokens_seen": 57870400, | |
| "step": 5155, | |
| "train_runtime": 8888.1359, | |
| "train_tokens_per_second": 6510.972 | |
| }, | |
| { | |
| "epoch": 2.47615047699046, | |
| "grad_norm": 0.6223105192184448, | |
| "learning_rate": 3.6767789565950563e-06, | |
| "loss": 0.425, | |
| "num_input_tokens_seen": 57932208, | |
| "step": 5160, | |
| "train_runtime": 8896.7689, | |
| "train_tokens_per_second": 6511.601 | |
| }, | |
| { | |
| "epoch": 2.4785504289914204, | |
| "grad_norm": 0.7725955843925476, | |
| "learning_rate": 3.64405684922855e-06, | |
| "loss": 0.4413, | |
| "num_input_tokens_seen": 57989280, | |
| "step": 5165, | |
| "train_runtime": 8905.0042, | |
| "train_tokens_per_second": 6511.988 | |
| }, | |
| { | |
| "epoch": 2.4809503809923803, | |
| "grad_norm": 0.7563416361808777, | |
| "learning_rate": 3.611469551313959e-06, | |
| "loss": 0.521, | |
| "num_input_tokens_seen": 58045968, | |
| "step": 5170, | |
| "train_runtime": 8913.261, | |
| "train_tokens_per_second": 6512.316 | |
| }, | |
| { | |
| "epoch": 2.48335033299334, | |
| "grad_norm": 0.7822843790054321, | |
| "learning_rate": 3.579017268558693e-06, | |
| "loss": 0.4989, | |
| "num_input_tokens_seen": 58098536, | |
| "step": 5175, | |
| "train_runtime": 8920.913, | |
| "train_tokens_per_second": 6512.622 | |
| }, | |
| { | |
| "epoch": 2.4857502849943, | |
| "grad_norm": 0.80488520860672, | |
| "learning_rate": 3.5467002058178764e-06, | |
| "loss": 0.498, | |
| "num_input_tokens_seen": 58153656, | |
| "step": 5180, | |
| "train_runtime": 8929.6199, | |
| "train_tokens_per_second": 6512.445 | |
| }, | |
| { | |
| "epoch": 2.48815023699526, | |
| "grad_norm": 0.7986950278282166, | |
| "learning_rate": 3.514518567093056e-06, | |
| "loss": 0.4513, | |
| "num_input_tokens_seen": 58208960, | |
| "step": 5185, | |
| "train_runtime": 8938.3362, | |
| "train_tokens_per_second": 6512.281 | |
| }, | |
| { | |
| "epoch": 2.4905501889962203, | |
| "grad_norm": 0.7876197695732117, | |
| "learning_rate": 3.4824725555309272e-06, | |
| "loss": 0.4757, | |
| "num_input_tokens_seen": 58268880, | |
| "step": 5190, | |
| "train_runtime": 8946.6352, | |
| "train_tokens_per_second": 6512.938 | |
| }, | |
| { | |
| "epoch": 2.49295014099718, | |
| "grad_norm": 0.8735581040382385, | |
| "learning_rate": 3.4505623734220226e-06, | |
| "loss": 0.4926, | |
| "num_input_tokens_seen": 58323184, | |
| "step": 5195, | |
| "train_runtime": 8954.4183, | |
| "train_tokens_per_second": 6513.341 | |
| }, | |
| { | |
| "epoch": 2.49535009299814, | |
| "grad_norm": 0.8230021595954895, | |
| "learning_rate": 3.4187882221994564e-06, | |
| "loss": 0.5169, | |
| "num_input_tokens_seen": 58379592, | |
| "step": 5200, | |
| "train_runtime": 8962.9041, | |
| "train_tokens_per_second": 6513.468 | |
| }, | |
| { | |
| "epoch": 2.4977500449991, | |
| "grad_norm": 0.9317114353179932, | |
| "learning_rate": 3.3871503024376554e-06, | |
| "loss": 0.4625, | |
| "num_input_tokens_seen": 58439472, | |
| "step": 5205, | |
| "train_runtime": 8971.3456, | |
| "train_tokens_per_second": 6514.014 | |
| }, | |
| { | |
| "epoch": 2.50014999700006, | |
| "grad_norm": 0.889101505279541, | |
| "learning_rate": 3.3556488138510674e-06, | |
| "loss": 0.4478, | |
| "num_input_tokens_seen": 58498776, | |
| "step": 5210, | |
| "train_runtime": 8980.13, | |
| "train_tokens_per_second": 6514.246 | |
| }, | |
| { | |
| "epoch": 2.50254994900102, | |
| "grad_norm": 0.5332804322242737, | |
| "learning_rate": 3.3242839552929366e-06, | |
| "loss": 0.4552, | |
| "num_input_tokens_seen": 58559344, | |
| "step": 5215, | |
| "train_runtime": 8988.8739, | |
| "train_tokens_per_second": 6514.647 | |
| }, | |
| { | |
| "epoch": 2.50494990100198, | |
| "grad_norm": 0.9555898308753967, | |
| "learning_rate": 3.2930559247540267e-06, | |
| "loss": 0.4537, | |
| "num_input_tokens_seen": 58614416, | |
| "step": 5220, | |
| "train_runtime": 8997.7825, | |
| "train_tokens_per_second": 6514.318 | |
| }, | |
| { | |
| "epoch": 2.50734985300294, | |
| "grad_norm": 1.1382311582565308, | |
| "learning_rate": 3.2619649193613626e-06, | |
| "loss": 0.5041, | |
| "num_input_tokens_seen": 58667216, | |
| "step": 5225, | |
| "train_runtime": 9006.2657, | |
| "train_tokens_per_second": 6514.045 | |
| }, | |
| { | |
| "epoch": 2.5097498050038998, | |
| "grad_norm": 1.1261781454086304, | |
| "learning_rate": 3.2310111353770045e-06, | |
| "loss": 0.5123, | |
| "num_input_tokens_seen": 58722648, | |
| "step": 5230, | |
| "train_runtime": 9014.5273, | |
| "train_tokens_per_second": 6514.224 | |
| }, | |
| { | |
| "epoch": 2.5121497570048597, | |
| "grad_norm": 0.6339508295059204, | |
| "learning_rate": 3.2001947681967987e-06, | |
| "loss": 0.466, | |
| "num_input_tokens_seen": 58780640, | |
| "step": 5235, | |
| "train_runtime": 9023.7118, | |
| "train_tokens_per_second": 6514.02 | |
| }, | |
| { | |
| "epoch": 2.51454970900582, | |
| "grad_norm": 0.8819341659545898, | |
| "learning_rate": 3.169516012349161e-06, | |
| "loss": 0.4855, | |
| "num_input_tokens_seen": 58839080, | |
| "step": 5240, | |
| "train_runtime": 9032.9027, | |
| "train_tokens_per_second": 6513.862 | |
| }, | |
| { | |
| "epoch": 2.51694966100678, | |
| "grad_norm": 0.8198482394218445, | |
| "learning_rate": 3.138975061493815e-06, | |
| "loss": 0.5462, | |
| "num_input_tokens_seen": 58888056, | |
| "step": 5245, | |
| "train_runtime": 9041.1086, | |
| "train_tokens_per_second": 6513.367 | |
| }, | |
| { | |
| "epoch": 2.5193496130077397, | |
| "grad_norm": 0.7308799028396606, | |
| "learning_rate": 3.1085721084205987e-06, | |
| "loss": 0.4879, | |
| "num_input_tokens_seen": 58948912, | |
| "step": 5250, | |
| "train_runtime": 9049.9278, | |
| "train_tokens_per_second": 6513.744 | |
| }, | |
| { | |
| "epoch": 2.5217495650087, | |
| "grad_norm": 0.7503857612609863, | |
| "learning_rate": 3.078307345048251e-06, | |
| "loss": 0.434, | |
| "num_input_tokens_seen": 59005656, | |
| "step": 5255, | |
| "train_runtime": 9058.4522, | |
| "train_tokens_per_second": 6513.878 | |
| }, | |
| { | |
| "epoch": 2.52414951700966, | |
| "grad_norm": 0.7755120992660522, | |
| "learning_rate": 3.0481809624231667e-06, | |
| "loss": 0.4226, | |
| "num_input_tokens_seen": 59064880, | |
| "step": 5260, | |
| "train_runtime": 9067.2632, | |
| "train_tokens_per_second": 6514.08 | |
| }, | |
| { | |
| "epoch": 2.52654946901062, | |
| "grad_norm": 0.7984574437141418, | |
| "learning_rate": 3.018193150718224e-06, | |
| "loss": 0.4881, | |
| "num_input_tokens_seen": 59122920, | |
| "step": 5265, | |
| "train_runtime": 9075.8636, | |
| "train_tokens_per_second": 6514.302 | |
| }, | |
| { | |
| "epoch": 2.5289494210115797, | |
| "grad_norm": 0.7857392430305481, | |
| "learning_rate": 2.9883440992315744e-06, | |
| "loss": 0.4949, | |
| "num_input_tokens_seen": 59180768, | |
| "step": 5270, | |
| "train_runtime": 9084.2259, | |
| "train_tokens_per_second": 6514.674 | |
| }, | |
| { | |
| "epoch": 2.5313493730125396, | |
| "grad_norm": 0.7636000514030457, | |
| "learning_rate": 2.9586339963854402e-06, | |
| "loss": 0.4584, | |
| "num_input_tokens_seen": 59236392, | |
| "step": 5275, | |
| "train_runtime": 9093.425, | |
| "train_tokens_per_second": 6514.2 | |
| }, | |
| { | |
| "epoch": 2.5337493250135, | |
| "grad_norm": 0.7404913306236267, | |
| "learning_rate": 2.929063029724924e-06, | |
| "loss": 0.5001, | |
| "num_input_tokens_seen": 59288152, | |
| "step": 5280, | |
| "train_runtime": 9101.2939, | |
| "train_tokens_per_second": 6514.255 | |
| }, | |
| { | |
| "epoch": 2.53614927701446, | |
| "grad_norm": 0.8310667872428894, | |
| "learning_rate": 2.8996313859168373e-06, | |
| "loss": 0.4752, | |
| "num_input_tokens_seen": 59350448, | |
| "step": 5285, | |
| "train_runtime": 9109.697, | |
| "train_tokens_per_second": 6515.085 | |
| }, | |
| { | |
| "epoch": 2.5385492290154197, | |
| "grad_norm": 0.7058178782463074, | |
| "learning_rate": 2.8703392507485244e-06, | |
| "loss": 0.5058, | |
| "num_input_tokens_seen": 59405224, | |
| "step": 5290, | |
| "train_runtime": 9118.1859, | |
| "train_tokens_per_second": 6515.027 | |
| }, | |
| { | |
| "epoch": 2.5409491810163796, | |
| "grad_norm": 0.9837594628334045, | |
| "learning_rate": 2.8411868091266614e-06, | |
| "loss": 0.5101, | |
| "num_input_tokens_seen": 59459408, | |
| "step": 5295, | |
| "train_runtime": 9125.7939, | |
| "train_tokens_per_second": 6515.533 | |
| }, | |
| { | |
| "epoch": 2.5433491330173394, | |
| "grad_norm": 0.749136745929718, | |
| "learning_rate": 2.812174245076121e-06, | |
| "loss": 0.4509, | |
| "num_input_tokens_seen": 59519864, | |
| "step": 5300, | |
| "train_runtime": 9134.5564, | |
| "train_tokens_per_second": 6515.901 | |
| }, | |
| { | |
| "epoch": 2.5457490850182998, | |
| "grad_norm": 0.8679369688034058, | |
| "learning_rate": 2.783301741738803e-06, | |
| "loss": 0.5337, | |
| "num_input_tokens_seen": 59575648, | |
| "step": 5305, | |
| "train_runtime": 9142.5914, | |
| "train_tokens_per_second": 6516.276 | |
| }, | |
| { | |
| "epoch": 2.5481490370192597, | |
| "grad_norm": 0.7311270833015442, | |
| "learning_rate": 2.75456948137246e-06, | |
| "loss": 0.4446, | |
| "num_input_tokens_seen": 59631568, | |
| "step": 5310, | |
| "train_runtime": 9150.8949, | |
| "train_tokens_per_second": 6516.474 | |
| }, | |
| { | |
| "epoch": 2.5505489890202195, | |
| "grad_norm": 0.9072261452674866, | |
| "learning_rate": 2.725977645349567e-06, | |
| "loss": 0.4515, | |
| "num_input_tokens_seen": 59688168, | |
| "step": 5315, | |
| "train_runtime": 9158.8503, | |
| "train_tokens_per_second": 6516.993 | |
| }, | |
| { | |
| "epoch": 2.5529489410211794, | |
| "grad_norm": 0.7925878763198853, | |
| "learning_rate": 2.6975264141561792e-06, | |
| "loss": 0.4743, | |
| "num_input_tokens_seen": 59750784, | |
| "step": 5320, | |
| "train_runtime": 9167.7914, | |
| "train_tokens_per_second": 6517.468 | |
| }, | |
| { | |
| "epoch": 2.5553488930221393, | |
| "grad_norm": 0.7712064981460571, | |
| "learning_rate": 2.6692159673907674e-06, | |
| "loss": 0.4835, | |
| "num_input_tokens_seen": 59804776, | |
| "step": 5325, | |
| "train_runtime": 9176.5665, | |
| "train_tokens_per_second": 6517.119 | |
| }, | |
| { | |
| "epoch": 2.5577488450230996, | |
| "grad_norm": 0.9932171106338501, | |
| "learning_rate": 2.641046483763107e-06, | |
| "loss": 0.4954, | |
| "num_input_tokens_seen": 59862336, | |
| "step": 5330, | |
| "train_runtime": 9184.9522, | |
| "train_tokens_per_second": 6517.436 | |
| }, | |
| { | |
| "epoch": 2.5601487970240595, | |
| "grad_norm": 0.8807353377342224, | |
| "learning_rate": 2.613018141093143e-06, | |
| "loss": 0.5017, | |
| "num_input_tokens_seen": 59920072, | |
| "step": 5335, | |
| "train_runtime": 9193.9014, | |
| "train_tokens_per_second": 6517.372 | |
| }, | |
| { | |
| "epoch": 2.5625487490250194, | |
| "grad_norm": 0.7849051356315613, | |
| "learning_rate": 2.585131116309872e-06, | |
| "loss": 0.4951, | |
| "num_input_tokens_seen": 59975568, | |
| "step": 5340, | |
| "train_runtime": 9202.2095, | |
| "train_tokens_per_second": 6517.518 | |
| }, | |
| { | |
| "epoch": 2.5649487010259797, | |
| "grad_norm": 0.5779772400856018, | |
| "learning_rate": 2.557385585450217e-06, | |
| "loss": 0.4706, | |
| "num_input_tokens_seen": 60036392, | |
| "step": 5345, | |
| "train_runtime": 9211.2288, | |
| "train_tokens_per_second": 6517.74 | |
| }, | |
| { | |
| "epoch": 2.5673486530269396, | |
| "grad_norm": 0.9567521810531616, | |
| "learning_rate": 2.529781723657915e-06, | |
| "loss": 0.4893, | |
| "num_input_tokens_seen": 60093024, | |
| "step": 5350, | |
| "train_runtime": 9220.1795, | |
| "train_tokens_per_second": 6517.555 | |
| }, | |
| { | |
| "epoch": 2.5697486050278995, | |
| "grad_norm": 0.7940301299095154, | |
| "learning_rate": 2.5023197051824267e-06, | |
| "loss": 0.5055, | |
| "num_input_tokens_seen": 60144920, | |
| "step": 5355, | |
| "train_runtime": 9228.2311, | |
| "train_tokens_per_second": 6517.492 | |
| }, | |
| { | |
| "epoch": 2.5721485570288594, | |
| "grad_norm": 0.9344842433929443, | |
| "learning_rate": 2.4749997033778228e-06, | |
| "loss": 0.5167, | |
| "num_input_tokens_seen": 60203224, | |
| "step": 5360, | |
| "train_runtime": 9236.6101, | |
| "train_tokens_per_second": 6517.892 | |
| }, | |
| { | |
| "epoch": 2.5745485090298192, | |
| "grad_norm": 0.9174864888191223, | |
| "learning_rate": 2.4478218907016877e-06, | |
| "loss": 0.4896, | |
| "num_input_tokens_seen": 60259032, | |
| "step": 5365, | |
| "train_runtime": 9245.2879, | |
| "train_tokens_per_second": 6517.81 | |
| }, | |
| { | |
| "epoch": 2.5769484610307796, | |
| "grad_norm": 0.9624903798103333, | |
| "learning_rate": 2.4207864387140512e-06, | |
| "loss": 0.5132, | |
| "num_input_tokens_seen": 60308024, | |
| "step": 5370, | |
| "train_runtime": 9253.8315, | |
| "train_tokens_per_second": 6517.087 | |
| }, | |
| { | |
| "epoch": 2.5793484130317395, | |
| "grad_norm": 0.6800229549407959, | |
| "learning_rate": 2.3938935180762707e-06, | |
| "loss": 0.5086, | |
| "num_input_tokens_seen": 60362552, | |
| "step": 5375, | |
| "train_runtime": 9261.5584, | |
| "train_tokens_per_second": 6517.537 | |
| }, | |
| { | |
| "epoch": 2.5817483650326993, | |
| "grad_norm": 0.9939396977424622, | |
| "learning_rate": 2.36714329854999e-06, | |
| "loss": 0.5001, | |
| "num_input_tokens_seen": 60415520, | |
| "step": 5380, | |
| "train_runtime": 9269.7261, | |
| "train_tokens_per_second": 6517.509 | |
| }, | |
| { | |
| "epoch": 2.584148317033659, | |
| "grad_norm": 0.7869457602500916, | |
| "learning_rate": 2.3405359489960365e-06, | |
| "loss": 0.493, | |
| "num_input_tokens_seen": 60469016, | |
| "step": 5385, | |
| "train_runtime": 9277.4328, | |
| "train_tokens_per_second": 6517.861 | |
| }, | |
| { | |
| "epoch": 2.586548269034619, | |
| "grad_norm": 0.8779625296592712, | |
| "learning_rate": 2.314071637373394e-06, | |
| "loss": 0.537, | |
| "num_input_tokens_seen": 60528736, | |
| "step": 5390, | |
| "train_runtime": 9286.7608, | |
| "train_tokens_per_second": 6517.745 | |
| }, | |
| { | |
| "epoch": 2.5889482210355794, | |
| "grad_norm": 0.9168468713760376, | |
| "learning_rate": 2.2877505307380976e-06, | |
| "loss": 0.5101, | |
| "num_input_tokens_seen": 60585352, | |
| "step": 5395, | |
| "train_runtime": 9294.8068, | |
| "train_tokens_per_second": 6518.194 | |
| }, | |
| { | |
| "epoch": 2.5913481730365393, | |
| "grad_norm": 0.7564955353736877, | |
| "learning_rate": 2.2615727952422033e-06, | |
| "loss": 0.4426, | |
| "num_input_tokens_seen": 60645192, | |
| "step": 5400, | |
| "train_runtime": 9303.554, | |
| "train_tokens_per_second": 6518.497 | |
| }, | |
| { | |
| "epoch": 2.593748125037499, | |
| "grad_norm": 0.823637843132019, | |
| "learning_rate": 2.235538596132747e-06, | |
| "loss": 0.4401, | |
| "num_input_tokens_seen": 60705872, | |
| "step": 5405, | |
| "train_runtime": 9314.3874, | |
| "train_tokens_per_second": 6517.43 | |
| }, | |
| { | |
| "epoch": 2.596148077038459, | |
| "grad_norm": 0.5428220629692078, | |
| "learning_rate": 2.2096480977506883e-06, | |
| "loss": 0.466, | |
| "num_input_tokens_seen": 60766448, | |
| "step": 5410, | |
| "train_runtime": 9324.731, | |
| "train_tokens_per_second": 6516.697 | |
| }, | |
| { | |
| "epoch": 2.598548029039419, | |
| "grad_norm": 1.0644038915634155, | |
| "learning_rate": 2.183901463529861e-06, | |
| "loss": 0.4647, | |
| "num_input_tokens_seen": 60820832, | |
| "step": 5415, | |
| "train_runtime": 9335.3113, | |
| "train_tokens_per_second": 6515.137 | |
| }, | |
| { | |
| "epoch": 2.6009479810403793, | |
| "grad_norm": 0.7919825315475464, | |
| "learning_rate": 2.1582988559959773e-06, | |
| "loss": 0.4435, | |
| "num_input_tokens_seen": 60879048, | |
| "step": 5420, | |
| "train_runtime": 9346.1879, | |
| "train_tokens_per_second": 6513.784 | |
| }, | |
| { | |
| "epoch": 2.603347933041339, | |
| "grad_norm": 1.047285556793213, | |
| "learning_rate": 2.132840436765568e-06, | |
| "loss": 0.4641, | |
| "num_input_tokens_seen": 60927720, | |
| "step": 5425, | |
| "train_runtime": 9355.613, | |
| "train_tokens_per_second": 6512.424 | |
| }, | |
| { | |
| "epoch": 2.605747885042299, | |
| "grad_norm": 0.9616097211837769, | |
| "learning_rate": 2.1075263665449737e-06, | |
| "loss": 0.4677, | |
| "num_input_tokens_seen": 60981576, | |
| "step": 5430, | |
| "train_runtime": 9365.6809, | |
| "train_tokens_per_second": 6511.174 | |
| }, | |
| { | |
| "epoch": 2.6081478370432594, | |
| "grad_norm": 0.9964049458503723, | |
| "learning_rate": 2.082356805129332e-06, | |
| "loss": 0.4929, | |
| "num_input_tokens_seen": 61039448, | |
| "step": 5435, | |
| "train_runtime": 9376.1343, | |
| "train_tokens_per_second": 6510.087 | |
| }, | |
| { | |
| "epoch": 2.6105477890442192, | |
| "grad_norm": 0.8985645174980164, | |
| "learning_rate": 2.0573319114015775e-06, | |
| "loss": 0.4886, | |
| "num_input_tokens_seen": 61093640, | |
| "step": 5440, | |
| "train_runtime": 9386.3154, | |
| "train_tokens_per_second": 6508.799 | |
| }, | |
| { | |
| "epoch": 2.612947741045179, | |
| "grad_norm": 0.7488046884536743, | |
| "learning_rate": 2.0324518433314206e-06, | |
| "loss": 0.4697, | |
| "num_input_tokens_seen": 61149808, | |
| "step": 5445, | |
| "train_runtime": 9396.5128, | |
| "train_tokens_per_second": 6507.713 | |
| }, | |
| { | |
| "epoch": 2.615347693046139, | |
| "grad_norm": 0.7769824862480164, | |
| "learning_rate": 2.0077167579743593e-06, | |
| "loss": 0.4645, | |
| "num_input_tokens_seen": 61206176, | |
| "step": 5450, | |
| "train_runtime": 9406.5758, | |
| "train_tokens_per_second": 6506.743 | |
| }, | |
| { | |
| "epoch": 2.617747645047099, | |
| "grad_norm": 0.7720673084259033, | |
| "learning_rate": 1.9831268114706925e-06, | |
| "loss": 0.4667, | |
| "num_input_tokens_seen": 61266712, | |
| "step": 5455, | |
| "train_runtime": 9417.2442, | |
| "train_tokens_per_second": 6505.8 | |
| }, | |
| { | |
| "epoch": 2.620147597048059, | |
| "grad_norm": 0.7182523012161255, | |
| "learning_rate": 1.958682159044531e-06, | |
| "loss": 0.4644, | |
| "num_input_tokens_seen": 61319856, | |
| "step": 5460, | |
| "train_runtime": 9426.6437, | |
| "train_tokens_per_second": 6504.951 | |
| }, | |
| { | |
| "epoch": 2.622547549049019, | |
| "grad_norm": 0.8977944850921631, | |
| "learning_rate": 1.934382955002803e-06, | |
| "loss": 0.5007, | |
| "num_input_tokens_seen": 61377048, | |
| "step": 5465, | |
| "train_runtime": 9437.2729, | |
| "train_tokens_per_second": 6503.685 | |
| }, | |
| { | |
| "epoch": 2.624947501049979, | |
| "grad_norm": 0.7803311347961426, | |
| "learning_rate": 1.9102293527343163e-06, | |
| "loss": 0.4658, | |
| "num_input_tokens_seen": 61434248, | |
| "step": 5470, | |
| "train_runtime": 9448.0138, | |
| "train_tokens_per_second": 6502.345 | |
| }, | |
| { | |
| "epoch": 2.627347453050939, | |
| "grad_norm": 0.72231125831604, | |
| "learning_rate": 1.886221504708746e-06, | |
| "loss": 0.4968, | |
| "num_input_tokens_seen": 61494600, | |
| "step": 5475, | |
| "train_runtime": 9459.0534, | |
| "train_tokens_per_second": 6501.137 | |
| }, | |
| { | |
| "epoch": 2.6297474050518987, | |
| "grad_norm": 0.5621334314346313, | |
| "learning_rate": 1.8623595624757045e-06, | |
| "loss": 0.4606, | |
| "num_input_tokens_seen": 61555232, | |
| "step": 5480, | |
| "train_runtime": 9469.3682, | |
| "train_tokens_per_second": 6500.458 | |
| }, | |
| { | |
| "epoch": 2.632147357052859, | |
| "grad_norm": 0.6386857628822327, | |
| "learning_rate": 1.8386436766637593e-06, | |
| "loss": 0.4647, | |
| "num_input_tokens_seen": 61610480, | |
| "step": 5485, | |
| "train_runtime": 9479.3329, | |
| "train_tokens_per_second": 6499.453 | |
| }, | |
| { | |
| "epoch": 2.634547309053819, | |
| "grad_norm": 0.6079943776130676, | |
| "learning_rate": 1.8150739969795245e-06, | |
| "loss": 0.4742, | |
| "num_input_tokens_seen": 61666936, | |
| "step": 5490, | |
| "train_runtime": 9489.1199, | |
| "train_tokens_per_second": 6498.699 | |
| }, | |
| { | |
| "epoch": 2.636947261054779, | |
| "grad_norm": 0.6471970677375793, | |
| "learning_rate": 1.7916506722066573e-06, | |
| "loss": 0.5121, | |
| "num_input_tokens_seen": 61723152, | |
| "step": 5495, | |
| "train_runtime": 9498.3327, | |
| "train_tokens_per_second": 6498.314 | |
| }, | |
| { | |
| "epoch": 2.639347213055739, | |
| "grad_norm": 0.8927129507064819, | |
| "learning_rate": 1.7683738502049658e-06, | |
| "loss": 0.5282, | |
| "num_input_tokens_seen": 61779792, | |
| "step": 5500, | |
| "train_runtime": 9508.4194, | |
| "train_tokens_per_second": 6497.378 | |
| }, | |
| { | |
| "epoch": 2.6417471650566986, | |
| "grad_norm": 0.9175587296485901, | |
| "learning_rate": 1.7452436779094527e-06, | |
| "loss": 0.5226, | |
| "num_input_tokens_seen": 61837696, | |
| "step": 5505, | |
| "train_runtime": 9518.2144, | |
| "train_tokens_per_second": 6496.775 | |
| }, | |
| { | |
| "epoch": 2.644147117057659, | |
| "grad_norm": 0.6489665508270264, | |
| "learning_rate": 1.7222603013294036e-06, | |
| "loss": 0.4645, | |
| "num_input_tokens_seen": 61896032, | |
| "step": 5510, | |
| "train_runtime": 9528.6748, | |
| "train_tokens_per_second": 6495.765 | |
| }, | |
| { | |
| "epoch": 2.646547069058619, | |
| "grad_norm": 0.8270627856254578, | |
| "learning_rate": 1.6994238655474394e-06, | |
| "loss": 0.4943, | |
| "num_input_tokens_seen": 61949384, | |
| "step": 5515, | |
| "train_runtime": 9538.4414, | |
| "train_tokens_per_second": 6494.707 | |
| }, | |
| { | |
| "epoch": 2.6489470210595787, | |
| "grad_norm": 0.7798356413841248, | |
| "learning_rate": 1.6767345147186336e-06, | |
| "loss": 0.5109, | |
| "num_input_tokens_seen": 62002592, | |
| "step": 5520, | |
| "train_runtime": 9548.0079, | |
| "train_tokens_per_second": 6493.773 | |
| }, | |
| { | |
| "epoch": 2.651346973060539, | |
| "grad_norm": 0.8514456748962402, | |
| "learning_rate": 1.6541923920695756e-06, | |
| "loss": 0.4477, | |
| "num_input_tokens_seen": 62055040, | |
| "step": 5525, | |
| "train_runtime": 9558.322, | |
| "train_tokens_per_second": 6492.253 | |
| }, | |
| { | |
| "epoch": 2.653746925061499, | |
| "grad_norm": 1.0111453533172607, | |
| "learning_rate": 1.6317976398974782e-06, | |
| "loss": 0.5174, | |
| "num_input_tokens_seen": 62109976, | |
| "step": 5530, | |
| "train_runtime": 9567.8838, | |
| "train_tokens_per_second": 6491.506 | |
| }, | |
| { | |
| "epoch": 2.6561468770624588, | |
| "grad_norm": 0.702575147151947, | |
| "learning_rate": 1.6095503995692762e-06, | |
| "loss": 0.4668, | |
| "num_input_tokens_seen": 62167376, | |
| "step": 5535, | |
| "train_runtime": 9577.5036, | |
| "train_tokens_per_second": 6490.979 | |
| }, | |
| { | |
| "epoch": 2.6585468290634187, | |
| "grad_norm": 0.8962842226028442, | |
| "learning_rate": 1.5874508115207408e-06, | |
| "loss": 0.4676, | |
| "num_input_tokens_seen": 62221488, | |
| "step": 5540, | |
| "train_runtime": 9587.9271, | |
| "train_tokens_per_second": 6489.566 | |
| }, | |
| { | |
| "epoch": 2.6609467810643785, | |
| "grad_norm": 0.7158124446868896, | |
| "learning_rate": 1.5654990152555837e-06, | |
| "loss": 0.4947, | |
| "num_input_tokens_seen": 62277176, | |
| "step": 5545, | |
| "train_runtime": 9597.969, | |
| "train_tokens_per_second": 6488.579 | |
| }, | |
| { | |
| "epoch": 2.663346733065339, | |
| "grad_norm": 1.1132010221481323, | |
| "learning_rate": 1.5436951493445762e-06, | |
| "loss": 0.4875, | |
| "num_input_tokens_seen": 62330544, | |
| "step": 5550, | |
| "train_runtime": 9607.4993, | |
| "train_tokens_per_second": 6487.697 | |
| }, | |
| { | |
| "epoch": 2.6657466850662987, | |
| "grad_norm": 0.8258331418037415, | |
| "learning_rate": 1.5220393514246895e-06, | |
| "loss": 0.5035, | |
| "num_input_tokens_seen": 62381768, | |
| "step": 5555, | |
| "train_runtime": 9616.8354, | |
| "train_tokens_per_second": 6486.725 | |
| }, | |
| { | |
| "epoch": 2.6681466370672586, | |
| "grad_norm": 0.8152797818183899, | |
| "learning_rate": 1.5005317581982092e-06, | |
| "loss": 0.4839, | |
| "num_input_tokens_seen": 62436944, | |
| "step": 5560, | |
| "train_runtime": 9626.6187, | |
| "train_tokens_per_second": 6485.864 | |
| }, | |
| { | |
| "epoch": 2.6705465890682185, | |
| "grad_norm": 0.8248258233070374, | |
| "learning_rate": 1.479172505431875e-06, | |
| "loss": 0.4973, | |
| "num_input_tokens_seen": 62491352, | |
| "step": 5565, | |
| "train_runtime": 9636.4281, | |
| "train_tokens_per_second": 6484.908 | |
| }, | |
| { | |
| "epoch": 2.6729465410691784, | |
| "grad_norm": 1.0632202625274658, | |
| "learning_rate": 1.4579617279560393e-06, | |
| "loss": 0.486, | |
| "num_input_tokens_seen": 62546464, | |
| "step": 5570, | |
| "train_runtime": 9646.0848, | |
| "train_tokens_per_second": 6484.13 | |
| }, | |
| { | |
| "epoch": 2.6753464930701387, | |
| "grad_norm": 1.1524382829666138, | |
| "learning_rate": 1.4368995596637902e-06, | |
| "loss": 0.4729, | |
| "num_input_tokens_seen": 62602496, | |
| "step": 5575, | |
| "train_runtime": 9656.9657, | |
| "train_tokens_per_second": 6482.626 | |
| }, | |
| { | |
| "epoch": 2.6777464450710986, | |
| "grad_norm": 0.66849684715271, | |
| "learning_rate": 1.415986133510122e-06, | |
| "loss": 0.4894, | |
| "num_input_tokens_seen": 62664360, | |
| "step": 5580, | |
| "train_runtime": 9668.3929, | |
| "train_tokens_per_second": 6481.363 | |
| }, | |
| { | |
| "epoch": 2.6801463970720585, | |
| "grad_norm": 0.7072093486785889, | |
| "learning_rate": 1.395221581511097e-06, | |
| "loss": 0.4524, | |
| "num_input_tokens_seen": 62721848, | |
| "step": 5585, | |
| "train_runtime": 9678.2677, | |
| "train_tokens_per_second": 6480.69 | |
| }, | |
| { | |
| "epoch": 2.682546349073019, | |
| "grad_norm": 0.8476486802101135, | |
| "learning_rate": 1.3746060347430118e-06, | |
| "loss": 0.4765, | |
| "num_input_tokens_seen": 62776544, | |
| "step": 5590, | |
| "train_runtime": 9687.8596, | |
| "train_tokens_per_second": 6479.919 | |
| }, | |
| { | |
| "epoch": 2.6849463010739782, | |
| "grad_norm": 0.807366132736206, | |
| "learning_rate": 1.354139623341566e-06, | |
| "loss": 0.4656, | |
| "num_input_tokens_seen": 62834048, | |
| "step": 5595, | |
| "train_runtime": 9698.4717, | |
| "train_tokens_per_second": 6478.758 | |
| }, | |
| { | |
| "epoch": 2.6873462530749386, | |
| "grad_norm": 0.6468657851219177, | |
| "learning_rate": 1.3338224765010315e-06, | |
| "loss": 0.4573, | |
| "num_input_tokens_seen": 62894360, | |
| "step": 5600, | |
| "train_runtime": 9709.7847, | |
| "train_tokens_per_second": 6477.421 | |
| }, | |
| { | |
| "epoch": 2.6897462050758985, | |
| "grad_norm": 0.9837515354156494, | |
| "learning_rate": 1.3136547224734646e-06, | |
| "loss": 0.4944, | |
| "num_input_tokens_seen": 62952560, | |
| "step": 5605, | |
| "train_runtime": 9720.79, | |
| "train_tokens_per_second": 6476.074 | |
| }, | |
| { | |
| "epoch": 2.6921461570768583, | |
| "grad_norm": 0.7956768274307251, | |
| "learning_rate": 1.2936364885678676e-06, | |
| "loss": 0.4829, | |
| "num_input_tokens_seen": 63006360, | |
| "step": 5610, | |
| "train_runtime": 9729.8891, | |
| "train_tokens_per_second": 6475.548 | |
| }, | |
| { | |
| "epoch": 2.6945461090778187, | |
| "grad_norm": 0.7825217247009277, | |
| "learning_rate": 1.2737679011493947e-06, | |
| "loss": 0.4819, | |
| "num_input_tokens_seen": 63065920, | |
| "step": 5615, | |
| "train_runtime": 9740.3812, | |
| "train_tokens_per_second": 6474.687 | |
| }, | |
| { | |
| "epoch": 2.6969460610787785, | |
| "grad_norm": 0.8457074761390686, | |
| "learning_rate": 1.2540490856385672e-06, | |
| "loss": 0.4717, | |
| "num_input_tokens_seen": 63121320, | |
| "step": 5620, | |
| "train_runtime": 9751.1742, | |
| "train_tokens_per_second": 6473.202 | |
| }, | |
| { | |
| "epoch": 2.6993460130797384, | |
| "grad_norm": 0.8086642026901245, | |
| "learning_rate": 1.23448016651046e-06, | |
| "loss": 0.462, | |
| "num_input_tokens_seen": 63176440, | |
| "step": 5625, | |
| "train_runtime": 9760.6545, | |
| "train_tokens_per_second": 6472.562 | |
| }, | |
| { | |
| "epoch": 2.7017459650806983, | |
| "grad_norm": 0.6313350796699524, | |
| "learning_rate": 1.215061267293932e-06, | |
| "loss": 0.4332, | |
| "num_input_tokens_seen": 63242712, | |
| "step": 5630, | |
| "train_runtime": 9772.2646, | |
| "train_tokens_per_second": 6471.654 | |
| }, | |
| { | |
| "epoch": 2.704145917081658, | |
| "grad_norm": 1.2930268049240112, | |
| "learning_rate": 1.195792510570834e-06, | |
| "loss": 0.4613, | |
| "num_input_tokens_seen": 63294640, | |
| "step": 5635, | |
| "train_runtime": 9782.3592, | |
| "train_tokens_per_second": 6470.284 | |
| }, | |
| { | |
| "epoch": 2.7065458690826185, | |
| "grad_norm": 0.6524819731712341, | |
| "learning_rate": 1.1766740179752572e-06, | |
| "loss": 0.4588, | |
| "num_input_tokens_seen": 63353040, | |
| "step": 5640, | |
| "train_runtime": 9793.201, | |
| "train_tokens_per_second": 6469.084 | |
| }, | |
| { | |
| "epoch": 2.7089458210835784, | |
| "grad_norm": 0.9691641330718994, | |
| "learning_rate": 1.1577059101927385e-06, | |
| "loss": 0.5275, | |
| "num_input_tokens_seen": 63408480, | |
| "step": 5645, | |
| "train_runtime": 9803.1346, | |
| "train_tokens_per_second": 6468.184 | |
| }, | |
| { | |
| "epoch": 2.7113457730845383, | |
| "grad_norm": 0.7839572429656982, | |
| "learning_rate": 1.138888306959504e-06, | |
| "loss": 0.4728, | |
| "num_input_tokens_seen": 63465824, | |
| "step": 5650, | |
| "train_runtime": 9814.8407, | |
| "train_tokens_per_second": 6466.312 | |
| }, | |
| { | |
| "epoch": 2.713745725085498, | |
| "grad_norm": 0.9171317219734192, | |
| "learning_rate": 1.1202213270617322e-06, | |
| "loss": 0.4897, | |
| "num_input_tokens_seen": 63518744, | |
| "step": 5655, | |
| "train_runtime": 9824.5678, | |
| "train_tokens_per_second": 6465.297 | |
| }, | |
| { | |
| "epoch": 2.716145677086458, | |
| "grad_norm": 1.0188878774642944, | |
| "learning_rate": 1.101705088334795e-06, | |
| "loss": 0.4849, | |
| "num_input_tokens_seen": 63573232, | |
| "step": 5660, | |
| "train_runtime": 9833.9406, | |
| "train_tokens_per_second": 6464.675 | |
| }, | |
| { | |
| "epoch": 2.7185456290874184, | |
| "grad_norm": 0.811906099319458, | |
| "learning_rate": 1.0833397076624897e-06, | |
| "loss": 0.4778, | |
| "num_input_tokens_seen": 63626872, | |
| "step": 5665, | |
| "train_runtime": 9843.8939, | |
| "train_tokens_per_second": 6463.588 | |
| }, | |
| { | |
| "epoch": 2.7209455810883783, | |
| "grad_norm": 0.9648638367652893, | |
| "learning_rate": 1.065125300976344e-06, | |
| "loss": 0.5255, | |
| "num_input_tokens_seen": 63680184, | |
| "step": 5670, | |
| "train_runtime": 9852.7656, | |
| "train_tokens_per_second": 6463.179 | |
| }, | |
| { | |
| "epoch": 2.723345533089338, | |
| "grad_norm": 0.8658723831176758, | |
| "learning_rate": 1.0470619832548461e-06, | |
| "loss": 0.5119, | |
| "num_input_tokens_seen": 63732752, | |
| "step": 5675, | |
| "train_runtime": 9861.8742, | |
| "train_tokens_per_second": 6462.54 | |
| }, | |
| { | |
| "epoch": 2.7257454850902985, | |
| "grad_norm": 0.6413763761520386, | |
| "learning_rate": 1.0291498685227441e-06, | |
| "loss": 0.4683, | |
| "num_input_tokens_seen": 63790384, | |
| "step": 5680, | |
| "train_runtime": 9873.128, | |
| "train_tokens_per_second": 6461.011 | |
| }, | |
| { | |
| "epoch": 2.7281454370912583, | |
| "grad_norm": 0.9176835417747498, | |
| "learning_rate": 1.0113890698503076e-06, | |
| "loss": 0.4943, | |
| "num_input_tokens_seen": 63845528, | |
| "step": 5685, | |
| "train_runtime": 9883.5777, | |
| "train_tokens_per_second": 6459.759 | |
| }, | |
| { | |
| "epoch": 2.7305453890922182, | |
| "grad_norm": 0.8102623224258423, | |
| "learning_rate": 9.937796993526343e-07, | |
| "loss": 0.4989, | |
| "num_input_tokens_seen": 63898616, | |
| "step": 5690, | |
| "train_runtime": 9893.716, | |
| "train_tokens_per_second": 6458.505 | |
| }, | |
| { | |
| "epoch": 2.732945341093178, | |
| "grad_norm": 0.7839487195014954, | |
| "learning_rate": 9.763218681889203e-07, | |
| "loss": 0.4506, | |
| "num_input_tokens_seen": 63953600, | |
| "step": 5695, | |
| "train_runtime": 9903.0294, | |
| "train_tokens_per_second": 6457.983 | |
| }, | |
| { | |
| "epoch": 2.735345293094138, | |
| "grad_norm": 0.8236997723579407, | |
| "learning_rate": 9.59015686561779e-07, | |
| "loss": 0.4606, | |
| "num_input_tokens_seen": 64012184, | |
| "step": 5700, | |
| "train_runtime": 9913.4852, | |
| "train_tokens_per_second": 6457.082 | |
| }, | |
| { | |
| "epoch": 2.7377452450950983, | |
| "grad_norm": 0.7789479494094849, | |
| "learning_rate": 9.418612637165286e-07, | |
| "loss": 0.4545, | |
| "num_input_tokens_seen": 64065248, | |
| "step": 5705, | |
| "train_runtime": 9924.2434, | |
| "train_tokens_per_second": 6455.429 | |
| }, | |
| { | |
| "epoch": 2.740145197096058, | |
| "grad_norm": 0.890102744102478, | |
| "learning_rate": 9.24858707940518e-07, | |
| "loss": 0.5299, | |
| "num_input_tokens_seen": 64120216, | |
| "step": 5710, | |
| "train_runtime": 9934.9595, | |
| "train_tokens_per_second": 6453.999 | |
| }, | |
| { | |
| "epoch": 2.742545149097018, | |
| "grad_norm": 0.9005339741706848, | |
| "learning_rate": 9.08008126562418e-07, | |
| "loss": 0.4609, | |
| "num_input_tokens_seen": 64181128, | |
| "step": 5715, | |
| "train_runtime": 9946.018, | |
| "train_tokens_per_second": 6452.947 | |
| }, | |
| { | |
| "epoch": 2.744945101097978, | |
| "grad_norm": 0.9289687275886536, | |
| "learning_rate": 8.913096259515835e-07, | |
| "loss": 0.464, | |
| "num_input_tokens_seen": 64234984, | |
| "step": 5720, | |
| "train_runtime": 9954.6483, | |
| "train_tokens_per_second": 6452.763 | |
| }, | |
| { | |
| "epoch": 2.747345053098938, | |
| "grad_norm": 1.0818783044815063, | |
| "learning_rate": 8.747633115173404e-07, | |
| "loss": 0.4932, | |
| "num_input_tokens_seen": 64290040, | |
| "step": 5725, | |
| "train_runtime": 9963.0154, | |
| "train_tokens_per_second": 6452.87 | |
| }, | |
| { | |
| "epoch": 2.749745005099898, | |
| "grad_norm": 0.7084750533103943, | |
| "learning_rate": 8.583692877083465e-07, | |
| "loss": 0.4344, | |
| "num_input_tokens_seen": 64347256, | |
| "step": 5730, | |
| "train_runtime": 9971.5711, | |
| "train_tokens_per_second": 6453.071 | |
| }, | |
| { | |
| "epoch": 2.752144957100858, | |
| "grad_norm": 0.8155821561813354, | |
| "learning_rate": 8.421276580119236e-07, | |
| "loss": 0.4921, | |
| "num_input_tokens_seen": 64401448, | |
| "step": 5735, | |
| "train_runtime": 9980.1585, | |
| "train_tokens_per_second": 6452.948 | |
| }, | |
| { | |
| "epoch": 2.754544909101818, | |
| "grad_norm": 0.7858007550239563, | |
| "learning_rate": 8.260385249534042e-07, | |
| "loss": 0.4953, | |
| "num_input_tokens_seen": 64457576, | |
| "step": 5740, | |
| "train_runtime": 9988.8703, | |
| "train_tokens_per_second": 6452.94 | |
| }, | |
| { | |
| "epoch": 2.756944861102778, | |
| "grad_norm": 0.8042717576026917, | |
| "learning_rate": 8.101019900954881e-07, | |
| "loss": 0.4595, | |
| "num_input_tokens_seen": 64515152, | |
| "step": 5745, | |
| "train_runtime": 9998.2113, | |
| "train_tokens_per_second": 6452.669 | |
| }, | |
| { | |
| "epoch": 2.7593448131037377, | |
| "grad_norm": 0.61765056848526, | |
| "learning_rate": 7.943181540375988e-07, | |
| "loss": 0.4843, | |
| "num_input_tokens_seen": 64573768, | |
| "step": 5750, | |
| "train_runtime": 10006.8604, | |
| "train_tokens_per_second": 6452.95 | |
| }, | |
| { | |
| "epoch": 2.761744765104698, | |
| "grad_norm": 0.8006062507629395, | |
| "learning_rate": 7.786871164152415e-07, | |
| "loss": 0.4595, | |
| "num_input_tokens_seen": 64626520, | |
| "step": 5755, | |
| "train_runtime": 10014.3267, | |
| "train_tokens_per_second": 6453.406 | |
| }, | |
| { | |
| "epoch": 2.764144717105658, | |
| "grad_norm": 0.7694302797317505, | |
| "learning_rate": 7.632089758993932e-07, | |
| "loss": 0.4565, | |
| "num_input_tokens_seen": 64683224, | |
| "step": 5760, | |
| "train_runtime": 10022.5457, | |
| "train_tokens_per_second": 6453.772 | |
| }, | |
| { | |
| "epoch": 2.766544669106618, | |
| "grad_norm": 0.7269204258918762, | |
| "learning_rate": 7.478838301958502e-07, | |
| "loss": 0.4728, | |
| "num_input_tokens_seen": 64738056, | |
| "step": 5765, | |
| "train_runtime": 10030.9759, | |
| "train_tokens_per_second": 6453.814 | |
| }, | |
| { | |
| "epoch": 2.768944621107578, | |
| "grad_norm": 0.8213253021240234, | |
| "learning_rate": 7.327117760446478e-07, | |
| "loss": 0.4835, | |
| "num_input_tokens_seen": 64790592, | |
| "step": 5770, | |
| "train_runtime": 10039.0056, | |
| "train_tokens_per_second": 6453.885 | |
| }, | |
| { | |
| "epoch": 2.771344573108538, | |
| "grad_norm": 0.6208813190460205, | |
| "learning_rate": 7.17692909219414e-07, | |
| "loss": 0.4922, | |
| "num_input_tokens_seen": 64844640, | |
| "step": 5775, | |
| "train_runtime": 10047.9962, | |
| "train_tokens_per_second": 6453.49 | |
| }, | |
| { | |
| "epoch": 2.773744525109498, | |
| "grad_norm": 0.7945714592933655, | |
| "learning_rate": 7.028273245267947e-07, | |
| "loss": 0.4473, | |
| "num_input_tokens_seen": 64903320, | |
| "step": 5780, | |
| "train_runtime": 10056.9037, | |
| "train_tokens_per_second": 6453.609 | |
| }, | |
| { | |
| "epoch": 2.7761444771104578, | |
| "grad_norm": 0.6964590549468994, | |
| "learning_rate": 6.881151158058263e-07, | |
| "loss": 0.5196, | |
| "num_input_tokens_seen": 64963432, | |
| "step": 5785, | |
| "train_runtime": 10066.3751, | |
| "train_tokens_per_second": 6453.508 | |
| }, | |
| { | |
| "epoch": 2.7785444291114176, | |
| "grad_norm": 0.7940050959587097, | |
| "learning_rate": 6.735563759273783e-07, | |
| "loss": 0.4862, | |
| "num_input_tokens_seen": 65020920, | |
| "step": 5790, | |
| "train_runtime": 10074.8972, | |
| "train_tokens_per_second": 6453.755 | |
| }, | |
| { | |
| "epoch": 2.780944381112378, | |
| "grad_norm": 0.7207697033882141, | |
| "learning_rate": 6.591511967935282e-07, | |
| "loss": 0.4557, | |
| "num_input_tokens_seen": 65077720, | |
| "step": 5795, | |
| "train_runtime": 10083.2022, | |
| "train_tokens_per_second": 6454.073 | |
| }, | |
| { | |
| "epoch": 2.783344333113338, | |
| "grad_norm": 0.9495781064033508, | |
| "learning_rate": 6.448996693370179e-07, | |
| "loss": 0.4682, | |
| "num_input_tokens_seen": 65133616, | |
| "step": 5800, | |
| "train_runtime": 10092.261, | |
| "train_tokens_per_second": 6453.818 | |
| }, | |
| { | |
| "epoch": 2.7857442851142977, | |
| "grad_norm": 0.8136801719665527, | |
| "learning_rate": 6.308018835206541e-07, | |
| "loss": 0.4646, | |
| "num_input_tokens_seen": 65187840, | |
| "step": 5805, | |
| "train_runtime": 10100.5435, | |
| "train_tokens_per_second": 6453.894 | |
| }, | |
| { | |
| "epoch": 2.7881442371152576, | |
| "grad_norm": 0.6333021521568298, | |
| "learning_rate": 6.168579283367476e-07, | |
| "loss": 0.472, | |
| "num_input_tokens_seen": 65240368, | |
| "step": 5810, | |
| "train_runtime": 10108.7592, | |
| "train_tokens_per_second": 6453.845 | |
| }, | |
| { | |
| "epoch": 2.7905441891162175, | |
| "grad_norm": 1.0317847728729248, | |
| "learning_rate": 6.030678918065552e-07, | |
| "loss": 0.4831, | |
| "num_input_tokens_seen": 65295184, | |
| "step": 5815, | |
| "train_runtime": 10117.6223, | |
| "train_tokens_per_second": 6453.61 | |
| }, | |
| { | |
| "epoch": 2.792944141117178, | |
| "grad_norm": 1.2926782369613647, | |
| "learning_rate": 5.894318609797222e-07, | |
| "loss": 0.4951, | |
| "num_input_tokens_seen": 65351248, | |
| "step": 5820, | |
| "train_runtime": 10125.5866, | |
| "train_tokens_per_second": 6454.07 | |
| }, | |
| { | |
| "epoch": 2.7953440931181377, | |
| "grad_norm": 0.8632203936576843, | |
| "learning_rate": 5.759499219337328e-07, | |
| "loss": 0.4852, | |
| "num_input_tokens_seen": 65405976, | |
| "step": 5825, | |
| "train_runtime": 10133.4185, | |
| "train_tokens_per_second": 6454.483 | |
| }, | |
| { | |
| "epoch": 2.7977440451190976, | |
| "grad_norm": 0.8666356801986694, | |
| "learning_rate": 5.626221597733655e-07, | |
| "loss": 0.4505, | |
| "num_input_tokens_seen": 65466136, | |
| "step": 5830, | |
| "train_runtime": 10141.883, | |
| "train_tokens_per_second": 6455.028 | |
| }, | |
| { | |
| "epoch": 2.8001439971200575, | |
| "grad_norm": 0.894623875617981, | |
| "learning_rate": 5.494486586301528e-07, | |
| "loss": 0.5448, | |
| "num_input_tokens_seen": 65518496, | |
| "step": 5835, | |
| "train_runtime": 10149.8014, | |
| "train_tokens_per_second": 6455.151 | |
| }, | |
| { | |
| "epoch": 2.8025439491210173, | |
| "grad_norm": 0.8759870529174805, | |
| "learning_rate": 5.364295016618643e-07, | |
| "loss": 0.4865, | |
| "num_input_tokens_seen": 65577616, | |
| "step": 5840, | |
| "train_runtime": 10157.9244, | |
| "train_tokens_per_second": 6455.809 | |
| }, | |
| { | |
| "epoch": 2.8049439011219777, | |
| "grad_norm": 0.7551533579826355, | |
| "learning_rate": 5.235647710519626e-07, | |
| "loss": 0.4664, | |
| "num_input_tokens_seen": 65634592, | |
| "step": 5845, | |
| "train_runtime": 10166.5957, | |
| "train_tokens_per_second": 6455.907 | |
| }, | |
| { | |
| "epoch": 2.8073438531229375, | |
| "grad_norm": 0.7756850719451904, | |
| "learning_rate": 5.108545480090931e-07, | |
| "loss": 0.4649, | |
| "num_input_tokens_seen": 65691480, | |
| "step": 5850, | |
| "train_runtime": 10174.9677, | |
| "train_tokens_per_second": 6456.186 | |
| }, | |
| { | |
| "epoch": 2.8097438051238974, | |
| "grad_norm": 0.6903165578842163, | |
| "learning_rate": 4.982989127665816e-07, | |
| "loss": 0.4969, | |
| "num_input_tokens_seen": 65745568, | |
| "step": 5855, | |
| "train_runtime": 10183.3283, | |
| "train_tokens_per_second": 6456.196 | |
| }, | |
| { | |
| "epoch": 2.8121437571248578, | |
| "grad_norm": 0.7350341081619263, | |
| "learning_rate": 4.858979445819089e-07, | |
| "loss": 0.4742, | |
| "num_input_tokens_seen": 65799784, | |
| "step": 5860, | |
| "train_runtime": 10190.9666, | |
| "train_tokens_per_second": 6456.677 | |
| }, | |
| { | |
| "epoch": 2.8145437091258176, | |
| "grad_norm": 0.7910242676734924, | |
| "learning_rate": 4.7365172173621796e-07, | |
| "loss": 0.4561, | |
| "num_input_tokens_seen": 65856528, | |
| "step": 5865, | |
| "train_runtime": 10199.5186, | |
| "train_tokens_per_second": 6456.827 | |
| }, | |
| { | |
| "epoch": 2.8169436611267775, | |
| "grad_norm": 0.8002808094024658, | |
| "learning_rate": 4.615603215338299e-07, | |
| "loss": 0.4425, | |
| "num_input_tokens_seen": 65911144, | |
| "step": 5870, | |
| "train_runtime": 10208.0985, | |
| "train_tokens_per_second": 6456.75 | |
| }, | |
| { | |
| "epoch": 2.8193436131277374, | |
| "grad_norm": 0.6876586079597473, | |
| "learning_rate": 4.496238203017422e-07, | |
| "loss": 0.4873, | |
| "num_input_tokens_seen": 65971080, | |
| "step": 5875, | |
| "train_runtime": 10216.3273, | |
| "train_tokens_per_second": 6457.416 | |
| }, | |
| { | |
| "epoch": 2.8217435651286973, | |
| "grad_norm": 0.65282142162323, | |
| "learning_rate": 4.3784229338915406e-07, | |
| "loss": 0.4867, | |
| "num_input_tokens_seen": 66026344, | |
| "step": 5880, | |
| "train_runtime": 10224.7475, | |
| "train_tokens_per_second": 6457.504 | |
| }, | |
| { | |
| "epoch": 2.8241435171296576, | |
| "grad_norm": 0.6614166498184204, | |
| "learning_rate": 4.262158151669804e-07, | |
| "loss": 0.4813, | |
| "num_input_tokens_seen": 66082360, | |
| "step": 5885, | |
| "train_runtime": 10233.2091, | |
| "train_tokens_per_second": 6457.638 | |
| }, | |
| { | |
| "epoch": 2.8265434691306175, | |
| "grad_norm": 0.7193440794944763, | |
| "learning_rate": 4.147444590274052e-07, | |
| "loss": 0.4968, | |
| "num_input_tokens_seen": 66134928, | |
| "step": 5890, | |
| "train_runtime": 10241.3234, | |
| "train_tokens_per_second": 6457.654 | |
| }, | |
| { | |
| "epoch": 2.8289434211315774, | |
| "grad_norm": 0.7374788522720337, | |
| "learning_rate": 4.0342829738339583e-07, | |
| "loss": 0.4744, | |
| "num_input_tokens_seen": 66190032, | |
| "step": 5895, | |
| "train_runtime": 10249.2265, | |
| "train_tokens_per_second": 6458.051 | |
| }, | |
| { | |
| "epoch": 2.8313433731325373, | |
| "grad_norm": 0.9320788979530334, | |
| "learning_rate": 3.922674016682504e-07, | |
| "loss": 0.4819, | |
| "num_input_tokens_seen": 66244312, | |
| "step": 5900, | |
| "train_runtime": 10256.9977, | |
| "train_tokens_per_second": 6458.45 | |
| }, | |
| { | |
| "epoch": 2.833743325133497, | |
| "grad_norm": 0.526983916759491, | |
| "learning_rate": 3.812618423351622e-07, | |
| "loss": 0.4424, | |
| "num_input_tokens_seen": 66305552, | |
| "step": 5905, | |
| "train_runtime": 10265.6243, | |
| "train_tokens_per_second": 6458.989 | |
| }, | |
| { | |
| "epoch": 2.8361432771344575, | |
| "grad_norm": 0.9565876722335815, | |
| "learning_rate": 3.704116888567505e-07, | |
| "loss": 0.4926, | |
| "num_input_tokens_seen": 66358648, | |
| "step": 5910, | |
| "train_runtime": 10273.7771, | |
| "train_tokens_per_second": 6459.031 | |
| }, | |
| { | |
| "epoch": 2.8385432291354173, | |
| "grad_norm": 0.9867433905601501, | |
| "learning_rate": 3.597170097246416e-07, | |
| "loss": 0.4706, | |
| "num_input_tokens_seen": 66417384, | |
| "step": 5915, | |
| "train_runtime": 10283.2277, | |
| "train_tokens_per_second": 6458.807 | |
| }, | |
| { | |
| "epoch": 2.8409431811363772, | |
| "grad_norm": 0.6663256883621216, | |
| "learning_rate": 3.4917787244902743e-07, | |
| "loss": 0.4945, | |
| "num_input_tokens_seen": 66477648, | |
| "step": 5920, | |
| "train_runtime": 10293.4798, | |
| "train_tokens_per_second": 6458.229 | |
| }, | |
| { | |
| "epoch": 2.843343133137337, | |
| "grad_norm": 0.621631920337677, | |
| "learning_rate": 3.387943435582436e-07, | |
| "loss": 0.495, | |
| "num_input_tokens_seen": 66532464, | |
| "step": 5925, | |
| "train_runtime": 10302.8802, | |
| "train_tokens_per_second": 6457.657 | |
| }, | |
| { | |
| "epoch": 2.845743085138297, | |
| "grad_norm": 0.638155460357666, | |
| "learning_rate": 3.285664885983447e-07, | |
| "loss": 0.4263, | |
| "num_input_tokens_seen": 66589296, | |
| "step": 5930, | |
| "train_runtime": 10312.6945, | |
| "train_tokens_per_second": 6457.022 | |
| }, | |
| { | |
| "epoch": 2.8481430371392573, | |
| "grad_norm": 0.7790648341178894, | |
| "learning_rate": 3.184943721326938e-07, | |
| "loss": 0.4473, | |
| "num_input_tokens_seen": 66648144, | |
| "step": 5935, | |
| "train_runtime": 10322.4204, | |
| "train_tokens_per_second": 6456.639 | |
| }, | |
| { | |
| "epoch": 2.850542989140217, | |
| "grad_norm": 0.9435281753540039, | |
| "learning_rate": 3.0857805774155423e-07, | |
| "loss": 0.4773, | |
| "num_input_tokens_seen": 66702560, | |
| "step": 5940, | |
| "train_runtime": 10331.5732, | |
| "train_tokens_per_second": 6456.186 | |
| }, | |
| { | |
| "epoch": 2.852942941141177, | |
| "grad_norm": 0.7527910470962524, | |
| "learning_rate": 2.988176080216898e-07, | |
| "loss": 0.5113, | |
| "num_input_tokens_seen": 66757360, | |
| "step": 5945, | |
| "train_runtime": 10341.338, | |
| "train_tokens_per_second": 6455.389 | |
| }, | |
| { | |
| "epoch": 2.8553428931421374, | |
| "grad_norm": 0.949381411075592, | |
| "learning_rate": 2.892130845859653e-07, | |
| "loss": 0.5225, | |
| "num_input_tokens_seen": 66813080, | |
| "step": 5950, | |
| "train_runtime": 10351.5482, | |
| "train_tokens_per_second": 6454.405 | |
| }, | |
| { | |
| "epoch": 2.8577428451430973, | |
| "grad_norm": 0.682515561580658, | |
| "learning_rate": 2.7976454806296906e-07, | |
| "loss": 0.4474, | |
| "num_input_tokens_seen": 66870744, | |
| "step": 5955, | |
| "train_runtime": 10361.7884, | |
| "train_tokens_per_second": 6453.591 | |
| }, | |
| { | |
| "epoch": 2.860142797144057, | |
| "grad_norm": 0.8949669599533081, | |
| "learning_rate": 2.7047205809660746e-07, | |
| "loss": 0.4552, | |
| "num_input_tokens_seen": 66926176, | |
| "step": 5960, | |
| "train_runtime": 10372.0384, | |
| "train_tokens_per_second": 6452.558 | |
| }, | |
| { | |
| "epoch": 2.862542749145017, | |
| "grad_norm": 0.672732949256897, | |
| "learning_rate": 2.6133567334575e-07, | |
| "loss": 0.461, | |
| "num_input_tokens_seen": 66982736, | |
| "step": 5965, | |
| "train_runtime": 10381.6755, | |
| "train_tokens_per_second": 6452.016 | |
| }, | |
| { | |
| "epoch": 2.864942701145977, | |
| "grad_norm": 0.7349382638931274, | |
| "learning_rate": 2.523554514838544e-07, | |
| "loss": 0.4649, | |
| "num_input_tokens_seen": 67040256, | |
| "step": 5970, | |
| "train_runtime": 10391.7883, | |
| "train_tokens_per_second": 6451.272 | |
| }, | |
| { | |
| "epoch": 2.8673426531469373, | |
| "grad_norm": 0.7584925293922424, | |
| "learning_rate": 2.435314491985974e-07, | |
| "loss": 0.5227, | |
| "num_input_tokens_seen": 67098776, | |
| "step": 5975, | |
| "train_runtime": 10401.6032, | |
| "train_tokens_per_second": 6450.811 | |
| }, | |
| { | |
| "epoch": 2.869742605147897, | |
| "grad_norm": 0.8414415717124939, | |
| "learning_rate": 2.3486372219151675e-07, | |
| "loss": 0.4989, | |
| "num_input_tokens_seen": 67151768, | |
| "step": 5980, | |
| "train_runtime": 10411.5952, | |
| "train_tokens_per_second": 6449.71 | |
| }, | |
| { | |
| "epoch": 2.872142557148857, | |
| "grad_norm": 0.6477630734443665, | |
| "learning_rate": 2.263523251776617e-07, | |
| "loss": 0.4962, | |
| "num_input_tokens_seen": 67210600, | |
| "step": 5985, | |
| "train_runtime": 10422.1011, | |
| "train_tokens_per_second": 6448.853 | |
| }, | |
| { | |
| "epoch": 2.874542509149817, | |
| "grad_norm": 1.1014198064804077, | |
| "learning_rate": 2.1799731188525407e-07, | |
| "loss": 0.5162, | |
| "num_input_tokens_seen": 67263744, | |
| "step": 5990, | |
| "train_runtime": 10431.8385, | |
| "train_tokens_per_second": 6447.928 | |
| }, | |
| { | |
| "epoch": 2.876942461150777, | |
| "grad_norm": 0.9391694664955139, | |
| "learning_rate": 2.0979873505533876e-07, | |
| "loss": 0.449, | |
| "num_input_tokens_seen": 67316560, | |
| "step": 5995, | |
| "train_runtime": 10441.9194, | |
| "train_tokens_per_second": 6446.761 | |
| }, | |
| { | |
| "epoch": 2.879342413151737, | |
| "grad_norm": 0.8007956147193909, | |
| "learning_rate": 2.0175664644145053e-07, | |
| "loss": 0.4849, | |
| "num_input_tokens_seen": 67373408, | |
| "step": 6000, | |
| "train_runtime": 10452.6728, | |
| "train_tokens_per_second": 6445.568 | |
| }, | |
| { | |
| "epoch": 2.881742365152697, | |
| "grad_norm": 0.7711721658706665, | |
| "learning_rate": 1.9387109680930327e-07, | |
| "loss": 0.4332, | |
| "num_input_tokens_seen": 67428800, | |
| "step": 6005, | |
| "train_runtime": 10463.242, | |
| "train_tokens_per_second": 6444.351 | |
| }, | |
| { | |
| "epoch": 2.884142317153657, | |
| "grad_norm": 0.8150792121887207, | |
| "learning_rate": 1.8614213593644846e-07, | |
| "loss": 0.4459, | |
| "num_input_tokens_seen": 67490440, | |
| "step": 6010, | |
| "train_runtime": 10473.7424, | |
| "train_tokens_per_second": 6443.775 | |
| }, | |
| { | |
| "epoch": 2.8865422691546168, | |
| "grad_norm": 0.7124377489089966, | |
| "learning_rate": 1.7856981261197002e-07, | |
| "loss": 0.4779, | |
| "num_input_tokens_seen": 67545608, | |
| "step": 6015, | |
| "train_runtime": 10483.2085, | |
| "train_tokens_per_second": 6443.219 | |
| }, | |
| { | |
| "epoch": 2.8889422211555766, | |
| "grad_norm": 0.8673171997070312, | |
| "learning_rate": 1.7115417463618722e-07, | |
| "loss": 0.4598, | |
| "num_input_tokens_seen": 67595400, | |
| "step": 6020, | |
| "train_runtime": 10492.2481, | |
| "train_tokens_per_second": 6442.413 | |
| }, | |
| { | |
| "epoch": 2.891342173156537, | |
| "grad_norm": 0.7837307453155518, | |
| "learning_rate": 1.638952688203327e-07, | |
| "loss": 0.4797, | |
| "num_input_tokens_seen": 67646720, | |
| "step": 6025, | |
| "train_runtime": 10501.2034, | |
| "train_tokens_per_second": 6441.806 | |
| }, | |
| { | |
| "epoch": 2.893742125157497, | |
| "grad_norm": 0.6940703392028809, | |
| "learning_rate": 1.567931409862694e-07, | |
| "loss": 0.4915, | |
| "num_input_tokens_seen": 67700752, | |
| "step": 6030, | |
| "train_runtime": 10511.0778, | |
| "train_tokens_per_second": 6440.895 | |
| }, | |
| { | |
| "epoch": 2.8961420771584567, | |
| "grad_norm": 0.8700549602508545, | |
| "learning_rate": 1.4984783596619922e-07, | |
| "loss": 0.4946, | |
| "num_input_tokens_seen": 67755144, | |
| "step": 6035, | |
| "train_runtime": 10520.7321, | |
| "train_tokens_per_second": 6440.155 | |
| }, | |
| { | |
| "epoch": 2.898542029159417, | |
| "grad_norm": 0.7011561989784241, | |
| "learning_rate": 1.430593976023825e-07, | |
| "loss": 0.4919, | |
| "num_input_tokens_seen": 67814680, | |
| "step": 6040, | |
| "train_runtime": 10531.5769, | |
| "train_tokens_per_second": 6439.176 | |
| }, | |
| { | |
| "epoch": 2.900941981160377, | |
| "grad_norm": 0.893417477607727, | |
| "learning_rate": 1.3642786874685233e-07, | |
| "loss": 0.5055, | |
| "num_input_tokens_seen": 67867648, | |
| "step": 6045, | |
| "train_runtime": 10541.6146, | |
| "train_tokens_per_second": 6438.07 | |
| }, | |
| { | |
| "epoch": 2.903341933161337, | |
| "grad_norm": 0.7926166653633118, | |
| "learning_rate": 1.299532912611534e-07, | |
| "loss": 0.459, | |
| "num_input_tokens_seen": 67922728, | |
| "step": 6050, | |
| "train_runtime": 10550.8628, | |
| "train_tokens_per_second": 6437.647 | |
| }, | |
| { | |
| "epoch": 2.9057418851622967, | |
| "grad_norm": 0.7883651852607727, | |
| "learning_rate": 1.2363570601608143e-07, | |
| "loss": 0.4636, | |
| "num_input_tokens_seen": 67975200, | |
| "step": 6055, | |
| "train_runtime": 10560.1447, | |
| "train_tokens_per_second": 6436.957 | |
| }, | |
| { | |
| "epoch": 2.9081418371632566, | |
| "grad_norm": 0.9356446266174316, | |
| "learning_rate": 1.1747515289140254e-07, | |
| "loss": 0.4612, | |
| "num_input_tokens_seen": 68029864, | |
| "step": 6060, | |
| "train_runtime": 10570.9284, | |
| "train_tokens_per_second": 6435.562 | |
| }, | |
| { | |
| "epoch": 2.910541789164217, | |
| "grad_norm": 1.2164058685302734, | |
| "learning_rate": 1.1147167077562859e-07, | |
| "loss": 0.5042, | |
| "num_input_tokens_seen": 68079824, | |
| "step": 6065, | |
| "train_runtime": 10580.6679, | |
| "train_tokens_per_second": 6434.36 | |
| }, | |
| { | |
| "epoch": 2.912941741165177, | |
| "grad_norm": 0.9457964301109314, | |
| "learning_rate": 1.0562529756576179e-07, | |
| "loss": 0.4287, | |
| "num_input_tokens_seen": 68136632, | |
| "step": 6070, | |
| "train_runtime": 10591.0019, | |
| "train_tokens_per_second": 6433.445 | |
| }, | |
| { | |
| "epoch": 2.9153416931661367, | |
| "grad_norm": 0.7782816290855408, | |
| "learning_rate": 9.993607016704209e-08, | |
| "loss": 0.4994, | |
| "num_input_tokens_seen": 68192816, | |
| "step": 6075, | |
| "train_runtime": 10601.2725, | |
| "train_tokens_per_second": 6432.512 | |
| }, | |
| { | |
| "epoch": 2.9177416451670966, | |
| "grad_norm": 0.7655016183853149, | |
| "learning_rate": 9.440402449274188e-08, | |
| "loss": 0.5164, | |
| "num_input_tokens_seen": 68244208, | |
| "step": 6080, | |
| "train_runtime": 10610.674, | |
| "train_tokens_per_second": 6431.656 | |
| }, | |
| { | |
| "epoch": 2.9201415971680564, | |
| "grad_norm": 0.8917096257209778, | |
| "learning_rate": 8.902919546390776e-08, | |
| "loss": 0.4609, | |
| "num_input_tokens_seen": 68300352, | |
| "step": 6085, | |
| "train_runtime": 10620.9066, | |
| "train_tokens_per_second": 6430.746 | |
| }, | |
| { | |
| "epoch": 2.9225415491690168, | |
| "grad_norm": 0.940250039100647, | |
| "learning_rate": 8.381161700916906e-08, | |
| "loss": 0.5296, | |
| "num_input_tokens_seen": 68350392, | |
| "step": 6090, | |
| "train_runtime": 10630.1557, | |
| "train_tokens_per_second": 6429.858 | |
| }, | |
| { | |
| "epoch": 2.9249415011699766, | |
| "grad_norm": 0.8829488158226013, | |
| "learning_rate": 7.87513220644992e-08, | |
| "loss": 0.5012, | |
| "num_input_tokens_seen": 68405152, | |
| "step": 6095, | |
| "train_runtime": 10639.9288, | |
| "train_tokens_per_second": 6429.099 | |
| }, | |
| { | |
| "epoch": 2.9273414531709365, | |
| "grad_norm": 0.9745586514472961, | |
| "learning_rate": 7.384834257302687e-08, | |
| "loss": 0.5022, | |
| "num_input_tokens_seen": 68461336, | |
| "step": 6100, | |
| "train_runtime": 10650.3632, | |
| "train_tokens_per_second": 6428.075 | |
| }, | |
| { | |
| "epoch": 2.9297414051718964, | |
| "grad_norm": 0.9082819819450378, | |
| "learning_rate": 6.910270948482789e-08, | |
| "loss": 0.477, | |
| "num_input_tokens_seen": 68512936, | |
| "step": 6105, | |
| "train_runtime": 10660.0699, | |
| "train_tokens_per_second": 6427.063 | |
| }, | |
| { | |
| "epoch": 2.9321413571728563, | |
| "grad_norm": 0.831038773059845, | |
| "learning_rate": 6.451445275671986e-08, | |
| "loss": 0.4894, | |
| "num_input_tokens_seen": 68569728, | |
| "step": 6110, | |
| "train_runtime": 10670.2152, | |
| "train_tokens_per_second": 6426.274 | |
| }, | |
| { | |
| "epoch": 2.9345413091738166, | |
| "grad_norm": 0.7757657170295715, | |
| "learning_rate": 6.008360135208724e-08, | |
| "loss": 0.4685, | |
| "num_input_tokens_seen": 68623976, | |
| "step": 6115, | |
| "train_runtime": 10680.1954, | |
| "train_tokens_per_second": 6425.348 | |
| }, | |
| { | |
| "epoch": 2.9369412611747765, | |
| "grad_norm": 0.8630353212356567, | |
| "learning_rate": 5.581018324069543e-08, | |
| "loss": 0.4904, | |
| "num_input_tokens_seen": 68679096, | |
| "step": 6120, | |
| "train_runtime": 10691.1399, | |
| "train_tokens_per_second": 6423.926 | |
| }, | |
| { | |
| "epoch": 2.9393412131757364, | |
| "grad_norm": 0.881776750087738, | |
| "learning_rate": 5.169422539850477e-08, | |
| "loss": 0.4671, | |
| "num_input_tokens_seen": 68734576, | |
| "step": 6125, | |
| "train_runtime": 10700.7437, | |
| "train_tokens_per_second": 6423.346 | |
| }, | |
| { | |
| "epoch": 2.9417411651766967, | |
| "grad_norm": 0.8964380025863647, | |
| "learning_rate": 4.773575380750961e-08, | |
| "loss": 0.469, | |
| "num_input_tokens_seen": 68793128, | |
| "step": 6130, | |
| "train_runtime": 10711.0036, | |
| "train_tokens_per_second": 6422.659 | |
| }, | |
| { | |
| "epoch": 2.9441411171776566, | |
| "grad_norm": 0.8133379220962524, | |
| "learning_rate": 4.393479345557727e-08, | |
| "loss": 0.5031, | |
| "num_input_tokens_seen": 68847592, | |
| "step": 6135, | |
| "train_runtime": 10721.1224, | |
| "train_tokens_per_second": 6421.678 | |
| }, | |
| { | |
| "epoch": 2.9465410691786165, | |
| "grad_norm": 0.6794693470001221, | |
| "learning_rate": 4.0291368336276e-08, | |
| "loss": 0.4709, | |
| "num_input_tokens_seen": 68905096, | |
| "step": 6140, | |
| "train_runtime": 10731.8838, | |
| "train_tokens_per_second": 6420.597 | |
| }, | |
| { | |
| "epoch": 2.9489410211795763, | |
| "grad_norm": 0.8234326839447021, | |
| "learning_rate": 3.6805501448744505e-08, | |
| "loss": 0.4638, | |
| "num_input_tokens_seen": 68960224, | |
| "step": 6145, | |
| "train_runtime": 10741.5942, | |
| "train_tokens_per_second": 6419.924 | |
| }, | |
| { | |
| "epoch": 2.9513409731805362, | |
| "grad_norm": 0.8420405387878418, | |
| "learning_rate": 3.347721479751986e-08, | |
| "loss": 0.5143, | |
| "num_input_tokens_seen": 69014200, | |
| "step": 6150, | |
| "train_runtime": 10751.3552, | |
| "train_tokens_per_second": 6419.116 | |
| }, | |
| { | |
| "epoch": 2.9537409251814966, | |
| "grad_norm": 0.876466691493988, | |
| "learning_rate": 3.0306529392426507e-08, | |
| "loss": 0.4258, | |
| "num_input_tokens_seen": 69071584, | |
| "step": 6155, | |
| "train_runtime": 10761.6029, | |
| "train_tokens_per_second": 6418.336 | |
| }, | |
| { | |
| "epoch": 2.9561408771824564, | |
| "grad_norm": 0.8103510737419128, | |
| "learning_rate": 2.72934652484208e-08, | |
| "loss": 0.4785, | |
| "num_input_tokens_seen": 69125824, | |
| "step": 6160, | |
| "train_runtime": 10771.3537, | |
| "train_tokens_per_second": 6417.561 | |
| }, | |
| { | |
| "epoch": 2.9585408291834163, | |
| "grad_norm": 0.9023430347442627, | |
| "learning_rate": 2.4438041385480003e-08, | |
| "loss": 0.5019, | |
| "num_input_tokens_seen": 69183992, | |
| "step": 6165, | |
| "train_runtime": 10782.2651, | |
| "train_tokens_per_second": 6416.462 | |
| }, | |
| { | |
| "epoch": 2.960940781184376, | |
| "grad_norm": 0.9007648825645447, | |
| "learning_rate": 2.174027582848015e-08, | |
| "loss": 0.4764, | |
| "num_input_tokens_seen": 69243264, | |
| "step": 6170, | |
| "train_runtime": 10792.8565, | |
| "train_tokens_per_second": 6415.657 | |
| }, | |
| { | |
| "epoch": 2.963340733185336, | |
| "grad_norm": 0.9024353623390198, | |
| "learning_rate": 1.92001856070656e-08, | |
| "loss": 0.499, | |
| "num_input_tokens_seen": 69299200, | |
| "step": 6175, | |
| "train_runtime": 10803.5555, | |
| "train_tokens_per_second": 6414.481 | |
| }, | |
| { | |
| "epoch": 2.9657406851862964, | |
| "grad_norm": 0.7554855942726135, | |
| "learning_rate": 1.6817786755568553e-08, | |
| "loss": 0.4397, | |
| "num_input_tokens_seen": 69352824, | |
| "step": 6180, | |
| "train_runtime": 10812.9366, | |
| "train_tokens_per_second": 6413.875 | |
| }, | |
| { | |
| "epoch": 2.9681406371872563, | |
| "grad_norm": 0.7788093686103821, | |
| "learning_rate": 1.4593094312889688e-08, | |
| "loss": 0.452, | |
| "num_input_tokens_seen": 69415024, | |
| "step": 6185, | |
| "train_runtime": 10823.0536, | |
| "train_tokens_per_second": 6413.627 | |
| }, | |
| { | |
| "epoch": 2.970540589188216, | |
| "grad_norm": 0.7968340516090393, | |
| "learning_rate": 1.2526122322401024e-08, | |
| "loss": 0.4915, | |
| "num_input_tokens_seen": 69471512, | |
| "step": 6190, | |
| "train_runtime": 10832.747, | |
| "train_tokens_per_second": 6413.102 | |
| }, | |
| { | |
| "epoch": 2.972940541189176, | |
| "grad_norm": 0.7601198554039001, | |
| "learning_rate": 1.0616883831873758e-08, | |
| "loss": 0.4443, | |
| "num_input_tokens_seen": 69527768, | |
| "step": 6195, | |
| "train_runtime": 10842.6627, | |
| "train_tokens_per_second": 6412.426 | |
| }, | |
| { | |
| "epoch": 2.975340493190136, | |
| "grad_norm": 0.8078719973564148, | |
| "learning_rate": 8.86539089338112e-09, | |
| "loss": 0.4387, | |
| "num_input_tokens_seen": 69583024, | |
| "step": 6200, | |
| "train_runtime": 10852.4744, | |
| "train_tokens_per_second": 6411.72 | |
| }, | |
| { | |
| "epoch": 2.9777404451910963, | |
| "grad_norm": 1.0166022777557373, | |
| "learning_rate": 7.271654563223429e-09, | |
| "loss": 0.4519, | |
| "num_input_tokens_seen": 69639080, | |
| "step": 6205, | |
| "train_runtime": 10863.2159, | |
| "train_tokens_per_second": 6410.54 | |
| }, | |
| { | |
| "epoch": 2.980140397192056, | |
| "grad_norm": 1.051282286643982, | |
| "learning_rate": 5.835684901869809e-09, | |
| "loss": 0.5355, | |
| "num_input_tokens_seen": 69695440, | |
| "step": 6210, | |
| "train_runtime": 10873.6609, | |
| "train_tokens_per_second": 6409.565 | |
| }, | |
| { | |
| "epoch": 2.982540349193016, | |
| "grad_norm": 0.9155645966529846, | |
| "learning_rate": 4.5574909738804735e-09, | |
| "loss": 0.4775, | |
| "num_input_tokens_seen": 69752488, | |
| "step": 6215, | |
| "train_runtime": 10884.1415, | |
| "train_tokens_per_second": 6408.635 | |
| }, | |
| { | |
| "epoch": 2.9849403011939764, | |
| "grad_norm": 0.8648121356964111, | |
| "learning_rate": 3.4370808478595417e-09, | |
| "loss": 0.4861, | |
| "num_input_tokens_seen": 69804712, | |
| "step": 6220, | |
| "train_runtime": 10894.0291, | |
| "train_tokens_per_second": 6407.612 | |
| }, | |
| { | |
| "epoch": 2.9873402531949362, | |
| "grad_norm": 0.9490159153938293, | |
| "learning_rate": 2.474461596396749e-09, | |
| "loss": 0.4641, | |
| "num_input_tokens_seen": 69863384, | |
| "step": 6225, | |
| "train_runtime": 10903.095, | |
| "train_tokens_per_second": 6407.665 | |
| }, | |
| { | |
| "epoch": 2.989740205195896, | |
| "grad_norm": 0.823014497756958, | |
| "learning_rate": 1.6696392960341423e-09, | |
| "loss": 0.4785, | |
| "num_input_tokens_seen": 69920712, | |
| "step": 6230, | |
| "train_runtime": 10911.9258, | |
| "train_tokens_per_second": 6407.733 | |
| }, | |
| { | |
| "epoch": 2.992140157196856, | |
| "grad_norm": 0.9870671629905701, | |
| "learning_rate": 1.022619027207794e-09, | |
| "loss": 0.4529, | |
| "num_input_tokens_seen": 69978976, | |
| "step": 6235, | |
| "train_runtime": 10920.4005, | |
| "train_tokens_per_second": 6408.096 | |
| }, | |
| { | |
| "epoch": 2.994540109197816, | |
| "grad_norm": 0.8132453560829163, | |
| "learning_rate": 5.334048742394737e-10, | |
| "loss": 0.4621, | |
| "num_input_tokens_seen": 70037816, | |
| "step": 6240, | |
| "train_runtime": 10929.3119, | |
| "train_tokens_per_second": 6408.255 | |
| }, | |
| { | |
| "epoch": 2.996940061198776, | |
| "grad_norm": 0.9090087413787842, | |
| "learning_rate": 2.0199992529501554e-10, | |
| "loss": 0.4757, | |
| "num_input_tokens_seen": 70098000, | |
| "step": 6245, | |
| "train_runtime": 10938.0514, | |
| "train_tokens_per_second": 6408.637 | |
| }, | |
| { | |
| "epoch": 2.999340013199736, | |
| "grad_norm": 0.8769118189811707, | |
| "learning_rate": 2.8406272370440357e-11, | |
| "loss": 0.463, | |
| "num_input_tokens_seen": 70153968, | |
| "step": 6250, | |
| "train_runtime": 10946.6798, | |
| "train_tokens_per_second": 6408.698 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "num_input_tokens_seen": 70167528, | |
| "step": 6252, | |
| "total_flos": 3.161046812140241e+18, | |
| "train_loss": 0.5038315440246255, | |
| "train_runtime": 10949.1572, | |
| "train_samples_per_second": 27.399, | |
| "train_steps_per_second": 0.571 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 6252, | |
| "num_input_tokens_seen": 70167528, | |
| "num_train_epochs": 3, | |
| "save_steps": 1500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.161046812140241e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |