| { |
| "best_metric": 0.35549462, |
| "best_model_checkpoint": "/home/ubuntu/output/v3-20250315-011617/checkpoint-1400", |
| "epoch": 0.9408602150537635, |
| "eval_steps": 100, |
| "global_step": 1400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0006720430107526882, |
| "grad_norm": 6.624470388540664, |
| "learning_rate": 4.4642857142857145e-08, |
| "loss": 0.8636100888252258, |
| "memory(GiB)": 30.88, |
| "step": 1, |
| "token_acc": 0.7959770114942529, |
| "train_speed(iter/s)": 0.075305 |
| }, |
| { |
| "epoch": 0.003360215053763441, |
| "grad_norm": 5.470006090474637, |
| "learning_rate": 2.2321428571428574e-07, |
| "loss": 0.8933483362197876, |
| "memory(GiB)": 34.75, |
| "step": 5, |
| "token_acc": 0.8101460415065335, |
| "train_speed(iter/s)": 0.130654 |
| }, |
| { |
| "epoch": 0.006720430107526882, |
| "grad_norm": 7.203568128298078, |
| "learning_rate": 4.4642857142857147e-07, |
| "loss": 0.8624147415161133, |
| "memory(GiB)": 36.25, |
| "step": 10, |
| "token_acc": 0.8137791870856155, |
| "train_speed(iter/s)": 0.135384 |
| }, |
| { |
| "epoch": 0.010080645161290322, |
| "grad_norm": 6.286014373217196, |
| "learning_rate": 6.696428571428571e-07, |
| "loss": 0.8743539810180664, |
| "memory(GiB)": 36.41, |
| "step": 15, |
| "token_acc": 0.7762515262515263, |
| "train_speed(iter/s)": 0.140929 |
| }, |
| { |
| "epoch": 0.013440860215053764, |
| "grad_norm": 6.507242651795415, |
| "learning_rate": 8.928571428571429e-07, |
| "loss": 0.7428807258605957, |
| "memory(GiB)": 36.41, |
| "step": 20, |
| "token_acc": 0.8280786849905686, |
| "train_speed(iter/s)": 0.148933 |
| }, |
| { |
| "epoch": 0.016801075268817203, |
| "grad_norm": 4.90720329584475, |
| "learning_rate": 1.1160714285714287e-06, |
| "loss": 0.7442412853240967, |
| "memory(GiB)": 36.98, |
| "step": 25, |
| "token_acc": 0.847274158630191, |
| "train_speed(iter/s)": 0.148937 |
| }, |
| { |
| "epoch": 0.020161290322580645, |
| "grad_norm": 2.96004802436193, |
| "learning_rate": 1.3392857142857143e-06, |
| "loss": 0.6135346412658691, |
| "memory(GiB)": 36.98, |
| "step": 30, |
| "token_acc": 0.8366533864541833, |
| "train_speed(iter/s)": 0.151917 |
| }, |
| { |
| "epoch": 0.023521505376344086, |
| "grad_norm": 2.5106205597924416, |
| "learning_rate": 1.5625e-06, |
| "loss": 0.5674124717712402, |
| "memory(GiB)": 36.98, |
| "step": 35, |
| "token_acc": 0.8213552361396304, |
| "train_speed(iter/s)": 0.153037 |
| }, |
| { |
| "epoch": 0.026881720430107527, |
| "grad_norm": 2.1951186534941787, |
| "learning_rate": 1.7857142857142859e-06, |
| "loss": 0.5761374473571778, |
| "memory(GiB)": 36.98, |
| "step": 40, |
| "token_acc": 0.8286891204264599, |
| "train_speed(iter/s)": 0.154552 |
| }, |
| { |
| "epoch": 0.03024193548387097, |
| "grad_norm": 2.133434174676425, |
| "learning_rate": 2.0089285714285715e-06, |
| "loss": 0.5460317611694336, |
| "memory(GiB)": 36.98, |
| "step": 45, |
| "token_acc": 0.7985267034990792, |
| "train_speed(iter/s)": 0.153801 |
| }, |
| { |
| "epoch": 0.033602150537634407, |
| "grad_norm": 1.984334647250237, |
| "learning_rate": 2.2321428571428573e-06, |
| "loss": 0.5067720413208008, |
| "memory(GiB)": 36.98, |
| "step": 50, |
| "token_acc": 0.8156975549619889, |
| "train_speed(iter/s)": 0.154771 |
| }, |
| { |
| "epoch": 0.03696236559139785, |
| "grad_norm": 1.9203518870194811, |
| "learning_rate": 2.455357142857143e-06, |
| "loss": 0.4443554401397705, |
| "memory(GiB)": 36.98, |
| "step": 55, |
| "token_acc": 0.8660345246778507, |
| "train_speed(iter/s)": 0.156385 |
| }, |
| { |
| "epoch": 0.04032258064516129, |
| "grad_norm": 1.8799641787273145, |
| "learning_rate": 2.6785714285714285e-06, |
| "loss": 0.4721069812774658, |
| "memory(GiB)": 36.98, |
| "step": 60, |
| "token_acc": 0.8127896200185357, |
| "train_speed(iter/s)": 0.158928 |
| }, |
| { |
| "epoch": 0.043682795698924734, |
| "grad_norm": 2.013534899867525, |
| "learning_rate": 2.9017857142857148e-06, |
| "loss": 0.45479617118835447, |
| "memory(GiB)": 36.98, |
| "step": 65, |
| "token_acc": 0.8543247344461306, |
| "train_speed(iter/s)": 0.159897 |
| }, |
| { |
| "epoch": 0.04704301075268817, |
| "grad_norm": 1.9289442808657369, |
| "learning_rate": 3.125e-06, |
| "loss": 0.4564558982849121, |
| "memory(GiB)": 36.98, |
| "step": 70, |
| "token_acc": 0.8599615014436959, |
| "train_speed(iter/s)": 0.160729 |
| }, |
| { |
| "epoch": 0.05040322580645161, |
| "grad_norm": 1.647464466961927, |
| "learning_rate": 3.3482142857142855e-06, |
| "loss": 0.434113073348999, |
| "memory(GiB)": 36.98, |
| "step": 75, |
| "token_acc": 0.8652792990142387, |
| "train_speed(iter/s)": 0.161226 |
| }, |
| { |
| "epoch": 0.053763440860215055, |
| "grad_norm": 1.715327717999067, |
| "learning_rate": 3.5714285714285718e-06, |
| "loss": 0.44471006393432616, |
| "memory(GiB)": 36.98, |
| "step": 80, |
| "token_acc": 0.8362068965517241, |
| "train_speed(iter/s)": 0.16071 |
| }, |
| { |
| "epoch": 0.05712365591397849, |
| "grad_norm": 2.014035980278717, |
| "learning_rate": 3.794642857142857e-06, |
| "loss": 0.4432626247406006, |
| "memory(GiB)": 36.98, |
| "step": 85, |
| "token_acc": 0.8698553948832035, |
| "train_speed(iter/s)": 0.161156 |
| }, |
| { |
| "epoch": 0.06048387096774194, |
| "grad_norm": 1.5564040192565838, |
| "learning_rate": 4.017857142857143e-06, |
| "loss": 0.43194828033447263, |
| "memory(GiB)": 36.98, |
| "step": 90, |
| "token_acc": 0.8666666666666667, |
| "train_speed(iter/s)": 0.161467 |
| }, |
| { |
| "epoch": 0.06384408602150538, |
| "grad_norm": 1.61832820761015, |
| "learning_rate": 4.241071428571429e-06, |
| "loss": 0.4064196586608887, |
| "memory(GiB)": 36.98, |
| "step": 95, |
| "token_acc": 0.8643278484942565, |
| "train_speed(iter/s)": 0.161074 |
| }, |
| { |
| "epoch": 0.06720430107526881, |
| "grad_norm": 1.5539812424163364, |
| "learning_rate": 4.464285714285715e-06, |
| "loss": 0.426595401763916, |
| "memory(GiB)": 36.98, |
| "step": 100, |
| "token_acc": 0.8269726663228468, |
| "train_speed(iter/s)": 0.162108 |
| }, |
| { |
| "epoch": 0.06720430107526881, |
| "eval_loss": 0.4180561602115631, |
| "eval_runtime": 22.2875, |
| "eval_samples_per_second": 21.133, |
| "eval_steps_per_second": 2.647, |
| "eval_token_acc": 0.8593293797924896, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07056451612903226, |
| "grad_norm": 1.70607185424576, |
| "learning_rate": 4.6875000000000004e-06, |
| "loss": 0.37628803253173826, |
| "memory(GiB)": 36.98, |
| "step": 105, |
| "token_acc": 0.8750300264232524, |
| "train_speed(iter/s)": 0.145711 |
| }, |
| { |
| "epoch": 0.0739247311827957, |
| "grad_norm": 1.8191153432807592, |
| "learning_rate": 4.910714285714286e-06, |
| "loss": 0.4305882453918457, |
| "memory(GiB)": 36.98, |
| "step": 110, |
| "token_acc": 0.83150800336984, |
| "train_speed(iter/s)": 0.146623 |
| }, |
| { |
| "epoch": 0.07728494623655914, |
| "grad_norm": 1.516959275850232, |
| "learning_rate": 5.133928571428571e-06, |
| "loss": 0.3796091079711914, |
| "memory(GiB)": 36.98, |
| "step": 115, |
| "token_acc": 0.8756284153005465, |
| "train_speed(iter/s)": 0.147653 |
| }, |
| { |
| "epoch": 0.08064516129032258, |
| "grad_norm": 1.695400164810879, |
| "learning_rate": 5.357142857142857e-06, |
| "loss": 0.40662312507629395, |
| "memory(GiB)": 36.98, |
| "step": 120, |
| "token_acc": 0.8457357859531772, |
| "train_speed(iter/s)": 0.148073 |
| }, |
| { |
| "epoch": 0.08400537634408602, |
| "grad_norm": 1.8736717783155716, |
| "learning_rate": 5.580357142857144e-06, |
| "loss": 0.4302112102508545, |
| "memory(GiB)": 36.98, |
| "step": 125, |
| "token_acc": 0.8765765765765766, |
| "train_speed(iter/s)": 0.148479 |
| }, |
| { |
| "epoch": 0.08736559139784947, |
| "grad_norm": 1.7896300849207585, |
| "learning_rate": 5.8035714285714295e-06, |
| "loss": 0.40099687576293946, |
| "memory(GiB)": 36.98, |
| "step": 130, |
| "token_acc": 0.8720675633406318, |
| "train_speed(iter/s)": 0.14916 |
| }, |
| { |
| "epoch": 0.0907258064516129, |
| "grad_norm": 1.3724965970536878, |
| "learning_rate": 6.0267857142857145e-06, |
| "loss": 0.3895423889160156, |
| "memory(GiB)": 36.98, |
| "step": 135, |
| "token_acc": 0.8717873831775701, |
| "train_speed(iter/s)": 0.149285 |
| }, |
| { |
| "epoch": 0.09408602150537634, |
| "grad_norm": 1.5030342275560018, |
| "learning_rate": 6.25e-06, |
| "loss": 0.36336331367492675, |
| "memory(GiB)": 36.98, |
| "step": 140, |
| "token_acc": 0.8672401767030923, |
| "train_speed(iter/s)": 0.149903 |
| }, |
| { |
| "epoch": 0.09744623655913978, |
| "grad_norm": 2.1533049159645983, |
| "learning_rate": 6.473214285714287e-06, |
| "loss": 0.4152632713317871, |
| "memory(GiB)": 36.98, |
| "step": 145, |
| "token_acc": 0.850445481544336, |
| "train_speed(iter/s)": 0.149889 |
| }, |
| { |
| "epoch": 0.10080645161290322, |
| "grad_norm": 1.618692560488842, |
| "learning_rate": 6.696428571428571e-06, |
| "loss": 0.3959961891174316, |
| "memory(GiB)": 36.98, |
| "step": 150, |
| "token_acc": 0.8774609640190089, |
| "train_speed(iter/s)": 0.150655 |
| }, |
| { |
| "epoch": 0.10416666666666667, |
| "grad_norm": 1.6433698117562718, |
| "learning_rate": 6.919642857142858e-06, |
| "loss": 0.3835208654403687, |
| "memory(GiB)": 36.98, |
| "step": 155, |
| "token_acc": 0.8653895935801191, |
| "train_speed(iter/s)": 0.151188 |
| }, |
| { |
| "epoch": 0.10752688172043011, |
| "grad_norm": 1.482923442532512, |
| "learning_rate": 7.1428571428571436e-06, |
| "loss": 0.374590539932251, |
| "memory(GiB)": 36.98, |
| "step": 160, |
| "token_acc": 0.8786279683377308, |
| "train_speed(iter/s)": 0.151943 |
| }, |
| { |
| "epoch": 0.11088709677419355, |
| "grad_norm": 1.4090449360939217, |
| "learning_rate": 7.366071428571429e-06, |
| "loss": 0.36885733604431153, |
| "memory(GiB)": 36.98, |
| "step": 165, |
| "token_acc": 0.85635602555629, |
| "train_speed(iter/s)": 0.152229 |
| }, |
| { |
| "epoch": 0.11424731182795698, |
| "grad_norm": 1.9132620037814079, |
| "learning_rate": 7.589285714285714e-06, |
| "loss": 0.4086275577545166, |
| "memory(GiB)": 36.98, |
| "step": 170, |
| "token_acc": 0.8498422712933754, |
| "train_speed(iter/s)": 0.152742 |
| }, |
| { |
| "epoch": 0.11760752688172044, |
| "grad_norm": 1.92320190057799, |
| "learning_rate": 7.8125e-06, |
| "loss": 0.3889497756958008, |
| "memory(GiB)": 36.98, |
| "step": 175, |
| "token_acc": 0.9108433734939759, |
| "train_speed(iter/s)": 0.152976 |
| }, |
| { |
| "epoch": 0.12096774193548387, |
| "grad_norm": 1.6746635838154391, |
| "learning_rate": 8.035714285714286e-06, |
| "loss": 0.4047250270843506, |
| "memory(GiB)": 36.98, |
| "step": 180, |
| "token_acc": 0.8924731182795699, |
| "train_speed(iter/s)": 0.153229 |
| }, |
| { |
| "epoch": 0.12432795698924731, |
| "grad_norm": 1.6017724626754042, |
| "learning_rate": 8.258928571428572e-06, |
| "loss": 0.3953080654144287, |
| "memory(GiB)": 36.98, |
| "step": 185, |
| "token_acc": 0.8757406188281764, |
| "train_speed(iter/s)": 0.153785 |
| }, |
| { |
| "epoch": 0.12768817204301075, |
| "grad_norm": 1.416612700811783, |
| "learning_rate": 8.482142857142858e-06, |
| "loss": 0.38227014541625975, |
| "memory(GiB)": 36.98, |
| "step": 190, |
| "token_acc": 0.8174425126191811, |
| "train_speed(iter/s)": 0.153823 |
| }, |
| { |
| "epoch": 0.1310483870967742, |
| "grad_norm": 1.607816186735269, |
| "learning_rate": 8.705357142857143e-06, |
| "loss": 0.3862867832183838, |
| "memory(GiB)": 36.98, |
| "step": 195, |
| "token_acc": 0.8581641406920217, |
| "train_speed(iter/s)": 0.154023 |
| }, |
| { |
| "epoch": 0.13440860215053763, |
| "grad_norm": 1.4260899797117483, |
| "learning_rate": 8.92857142857143e-06, |
| "loss": 0.3993819713592529, |
| "memory(GiB)": 36.98, |
| "step": 200, |
| "token_acc": 0.8897238399398836, |
| "train_speed(iter/s)": 0.154175 |
| }, |
| { |
| "epoch": 0.13440860215053763, |
| "eval_loss": 0.3838532567024231, |
| "eval_runtime": 22.251, |
| "eval_samples_per_second": 21.168, |
| "eval_steps_per_second": 2.652, |
| "eval_token_acc": 0.8661689381004819, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.13776881720430106, |
| "grad_norm": 1.6111939807098428, |
| "learning_rate": 9.151785714285715e-06, |
| "loss": 0.39362945556640627, |
| "memory(GiB)": 36.98, |
| "step": 205, |
| "token_acc": 0.870816083081889, |
| "train_speed(iter/s)": 0.146456 |
| }, |
| { |
| "epoch": 0.14112903225806453, |
| "grad_norm": 1.45575497509253, |
| "learning_rate": 9.375000000000001e-06, |
| "loss": 0.37442193031311033, |
| "memory(GiB)": 36.98, |
| "step": 210, |
| "token_acc": 0.83461210571185, |
| "train_speed(iter/s)": 0.146913 |
| }, |
| { |
| "epoch": 0.14448924731182797, |
| "grad_norm": 1.5444085134752388, |
| "learning_rate": 9.598214285714287e-06, |
| "loss": 0.38316497802734373, |
| "memory(GiB)": 36.98, |
| "step": 215, |
| "token_acc": 0.8777614138438881, |
| "train_speed(iter/s)": 0.147416 |
| }, |
| { |
| "epoch": 0.1478494623655914, |
| "grad_norm": 1.3520701605942982, |
| "learning_rate": 9.821428571428573e-06, |
| "loss": 0.3548079013824463, |
| "memory(GiB)": 36.98, |
| "step": 220, |
| "token_acc": 0.8824049513704686, |
| "train_speed(iter/s)": 0.147708 |
| }, |
| { |
| "epoch": 0.15120967741935484, |
| "grad_norm": 1.6545075322794685, |
| "learning_rate": 9.999998627513692e-06, |
| "loss": 0.38989009857177737, |
| "memory(GiB)": 36.98, |
| "step": 225, |
| "token_acc": 0.8820522529344945, |
| "train_speed(iter/s)": 0.147715 |
| }, |
| { |
| "epoch": 0.15456989247311828, |
| "grad_norm": 1.7618846612441343, |
| "learning_rate": 9.999950590571983e-06, |
| "loss": 0.37784652709960936, |
| "memory(GiB)": 36.98, |
| "step": 230, |
| "token_acc": 0.8712837837837838, |
| "train_speed(iter/s)": 0.148241 |
| }, |
| { |
| "epoch": 0.15793010752688172, |
| "grad_norm": 1.639163396629249, |
| "learning_rate": 9.999833930068294e-06, |
| "loss": 0.4089940547943115, |
| "memory(GiB)": 36.98, |
| "step": 235, |
| "token_acc": 0.8786279683377308, |
| "train_speed(iter/s)": 0.148602 |
| }, |
| { |
| "epoch": 0.16129032258064516, |
| "grad_norm": 1.6330896141172941, |
| "learning_rate": 9.999648647603774e-06, |
| "loss": 0.3805581569671631, |
| "memory(GiB)": 36.98, |
| "step": 240, |
| "token_acc": 0.8617309697601668, |
| "train_speed(iter/s)": 0.149149 |
| }, |
| { |
| "epoch": 0.1646505376344086, |
| "grad_norm": 1.3404402756331484, |
| "learning_rate": 9.999394745721398e-06, |
| "loss": 0.3552532196044922, |
| "memory(GiB)": 36.98, |
| "step": 245, |
| "token_acc": 0.8820645161290323, |
| "train_speed(iter/s)": 0.149569 |
| }, |
| { |
| "epoch": 0.16801075268817203, |
| "grad_norm": 1.6681551792745373, |
| "learning_rate": 9.99907222790593e-06, |
| "loss": 0.39254608154296877, |
| "memory(GiB)": 36.98, |
| "step": 250, |
| "token_acc": 0.8685992955838526, |
| "train_speed(iter/s)": 0.149851 |
| }, |
| { |
| "epoch": 0.17137096774193547, |
| "grad_norm": 1.5895776155869723, |
| "learning_rate": 9.998681098583875e-06, |
| "loss": 0.3646648645401001, |
| "memory(GiB)": 36.98, |
| "step": 255, |
| "token_acc": 0.8523304107060452, |
| "train_speed(iter/s)": 0.150089 |
| }, |
| { |
| "epoch": 0.17473118279569894, |
| "grad_norm": 1.6243707032073051, |
| "learning_rate": 9.998221363123425e-06, |
| "loss": 0.37679805755615237, |
| "memory(GiB)": 36.98, |
| "step": 260, |
| "token_acc": 0.875, |
| "train_speed(iter/s)": 0.150347 |
| }, |
| { |
| "epoch": 0.17809139784946237, |
| "grad_norm": 1.355667950869519, |
| "learning_rate": 9.997693027834384e-06, |
| "loss": 0.4236710548400879, |
| "memory(GiB)": 36.98, |
| "step": 265, |
| "token_acc": 0.8643803585346843, |
| "train_speed(iter/s)": 0.150219 |
| }, |
| { |
| "epoch": 0.1814516129032258, |
| "grad_norm": 1.6123089970883007, |
| "learning_rate": 9.997096099968065e-06, |
| "loss": 0.3853868246078491, |
| "memory(GiB)": 36.98, |
| "step": 270, |
| "token_acc": 0.8670948616600791, |
| "train_speed(iter/s)": 0.150287 |
| }, |
| { |
| "epoch": 0.18481182795698925, |
| "grad_norm": 1.5728048169270692, |
| "learning_rate": 9.996430587717219e-06, |
| "loss": 0.39868209362030027, |
| "memory(GiB)": 36.98, |
| "step": 275, |
| "token_acc": 0.8995027479717351, |
| "train_speed(iter/s)": 0.150527 |
| }, |
| { |
| "epoch": 0.1881720430107527, |
| "grad_norm": 1.6115770374072542, |
| "learning_rate": 9.995696500215899e-06, |
| "loss": 0.38219125270843507, |
| "memory(GiB)": 36.98, |
| "step": 280, |
| "token_acc": 0.8556105610561056, |
| "train_speed(iter/s)": 0.150658 |
| }, |
| { |
| "epoch": 0.19153225806451613, |
| "grad_norm": 1.7888055041840647, |
| "learning_rate": 9.994893847539341e-06, |
| "loss": 0.39760608673095704, |
| "memory(GiB)": 36.98, |
| "step": 285, |
| "token_acc": 0.8500110448420588, |
| "train_speed(iter/s)": 0.150648 |
| }, |
| { |
| "epoch": 0.19489247311827956, |
| "grad_norm": 1.7509518458716071, |
| "learning_rate": 9.994022640703837e-06, |
| "loss": 0.3869561910629272, |
| "memory(GiB)": 36.98, |
| "step": 290, |
| "token_acc": 0.8347354138398915, |
| "train_speed(iter/s)": 0.150683 |
| }, |
| { |
| "epoch": 0.198252688172043, |
| "grad_norm": 1.548716222125624, |
| "learning_rate": 9.993082891666564e-06, |
| "loss": 0.38693482875823976, |
| "memory(GiB)": 36.98, |
| "step": 295, |
| "token_acc": 0.882627817482133, |
| "train_speed(iter/s)": 0.150731 |
| }, |
| { |
| "epoch": 0.20161290322580644, |
| "grad_norm": 1.4513252709739255, |
| "learning_rate": 9.992074613325435e-06, |
| "loss": 0.3878211498260498, |
| "memory(GiB)": 36.98, |
| "step": 300, |
| "token_acc": 0.8645980253878702, |
| "train_speed(iter/s)": 0.150735 |
| }, |
| { |
| "epoch": 0.20161290322580644, |
| "eval_loss": 0.3748473823070526, |
| "eval_runtime": 22.1986, |
| "eval_samples_per_second": 21.218, |
| "eval_steps_per_second": 2.658, |
| "eval_token_acc": 0.8676136306234404, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.2049731182795699, |
| "grad_norm": 1.553685980316146, |
| "learning_rate": 9.990997819518916e-06, |
| "loss": 0.3740631103515625, |
| "memory(GiB)": 36.98, |
| "step": 305, |
| "token_acc": 0.8838088271153377, |
| "train_speed(iter/s)": 0.145632 |
| }, |
| { |
| "epoch": 0.20833333333333334, |
| "grad_norm": 1.4595129136479905, |
| "learning_rate": 9.989852525025845e-06, |
| "loss": 0.39213879108428956, |
| "memory(GiB)": 36.98, |
| "step": 310, |
| "token_acc": 0.8785211267605634, |
| "train_speed(iter/s)": 0.146129 |
| }, |
| { |
| "epoch": 0.21169354838709678, |
| "grad_norm": 1.6424371283556445, |
| "learning_rate": 9.988638745565207e-06, |
| "loss": 0.3821288585662842, |
| "memory(GiB)": 36.98, |
| "step": 315, |
| "token_acc": 0.855286827903258, |
| "train_speed(iter/s)": 0.146459 |
| }, |
| { |
| "epoch": 0.21505376344086022, |
| "grad_norm": 1.4969668062939072, |
| "learning_rate": 9.987356497795944e-06, |
| "loss": 0.3804674863815308, |
| "memory(GiB)": 36.98, |
| "step": 320, |
| "token_acc": 0.8544133476856836, |
| "train_speed(iter/s)": 0.146629 |
| }, |
| { |
| "epoch": 0.21841397849462366, |
| "grad_norm": 1.7200733273955053, |
| "learning_rate": 9.986005799316711e-06, |
| "loss": 0.38283023834228513, |
| "memory(GiB)": 36.98, |
| "step": 325, |
| "token_acc": 0.8894173602853745, |
| "train_speed(iter/s)": 0.146936 |
| }, |
| { |
| "epoch": 0.2217741935483871, |
| "grad_norm": 1.3976699612621017, |
| "learning_rate": 9.984586668665641e-06, |
| "loss": 0.3722895622253418, |
| "memory(GiB)": 36.98, |
| "step": 330, |
| "token_acc": 0.8618727366787378, |
| "train_speed(iter/s)": 0.147204 |
| }, |
| { |
| "epoch": 0.22513440860215053, |
| "grad_norm": 1.81408532653209, |
| "learning_rate": 9.983099125320083e-06, |
| "loss": 0.36228926181793214, |
| "memory(GiB)": 36.98, |
| "step": 335, |
| "token_acc": 0.8845631507469444, |
| "train_speed(iter/s)": 0.147325 |
| }, |
| { |
| "epoch": 0.22849462365591397, |
| "grad_norm": 1.6305131775402135, |
| "learning_rate": 9.981543189696349e-06, |
| "loss": 0.3938841104507446, |
| "memory(GiB)": 36.98, |
| "step": 340, |
| "token_acc": 0.86302780638517, |
| "train_speed(iter/s)": 0.147585 |
| }, |
| { |
| "epoch": 0.2318548387096774, |
| "grad_norm": 1.3804298809031184, |
| "learning_rate": 9.979918883149412e-06, |
| "loss": 0.3789535999298096, |
| "memory(GiB)": 36.98, |
| "step": 345, |
| "token_acc": 0.8524590163934426, |
| "train_speed(iter/s)": 0.148072 |
| }, |
| { |
| "epoch": 0.23521505376344087, |
| "grad_norm": 1.566906969179067, |
| "learning_rate": 9.97822622797264e-06, |
| "loss": 0.35543198585510255, |
| "memory(GiB)": 36.98, |
| "step": 350, |
| "token_acc": 0.8679446219382322, |
| "train_speed(iter/s)": 0.148335 |
| }, |
| { |
| "epoch": 0.2385752688172043, |
| "grad_norm": 1.4075206244947833, |
| "learning_rate": 9.976465247397463e-06, |
| "loss": 0.3800350666046143, |
| "memory(GiB)": 36.98, |
| "step": 355, |
| "token_acc": 0.8800525796910943, |
| "train_speed(iter/s)": 0.148683 |
| }, |
| { |
| "epoch": 0.24193548387096775, |
| "grad_norm": 1.515030938288751, |
| "learning_rate": 9.97463596559307e-06, |
| "loss": 0.3830380439758301, |
| "memory(GiB)": 36.98, |
| "step": 360, |
| "token_acc": 0.8495917490330898, |
| "train_speed(iter/s)": 0.148913 |
| }, |
| { |
| "epoch": 0.2452956989247312, |
| "grad_norm": 1.4289246071399373, |
| "learning_rate": 9.97273840766608e-06, |
| "loss": 0.3855442047119141, |
| "memory(GiB)": 36.98, |
| "step": 365, |
| "token_acc": 0.8720626631853786, |
| "train_speed(iter/s)": 0.149089 |
| }, |
| { |
| "epoch": 0.24865591397849462, |
| "grad_norm": 1.2845230370748955, |
| "learning_rate": 9.970772599660188e-06, |
| "loss": 0.3582731246948242, |
| "memory(GiB)": 36.98, |
| "step": 370, |
| "token_acc": 0.8702564102564102, |
| "train_speed(iter/s)": 0.149282 |
| }, |
| { |
| "epoch": 0.25201612903225806, |
| "grad_norm": 1.3569524106626425, |
| "learning_rate": 9.968738568555806e-06, |
| "loss": 0.37152538299560545, |
| "memory(GiB)": 36.98, |
| "step": 375, |
| "token_acc": 0.8608198284080076, |
| "train_speed(iter/s)": 0.149486 |
| }, |
| { |
| "epoch": 0.2553763440860215, |
| "grad_norm": 1.3704888639008825, |
| "learning_rate": 9.966636342269706e-06, |
| "loss": 0.37766404151916505, |
| "memory(GiB)": 36.98, |
| "step": 380, |
| "token_acc": 0.8613061797752809, |
| "train_speed(iter/s)": 0.14955 |
| }, |
| { |
| "epoch": 0.25873655913978494, |
| "grad_norm": 1.4312626718198491, |
| "learning_rate": 9.964465949654621e-06, |
| "loss": 0.3510261058807373, |
| "memory(GiB)": 36.98, |
| "step": 385, |
| "token_acc": 0.8919360104472739, |
| "train_speed(iter/s)": 0.149815 |
| }, |
| { |
| "epoch": 0.2620967741935484, |
| "grad_norm": 1.5012892071738868, |
| "learning_rate": 9.96222742049886e-06, |
| "loss": 0.394317102432251, |
| "memory(GiB)": 36.98, |
| "step": 390, |
| "token_acc": 0.8867279894875164, |
| "train_speed(iter/s)": 0.150039 |
| }, |
| { |
| "epoch": 0.2654569892473118, |
| "grad_norm": 1.2294914710999452, |
| "learning_rate": 9.959920785525896e-06, |
| "loss": 0.37460813522338865, |
| "memory(GiB)": 36.98, |
| "step": 395, |
| "token_acc": 0.8766404199475065, |
| "train_speed(iter/s)": 0.150189 |
| }, |
| { |
| "epoch": 0.26881720430107525, |
| "grad_norm": 1.312969727779233, |
| "learning_rate": 9.957546076393944e-06, |
| "loss": 0.3609046220779419, |
| "memory(GiB)": 36.98, |
| "step": 400, |
| "token_acc": 0.8716331401295602, |
| "train_speed(iter/s)": 0.150494 |
| }, |
| { |
| "epoch": 0.26881720430107525, |
| "eval_loss": 0.37320902943611145, |
| "eval_runtime": 22.1942, |
| "eval_samples_per_second": 21.222, |
| "eval_steps_per_second": 2.658, |
| "eval_token_acc": 0.8664013012335452, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2721774193548387, |
| "grad_norm": 1.4201872276695326, |
| "learning_rate": 9.955103325695526e-06, |
| "loss": 0.3812046766281128, |
| "memory(GiB)": 36.98, |
| "step": 405, |
| "token_acc": 0.8722784634586049, |
| "train_speed(iter/s)": 0.146726 |
| }, |
| { |
| "epoch": 0.27553763440860213, |
| "grad_norm": 1.68708660262874, |
| "learning_rate": 9.952592566957024e-06, |
| "loss": 0.3877915382385254, |
| "memory(GiB)": 36.98, |
| "step": 410, |
| "token_acc": 0.8642187798814305, |
| "train_speed(iter/s)": 0.146883 |
| }, |
| { |
| "epoch": 0.27889784946236557, |
| "grad_norm": 1.1600519908390228, |
| "learning_rate": 9.95001383463822e-06, |
| "loss": 0.3642563343048096, |
| "memory(GiB)": 36.98, |
| "step": 415, |
| "token_acc": 0.8828267082359055, |
| "train_speed(iter/s)": 0.146962 |
| }, |
| { |
| "epoch": 0.28225806451612906, |
| "grad_norm": 1.2285637627216721, |
| "learning_rate": 9.947367164131823e-06, |
| "loss": 0.37247591018676757, |
| "memory(GiB)": 36.98, |
| "step": 420, |
| "token_acc": 0.8826968755709849, |
| "train_speed(iter/s)": 0.147109 |
| }, |
| { |
| "epoch": 0.2856182795698925, |
| "grad_norm": 1.259887568682347, |
| "learning_rate": 9.944652591762982e-06, |
| "loss": 0.36926772594451907, |
| "memory(GiB)": 36.98, |
| "step": 425, |
| "token_acc": 0.8709743171626579, |
| "train_speed(iter/s)": 0.147357 |
| }, |
| { |
| "epoch": 0.28897849462365593, |
| "grad_norm": 1.3616426835834692, |
| "learning_rate": 9.941870154788793e-06, |
| "loss": 0.3907612323760986, |
| "memory(GiB)": 36.98, |
| "step": 430, |
| "token_acc": 0.8674786845310597, |
| "train_speed(iter/s)": 0.147389 |
| }, |
| { |
| "epoch": 0.2923387096774194, |
| "grad_norm": 1.4245606118453644, |
| "learning_rate": 9.939019891397778e-06, |
| "loss": 0.3910489320755005, |
| "memory(GiB)": 36.98, |
| "step": 435, |
| "token_acc": 0.8651339608979001, |
| "train_speed(iter/s)": 0.14749 |
| }, |
| { |
| "epoch": 0.2956989247311828, |
| "grad_norm": 1.2523176077069897, |
| "learning_rate": 9.936101840709373e-06, |
| "loss": 0.3995755672454834, |
| "memory(GiB)": 36.98, |
| "step": 440, |
| "token_acc": 0.8203216947822676, |
| "train_speed(iter/s)": 0.147702 |
| }, |
| { |
| "epoch": 0.29905913978494625, |
| "grad_norm": 1.3331291522700441, |
| "learning_rate": 9.933116042773375e-06, |
| "loss": 0.3527373313903809, |
| "memory(GiB)": 36.98, |
| "step": 445, |
| "token_acc": 0.8523316062176166, |
| "train_speed(iter/s)": 0.147868 |
| }, |
| { |
| "epoch": 0.3024193548387097, |
| "grad_norm": 1.408211021179238, |
| "learning_rate": 9.93006253856941e-06, |
| "loss": 0.37166690826416016, |
| "memory(GiB)": 36.98, |
| "step": 450, |
| "token_acc": 0.8806776331942058, |
| "train_speed(iter/s)": 0.148099 |
| }, |
| { |
| "epoch": 0.3057795698924731, |
| "grad_norm": 1.665951694594511, |
| "learning_rate": 9.92694137000636e-06, |
| "loss": 0.39462215900421144, |
| "memory(GiB)": 36.98, |
| "step": 455, |
| "token_acc": 0.8611589213998853, |
| "train_speed(iter/s)": 0.148142 |
| }, |
| { |
| "epoch": 0.30913978494623656, |
| "grad_norm": 1.3584787225981534, |
| "learning_rate": 9.923752579921787e-06, |
| "loss": 0.3762697696685791, |
| "memory(GiB)": 36.98, |
| "step": 460, |
| "token_acc": 0.8588405797101449, |
| "train_speed(iter/s)": 0.148298 |
| }, |
| { |
| "epoch": 0.3125, |
| "grad_norm": 1.3251616436516978, |
| "learning_rate": 9.92049621208135e-06, |
| "loss": 0.3711697101593018, |
| "memory(GiB)": 36.98, |
| "step": 465, |
| "token_acc": 0.8663532572196104, |
| "train_speed(iter/s)": 0.148468 |
| }, |
| { |
| "epoch": 0.31586021505376344, |
| "grad_norm": 1.2515144595137409, |
| "learning_rate": 9.917172311178203e-06, |
| "loss": 0.360841178894043, |
| "memory(GiB)": 36.98, |
| "step": 470, |
| "token_acc": 0.8898142216527867, |
| "train_speed(iter/s)": 0.148629 |
| }, |
| { |
| "epoch": 0.3192204301075269, |
| "grad_norm": 1.1833628858332188, |
| "learning_rate": 9.913780922832383e-06, |
| "loss": 0.3361430883407593, |
| "memory(GiB)": 36.98, |
| "step": 475, |
| "token_acc": 0.8739758095981272, |
| "train_speed(iter/s)": 0.148749 |
| }, |
| { |
| "epoch": 0.3225806451612903, |
| "grad_norm": 1.5056179038816182, |
| "learning_rate": 9.910322093590177e-06, |
| "loss": 0.39411134719848634, |
| "memory(GiB)": 36.98, |
| "step": 480, |
| "token_acc": 0.8696789536266349, |
| "train_speed(iter/s)": 0.148862 |
| }, |
| { |
| "epoch": 0.32594086021505375, |
| "grad_norm": 1.3378379217665801, |
| "learning_rate": 9.90679587092349e-06, |
| "loss": 0.3966991662979126, |
| "memory(GiB)": 36.98, |
| "step": 485, |
| "token_acc": 0.8551621688536767, |
| "train_speed(iter/s)": 0.148869 |
| }, |
| { |
| "epoch": 0.3293010752688172, |
| "grad_norm": 1.0480697431497084, |
| "learning_rate": 9.903202303229199e-06, |
| "loss": 0.3763158321380615, |
| "memory(GiB)": 36.98, |
| "step": 490, |
| "token_acc": 0.8711036225779275, |
| "train_speed(iter/s)": 0.148966 |
| }, |
| { |
| "epoch": 0.3326612903225806, |
| "grad_norm": 1.3180202797254805, |
| "learning_rate": 9.899541439828464e-06, |
| "loss": 0.3638334274291992, |
| "memory(GiB)": 36.98, |
| "step": 495, |
| "token_acc": 0.881887165951831, |
| "train_speed(iter/s)": 0.149291 |
| }, |
| { |
| "epoch": 0.33602150537634407, |
| "grad_norm": 1.3597315112923483, |
| "learning_rate": 9.895813330966086e-06, |
| "loss": 0.3699483394622803, |
| "memory(GiB)": 36.98, |
| "step": 500, |
| "token_acc": 0.8665288306914771, |
| "train_speed(iter/s)": 0.149428 |
| }, |
| { |
| "epoch": 0.33602150537634407, |
| "eval_loss": 0.36967214941978455, |
| "eval_runtime": 22.2237, |
| "eval_samples_per_second": 21.194, |
| "eval_steps_per_second": 2.655, |
| "eval_token_acc": 0.8679470212056616, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.3393817204301075, |
| "grad_norm": 1.5431659281776462, |
| "learning_rate": 9.892018027809793e-06, |
| "loss": 0.37493338584899905, |
| "memory(GiB)": 36.98, |
| "step": 505, |
| "token_acc": 0.8687758281959217, |
| "train_speed(iter/s)": 0.146352 |
| }, |
| { |
| "epoch": 0.34274193548387094, |
| "grad_norm": 1.3449879580767712, |
| "learning_rate": 9.88815558244954e-06, |
| "loss": 0.39549927711486815, |
| "memory(GiB)": 36.98, |
| "step": 510, |
| "token_acc": 0.8586065573770492, |
| "train_speed(iter/s)": 0.146469 |
| }, |
| { |
| "epoch": 0.34610215053763443, |
| "grad_norm": 1.573469085654153, |
| "learning_rate": 9.884226047896803e-06, |
| "loss": 0.35512893199920653, |
| "memory(GiB)": 36.98, |
| "step": 515, |
| "token_acc": 0.8800461361014994, |
| "train_speed(iter/s)": 0.146625 |
| }, |
| { |
| "epoch": 0.34946236559139787, |
| "grad_norm": 1.306949068152781, |
| "learning_rate": 9.880229478083849e-06, |
| "loss": 0.35407171249389646, |
| "memory(GiB)": 36.98, |
| "step": 520, |
| "token_acc": 0.8720520466251016, |
| "train_speed(iter/s)": 0.146718 |
| }, |
| { |
| "epoch": 0.3528225806451613, |
| "grad_norm": 1.377421152139763, |
| "learning_rate": 9.87616592786299e-06, |
| "loss": 0.39537363052368163, |
| "memory(GiB)": 36.98, |
| "step": 525, |
| "token_acc": 0.8787520680690144, |
| "train_speed(iter/s)": 0.146839 |
| }, |
| { |
| "epoch": 0.35618279569892475, |
| "grad_norm": 1.4366698685832746, |
| "learning_rate": 9.872035453005836e-06, |
| "loss": 0.37119576930999754, |
| "memory(GiB)": 36.98, |
| "step": 530, |
| "token_acc": 0.8672721945502491, |
| "train_speed(iter/s)": 0.147073 |
| }, |
| { |
| "epoch": 0.3595430107526882, |
| "grad_norm": 1.2355637712340461, |
| "learning_rate": 9.867838110202525e-06, |
| "loss": 0.3660418510437012, |
| "memory(GiB)": 36.98, |
| "step": 535, |
| "token_acc": 0.8535449735449735, |
| "train_speed(iter/s)": 0.147263 |
| }, |
| { |
| "epoch": 0.3629032258064516, |
| "grad_norm": 1.2495337501258665, |
| "learning_rate": 9.863573957060953e-06, |
| "loss": 0.36377530097961425, |
| "memory(GiB)": 36.98, |
| "step": 540, |
| "token_acc": 0.8842736561528209, |
| "train_speed(iter/s)": 0.14738 |
| }, |
| { |
| "epoch": 0.36626344086021506, |
| "grad_norm": 1.305490979194108, |
| "learning_rate": 9.859243052105967e-06, |
| "loss": 0.38242766857147215, |
| "memory(GiB)": 36.98, |
| "step": 545, |
| "token_acc": 0.858521717043434, |
| "train_speed(iter/s)": 0.14751 |
| }, |
| { |
| "epoch": 0.3696236559139785, |
| "grad_norm": 1.3434721486823473, |
| "learning_rate": 9.854845454778585e-06, |
| "loss": 0.37967212200164796, |
| "memory(GiB)": 36.98, |
| "step": 550, |
| "token_acc": 0.8517232829159435, |
| "train_speed(iter/s)": 0.147798 |
| }, |
| { |
| "epoch": 0.37298387096774194, |
| "grad_norm": 1.5896521776380288, |
| "learning_rate": 9.85038122543516e-06, |
| "loss": 0.40446958541870115, |
| "memory(GiB)": 36.98, |
| "step": 555, |
| "token_acc": 0.8549883990719258, |
| "train_speed(iter/s)": 0.147841 |
| }, |
| { |
| "epoch": 0.3763440860215054, |
| "grad_norm": 1.5541190326682846, |
| "learning_rate": 9.845850425346563e-06, |
| "loss": 0.3877220869064331, |
| "memory(GiB)": 36.98, |
| "step": 560, |
| "token_acc": 0.8556073818698033, |
| "train_speed(iter/s)": 0.148023 |
| }, |
| { |
| "epoch": 0.3797043010752688, |
| "grad_norm": 1.2488745915944466, |
| "learning_rate": 9.841253116697333e-06, |
| "loss": 0.36992673873901366, |
| "memory(GiB)": 36.98, |
| "step": 565, |
| "token_acc": 0.8745377707342842, |
| "train_speed(iter/s)": 0.148144 |
| }, |
| { |
| "epoch": 0.38306451612903225, |
| "grad_norm": 1.4733618409553315, |
| "learning_rate": 9.836589362584837e-06, |
| "loss": 0.37825746536254884, |
| "memory(GiB)": 36.98, |
| "step": 570, |
| "token_acc": 0.8752980448259419, |
| "train_speed(iter/s)": 0.148207 |
| }, |
| { |
| "epoch": 0.3864247311827957, |
| "grad_norm": 1.2525075479382355, |
| "learning_rate": 9.831859227018387e-06, |
| "loss": 0.39865090847015383, |
| "memory(GiB)": 36.98, |
| "step": 575, |
| "token_acc": 0.8661578555472822, |
| "train_speed(iter/s)": 0.148276 |
| }, |
| { |
| "epoch": 0.3897849462365591, |
| "grad_norm": 1.449301845425235, |
| "learning_rate": 9.827062774918377e-06, |
| "loss": 0.3785409927368164, |
| "memory(GiB)": 36.98, |
| "step": 580, |
| "token_acc": 0.8519668737060041, |
| "train_speed(iter/s)": 0.148282 |
| }, |
| { |
| "epoch": 0.39314516129032256, |
| "grad_norm": 1.2168801605641335, |
| "learning_rate": 9.822200072115385e-06, |
| "loss": 0.3902196645736694, |
| "memory(GiB)": 36.98, |
| "step": 585, |
| "token_acc": 0.8548924568312632, |
| "train_speed(iter/s)": 0.148421 |
| }, |
| { |
| "epoch": 0.396505376344086, |
| "grad_norm": 1.3081294024585297, |
| "learning_rate": 9.817271185349263e-06, |
| "loss": 0.3606643438339233, |
| "memory(GiB)": 36.98, |
| "step": 590, |
| "token_acc": 0.8854581673306773, |
| "train_speed(iter/s)": 0.148553 |
| }, |
| { |
| "epoch": 0.39986559139784944, |
| "grad_norm": 1.1879061305415564, |
| "learning_rate": 9.812276182268236e-06, |
| "loss": 0.36301345825195314, |
| "memory(GiB)": 36.98, |
| "step": 595, |
| "token_acc": 0.8823529411764706, |
| "train_speed(iter/s)": 0.148614 |
| }, |
| { |
| "epoch": 0.4032258064516129, |
| "grad_norm": 1.3201174180172996, |
| "learning_rate": 9.807215131427966e-06, |
| "loss": 0.35914902687072753, |
| "memory(GiB)": 36.98, |
| "step": 600, |
| "token_acc": 0.8685085699667434, |
| "train_speed(iter/s)": 0.14863 |
| }, |
| { |
| "epoch": 0.4032258064516129, |
| "eval_loss": 0.3670656979084015, |
| "eval_runtime": 22.3702, |
| "eval_samples_per_second": 21.055, |
| "eval_steps_per_second": 2.637, |
| "eval_token_acc": 0.8686845215845145, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.40658602150537637, |
| "grad_norm": 1.0706504342347392, |
| "learning_rate": 9.802088102290598e-06, |
| "loss": 0.36201565265655516, |
| "memory(GiB)": 36.98, |
| "step": 605, |
| "token_acc": 0.8745827060286705, |
| "train_speed(iter/s)": 0.145719 |
| }, |
| { |
| "epoch": 0.4099462365591398, |
| "grad_norm": 1.3426300690915438, |
| "learning_rate": 9.796895165223835e-06, |
| "loss": 0.3961642265319824, |
| "memory(GiB)": 36.98, |
| "step": 610, |
| "token_acc": 0.8796818031156778, |
| "train_speed(iter/s)": 0.145743 |
| }, |
| { |
| "epoch": 0.41330645161290325, |
| "grad_norm": 1.2789042611843648, |
| "learning_rate": 9.791636391499944e-06, |
| "loss": 0.38369901180267335, |
| "memory(GiB)": 36.98, |
| "step": 615, |
| "token_acc": 0.8649926144756278, |
| "train_speed(iter/s)": 0.145891 |
| }, |
| { |
| "epoch": 0.4166666666666667, |
| "grad_norm": 1.713547331206706, |
| "learning_rate": 9.786311853294799e-06, |
| "loss": 0.37184116840362547, |
| "memory(GiB)": 36.98, |
| "step": 620, |
| "token_acc": 0.8365116279069768, |
| "train_speed(iter/s)": 0.14596 |
| }, |
| { |
| "epoch": 0.4200268817204301, |
| "grad_norm": 1.4766281192277437, |
| "learning_rate": 9.780921623686873e-06, |
| "loss": 0.3832437515258789, |
| "memory(GiB)": 36.98, |
| "step": 625, |
| "token_acc": 0.8809590973201693, |
| "train_speed(iter/s)": 0.146172 |
| }, |
| { |
| "epoch": 0.42338709677419356, |
| "grad_norm": 1.2467031836603142, |
| "learning_rate": 9.775465776656257e-06, |
| "loss": 0.3634549856185913, |
| "memory(GiB)": 36.98, |
| "step": 630, |
| "token_acc": 0.8876265744628303, |
| "train_speed(iter/s)": 0.14642 |
| }, |
| { |
| "epoch": 0.426747311827957, |
| "grad_norm": 1.2190081005378173, |
| "learning_rate": 9.769944387083613e-06, |
| "loss": 0.35646920204162597, |
| "memory(GiB)": 36.98, |
| "step": 635, |
| "token_acc": 0.9050476526650194, |
| "train_speed(iter/s)": 0.146633 |
| }, |
| { |
| "epoch": 0.43010752688172044, |
| "grad_norm": 1.4111684796366029, |
| "learning_rate": 9.764357530749178e-06, |
| "loss": 0.368475079536438, |
| "memory(GiB)": 36.98, |
| "step": 640, |
| "token_acc": 0.8574933551420977, |
| "train_speed(iter/s)": 0.146775 |
| }, |
| { |
| "epoch": 0.4334677419354839, |
| "grad_norm": 1.2502663586719176, |
| "learning_rate": 9.758705284331704e-06, |
| "loss": 0.36438665390014646, |
| "memory(GiB)": 36.98, |
| "step": 645, |
| "token_acc": 0.8530818278427205, |
| "train_speed(iter/s)": 0.146859 |
| }, |
| { |
| "epoch": 0.4368279569892473, |
| "grad_norm": 1.2436432744079327, |
| "learning_rate": 9.752987725407416e-06, |
| "loss": 0.35638880729675293, |
| "memory(GiB)": 36.98, |
| "step": 650, |
| "token_acc": 0.8638570167696381, |
| "train_speed(iter/s)": 0.146953 |
| }, |
| { |
| "epoch": 0.44018817204301075, |
| "grad_norm": 1.3569390750268104, |
| "learning_rate": 9.747204932448942e-06, |
| "loss": 0.37936763763427733, |
| "memory(GiB)": 36.98, |
| "step": 655, |
| "token_acc": 0.8516341406066306, |
| "train_speed(iter/s)": 0.147038 |
| }, |
| { |
| "epoch": 0.4435483870967742, |
| "grad_norm": 1.5388545697441205, |
| "learning_rate": 9.741356984824234e-06, |
| "loss": 0.3363116502761841, |
| "memory(GiB)": 36.98, |
| "step": 660, |
| "token_acc": 0.8699498672957829, |
| "train_speed(iter/s)": 0.147318 |
| }, |
| { |
| "epoch": 0.4469086021505376, |
| "grad_norm": 1.3962981286379317, |
| "learning_rate": 9.73544396279549e-06, |
| "loss": 0.34099550247192384, |
| "memory(GiB)": 36.98, |
| "step": 665, |
| "token_acc": 0.8706088992974239, |
| "train_speed(iter/s)": 0.147444 |
| }, |
| { |
| "epoch": 0.45026881720430106, |
| "grad_norm": 1.4093270418166992, |
| "learning_rate": 9.72946594751803e-06, |
| "loss": 0.37921915054321287, |
| "memory(GiB)": 36.98, |
| "step": 670, |
| "token_acc": 0.8673262873847538, |
| "train_speed(iter/s)": 0.147527 |
| }, |
| { |
| "epoch": 0.4536290322580645, |
| "grad_norm": 1.1728008554779097, |
| "learning_rate": 9.723423021039211e-06, |
| "loss": 0.3977491855621338, |
| "memory(GiB)": 36.98, |
| "step": 675, |
| "token_acc": 0.8671311271460562, |
| "train_speed(iter/s)": 0.147683 |
| }, |
| { |
| "epoch": 0.45698924731182794, |
| "grad_norm": 1.1840245257190896, |
| "learning_rate": 9.717315266297277e-06, |
| "loss": 0.3696840763092041, |
| "memory(GiB)": 36.98, |
| "step": 680, |
| "token_acc": 0.8644768856447689, |
| "train_speed(iter/s)": 0.147724 |
| }, |
| { |
| "epoch": 0.4603494623655914, |
| "grad_norm": 1.2426389912018956, |
| "learning_rate": 9.711142767120238e-06, |
| "loss": 0.3768123149871826, |
| "memory(GiB)": 36.98, |
| "step": 685, |
| "token_acc": 0.885691231845078, |
| "train_speed(iter/s)": 0.1478 |
| }, |
| { |
| "epoch": 0.4637096774193548, |
| "grad_norm": 1.2302759474202611, |
| "learning_rate": 9.704905608224706e-06, |
| "loss": 0.3753058433532715, |
| "memory(GiB)": 36.98, |
| "step": 690, |
| "token_acc": 0.8787061994609164, |
| "train_speed(iter/s)": 0.147897 |
| }, |
| { |
| "epoch": 0.46706989247311825, |
| "grad_norm": 1.2541456992823259, |
| "learning_rate": 9.698603875214737e-06, |
| "loss": 0.38683831691741943, |
| "memory(GiB)": 36.98, |
| "step": 695, |
| "token_acc": 0.8655941878567722, |
| "train_speed(iter/s)": 0.147816 |
| }, |
| { |
| "epoch": 0.47043010752688175, |
| "grad_norm": 1.169233647864971, |
| "learning_rate": 9.692237654580658e-06, |
| "loss": 0.379689884185791, |
| "memory(GiB)": 36.98, |
| "step": 700, |
| "token_acc": 0.8643190056965303, |
| "train_speed(iter/s)": 0.147882 |
| }, |
| { |
| "epoch": 0.47043010752688175, |
| "eval_loss": 0.36553341150283813, |
| "eval_runtime": 22.2168, |
| "eval_samples_per_second": 21.2, |
| "eval_steps_per_second": 2.656, |
| "eval_token_acc": 0.8678156855217563, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.4737903225806452, |
| "grad_norm": 1.2629929939158209, |
| "learning_rate": 9.685807033697883e-06, |
| "loss": 0.37116250991821287, |
| "memory(GiB)": 36.98, |
| "step": 705, |
| "token_acc": 0.8659203980099502, |
| "train_speed(iter/s)": 0.143693 |
| }, |
| { |
| "epoch": 0.4771505376344086, |
| "grad_norm": 1.2566121281394267, |
| "learning_rate": 9.679312100825703e-06, |
| "loss": 0.3787511348724365, |
| "memory(GiB)": 36.98, |
| "step": 710, |
| "token_acc": 0.8787093735280264, |
| "train_speed(iter/s)": 0.14381 |
| }, |
| { |
| "epoch": 0.48051075268817206, |
| "grad_norm": 1.4778551662573782, |
| "learning_rate": 9.672752945106088e-06, |
| "loss": 0.39328994750976565, |
| "memory(GiB)": 36.98, |
| "step": 715, |
| "token_acc": 0.885866802236909, |
| "train_speed(iter/s)": 0.144022 |
| }, |
| { |
| "epoch": 0.4838709677419355, |
| "grad_norm": 1.2521632439474264, |
| "learning_rate": 9.66612965656245e-06, |
| "loss": 0.3512038469314575, |
| "memory(GiB)": 36.98, |
| "step": 720, |
| "token_acc": 0.8695306284805091, |
| "train_speed(iter/s)": 0.144128 |
| }, |
| { |
| "epoch": 0.48723118279569894, |
| "grad_norm": 1.5687563338452333, |
| "learning_rate": 9.65944232609842e-06, |
| "loss": 0.38547964096069337, |
| "memory(GiB)": 36.98, |
| "step": 725, |
| "token_acc": 0.8300395256916996, |
| "train_speed(iter/s)": 0.14415 |
| }, |
| { |
| "epoch": 0.4905913978494624, |
| "grad_norm": 1.5100891343298852, |
| "learning_rate": 9.652691045496591e-06, |
| "loss": 0.38148338794708253, |
| "memory(GiB)": 36.98, |
| "step": 730, |
| "token_acc": 0.8454720616570327, |
| "train_speed(iter/s)": 0.144282 |
| }, |
| { |
| "epoch": 0.4939516129032258, |
| "grad_norm": 1.2871381068183412, |
| "learning_rate": 9.645875907417266e-06, |
| "loss": 0.3543083667755127, |
| "memory(GiB)": 36.98, |
| "step": 735, |
| "token_acc": 0.877696190913263, |
| "train_speed(iter/s)": 0.144319 |
| }, |
| { |
| "epoch": 0.49731182795698925, |
| "grad_norm": 1.5160670685722115, |
| "learning_rate": 9.638997005397174e-06, |
| "loss": 0.3613048791885376, |
| "memory(GiB)": 36.98, |
| "step": 740, |
| "token_acc": 0.8726815466834329, |
| "train_speed(iter/s)": 0.144365 |
| }, |
| { |
| "epoch": 0.5006720430107527, |
| "grad_norm": 1.135167693746094, |
| "learning_rate": 9.632054433848205e-06, |
| "loss": 0.3598623514175415, |
| "memory(GiB)": 36.98, |
| "step": 745, |
| "token_acc": 0.8829736211031175, |
| "train_speed(iter/s)": 0.144478 |
| }, |
| { |
| "epoch": 0.5040322580645161, |
| "grad_norm": 1.187612792246231, |
| "learning_rate": 9.625048288056098e-06, |
| "loss": 0.3665189266204834, |
| "memory(GiB)": 36.98, |
| "step": 750, |
| "token_acc": 0.8640932850824287, |
| "train_speed(iter/s)": 0.144599 |
| }, |
| { |
| "epoch": 0.5073924731182796, |
| "grad_norm": 1.1980021625067683, |
| "learning_rate": 9.617978664179135e-06, |
| "loss": 0.3561270236968994, |
| "memory(GiB)": 36.98, |
| "step": 755, |
| "token_acc": 0.8723776223776224, |
| "train_speed(iter/s)": 0.144765 |
| }, |
| { |
| "epoch": 0.510752688172043, |
| "grad_norm": 1.232472450356955, |
| "learning_rate": 9.610845659246833e-06, |
| "loss": 0.3396260976791382, |
| "memory(GiB)": 36.98, |
| "step": 760, |
| "token_acc": 0.8916548797736916, |
| "train_speed(iter/s)": 0.144853 |
| }, |
| { |
| "epoch": 0.5141129032258065, |
| "grad_norm": 1.3225586989381914, |
| "learning_rate": 9.6036493711586e-06, |
| "loss": 0.38932969570159914, |
| "memory(GiB)": 36.98, |
| "step": 765, |
| "token_acc": 0.874160048869884, |
| "train_speed(iter/s)": 0.145003 |
| }, |
| { |
| "epoch": 0.5174731182795699, |
| "grad_norm": 1.4950056939211132, |
| "learning_rate": 9.596389898682396e-06, |
| "loss": 0.3547043323516846, |
| "memory(GiB)": 36.98, |
| "step": 770, |
| "token_acc": 0.8626444159178434, |
| "train_speed(iter/s)": 0.145082 |
| }, |
| { |
| "epoch": 0.5208333333333334, |
| "grad_norm": 1.1802965236685845, |
| "learning_rate": 9.589067341453375e-06, |
| "loss": 0.34569859504699707, |
| "memory(GiB)": 36.98, |
| "step": 775, |
| "token_acc": 0.8858312858312858, |
| "train_speed(iter/s)": 0.145186 |
| }, |
| { |
| "epoch": 0.5241935483870968, |
| "grad_norm": 1.115277934031332, |
| "learning_rate": 9.581681799972528e-06, |
| "loss": 0.36436331272125244, |
| "memory(GiB)": 36.98, |
| "step": 780, |
| "token_acc": 0.8587719298245614, |
| "train_speed(iter/s)": 0.145311 |
| }, |
| { |
| "epoch": 0.5275537634408602, |
| "grad_norm": 1.2521450260761797, |
| "learning_rate": 9.574233375605284e-06, |
| "loss": 0.368036675453186, |
| "memory(GiB)": 36.98, |
| "step": 785, |
| "token_acc": 0.8453327276856689, |
| "train_speed(iter/s)": 0.145343 |
| }, |
| { |
| "epoch": 0.5309139784946236, |
| "grad_norm": 1.3001720572785291, |
| "learning_rate": 9.566722170580138e-06, |
| "loss": 0.37065854072570803, |
| "memory(GiB)": 36.98, |
| "step": 790, |
| "token_acc": 0.8767784552845529, |
| "train_speed(iter/s)": 0.145526 |
| }, |
| { |
| "epoch": 0.5342741935483871, |
| "grad_norm": 1.2400854128619574, |
| "learning_rate": 9.559148287987236e-06, |
| "loss": 0.36444659233093263, |
| "memory(GiB)": 36.98, |
| "step": 795, |
| "token_acc": 0.8533872598584429, |
| "train_speed(iter/s)": 0.145651 |
| }, |
| { |
| "epoch": 0.5376344086021505, |
| "grad_norm": 1.3328229333855304, |
| "learning_rate": 9.551511831776966e-06, |
| "loss": 0.3739351749420166, |
| "memory(GiB)": 36.98, |
| "step": 800, |
| "token_acc": 0.8744081172491545, |
| "train_speed(iter/s)": 0.145742 |
| }, |
| { |
| "epoch": 0.5376344086021505, |
| "eval_loss": 0.3648330569267273, |
| "eval_runtime": 22.3843, |
| "eval_samples_per_second": 21.042, |
| "eval_steps_per_second": 2.636, |
| "eval_token_acc": 0.8687956517785882, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.540994623655914, |
| "grad_norm": 1.1730739182895107, |
| "learning_rate": 9.543812906758529e-06, |
| "loss": 0.3657143831253052, |
| "memory(GiB)": 36.98, |
| "step": 805, |
| "token_acc": 0.8716752216518898, |
| "train_speed(iter/s)": 0.140525 |
| }, |
| { |
| "epoch": 0.5443548387096774, |
| "grad_norm": 1.4833023676939197, |
| "learning_rate": 9.536051618598503e-06, |
| "loss": 0.3627838373184204, |
| "memory(GiB)": 36.98, |
| "step": 810, |
| "token_acc": 0.873323994396638, |
| "train_speed(iter/s)": 0.140671 |
| }, |
| { |
| "epoch": 0.5477150537634409, |
| "grad_norm": 1.127923630638402, |
| "learning_rate": 9.528228073819385e-06, |
| "loss": 0.3465721845626831, |
| "memory(GiB)": 36.98, |
| "step": 815, |
| "token_acc": 0.8859081097748998, |
| "train_speed(iter/s)": 0.140707 |
| }, |
| { |
| "epoch": 0.5510752688172043, |
| "grad_norm": 1.3463540160197853, |
| "learning_rate": 9.520342379798141e-06, |
| "loss": 0.3924789190292358, |
| "memory(GiB)": 36.98, |
| "step": 820, |
| "token_acc": 0.8654133094598627, |
| "train_speed(iter/s)": 0.140898 |
| }, |
| { |
| "epoch": 0.5544354838709677, |
| "grad_norm": 1.4061087313801883, |
| "learning_rate": 9.51239464476472e-06, |
| "loss": 0.3804301738739014, |
| "memory(GiB)": 36.98, |
| "step": 825, |
| "token_acc": 0.8837538316434803, |
| "train_speed(iter/s)": 0.140894 |
| }, |
| { |
| "epoch": 0.5577956989247311, |
| "grad_norm": 1.1715838130476242, |
| "learning_rate": 9.50438497780058e-06, |
| "loss": 0.34037508964538576, |
| "memory(GiB)": 36.98, |
| "step": 830, |
| "token_acc": 0.8872146118721461, |
| "train_speed(iter/s)": 0.140962 |
| }, |
| { |
| "epoch": 0.5611559139784946, |
| "grad_norm": 1.315222425795007, |
| "learning_rate": 9.496313488837183e-06, |
| "loss": 0.36748337745666504, |
| "memory(GiB)": 36.98, |
| "step": 835, |
| "token_acc": 0.8590819348469891, |
| "train_speed(iter/s)": 0.141068 |
| }, |
| { |
| "epoch": 0.5645161290322581, |
| "grad_norm": 1.2776429071415132, |
| "learning_rate": 9.488180288654485e-06, |
| "loss": 0.37864868640899657, |
| "memory(GiB)": 36.98, |
| "step": 840, |
| "token_acc": 0.8681633627895868, |
| "train_speed(iter/s)": 0.141264 |
| }, |
| { |
| "epoch": 0.5678763440860215, |
| "grad_norm": 1.396507233078662, |
| "learning_rate": 9.479985488879426e-06, |
| "loss": 0.3637056589126587, |
| "memory(GiB)": 36.98, |
| "step": 845, |
| "token_acc": 0.8704206241519674, |
| "train_speed(iter/s)": 0.141334 |
| }, |
| { |
| "epoch": 0.571236559139785, |
| "grad_norm": 1.1602905232651854, |
| "learning_rate": 9.471729201984385e-06, |
| "loss": 0.35836124420166016, |
| "memory(GiB)": 36.98, |
| "step": 850, |
| "token_acc": 0.8477945744460551, |
| "train_speed(iter/s)": 0.141429 |
| }, |
| { |
| "epoch": 0.5745967741935484, |
| "grad_norm": 1.2802615859096695, |
| "learning_rate": 9.463411541285648e-06, |
| "loss": 0.3767895221710205, |
| "memory(GiB)": 36.98, |
| "step": 855, |
| "token_acc": 0.842360942433811, |
| "train_speed(iter/s)": 0.141522 |
| }, |
| { |
| "epoch": 0.5779569892473119, |
| "grad_norm": 1.152936856157292, |
| "learning_rate": 9.45503262094184e-06, |
| "loss": 0.34860782623291015, |
| "memory(GiB)": 36.98, |
| "step": 860, |
| "token_acc": 0.890283046396265, |
| "train_speed(iter/s)": 0.141556 |
| }, |
| { |
| "epoch": 0.5813172043010753, |
| "grad_norm": 1.3209240471576482, |
| "learning_rate": 9.446592555952372e-06, |
| "loss": 0.3571927547454834, |
| "memory(GiB)": 36.98, |
| "step": 865, |
| "token_acc": 0.864054003375211, |
| "train_speed(iter/s)": 0.141632 |
| }, |
| { |
| "epoch": 0.5846774193548387, |
| "grad_norm": 1.3540565356974807, |
| "learning_rate": 9.438091462155854e-06, |
| "loss": 0.39406092166900636, |
| "memory(GiB)": 36.98, |
| "step": 870, |
| "token_acc": 0.8713701871370187, |
| "train_speed(iter/s)": 0.141743 |
| }, |
| { |
| "epoch": 0.5880376344086021, |
| "grad_norm": 1.1536152243383244, |
| "learning_rate": 9.429529456228503e-06, |
| "loss": 0.3671466827392578, |
| "memory(GiB)": 36.98, |
| "step": 875, |
| "token_acc": 0.8574139976275208, |
| "train_speed(iter/s)": 0.141896 |
| }, |
| { |
| "epoch": 0.5913978494623656, |
| "grad_norm": 1.111417750503976, |
| "learning_rate": 9.420906655682553e-06, |
| "loss": 0.4034374713897705, |
| "memory(GiB)": 36.98, |
| "step": 880, |
| "token_acc": 0.8493527953731754, |
| "train_speed(iter/s)": 0.141998 |
| }, |
| { |
| "epoch": 0.594758064516129, |
| "grad_norm": 1.1371173509292516, |
| "learning_rate": 9.412223178864628e-06, |
| "loss": 0.3718876838684082, |
| "memory(GiB)": 36.98, |
| "step": 885, |
| "token_acc": 0.865809922295278, |
| "train_speed(iter/s)": 0.141971 |
| }, |
| { |
| "epoch": 0.5981182795698925, |
| "grad_norm": 1.4351101955941779, |
| "learning_rate": 9.403479144954129e-06, |
| "loss": 0.38196277618408203, |
| "memory(GiB)": 36.98, |
| "step": 890, |
| "token_acc": 0.8545710267229255, |
| "train_speed(iter/s)": 0.142118 |
| }, |
| { |
| "epoch": 0.6014784946236559, |
| "grad_norm": 1.406812942939455, |
| "learning_rate": 9.394674673961592e-06, |
| "loss": 0.4024368762969971, |
| "memory(GiB)": 36.98, |
| "step": 895, |
| "token_acc": 0.8243031016882607, |
| "train_speed(iter/s)": 0.142274 |
| }, |
| { |
| "epoch": 0.6048387096774194, |
| "grad_norm": 1.2905388696557576, |
| "learning_rate": 9.385809886727044e-06, |
| "loss": 0.36548285484313964, |
| "memory(GiB)": 36.98, |
| "step": 900, |
| "token_acc": 0.8452690972222222, |
| "train_speed(iter/s)": 0.142383 |
| }, |
| { |
| "epoch": 0.6048387096774194, |
| "eval_loss": 0.3623374104499817, |
| "eval_runtime": 22.2664, |
| "eval_samples_per_second": 21.153, |
| "eval_steps_per_second": 2.65, |
| "eval_token_acc": 0.8692300698099674, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6081989247311828, |
| "grad_norm": 1.1835524274220626, |
| "learning_rate": 9.376884904918342e-06, |
| "loss": 0.39243431091308595, |
| "memory(GiB)": 36.98, |
| "step": 905, |
| "token_acc": 0.8737482819556254, |
| "train_speed(iter/s)": 0.138228 |
| }, |
| { |
| "epoch": 0.6115591397849462, |
| "grad_norm": 1.2772878229089082, |
| "learning_rate": 9.367899851029506e-06, |
| "loss": 0.3636767387390137, |
| "memory(GiB)": 36.98, |
| "step": 910, |
| "token_acc": 0.8840648854961832, |
| "train_speed(iter/s)": 0.138378 |
| }, |
| { |
| "epoch": 0.6149193548387096, |
| "grad_norm": 1.3603322316139836, |
| "learning_rate": 9.358854848379034e-06, |
| "loss": 0.3802945613861084, |
| "memory(GiB)": 36.98, |
| "step": 915, |
| "token_acc": 0.8738487301144292, |
| "train_speed(iter/s)": 0.138443 |
| }, |
| { |
| "epoch": 0.6182795698924731, |
| "grad_norm": 1.352541823908552, |
| "learning_rate": 9.349750021108212e-06, |
| "loss": 0.3642538785934448, |
| "memory(GiB)": 36.98, |
| "step": 920, |
| "token_acc": 0.854089709762533, |
| "train_speed(iter/s)": 0.138538 |
| }, |
| { |
| "epoch": 0.6216397849462365, |
| "grad_norm": 0.9306176011999149, |
| "learning_rate": 9.340585494179412e-06, |
| "loss": 0.35754919052124023, |
| "memory(GiB)": 36.98, |
| "step": 925, |
| "token_acc": 0.9050704225352113, |
| "train_speed(iter/s)": 0.13871 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 1.1564276522228065, |
| "learning_rate": 9.331361393374373e-06, |
| "loss": 0.3721582889556885, |
| "memory(GiB)": 36.98, |
| "step": 930, |
| "token_acc": 0.8556311413454271, |
| "train_speed(iter/s)": 0.138847 |
| }, |
| { |
| "epoch": 0.6283602150537635, |
| "grad_norm": 1.198966805981438, |
| "learning_rate": 9.322077845292476e-06, |
| "loss": 0.3688213348388672, |
| "memory(GiB)": 36.98, |
| "step": 935, |
| "token_acc": 0.859504132231405, |
| "train_speed(iter/s)": 0.139024 |
| }, |
| { |
| "epoch": 0.6317204301075269, |
| "grad_norm": 1.0471663636235522, |
| "learning_rate": 9.31273497734901e-06, |
| "loss": 0.3661798477172852, |
| "memory(GiB)": 36.98, |
| "step": 940, |
| "token_acc": 0.87492762015055, |
| "train_speed(iter/s)": 0.139147 |
| }, |
| { |
| "epoch": 0.6350806451612904, |
| "grad_norm": 1.1434543165368571, |
| "learning_rate": 9.303332917773412e-06, |
| "loss": 0.378633451461792, |
| "memory(GiB)": 36.98, |
| "step": 945, |
| "token_acc": 0.8751429224788475, |
| "train_speed(iter/s)": 0.139348 |
| }, |
| { |
| "epoch": 0.6384408602150538, |
| "grad_norm": 1.4342447965175822, |
| "learning_rate": 9.293871795607527e-06, |
| "loss": 0.3892825603485107, |
| "memory(GiB)": 36.98, |
| "step": 950, |
| "token_acc": 0.8776422764227643, |
| "train_speed(iter/s)": 0.13948 |
| }, |
| { |
| "epoch": 0.6418010752688172, |
| "grad_norm": 1.2379295477470091, |
| "learning_rate": 9.284351740703817e-06, |
| "loss": 0.3719215154647827, |
| "memory(GiB)": 36.98, |
| "step": 955, |
| "token_acc": 0.8418898174995736, |
| "train_speed(iter/s)": 0.139663 |
| }, |
| { |
| "epoch": 0.6451612903225806, |
| "grad_norm": 1.1402428987562707, |
| "learning_rate": 9.274772883723587e-06, |
| "loss": 0.3425445079803467, |
| "memory(GiB)": 36.98, |
| "step": 960, |
| "token_acc": 0.8607703281027104, |
| "train_speed(iter/s)": 0.139765 |
| }, |
| { |
| "epoch": 0.6485215053763441, |
| "grad_norm": 1.2256536961952824, |
| "learning_rate": 9.265135356135195e-06, |
| "loss": 0.35796480178833007, |
| "memory(GiB)": 36.98, |
| "step": 965, |
| "token_acc": 0.8771626297577855, |
| "train_speed(iter/s)": 0.139872 |
| }, |
| { |
| "epoch": 0.6518817204301075, |
| "grad_norm": 1.1798596505310708, |
| "learning_rate": 9.25543929021224e-06, |
| "loss": 0.3797889232635498, |
| "memory(GiB)": 36.98, |
| "step": 970, |
| "token_acc": 0.86508155930329, |
| "train_speed(iter/s)": 0.13994 |
| }, |
| { |
| "epoch": 0.655241935483871, |
| "grad_norm": 1.3153118732048075, |
| "learning_rate": 9.245684819031757e-06, |
| "loss": 0.3661204814910889, |
| "memory(GiB)": 36.98, |
| "step": 975, |
| "token_acc": 0.8765514184397163, |
| "train_speed(iter/s)": 0.140006 |
| }, |
| { |
| "epoch": 0.6586021505376344, |
| "grad_norm": 0.9790199677836383, |
| "learning_rate": 9.235872076472378e-06, |
| "loss": 0.35305283069610593, |
| "memory(GiB)": 36.98, |
| "step": 980, |
| "token_acc": 0.8554982135768162, |
| "train_speed(iter/s)": 0.140074 |
| }, |
| { |
| "epoch": 0.6619623655913979, |
| "grad_norm": 1.342098542147494, |
| "learning_rate": 9.226001197212505e-06, |
| "loss": 0.35271439552307127, |
| "memory(GiB)": 36.98, |
| "step": 985, |
| "token_acc": 0.8696958317686819, |
| "train_speed(iter/s)": 0.140211 |
| }, |
| { |
| "epoch": 0.6653225806451613, |
| "grad_norm": 1.168743571907197, |
| "learning_rate": 9.216072316728453e-06, |
| "loss": 0.3402960538864136, |
| "memory(GiB)": 36.98, |
| "step": 990, |
| "token_acc": 0.8855403348554034, |
| "train_speed(iter/s)": 0.140343 |
| }, |
| { |
| "epoch": 0.6686827956989247, |
| "grad_norm": 1.3186904670113182, |
| "learning_rate": 9.2060855712926e-06, |
| "loss": 0.3960063934326172, |
| "memory(GiB)": 36.98, |
| "step": 995, |
| "token_acc": 0.860874062580812, |
| "train_speed(iter/s)": 0.140371 |
| }, |
| { |
| "epoch": 0.6720430107526881, |
| "grad_norm": 1.3153391708535236, |
| "learning_rate": 9.196041097971509e-06, |
| "loss": 0.3522847890853882, |
| "memory(GiB)": 36.98, |
| "step": 1000, |
| "token_acc": 0.8593790764414296, |
| "train_speed(iter/s)": 0.140526 |
| }, |
| { |
| "epoch": 0.6720430107526881, |
| "eval_loss": 0.3627634346485138, |
| "eval_runtime": 22.2222, |
| "eval_samples_per_second": 21.195, |
| "eval_steps_per_second": 2.655, |
| "eval_token_acc": 0.8697251043108413, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6754032258064516, |
| "grad_norm": 1.6648875853015621, |
| "learning_rate": 9.185939034624048e-06, |
| "loss": 0.3742126226425171, |
| "memory(GiB)": 36.98, |
| "step": 1005, |
| "token_acc": 0.8709945104046981, |
| "train_speed(iter/s)": 0.136625 |
| }, |
| { |
| "epoch": 0.678763440860215, |
| "grad_norm": 0.9496792330923782, |
| "learning_rate": 9.175779519899504e-06, |
| "loss": 0.36241965293884276, |
| "memory(GiB)": 36.98, |
| "step": 1010, |
| "token_acc": 0.878876404494382, |
| "train_speed(iter/s)": 0.136756 |
| }, |
| { |
| "epoch": 0.6821236559139785, |
| "grad_norm": 1.159148138819774, |
| "learning_rate": 9.165562693235671e-06, |
| "loss": 0.34667220115661623, |
| "memory(GiB)": 36.98, |
| "step": 1015, |
| "token_acc": 0.8697904933543591, |
| "train_speed(iter/s)": 0.136863 |
| }, |
| { |
| "epoch": 0.6854838709677419, |
| "grad_norm": 1.5210774238674614, |
| "learning_rate": 9.155288694856942e-06, |
| "loss": 0.3921034574508667, |
| "memory(GiB)": 36.98, |
| "step": 1020, |
| "token_acc": 0.8514568158168574, |
| "train_speed(iter/s)": 0.136995 |
| }, |
| { |
| "epoch": 0.6888440860215054, |
| "grad_norm": 1.4036032954503623, |
| "learning_rate": 9.144957665772383e-06, |
| "loss": 0.3707906246185303, |
| "memory(GiB)": 36.98, |
| "step": 1025, |
| "token_acc": 0.8549953022236142, |
| "train_speed(iter/s)": 0.137102 |
| }, |
| { |
| "epoch": 0.6922043010752689, |
| "grad_norm": 1.2363135604055617, |
| "learning_rate": 9.134569747773799e-06, |
| "loss": 0.3501997709274292, |
| "memory(GiB)": 36.98, |
| "step": 1030, |
| "token_acc": 0.8955597248280175, |
| "train_speed(iter/s)": 0.137218 |
| }, |
| { |
| "epoch": 0.6955645161290323, |
| "grad_norm": 1.2031587722298478, |
| "learning_rate": 9.124125083433785e-06, |
| "loss": 0.35233426094055176, |
| "memory(GiB)": 36.98, |
| "step": 1035, |
| "token_acc": 0.8718586713112715, |
| "train_speed(iter/s)": 0.137257 |
| }, |
| { |
| "epoch": 0.6989247311827957, |
| "grad_norm": 1.194003310180007, |
| "learning_rate": 9.113623816103775e-06, |
| "loss": 0.38490521907806396, |
| "memory(GiB)": 36.98, |
| "step": 1040, |
| "token_acc": 0.8675508720930233, |
| "train_speed(iter/s)": 0.137309 |
| }, |
| { |
| "epoch": 0.7022849462365591, |
| "grad_norm": 1.1443958062714952, |
| "learning_rate": 9.103066089912062e-06, |
| "loss": 0.37537660598754885, |
| "memory(GiB)": 36.98, |
| "step": 1045, |
| "token_acc": 0.8820490744726647, |
| "train_speed(iter/s)": 0.137442 |
| }, |
| { |
| "epoch": 0.7056451612903226, |
| "grad_norm": 1.4564894479548467, |
| "learning_rate": 9.092452049761837e-06, |
| "loss": 0.36408531665802, |
| "memory(GiB)": 36.98, |
| "step": 1050, |
| "token_acc": 0.8774496995035276, |
| "train_speed(iter/s)": 0.137483 |
| }, |
| { |
| "epoch": 0.709005376344086, |
| "grad_norm": 1.2480131437713693, |
| "learning_rate": 9.081781841329186e-06, |
| "loss": 0.3492724895477295, |
| "memory(GiB)": 36.98, |
| "step": 1055, |
| "token_acc": 0.8737158606865447, |
| "train_speed(iter/s)": 0.137587 |
| }, |
| { |
| "epoch": 0.7123655913978495, |
| "grad_norm": 1.1879780995611289, |
| "learning_rate": 9.071055611061102e-06, |
| "loss": 0.3583995819091797, |
| "memory(GiB)": 36.98, |
| "step": 1060, |
| "token_acc": 0.8709024686361797, |
| "train_speed(iter/s)": 0.137746 |
| }, |
| { |
| "epoch": 0.7157258064516129, |
| "grad_norm": 1.167689141740901, |
| "learning_rate": 9.06027350617346e-06, |
| "loss": 0.3558779716491699, |
| "memory(GiB)": 36.98, |
| "step": 1065, |
| "token_acc": 0.8789541639767592, |
| "train_speed(iter/s)": 0.137819 |
| }, |
| { |
| "epoch": 0.7190860215053764, |
| "grad_norm": 1.3103844035853496, |
| "learning_rate": 9.049435674649012e-06, |
| "loss": 0.3494231700897217, |
| "memory(GiB)": 36.98, |
| "step": 1070, |
| "token_acc": 0.875264910687254, |
| "train_speed(iter/s)": 0.13791 |
| }, |
| { |
| "epoch": 0.7224462365591398, |
| "grad_norm": 1.447881789596697, |
| "learning_rate": 9.038542265235353e-06, |
| "loss": 0.37793238162994386, |
| "memory(GiB)": 36.98, |
| "step": 1075, |
| "token_acc": 0.8793576184880533, |
| "train_speed(iter/s)": 0.137992 |
| }, |
| { |
| "epoch": 0.7258064516129032, |
| "grad_norm": 1.1420570268637051, |
| "learning_rate": 9.027593427442867e-06, |
| "loss": 0.3852741241455078, |
| "memory(GiB)": 36.98, |
| "step": 1080, |
| "token_acc": 0.8560224089635854, |
| "train_speed(iter/s)": 0.138133 |
| }, |
| { |
| "epoch": 0.7291666666666666, |
| "grad_norm": 0.9932383409822715, |
| "learning_rate": 9.01658931154269e-06, |
| "loss": 0.37255063056945803, |
| "memory(GiB)": 36.98, |
| "step": 1085, |
| "token_acc": 0.8839645447219984, |
| "train_speed(iter/s)": 0.138268 |
| }, |
| { |
| "epoch": 0.7325268817204301, |
| "grad_norm": 1.189078778252946, |
| "learning_rate": 9.005530068564641e-06, |
| "loss": 0.365185284614563, |
| "memory(GiB)": 36.98, |
| "step": 1090, |
| "token_acc": 0.8813782219884272, |
| "train_speed(iter/s)": 0.138348 |
| }, |
| { |
| "epoch": 0.7358870967741935, |
| "grad_norm": 1.157742839318529, |
| "learning_rate": 8.994415850295148e-06, |
| "loss": 0.35784361362457273, |
| "memory(GiB)": 36.98, |
| "step": 1095, |
| "token_acc": 0.8647392875580795, |
| "train_speed(iter/s)": 0.138449 |
| }, |
| { |
| "epoch": 0.739247311827957, |
| "grad_norm": 1.220404878981132, |
| "learning_rate": 8.98324680927517e-06, |
| "loss": 0.373700475692749, |
| "memory(GiB)": 36.98, |
| "step": 1100, |
| "token_acc": 0.8641396933560477, |
| "train_speed(iter/s)": 0.138546 |
| }, |
| { |
| "epoch": 0.739247311827957, |
| "eval_loss": 0.35993626713752747, |
| "eval_runtime": 22.2705, |
| "eval_samples_per_second": 21.149, |
| "eval_steps_per_second": 2.649, |
| "eval_token_acc": 0.8690179121667357, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.7426075268817204, |
| "grad_norm": 1.027962609432086, |
| "learning_rate": 8.972023098798095e-06, |
| "loss": 0.40328526496887207, |
| "memory(GiB)": 36.98, |
| "step": 1105, |
| "token_acc": 0.8727120885239056, |
| "train_speed(iter/s)": 0.135213 |
| }, |
| { |
| "epoch": 0.7459677419354839, |
| "grad_norm": 1.1113471488531306, |
| "learning_rate": 8.960744872907645e-06, |
| "loss": 0.38501567840576173, |
| "memory(GiB)": 36.98, |
| "step": 1110, |
| "token_acc": 0.8484187568157033, |
| "train_speed(iter/s)": 0.135318 |
| }, |
| { |
| "epoch": 0.7493279569892473, |
| "grad_norm": 1.1306811125471197, |
| "learning_rate": 8.949412286395755e-06, |
| "loss": 0.36622910499572753, |
| "memory(GiB)": 36.98, |
| "step": 1115, |
| "token_acc": 0.8693091732729332, |
| "train_speed(iter/s)": 0.135432 |
| }, |
| { |
| "epoch": 0.7526881720430108, |
| "grad_norm": 1.129836101156622, |
| "learning_rate": 8.938025494800454e-06, |
| "loss": 0.39587087631225587, |
| "memory(GiB)": 36.98, |
| "step": 1120, |
| "token_acc": 0.8733031674208145, |
| "train_speed(iter/s)": 0.1355 |
| }, |
| { |
| "epoch": 0.7560483870967742, |
| "grad_norm": 1.13107139794073, |
| "learning_rate": 8.926584654403725e-06, |
| "loss": 0.3494365692138672, |
| "memory(GiB)": 36.98, |
| "step": 1125, |
| "token_acc": 0.8773690078037905, |
| "train_speed(iter/s)": 0.13563 |
| }, |
| { |
| "epoch": 0.7594086021505376, |
| "grad_norm": 1.2782802692395279, |
| "learning_rate": 8.915089922229365e-06, |
| "loss": 0.3868433952331543, |
| "memory(GiB)": 36.98, |
| "step": 1130, |
| "token_acc": 0.8728874077600571, |
| "train_speed(iter/s)": 0.135686 |
| }, |
| { |
| "epoch": 0.7627688172043011, |
| "grad_norm": 1.1154758878343582, |
| "learning_rate": 8.903541456040825e-06, |
| "loss": 0.3632142782211304, |
| "memory(GiB)": 36.98, |
| "step": 1135, |
| "token_acc": 0.8736906962415281, |
| "train_speed(iter/s)": 0.13579 |
| }, |
| { |
| "epoch": 0.7661290322580645, |
| "grad_norm": 1.2723668159978256, |
| "learning_rate": 8.891939414339048e-06, |
| "loss": 0.36965441703796387, |
| "memory(GiB)": 36.98, |
| "step": 1140, |
| "token_acc": 0.8772372069573985, |
| "train_speed(iter/s)": 0.135898 |
| }, |
| { |
| "epoch": 0.769489247311828, |
| "grad_norm": 1.154588616850164, |
| "learning_rate": 8.880283956360297e-06, |
| "loss": 0.37631180286407473, |
| "memory(GiB)": 36.98, |
| "step": 1145, |
| "token_acc": 0.8772019402604033, |
| "train_speed(iter/s)": 0.135982 |
| }, |
| { |
| "epoch": 0.7728494623655914, |
| "grad_norm": 1.170600321406014, |
| "learning_rate": 8.868575242073954e-06, |
| "loss": 0.37340292930603025, |
| "memory(GiB)": 36.98, |
| "step": 1150, |
| "token_acc": 0.8703907539900936, |
| "train_speed(iter/s)": 0.136084 |
| }, |
| { |
| "epoch": 0.7762096774193549, |
| "grad_norm": 1.231802273714498, |
| "learning_rate": 8.856813432180349e-06, |
| "loss": 0.35198609828948973, |
| "memory(GiB)": 36.98, |
| "step": 1155, |
| "token_acc": 0.887806411062225, |
| "train_speed(iter/s)": 0.136204 |
| }, |
| { |
| "epoch": 0.7795698924731183, |
| "grad_norm": 1.1688665900475064, |
| "learning_rate": 8.844998688108535e-06, |
| "loss": 0.3763306140899658, |
| "memory(GiB)": 36.98, |
| "step": 1160, |
| "token_acc": 0.873224618621778, |
| "train_speed(iter/s)": 0.136285 |
| }, |
| { |
| "epoch": 0.7829301075268817, |
| "grad_norm": 1.2175946582909498, |
| "learning_rate": 8.833131172014075e-06, |
| "loss": 0.3766140937805176, |
| "memory(GiB)": 36.98, |
| "step": 1165, |
| "token_acc": 0.8513879485443466, |
| "train_speed(iter/s)": 0.136437 |
| }, |
| { |
| "epoch": 0.7862903225806451, |
| "grad_norm": 1.0227148148425746, |
| "learning_rate": 8.821211046776828e-06, |
| "loss": 0.3676277160644531, |
| "memory(GiB)": 36.98, |
| "step": 1170, |
| "token_acc": 0.8948380010982976, |
| "train_speed(iter/s)": 0.136557 |
| }, |
| { |
| "epoch": 0.7896505376344086, |
| "grad_norm": 1.1429501203864452, |
| "learning_rate": 8.809238475998699e-06, |
| "loss": 0.36184470653533934, |
| "memory(GiB)": 36.98, |
| "step": 1175, |
| "token_acc": 0.8679896462467644, |
| "train_speed(iter/s)": 0.136615 |
| }, |
| { |
| "epoch": 0.793010752688172, |
| "grad_norm": 1.2033359899326155, |
| "learning_rate": 8.797213624001403e-06, |
| "loss": 0.3503121852874756, |
| "memory(GiB)": 36.98, |
| "step": 1180, |
| "token_acc": 0.8806179775280899, |
| "train_speed(iter/s)": 0.136717 |
| }, |
| { |
| "epoch": 0.7963709677419355, |
| "grad_norm": 1.0758929439541713, |
| "learning_rate": 8.785136655824208e-06, |
| "loss": 0.3926861763000488, |
| "memory(GiB)": 36.98, |
| "step": 1185, |
| "token_acc": 0.8738359867828177, |
| "train_speed(iter/s)": 0.136848 |
| }, |
| { |
| "epoch": 0.7997311827956989, |
| "grad_norm": 1.3930864885101153, |
| "learning_rate": 8.773007737221661e-06, |
| "loss": 0.36632988452911375, |
| "memory(GiB)": 36.98, |
| "step": 1190, |
| "token_acc": 0.8565597667638484, |
| "train_speed(iter/s)": 0.13695 |
| }, |
| { |
| "epoch": 0.8030913978494624, |
| "grad_norm": 1.2851603723997174, |
| "learning_rate": 8.760827034661333e-06, |
| "loss": 0.37504141330718993, |
| "memory(GiB)": 36.98, |
| "step": 1195, |
| "token_acc": 0.8499542543458372, |
| "train_speed(iter/s)": 0.137062 |
| }, |
| { |
| "epoch": 0.8064516129032258, |
| "grad_norm": 1.1404438471085583, |
| "learning_rate": 8.748594715321512e-06, |
| "loss": 0.37414093017578126, |
| "memory(GiB)": 36.98, |
| "step": 1200, |
| "token_acc": 0.8452418096723869, |
| "train_speed(iter/s)": 0.137153 |
| }, |
| { |
| "epoch": 0.8064516129032258, |
| "eval_loss": 0.3591736853122711, |
| "eval_runtime": 22.3034, |
| "eval_samples_per_second": 21.118, |
| "eval_steps_per_second": 2.645, |
| "eval_token_acc": 0.8695634603921886, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.8098118279569892, |
| "grad_norm": 1.120175429790956, |
| "learning_rate": 8.736310947088925e-06, |
| "loss": 0.364569354057312, |
| "memory(GiB)": 36.98, |
| "step": 1205, |
| "token_acc": 0.8810635780559188, |
| "train_speed(iter/s)": 0.134072 |
| }, |
| { |
| "epoch": 0.8131720430107527, |
| "grad_norm": 1.1584760304462782, |
| "learning_rate": 8.723975898556418e-06, |
| "loss": 0.3694889545440674, |
| "memory(GiB)": 36.98, |
| "step": 1210, |
| "token_acc": 0.8554807103490508, |
| "train_speed(iter/s)": 0.134198 |
| }, |
| { |
| "epoch": 0.8165322580645161, |
| "grad_norm": 1.422435654534571, |
| "learning_rate": 8.711589739020666e-06, |
| "loss": 0.4119666576385498, |
| "memory(GiB)": 36.98, |
| "step": 1215, |
| "token_acc": 0.8824604141291108, |
| "train_speed(iter/s)": 0.134323 |
| }, |
| { |
| "epoch": 0.8198924731182796, |
| "grad_norm": 1.3248122307528065, |
| "learning_rate": 8.699152638479817e-06, |
| "loss": 0.35853800773620603, |
| "memory(GiB)": 36.98, |
| "step": 1220, |
| "token_acc": 0.8483437779767233, |
| "train_speed(iter/s)": 0.134403 |
| }, |
| { |
| "epoch": 0.823252688172043, |
| "grad_norm": 1.280124494652183, |
| "learning_rate": 8.686664767631194e-06, |
| "loss": 0.3697690486907959, |
| "memory(GiB)": 36.98, |
| "step": 1225, |
| "token_acc": 0.8864232398628202, |
| "train_speed(iter/s)": 0.134529 |
| }, |
| { |
| "epoch": 0.8266129032258065, |
| "grad_norm": 1.451060096605754, |
| "learning_rate": 8.67412629786892e-06, |
| "loss": 0.3914652347564697, |
| "memory(GiB)": 36.98, |
| "step": 1230, |
| "token_acc": 0.8521779425393883, |
| "train_speed(iter/s)": 0.134562 |
| }, |
| { |
| "epoch": 0.8299731182795699, |
| "grad_norm": 1.0228658030093372, |
| "learning_rate": 8.661537401281592e-06, |
| "loss": 0.3691814422607422, |
| "memory(GiB)": 36.98, |
| "step": 1235, |
| "token_acc": 0.8398544131028207, |
| "train_speed(iter/s)": 0.134623 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 1.2177785216419144, |
| "learning_rate": 8.6488982506499e-06, |
| "loss": 0.3742197275161743, |
| "memory(GiB)": 36.98, |
| "step": 1240, |
| "token_acc": 0.8786339754816113, |
| "train_speed(iter/s)": 0.134723 |
| }, |
| { |
| "epoch": 0.8366935483870968, |
| "grad_norm": 1.2600614569860304, |
| "learning_rate": 8.636209019444266e-06, |
| "loss": 0.3671316146850586, |
| "memory(GiB)": 36.98, |
| "step": 1245, |
| "token_acc": 0.8633489700183561, |
| "train_speed(iter/s)": 0.134806 |
| }, |
| { |
| "epoch": 0.8400537634408602, |
| "grad_norm": 1.2097597846514752, |
| "learning_rate": 8.623469881822459e-06, |
| "loss": 0.35306849479675295, |
| "memory(GiB)": 36.98, |
| "step": 1250, |
| "token_acc": 0.867431315017338, |
| "train_speed(iter/s)": 0.134879 |
| }, |
| { |
| "epoch": 0.8434139784946236, |
| "grad_norm": 1.3489602392669622, |
| "learning_rate": 8.610681012627206e-06, |
| "loss": 0.3992285966873169, |
| "memory(GiB)": 36.98, |
| "step": 1255, |
| "token_acc": 0.8585302686953706, |
| "train_speed(iter/s)": 0.135015 |
| }, |
| { |
| "epoch": 0.8467741935483871, |
| "grad_norm": 1.1284388647627097, |
| "learning_rate": 8.597842587383797e-06, |
| "loss": 0.35171847343444823, |
| "memory(GiB)": 36.98, |
| "step": 1260, |
| "token_acc": 0.883495145631068, |
| "train_speed(iter/s)": 0.135129 |
| }, |
| { |
| "epoch": 0.8501344086021505, |
| "grad_norm": 1.1397663299803389, |
| "learning_rate": 8.584954782297664e-06, |
| "loss": 0.3620689153671265, |
| "memory(GiB)": 36.98, |
| "step": 1265, |
| "token_acc": 0.8496460824993898, |
| "train_speed(iter/s)": 0.135164 |
| }, |
| { |
| "epoch": 0.853494623655914, |
| "grad_norm": 1.567282299034232, |
| "learning_rate": 8.572017774251975e-06, |
| "loss": 0.3426519870758057, |
| "memory(GiB)": 36.98, |
| "step": 1270, |
| "token_acc": 0.8790885703785373, |
| "train_speed(iter/s)": 0.13528 |
| }, |
| { |
| "epoch": 0.8568548387096774, |
| "grad_norm": 1.024146079054877, |
| "learning_rate": 8.559031740805197e-06, |
| "loss": 0.3614107608795166, |
| "memory(GiB)": 36.98, |
| "step": 1275, |
| "token_acc": 0.8775425487754255, |
| "train_speed(iter/s)": 0.135367 |
| }, |
| { |
| "epoch": 0.8602150537634409, |
| "grad_norm": 1.2877357462438697, |
| "learning_rate": 8.545996860188668e-06, |
| "loss": 0.36385910511016845, |
| "memory(GiB)": 36.98, |
| "step": 1280, |
| "token_acc": 0.8840579710144928, |
| "train_speed(iter/s)": 0.135448 |
| }, |
| { |
| "epoch": 0.8635752688172043, |
| "grad_norm": 0.9919794513523525, |
| "learning_rate": 8.53291331130414e-06, |
| "loss": 0.3696147441864014, |
| "memory(GiB)": 36.98, |
| "step": 1285, |
| "token_acc": 0.8672872340425531, |
| "train_speed(iter/s)": 0.135528 |
| }, |
| { |
| "epoch": 0.8669354838709677, |
| "grad_norm": 1.2702352256267997, |
| "learning_rate": 8.519781273721337e-06, |
| "loss": 0.37546935081481936, |
| "memory(GiB)": 36.98, |
| "step": 1290, |
| "token_acc": 0.8561872909698997, |
| "train_speed(iter/s)": 0.135644 |
| }, |
| { |
| "epoch": 0.8702956989247311, |
| "grad_norm": 1.1454414254082632, |
| "learning_rate": 8.506600927675479e-06, |
| "loss": 0.3705191373825073, |
| "memory(GiB)": 36.98, |
| "step": 1295, |
| "token_acc": 0.8777910685805422, |
| "train_speed(iter/s)": 0.135768 |
| }, |
| { |
| "epoch": 0.8736559139784946, |
| "grad_norm": 1.0281894853631854, |
| "learning_rate": 8.493372454064809e-06, |
| "loss": 0.3680076599121094, |
| "memory(GiB)": 36.98, |
| "step": 1300, |
| "token_acc": 0.8766331658291457, |
| "train_speed(iter/s)": 0.135882 |
| }, |
| { |
| "epoch": 0.8736559139784946, |
| "eval_loss": 0.35564836859703064, |
| "eval_runtime": 22.2867, |
| "eval_samples_per_second": 21.134, |
| "eval_steps_per_second": 2.647, |
| "eval_token_acc": 0.8703110635159573, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.8770161290322581, |
| "grad_norm": 1.1119653997133574, |
| "learning_rate": 8.480096034448118e-06, |
| "loss": 0.38275277614593506, |
| "memory(GiB)": 36.98, |
| "step": 1305, |
| "token_acc": 0.879605039155601, |
| "train_speed(iter/s)": 0.133094 |
| }, |
| { |
| "epoch": 0.8803763440860215, |
| "grad_norm": 1.3654028882674394, |
| "learning_rate": 8.46677185104225e-06, |
| "loss": 0.3956867218017578, |
| "memory(GiB)": 36.98, |
| "step": 1310, |
| "token_acc": 0.8529342997428104, |
| "train_speed(iter/s)": 0.133217 |
| }, |
| { |
| "epoch": 0.883736559139785, |
| "grad_norm": 1.446764466047648, |
| "learning_rate": 8.453400086719595e-06, |
| "loss": 0.3735336780548096, |
| "memory(GiB)": 36.98, |
| "step": 1315, |
| "token_acc": 0.8798710024187046, |
| "train_speed(iter/s)": 0.13335 |
| }, |
| { |
| "epoch": 0.8870967741935484, |
| "grad_norm": 1.4121926531149445, |
| "learning_rate": 8.439980925005587e-06, |
| "loss": 0.3659780502319336, |
| "memory(GiB)": 36.98, |
| "step": 1320, |
| "token_acc": 0.883887957864496, |
| "train_speed(iter/s)": 0.133469 |
| }, |
| { |
| "epoch": 0.8904569892473119, |
| "grad_norm": 1.164980050744471, |
| "learning_rate": 8.426514550076179e-06, |
| "loss": 0.3616935729980469, |
| "memory(GiB)": 36.98, |
| "step": 1325, |
| "token_acc": 0.8850448872345085, |
| "train_speed(iter/s)": 0.133589 |
| }, |
| { |
| "epoch": 0.8938172043010753, |
| "grad_norm": 1.0691472658135925, |
| "learning_rate": 8.413001146755322e-06, |
| "loss": 0.35316460132598876, |
| "memory(GiB)": 36.98, |
| "step": 1330, |
| "token_acc": 0.8866596268919394, |
| "train_speed(iter/s)": 0.133666 |
| }, |
| { |
| "epoch": 0.8971774193548387, |
| "grad_norm": 1.0994724914400242, |
| "learning_rate": 8.399440900512426e-06, |
| "loss": 0.3949526309967041, |
| "memory(GiB)": 36.98, |
| "step": 1335, |
| "token_acc": 0.8785682916117699, |
| "train_speed(iter/s)": 0.133743 |
| }, |
| { |
| "epoch": 0.9005376344086021, |
| "grad_norm": 1.1861769349247893, |
| "learning_rate": 8.385833997459804e-06, |
| "loss": 0.3643625259399414, |
| "memory(GiB)": 36.98, |
| "step": 1340, |
| "token_acc": 0.9060324825986079, |
| "train_speed(iter/s)": 0.133823 |
| }, |
| { |
| "epoch": 0.9038978494623656, |
| "grad_norm": 1.1612362502540743, |
| "learning_rate": 8.372180624350135e-06, |
| "loss": 0.3484476089477539, |
| "memory(GiB)": 36.98, |
| "step": 1345, |
| "token_acc": 0.891963426371511, |
| "train_speed(iter/s)": 0.133922 |
| }, |
| { |
| "epoch": 0.907258064516129, |
| "grad_norm": 1.3879852587828685, |
| "learning_rate": 8.358480968573891e-06, |
| "loss": 0.3419426441192627, |
| "memory(GiB)": 36.98, |
| "step": 1350, |
| "token_acc": 0.884161752316765, |
| "train_speed(iter/s)": 0.133959 |
| }, |
| { |
| "epoch": 0.9106182795698925, |
| "grad_norm": 1.2332875378045212, |
| "learning_rate": 8.344735218156765e-06, |
| "loss": 0.3734895706176758, |
| "memory(GiB)": 36.98, |
| "step": 1355, |
| "token_acc": 0.8606686111734351, |
| "train_speed(iter/s)": 0.134034 |
| }, |
| { |
| "epoch": 0.9139784946236559, |
| "grad_norm": 1.1771957299877707, |
| "learning_rate": 8.330943561757092e-06, |
| "loss": 0.3600280284881592, |
| "memory(GiB)": 36.98, |
| "step": 1360, |
| "token_acc": 0.8553880513679508, |
| "train_speed(iter/s)": 0.134132 |
| }, |
| { |
| "epoch": 0.9173387096774194, |
| "grad_norm": 1.0255370794800907, |
| "learning_rate": 8.31710618866326e-06, |
| "loss": 0.35778398513793946, |
| "memory(GiB)": 36.98, |
| "step": 1365, |
| "token_acc": 0.9062277580071174, |
| "train_speed(iter/s)": 0.134223 |
| }, |
| { |
| "epoch": 0.9206989247311828, |
| "grad_norm": 1.1122296528253035, |
| "learning_rate": 8.303223288791111e-06, |
| "loss": 0.3720524311065674, |
| "memory(GiB)": 36.98, |
| "step": 1370, |
| "token_acc": 0.8743216223936018, |
| "train_speed(iter/s)": 0.134348 |
| }, |
| { |
| "epoch": 0.9240591397849462, |
| "grad_norm": 1.3439572837247773, |
| "learning_rate": 8.289295052681338e-06, |
| "loss": 0.3542968273162842, |
| "memory(GiB)": 36.98, |
| "step": 1375, |
| "token_acc": 0.8697758933979406, |
| "train_speed(iter/s)": 0.134431 |
| }, |
| { |
| "epoch": 0.9274193548387096, |
| "grad_norm": 1.3514644861757483, |
| "learning_rate": 8.275321671496862e-06, |
| "loss": 0.35950050354003904, |
| "memory(GiB)": 36.98, |
| "step": 1380, |
| "token_acc": 0.8709907341411262, |
| "train_speed(iter/s)": 0.134551 |
| }, |
| { |
| "epoch": 0.9307795698924731, |
| "grad_norm": 1.0141397599722326, |
| "learning_rate": 8.26130333702022e-06, |
| "loss": 0.3641530990600586, |
| "memory(GiB)": 36.98, |
| "step": 1385, |
| "token_acc": 0.8553926118433265, |
| "train_speed(iter/s)": 0.134571 |
| }, |
| { |
| "epoch": 0.9341397849462365, |
| "grad_norm": 1.2175959834253216, |
| "learning_rate": 8.247240241650918e-06, |
| "loss": 0.3760999202728271, |
| "memory(GiB)": 36.98, |
| "step": 1390, |
| "token_acc": 0.8769605191995673, |
| "train_speed(iter/s)": 0.134653 |
| }, |
| { |
| "epoch": 0.9375, |
| "grad_norm": 1.294717847990162, |
| "learning_rate": 8.233132578402808e-06, |
| "loss": 0.3714743614196777, |
| "memory(GiB)": 36.98, |
| "step": 1395, |
| "token_acc": 0.8819672131147541, |
| "train_speed(iter/s)": 0.134765 |
| }, |
| { |
| "epoch": 0.9408602150537635, |
| "grad_norm": 1.2980876103734693, |
| "learning_rate": 8.218980540901417e-06, |
| "loss": 0.3365382194519043, |
| "memory(GiB)": 36.98, |
| "step": 1400, |
| "token_acc": 0.8853006681514477, |
| "train_speed(iter/s)": 0.13485 |
| }, |
| { |
| "epoch": 0.9408602150537635, |
| "eval_loss": 0.3554946184158325, |
| "eval_runtime": 22.3352, |
| "eval_samples_per_second": 21.088, |
| "eval_steps_per_second": 2.642, |
| "eval_token_acc": 0.8695735631371043, |
| "step": 1400 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 4464, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 171818621501440.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|