| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.809877820156442, |
| "eval_steps": -11601, |
| "global_step": 7000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00025858167948800825, |
| "grad_norm": 242.40017700195312, |
| "learning_rate": 8.605851979345955e-09, |
| "loss": 6.130181312561035, |
| "memory(GiB)": 34.54, |
| "step": 1, |
| "token_acc": 0.22661183727419593, |
| "train_speed(iter/s)": 0.013364 |
| }, |
| { |
| "epoch": 0.0012929083974400413, |
| "grad_norm": 655.3204345703125, |
| "learning_rate": 4.3029259896729774e-08, |
| "loss": 6.322210311889648, |
| "memory(GiB)": 56.04, |
| "step": 5, |
| "token_acc": 0.1488586387434555, |
| "train_speed(iter/s)": 0.023012 |
| }, |
| { |
| "epoch": 0.0025858167948800827, |
| "grad_norm": 484.6578369140625, |
| "learning_rate": 8.605851979345955e-08, |
| "loss": 6.274671173095703, |
| "memory(GiB)": 56.04, |
| "step": 10, |
| "token_acc": 0.1413762226201956, |
| "train_speed(iter/s)": 0.025227 |
| }, |
| { |
| "epoch": 0.003878725192320124, |
| "grad_norm": 247.675048828125, |
| "learning_rate": 1.2908777969018933e-07, |
| "loss": 5.9146270751953125, |
| "memory(GiB)": 56.04, |
| "step": 15, |
| "token_acc": 0.2279082774049217, |
| "train_speed(iter/s)": 0.026453 |
| }, |
| { |
| "epoch": 0.005171633589760165, |
| "grad_norm": 334.42694091796875, |
| "learning_rate": 1.721170395869191e-07, |
| "loss": 6.069369506835938, |
| "memory(GiB)": 56.04, |
| "step": 20, |
| "token_acc": 0.2284955683104916, |
| "train_speed(iter/s)": 0.02697 |
| }, |
| { |
| "epoch": 0.006464541987200207, |
| "grad_norm": 492.56463623046875, |
| "learning_rate": 2.151462994836489e-07, |
| "loss": 6.095654296875, |
| "memory(GiB)": 56.04, |
| "step": 25, |
| "token_acc": 0.24055135615829257, |
| "train_speed(iter/s)": 0.027272 |
| }, |
| { |
| "epoch": 0.007757450384640248, |
| "grad_norm": 182.06201171875, |
| "learning_rate": 2.5817555938037866e-07, |
| "loss": 6.0770011901855465, |
| "memory(GiB)": 76.04, |
| "step": 30, |
| "token_acc": 0.1521892967713401, |
| "train_speed(iter/s)": 0.027249 |
| }, |
| { |
| "epoch": 0.009050358782080289, |
| "grad_norm": 519.2252197265625, |
| "learning_rate": 3.0120481927710845e-07, |
| "loss": 6.1493080139160154, |
| "memory(GiB)": 76.04, |
| "step": 35, |
| "token_acc": 0.15634508680866296, |
| "train_speed(iter/s)": 0.027269 |
| }, |
| { |
| "epoch": 0.01034326717952033, |
| "grad_norm": 714.878173828125, |
| "learning_rate": 3.442340791738382e-07, |
| "loss": 6.561576843261719, |
| "memory(GiB)": 76.04, |
| "step": 40, |
| "token_acc": 0.16204781045060293, |
| "train_speed(iter/s)": 0.027175 |
| }, |
| { |
| "epoch": 0.011636175576960372, |
| "grad_norm": 666.3334350585938, |
| "learning_rate": 3.8726333907056804e-07, |
| "loss": 5.985930252075195, |
| "memory(GiB)": 76.04, |
| "step": 45, |
| "token_acc": 0.1617008883063929, |
| "train_speed(iter/s)": 0.027249 |
| }, |
| { |
| "epoch": 0.012929083974400414, |
| "grad_norm": 841.8603515625, |
| "learning_rate": 4.302925989672978e-07, |
| "loss": 6.3445274353027346, |
| "memory(GiB)": 76.04, |
| "step": 50, |
| "token_acc": 0.2033275151335091, |
| "train_speed(iter/s)": 0.027146 |
| }, |
| { |
| "epoch": 0.014221992371840455, |
| "grad_norm": 956.3386840820312, |
| "learning_rate": 4.733218588640276e-07, |
| "loss": 6.072727966308594, |
| "memory(GiB)": 76.04, |
| "step": 55, |
| "token_acc": 0.18730415801147765, |
| "train_speed(iter/s)": 0.027337 |
| }, |
| { |
| "epoch": 0.015514900769280497, |
| "grad_norm": 145.8020782470703, |
| "learning_rate": 5.163511187607573e-07, |
| "loss": 5.959784317016601, |
| "memory(GiB)": 76.04, |
| "step": 60, |
| "token_acc": 0.21014295439074201, |
| "train_speed(iter/s)": 0.027487 |
| }, |
| { |
| "epoch": 0.016807809166720537, |
| "grad_norm": 1471.006591796875, |
| "learning_rate": 5.593803786574872e-07, |
| "loss": 5.995822906494141, |
| "memory(GiB)": 76.04, |
| "step": 65, |
| "token_acc": 0.16151609777107784, |
| "train_speed(iter/s)": 0.027505 |
| }, |
| { |
| "epoch": 0.018100717564160578, |
| "grad_norm": 127.47267150878906, |
| "learning_rate": 6.024096385542169e-07, |
| "loss": 5.947823333740234, |
| "memory(GiB)": 76.04, |
| "step": 70, |
| "token_acc": 0.17944442339030583, |
| "train_speed(iter/s)": 0.027514 |
| }, |
| { |
| "epoch": 0.01939362596160062, |
| "grad_norm": 103.65353393554688, |
| "learning_rate": 6.454388984509467e-07, |
| "loss": 6.049673843383789, |
| "memory(GiB)": 76.04, |
| "step": 75, |
| "token_acc": 0.1430964467005076, |
| "train_speed(iter/s)": 0.027525 |
| }, |
| { |
| "epoch": 0.02068653435904066, |
| "grad_norm": 186.977294921875, |
| "learning_rate": 6.884681583476764e-07, |
| "loss": 5.822255706787109, |
| "memory(GiB)": 76.04, |
| "step": 80, |
| "token_acc": 0.20191989407480967, |
| "train_speed(iter/s)": 0.027535 |
| }, |
| { |
| "epoch": 0.021979442756480703, |
| "grad_norm": 487.37152099609375, |
| "learning_rate": 7.314974182444062e-07, |
| "loss": 5.861573791503906, |
| "memory(GiB)": 76.04, |
| "step": 85, |
| "token_acc": 0.25108269048585735, |
| "train_speed(iter/s)": 0.027566 |
| }, |
| { |
| "epoch": 0.023272351153920744, |
| "grad_norm": 536.355712890625, |
| "learning_rate": 7.745266781411361e-07, |
| "loss": 5.783546447753906, |
| "memory(GiB)": 76.04, |
| "step": 90, |
| "token_acc": 0.1870295076687734, |
| "train_speed(iter/s)": 0.027608 |
| }, |
| { |
| "epoch": 0.024565259551360786, |
| "grad_norm": 285.6499938964844, |
| "learning_rate": 8.175559380378658e-07, |
| "loss": 5.746212768554687, |
| "memory(GiB)": 76.04, |
| "step": 95, |
| "token_acc": 0.18162528216704288, |
| "train_speed(iter/s)": 0.02758 |
| }, |
| { |
| "epoch": 0.025858167948800827, |
| "grad_norm": 1952.98583984375, |
| "learning_rate": 8.605851979345956e-07, |
| "loss": 5.601922607421875, |
| "memory(GiB)": 76.04, |
| "step": 100, |
| "token_acc": 0.2339803356501102, |
| "train_speed(iter/s)": 0.027603 |
| }, |
| { |
| "epoch": 0.02715107634624087, |
| "grad_norm": 93.57474517822266, |
| "learning_rate": 9.036144578313254e-07, |
| "loss": 5.615843200683594, |
| "memory(GiB)": 76.04, |
| "step": 105, |
| "token_acc": 0.2615709535364429, |
| "train_speed(iter/s)": 0.027644 |
| }, |
| { |
| "epoch": 0.02844398474368091, |
| "grad_norm": 229.4741973876953, |
| "learning_rate": 9.466437177280551e-07, |
| "loss": 5.597761917114258, |
| "memory(GiB)": 76.04, |
| "step": 110, |
| "token_acc": 0.15636001564666974, |
| "train_speed(iter/s)": 0.027645 |
| }, |
| { |
| "epoch": 0.029736893141120952, |
| "grad_norm": 36.493988037109375, |
| "learning_rate": 9.896729776247848e-07, |
| "loss": 5.327813720703125, |
| "memory(GiB)": 76.04, |
| "step": 115, |
| "token_acc": 0.1776396590866798, |
| "train_speed(iter/s)": 0.027731 |
| }, |
| { |
| "epoch": 0.031029801538560994, |
| "grad_norm": 35.07899475097656, |
| "learning_rate": 1.0327022375215146e-06, |
| "loss": 5.352128982543945, |
| "memory(GiB)": 76.04, |
| "step": 120, |
| "token_acc": 0.24768539325842698, |
| "train_speed(iter/s)": 0.027714 |
| }, |
| { |
| "epoch": 0.03232270993600103, |
| "grad_norm": 144.920166015625, |
| "learning_rate": 1.0757314974182445e-06, |
| "loss": 5.0349891662597654, |
| "memory(GiB)": 76.04, |
| "step": 125, |
| "token_acc": 0.2503892788768347, |
| "train_speed(iter/s)": 0.027712 |
| }, |
| { |
| "epoch": 0.03361561833344107, |
| "grad_norm": 38.186100006103516, |
| "learning_rate": 1.1187607573149743e-06, |
| "loss": 5.087076950073242, |
| "memory(GiB)": 76.04, |
| "step": 130, |
| "token_acc": 0.24298036336942558, |
| "train_speed(iter/s)": 0.027764 |
| }, |
| { |
| "epoch": 0.034908526730881115, |
| "grad_norm": 130.58258056640625, |
| "learning_rate": 1.161790017211704e-06, |
| "loss": 4.898267364501953, |
| "memory(GiB)": 76.04, |
| "step": 135, |
| "token_acc": 0.27113337507827173, |
| "train_speed(iter/s)": 0.027766 |
| }, |
| { |
| "epoch": 0.036201435128321156, |
| "grad_norm": 26.92106056213379, |
| "learning_rate": 1.2048192771084338e-06, |
| "loss": 4.7400367736816404, |
| "memory(GiB)": 76.04, |
| "step": 140, |
| "token_acc": 0.20421513969901067, |
| "train_speed(iter/s)": 0.027799 |
| }, |
| { |
| "epoch": 0.0374943435257612, |
| "grad_norm": 208.29981994628906, |
| "learning_rate": 1.2478485370051637e-06, |
| "loss": 4.690925598144531, |
| "memory(GiB)": 76.04, |
| "step": 145, |
| "token_acc": 0.22307749241358893, |
| "train_speed(iter/s)": 0.027793 |
| }, |
| { |
| "epoch": 0.03878725192320124, |
| "grad_norm": 364.9195251464844, |
| "learning_rate": 1.2908777969018935e-06, |
| "loss": 4.929594421386719, |
| "memory(GiB)": 76.04, |
| "step": 150, |
| "token_acc": 0.23747012178927798, |
| "train_speed(iter/s)": 0.027767 |
| }, |
| { |
| "epoch": 0.04008016032064128, |
| "grad_norm": 361.99853515625, |
| "learning_rate": 1.3339070567986231e-06, |
| "loss": 4.708561706542969, |
| "memory(GiB)": 76.04, |
| "step": 155, |
| "token_acc": 0.2766303463977883, |
| "train_speed(iter/s)": 0.027779 |
| }, |
| { |
| "epoch": 0.04137306871808132, |
| "grad_norm": 714.6271362304688, |
| "learning_rate": 1.3769363166953528e-06, |
| "loss": 4.614714050292969, |
| "memory(GiB)": 76.04, |
| "step": 160, |
| "token_acc": 0.2798645816540384, |
| "train_speed(iter/s)": 0.027778 |
| }, |
| { |
| "epoch": 0.042665977115521364, |
| "grad_norm": 38.3203010559082, |
| "learning_rate": 1.4199655765920828e-06, |
| "loss": 4.407340621948242, |
| "memory(GiB)": 76.04, |
| "step": 165, |
| "token_acc": 0.26736621196222454, |
| "train_speed(iter/s)": 0.027815 |
| }, |
| { |
| "epoch": 0.043958885512961406, |
| "grad_norm": 142.34910583496094, |
| "learning_rate": 1.4629948364888125e-06, |
| "loss": 4.265171813964844, |
| "memory(GiB)": 76.04, |
| "step": 170, |
| "token_acc": 0.2892184533278596, |
| "train_speed(iter/s)": 0.027794 |
| }, |
| { |
| "epoch": 0.04525179391040145, |
| "grad_norm": 16.595224380493164, |
| "learning_rate": 1.5060240963855425e-06, |
| "loss": 4.142366409301758, |
| "memory(GiB)": 76.04, |
| "step": 175, |
| "token_acc": 0.28310791772330235, |
| "train_speed(iter/s)": 0.027783 |
| }, |
| { |
| "epoch": 0.04654470230784149, |
| "grad_norm": 24.273094177246094, |
| "learning_rate": 1.5490533562822722e-06, |
| "loss": 4.011473083496094, |
| "memory(GiB)": 76.04, |
| "step": 180, |
| "token_acc": 0.31321029626032054, |
| "train_speed(iter/s)": 0.027749 |
| }, |
| { |
| "epoch": 0.04783761070528153, |
| "grad_norm": 12.379109382629395, |
| "learning_rate": 1.5920826161790018e-06, |
| "loss": 3.945102310180664, |
| "memory(GiB)": 76.04, |
| "step": 185, |
| "token_acc": 0.3472663749960656, |
| "train_speed(iter/s)": 0.027756 |
| }, |
| { |
| "epoch": 0.04913051910272157, |
| "grad_norm": 11.814841270446777, |
| "learning_rate": 1.6351118760757316e-06, |
| "loss": 3.744676971435547, |
| "memory(GiB)": 76.04, |
| "step": 190, |
| "token_acc": 0.3549518681677272, |
| "train_speed(iter/s)": 0.027799 |
| }, |
| { |
| "epoch": 0.05042342750016161, |
| "grad_norm": 13.708587646484375, |
| "learning_rate": 1.6781411359724615e-06, |
| "loss": 3.6159019470214844, |
| "memory(GiB)": 76.04, |
| "step": 195, |
| "token_acc": 0.34429772852314705, |
| "train_speed(iter/s)": 0.027816 |
| }, |
| { |
| "epoch": 0.051716335897601655, |
| "grad_norm": 10.363809585571289, |
| "learning_rate": 1.7211703958691911e-06, |
| "loss": 3.508245086669922, |
| "memory(GiB)": 76.04, |
| "step": 200, |
| "token_acc": 0.34663152792923785, |
| "train_speed(iter/s)": 0.027818 |
| }, |
| { |
| "epoch": 0.053009244295041696, |
| "grad_norm": 10.083678245544434, |
| "learning_rate": 1.764199655765921e-06, |
| "loss": 3.42333869934082, |
| "memory(GiB)": 76.04, |
| "step": 205, |
| "token_acc": 0.4063367473915957, |
| "train_speed(iter/s)": 0.027805 |
| }, |
| { |
| "epoch": 0.05430215269248174, |
| "grad_norm": 8.798538208007812, |
| "learning_rate": 1.8072289156626508e-06, |
| "loss": 3.2132949829101562, |
| "memory(GiB)": 76.04, |
| "step": 210, |
| "token_acc": 0.40024092757114893, |
| "train_speed(iter/s)": 0.027761 |
| }, |
| { |
| "epoch": 0.05559506108992178, |
| "grad_norm": 9.79233455657959, |
| "learning_rate": 1.8502581755593804e-06, |
| "loss": 3.2455322265625, |
| "memory(GiB)": 76.04, |
| "step": 215, |
| "token_acc": 0.37759151099023586, |
| "train_speed(iter/s)": 0.027773 |
| }, |
| { |
| "epoch": 0.05688796948736182, |
| "grad_norm": 7.508085250854492, |
| "learning_rate": 1.8932874354561103e-06, |
| "loss": 3.0217824935913087, |
| "memory(GiB)": 76.04, |
| "step": 220, |
| "token_acc": 0.41623787623514413, |
| "train_speed(iter/s)": 0.027752 |
| }, |
| { |
| "epoch": 0.05818087788480186, |
| "grad_norm": 11.832063674926758, |
| "learning_rate": 1.93631669535284e-06, |
| "loss": 3.122202682495117, |
| "memory(GiB)": 76.04, |
| "step": 225, |
| "token_acc": 0.4242820412254325, |
| "train_speed(iter/s)": 0.027731 |
| }, |
| { |
| "epoch": 0.059473786282241904, |
| "grad_norm": 6.528841018676758, |
| "learning_rate": 1.9793459552495696e-06, |
| "loss": 2.9143745422363283, |
| "memory(GiB)": 76.04, |
| "step": 230, |
| "token_acc": 0.4459988808058198, |
| "train_speed(iter/s)": 0.027736 |
| }, |
| { |
| "epoch": 0.060766694679681946, |
| "grad_norm": 13.579035758972168, |
| "learning_rate": 2.0223752151463e-06, |
| "loss": 2.980012130737305, |
| "memory(GiB)": 76.04, |
| "step": 235, |
| "token_acc": 0.4274643521388717, |
| "train_speed(iter/s)": 0.027736 |
| }, |
| { |
| "epoch": 0.06205960307712199, |
| "grad_norm": 10.801968574523926, |
| "learning_rate": 2.0654044750430293e-06, |
| "loss": 2.9146419525146485, |
| "memory(GiB)": 76.04, |
| "step": 240, |
| "token_acc": 0.4537141861882783, |
| "train_speed(iter/s)": 0.027738 |
| }, |
| { |
| "epoch": 0.06335251147456203, |
| "grad_norm": 6.302434921264648, |
| "learning_rate": 2.1084337349397595e-06, |
| "loss": 2.8420055389404295, |
| "memory(GiB)": 76.04, |
| "step": 245, |
| "token_acc": 0.4608826083524118, |
| "train_speed(iter/s)": 0.027761 |
| }, |
| { |
| "epoch": 0.06464541987200206, |
| "grad_norm": 6.119657516479492, |
| "learning_rate": 2.151462994836489e-06, |
| "loss": 2.787134552001953, |
| "memory(GiB)": 76.04, |
| "step": 250, |
| "token_acc": 0.44650911754500944, |
| "train_speed(iter/s)": 0.027728 |
| }, |
| { |
| "epoch": 0.06593832826944211, |
| "grad_norm": 6.240930080413818, |
| "learning_rate": 2.194492254733219e-06, |
| "loss": 2.7430130004882813, |
| "memory(GiB)": 76.04, |
| "step": 255, |
| "token_acc": 0.4696727853152434, |
| "train_speed(iter/s)": 0.027754 |
| }, |
| { |
| "epoch": 0.06723123666688215, |
| "grad_norm": 6.333411693572998, |
| "learning_rate": 2.2375215146299486e-06, |
| "loss": 2.6061046600341795, |
| "memory(GiB)": 76.04, |
| "step": 260, |
| "token_acc": 0.48562300319488816, |
| "train_speed(iter/s)": 0.027762 |
| }, |
| { |
| "epoch": 0.0685241450643222, |
| "grad_norm": 7.289592742919922, |
| "learning_rate": 2.2805507745266785e-06, |
| "loss": 2.5609256744384767, |
| "memory(GiB)": 76.04, |
| "step": 265, |
| "token_acc": 0.4682853243301487, |
| "train_speed(iter/s)": 0.027763 |
| }, |
| { |
| "epoch": 0.06981705346176223, |
| "grad_norm": 5.113116264343262, |
| "learning_rate": 2.323580034423408e-06, |
| "loss": 2.5773744583129883, |
| "memory(GiB)": 76.04, |
| "step": 270, |
| "token_acc": 0.4764347547290798, |
| "train_speed(iter/s)": 0.02777 |
| }, |
| { |
| "epoch": 0.07110996185920228, |
| "grad_norm": 4.905300140380859, |
| "learning_rate": 2.3666092943201378e-06, |
| "loss": 2.493597221374512, |
| "memory(GiB)": 76.04, |
| "step": 275, |
| "token_acc": 0.5032743942370661, |
| "train_speed(iter/s)": 0.027774 |
| }, |
| { |
| "epoch": 0.07240287025664231, |
| "grad_norm": 5.233267307281494, |
| "learning_rate": 2.4096385542168676e-06, |
| "loss": 2.4493991851806642, |
| "memory(GiB)": 76.04, |
| "step": 280, |
| "token_acc": 0.5405880959631848, |
| "train_speed(iter/s)": 0.027795 |
| }, |
| { |
| "epoch": 0.07369577865408236, |
| "grad_norm": 8.618249893188477, |
| "learning_rate": 2.4526678141135975e-06, |
| "loss": 2.423325538635254, |
| "memory(GiB)": 76.04, |
| "step": 285, |
| "token_acc": 0.530728862973761, |
| "train_speed(iter/s)": 0.027795 |
| }, |
| { |
| "epoch": 0.0749886870515224, |
| "grad_norm": 5.426891803741455, |
| "learning_rate": 2.4956970740103273e-06, |
| "loss": 2.376586151123047, |
| "memory(GiB)": 76.04, |
| "step": 290, |
| "token_acc": 0.5145979170223663, |
| "train_speed(iter/s)": 0.027806 |
| }, |
| { |
| "epoch": 0.07628159544896244, |
| "grad_norm": 7.925861358642578, |
| "learning_rate": 2.538726333907057e-06, |
| "loss": 2.348302459716797, |
| "memory(GiB)": 76.04, |
| "step": 295, |
| "token_acc": 0.5290513911109377, |
| "train_speed(iter/s)": 0.027804 |
| }, |
| { |
| "epoch": 0.07757450384640248, |
| "grad_norm": 6.098197937011719, |
| "learning_rate": 2.581755593803787e-06, |
| "loss": 2.325778579711914, |
| "memory(GiB)": 76.04, |
| "step": 300, |
| "token_acc": 0.526869597895528, |
| "train_speed(iter/s)": 0.027824 |
| }, |
| { |
| "epoch": 0.07886741224384253, |
| "grad_norm": 4.564492225646973, |
| "learning_rate": 2.6247848537005164e-06, |
| "loss": 2.283577728271484, |
| "memory(GiB)": 76.04, |
| "step": 305, |
| "token_acc": 0.5214347367411528, |
| "train_speed(iter/s)": 0.02782 |
| }, |
| { |
| "epoch": 0.08016032064128256, |
| "grad_norm": 5.382349491119385, |
| "learning_rate": 2.6678141135972463e-06, |
| "loss": 2.260573959350586, |
| "memory(GiB)": 76.04, |
| "step": 310, |
| "token_acc": 0.5193088205746433, |
| "train_speed(iter/s)": 0.027827 |
| }, |
| { |
| "epoch": 0.08145322903872261, |
| "grad_norm": 4.09184455871582, |
| "learning_rate": 2.710843373493976e-06, |
| "loss": 2.202308464050293, |
| "memory(GiB)": 76.04, |
| "step": 315, |
| "token_acc": 0.5255345169316409, |
| "train_speed(iter/s)": 0.027838 |
| }, |
| { |
| "epoch": 0.08274613743616265, |
| "grad_norm": 6.044122695922852, |
| "learning_rate": 2.7538726333907055e-06, |
| "loss": 2.0868404388427733, |
| "memory(GiB)": 76.04, |
| "step": 320, |
| "token_acc": 0.5558229225038731, |
| "train_speed(iter/s)": 0.027868 |
| }, |
| { |
| "epoch": 0.0840390458336027, |
| "grad_norm": 6.152168273925781, |
| "learning_rate": 2.796901893287436e-06, |
| "loss": 2.019194412231445, |
| "memory(GiB)": 76.04, |
| "step": 325, |
| "token_acc": 0.5501780112094188, |
| "train_speed(iter/s)": 0.027887 |
| }, |
| { |
| "epoch": 0.08533195423104273, |
| "grad_norm": 7.0422773361206055, |
| "learning_rate": 2.8399311531841657e-06, |
| "loss": 2.0607986450195312, |
| "memory(GiB)": 76.04, |
| "step": 330, |
| "token_acc": 0.5108016425638279, |
| "train_speed(iter/s)": 0.027881 |
| }, |
| { |
| "epoch": 0.08662486262848278, |
| "grad_norm": 5.497431755065918, |
| "learning_rate": 2.882960413080895e-06, |
| "loss": 2.0316564559936525, |
| "memory(GiB)": 76.04, |
| "step": 335, |
| "token_acc": 0.5838930163447251, |
| "train_speed(iter/s)": 0.027889 |
| }, |
| { |
| "epoch": 0.08791777102592281, |
| "grad_norm": 5.31265115737915, |
| "learning_rate": 2.925989672977625e-06, |
| "loss": 2.0162227630615233, |
| "memory(GiB)": 76.04, |
| "step": 340, |
| "token_acc": 0.5688795253246195, |
| "train_speed(iter/s)": 0.027898 |
| }, |
| { |
| "epoch": 0.08921067942336286, |
| "grad_norm": 3.3592262268066406, |
| "learning_rate": 2.9690189328743548e-06, |
| "loss": 1.976116943359375, |
| "memory(GiB)": 76.04, |
| "step": 345, |
| "token_acc": 0.5811347794931926, |
| "train_speed(iter/s)": 0.027902 |
| }, |
| { |
| "epoch": 0.0905035878208029, |
| "grad_norm": 4.368274211883545, |
| "learning_rate": 3.012048192771085e-06, |
| "loss": 1.9872814178466798, |
| "memory(GiB)": 76.04, |
| "step": 350, |
| "token_acc": 0.5612575668814684, |
| "train_speed(iter/s)": 0.027903 |
| }, |
| { |
| "epoch": 0.09179649621824294, |
| "grad_norm": 4.863376140594482, |
| "learning_rate": 3.0550774526678145e-06, |
| "loss": 1.9638809204101562, |
| "memory(GiB)": 76.04, |
| "step": 355, |
| "token_acc": 0.611854751336805, |
| "train_speed(iter/s)": 0.027901 |
| }, |
| { |
| "epoch": 0.09308940461568298, |
| "grad_norm": 4.721250057220459, |
| "learning_rate": 3.0981067125645443e-06, |
| "loss": 1.9370807647705077, |
| "memory(GiB)": 76.04, |
| "step": 360, |
| "token_acc": 0.584777590187093, |
| "train_speed(iter/s)": 0.027911 |
| }, |
| { |
| "epoch": 0.09438231301312303, |
| "grad_norm": 3.189765691757202, |
| "learning_rate": 3.1411359724612737e-06, |
| "loss": 1.9602073669433593, |
| "memory(GiB)": 76.04, |
| "step": 365, |
| "token_acc": 0.6110412738319715, |
| "train_speed(iter/s)": 0.027898 |
| }, |
| { |
| "epoch": 0.09567522141056306, |
| "grad_norm": 4.200387001037598, |
| "learning_rate": 3.1841652323580036e-06, |
| "loss": 1.8618885040283204, |
| "memory(GiB)": 76.04, |
| "step": 370, |
| "token_acc": 0.6040174341481903, |
| "train_speed(iter/s)": 0.027895 |
| }, |
| { |
| "epoch": 0.09696812980800311, |
| "grad_norm": 3.2454254627227783, |
| "learning_rate": 3.2271944922547334e-06, |
| "loss": 1.821019744873047, |
| "memory(GiB)": 76.04, |
| "step": 375, |
| "token_acc": 0.6053048476893134, |
| "train_speed(iter/s)": 0.027882 |
| }, |
| { |
| "epoch": 0.09826103820544314, |
| "grad_norm": 3.34602427482605, |
| "learning_rate": 3.2702237521514633e-06, |
| "loss": 1.814716911315918, |
| "memory(GiB)": 76.04, |
| "step": 380, |
| "token_acc": 0.6051584430617856, |
| "train_speed(iter/s)": 0.027888 |
| }, |
| { |
| "epoch": 0.09955394660288319, |
| "grad_norm": 3.311286211013794, |
| "learning_rate": 3.313253012048193e-06, |
| "loss": 1.8220832824707032, |
| "memory(GiB)": 76.04, |
| "step": 385, |
| "token_acc": 0.5944087085601187, |
| "train_speed(iter/s)": 0.027892 |
| }, |
| { |
| "epoch": 0.10084685500032323, |
| "grad_norm": 5.586461544036865, |
| "learning_rate": 3.356282271944923e-06, |
| "loss": 1.7891130447387695, |
| "memory(GiB)": 76.04, |
| "step": 390, |
| "token_acc": 0.618925967321305, |
| "train_speed(iter/s)": 0.027898 |
| }, |
| { |
| "epoch": 0.10213976339776328, |
| "grad_norm": 6.616051197052002, |
| "learning_rate": 3.3993115318416524e-06, |
| "loss": 1.7373517990112304, |
| "memory(GiB)": 76.04, |
| "step": 395, |
| "token_acc": 0.6165356711003628, |
| "train_speed(iter/s)": 0.027877 |
| }, |
| { |
| "epoch": 0.10343267179520331, |
| "grad_norm": 2.835207223892212, |
| "learning_rate": 3.4423407917383822e-06, |
| "loss": 1.6772958755493164, |
| "memory(GiB)": 76.04, |
| "step": 400, |
| "token_acc": 0.6225554448697111, |
| "train_speed(iter/s)": 0.027881 |
| }, |
| { |
| "epoch": 0.10472558019264334, |
| "grad_norm": 2.6146092414855957, |
| "learning_rate": 3.485370051635112e-06, |
| "loss": 1.7310756683349608, |
| "memory(GiB)": 76.04, |
| "step": 405, |
| "token_acc": 0.615843204488778, |
| "train_speed(iter/s)": 0.027878 |
| }, |
| { |
| "epoch": 0.10601848859008339, |
| "grad_norm": 4.303338527679443, |
| "learning_rate": 3.528399311531842e-06, |
| "loss": 1.6970853805541992, |
| "memory(GiB)": 76.04, |
| "step": 410, |
| "token_acc": 0.6082253291152159, |
| "train_speed(iter/s)": 0.027889 |
| }, |
| { |
| "epoch": 0.10731139698752343, |
| "grad_norm": 3.152858257293701, |
| "learning_rate": 3.5714285714285718e-06, |
| "loss": 1.6324186325073242, |
| "memory(GiB)": 76.04, |
| "step": 415, |
| "token_acc": 0.6986581950424278, |
| "train_speed(iter/s)": 0.027905 |
| }, |
| { |
| "epoch": 0.10860430538496348, |
| "grad_norm": 2.5081074237823486, |
| "learning_rate": 3.6144578313253016e-06, |
| "loss": 1.6505191802978516, |
| "memory(GiB)": 76.04, |
| "step": 420, |
| "token_acc": 0.6553758610362383, |
| "train_speed(iter/s)": 0.027916 |
| }, |
| { |
| "epoch": 0.10989721378240351, |
| "grad_norm": 3.3457190990448, |
| "learning_rate": 3.657487091222031e-06, |
| "loss": 1.6839021682739257, |
| "memory(GiB)": 76.04, |
| "step": 425, |
| "token_acc": 0.6694164051234202, |
| "train_speed(iter/s)": 0.027902 |
| }, |
| { |
| "epoch": 0.11119012217984356, |
| "grad_norm": 2.359487295150757, |
| "learning_rate": 3.700516351118761e-06, |
| "loss": 1.6301973342895508, |
| "memory(GiB)": 76.04, |
| "step": 430, |
| "token_acc": 0.6513765837858436, |
| "train_speed(iter/s)": 0.027902 |
| }, |
| { |
| "epoch": 0.1124830305772836, |
| "grad_norm": 2.1152243614196777, |
| "learning_rate": 3.743545611015491e-06, |
| "loss": 1.5873135566711425, |
| "memory(GiB)": 76.04, |
| "step": 435, |
| "token_acc": 0.6180145649674205, |
| "train_speed(iter/s)": 0.027907 |
| }, |
| { |
| "epoch": 0.11377593897472364, |
| "grad_norm": 2.4160220623016357, |
| "learning_rate": 3.7865748709122206e-06, |
| "loss": 1.5810693740844726, |
| "memory(GiB)": 76.04, |
| "step": 440, |
| "token_acc": 0.6297903669547023, |
| "train_speed(iter/s)": 0.027905 |
| }, |
| { |
| "epoch": 0.11506884737216368, |
| "grad_norm": 2.329163074493408, |
| "learning_rate": 3.8296041308089504e-06, |
| "loss": 1.5028837203979493, |
| "memory(GiB)": 76.04, |
| "step": 445, |
| "token_acc": 0.6969645118236885, |
| "train_speed(iter/s)": 0.027912 |
| }, |
| { |
| "epoch": 0.11636175576960373, |
| "grad_norm": 2.004277467727661, |
| "learning_rate": 3.87263339070568e-06, |
| "loss": 1.5502431869506836, |
| "memory(GiB)": 76.04, |
| "step": 450, |
| "token_acc": 0.6654662441179295, |
| "train_speed(iter/s)": 0.027911 |
| }, |
| { |
| "epoch": 0.11765466416704376, |
| "grad_norm": 291.7929382324219, |
| "learning_rate": 3.91566265060241e-06, |
| "loss": 1.6129791259765625, |
| "memory(GiB)": 76.04, |
| "step": 455, |
| "token_acc": 0.623754295532646, |
| "train_speed(iter/s)": 0.027925 |
| }, |
| { |
| "epoch": 0.11894757256448381, |
| "grad_norm": 5.003607749938965, |
| "learning_rate": 3.958691910499139e-06, |
| "loss": 1.523465919494629, |
| "memory(GiB)": 76.04, |
| "step": 460, |
| "token_acc": 0.6120657218111125, |
| "train_speed(iter/s)": 0.027923 |
| }, |
| { |
| "epoch": 0.12024048096192384, |
| "grad_norm": 2.6622467041015625, |
| "learning_rate": 4.00172117039587e-06, |
| "loss": 1.5748241424560547, |
| "memory(GiB)": 76.04, |
| "step": 465, |
| "token_acc": 0.6213646902947996, |
| "train_speed(iter/s)": 0.027931 |
| }, |
| { |
| "epoch": 0.12153338935936389, |
| "grad_norm": 2.4443795680999756, |
| "learning_rate": 4.0447504302926e-06, |
| "loss": 1.526081657409668, |
| "memory(GiB)": 76.04, |
| "step": 470, |
| "token_acc": 0.6476268348713157, |
| "train_speed(iter/s)": 0.027937 |
| }, |
| { |
| "epoch": 0.12282629775680393, |
| "grad_norm": 2.6567704677581787, |
| "learning_rate": 4.087779690189329e-06, |
| "loss": 1.5151639938354493, |
| "memory(GiB)": 76.04, |
| "step": 475, |
| "token_acc": 0.6848871707273217, |
| "train_speed(iter/s)": 0.027943 |
| }, |
| { |
| "epoch": 0.12411920615424397, |
| "grad_norm": 2.540998935699463, |
| "learning_rate": 4.1308089500860585e-06, |
| "loss": 1.4633543014526367, |
| "memory(GiB)": 76.04, |
| "step": 480, |
| "token_acc": 0.6596202575584019, |
| "train_speed(iter/s)": 0.027936 |
| }, |
| { |
| "epoch": 0.12541211455168402, |
| "grad_norm": 2.337562084197998, |
| "learning_rate": 4.173838209982788e-06, |
| "loss": 1.4880233764648438, |
| "memory(GiB)": 76.04, |
| "step": 485, |
| "token_acc": 0.6452993555369705, |
| "train_speed(iter/s)": 0.027931 |
| }, |
| { |
| "epoch": 0.12670502294912406, |
| "grad_norm": 2.5540599822998047, |
| "learning_rate": 4.216867469879519e-06, |
| "loss": 1.4692201614379883, |
| "memory(GiB)": 76.04, |
| "step": 490, |
| "token_acc": 0.6396389676264359, |
| "train_speed(iter/s)": 0.027945 |
| }, |
| { |
| "epoch": 0.1279979313465641, |
| "grad_norm": 2.3116421699523926, |
| "learning_rate": 4.259896729776248e-06, |
| "loss": 1.4556035995483398, |
| "memory(GiB)": 76.04, |
| "step": 495, |
| "token_acc": 0.6138832517607735, |
| "train_speed(iter/s)": 0.02794 |
| }, |
| { |
| "epoch": 0.12929083974400413, |
| "grad_norm": 1.984755039215088, |
| "learning_rate": 4.302925989672978e-06, |
| "loss": 1.4775214195251465, |
| "memory(GiB)": 76.04, |
| "step": 500, |
| "token_acc": 0.6642163033079905, |
| "train_speed(iter/s)": 0.027946 |
| }, |
| { |
| "epoch": 0.1305837481414442, |
| "grad_norm": 2.973404884338379, |
| "learning_rate": 4.345955249569708e-06, |
| "loss": 1.421070671081543, |
| "memory(GiB)": 76.04, |
| "step": 505, |
| "token_acc": 0.6482611781405252, |
| "train_speed(iter/s)": 0.027935 |
| }, |
| { |
| "epoch": 0.13187665653888422, |
| "grad_norm": 2.4486446380615234, |
| "learning_rate": 4.388984509466438e-06, |
| "loss": 1.4313584327697755, |
| "memory(GiB)": 76.04, |
| "step": 510, |
| "token_acc": 0.6798098365476511, |
| "train_speed(iter/s)": 0.027937 |
| }, |
| { |
| "epoch": 0.13316956493632426, |
| "grad_norm": 2.326204776763916, |
| "learning_rate": 4.4320137693631674e-06, |
| "loss": 1.4402247428894044, |
| "memory(GiB)": 76.04, |
| "step": 515, |
| "token_acc": 0.7008269899445541, |
| "train_speed(iter/s)": 0.02794 |
| }, |
| { |
| "epoch": 0.1344624733337643, |
| "grad_norm": 1.8890414237976074, |
| "learning_rate": 4.475043029259897e-06, |
| "loss": 1.3767863273620606, |
| "memory(GiB)": 76.04, |
| "step": 520, |
| "token_acc": 0.6819113223176527, |
| "train_speed(iter/s)": 0.027946 |
| }, |
| { |
| "epoch": 0.13575538173120436, |
| "grad_norm": 2.8504834175109863, |
| "learning_rate": 4.518072289156627e-06, |
| "loss": 1.407078170776367, |
| "memory(GiB)": 76.04, |
| "step": 525, |
| "token_acc": 0.683960224816256, |
| "train_speed(iter/s)": 0.027961 |
| }, |
| { |
| "epoch": 0.1370482901286444, |
| "grad_norm": 2.0273118019104004, |
| "learning_rate": 4.561101549053357e-06, |
| "loss": 1.336355972290039, |
| "memory(GiB)": 76.04, |
| "step": 530, |
| "token_acc": 0.6482288828337874, |
| "train_speed(iter/s)": 0.027971 |
| }, |
| { |
| "epoch": 0.13834119852608442, |
| "grad_norm": 1.9206314086914062, |
| "learning_rate": 4.604130808950086e-06, |
| "loss": 1.3975639343261719, |
| "memory(GiB)": 76.04, |
| "step": 535, |
| "token_acc": 0.7144246703653001, |
| "train_speed(iter/s)": 0.027969 |
| }, |
| { |
| "epoch": 0.13963410692352446, |
| "grad_norm": 2.952894687652588, |
| "learning_rate": 4.647160068846816e-06, |
| "loss": 1.372206974029541, |
| "memory(GiB)": 76.04, |
| "step": 540, |
| "token_acc": 0.6865656633371321, |
| "train_speed(iter/s)": 0.027968 |
| }, |
| { |
| "epoch": 0.14092701532096452, |
| "grad_norm": 1.6340878009796143, |
| "learning_rate": 4.6901893287435465e-06, |
| "loss": 1.373509120941162, |
| "memory(GiB)": 76.04, |
| "step": 545, |
| "token_acc": 0.6471132494448557, |
| "train_speed(iter/s)": 0.027965 |
| }, |
| { |
| "epoch": 0.14221992371840456, |
| "grad_norm": 1.9484697580337524, |
| "learning_rate": 4.7332185886402755e-06, |
| "loss": 1.3788504600524902, |
| "memory(GiB)": 76.04, |
| "step": 550, |
| "token_acc": 0.6455056445137216, |
| "train_speed(iter/s)": 0.02796 |
| }, |
| { |
| "epoch": 0.1435128321158446, |
| "grad_norm": 2.1868441104888916, |
| "learning_rate": 4.776247848537005e-06, |
| "loss": 1.3279705047607422, |
| "memory(GiB)": 76.04, |
| "step": 555, |
| "token_acc": 0.6940514224859273, |
| "train_speed(iter/s)": 0.027951 |
| }, |
| { |
| "epoch": 0.14480574051328463, |
| "grad_norm": 1.7936123609542847, |
| "learning_rate": 4.819277108433735e-06, |
| "loss": 1.2733698844909669, |
| "memory(GiB)": 76.04, |
| "step": 560, |
| "token_acc": 0.6824586324720697, |
| "train_speed(iter/s)": 0.027957 |
| }, |
| { |
| "epoch": 0.1460986489107247, |
| "grad_norm": 2.035456418991089, |
| "learning_rate": 4.862306368330465e-06, |
| "loss": 1.3023791313171387, |
| "memory(GiB)": 76.04, |
| "step": 565, |
| "token_acc": 0.732261012611954, |
| "train_speed(iter/s)": 0.027962 |
| }, |
| { |
| "epoch": 0.14739155730816472, |
| "grad_norm": 1.8656188249588013, |
| "learning_rate": 4.905335628227195e-06, |
| "loss": 1.2920080184936524, |
| "memory(GiB)": 76.04, |
| "step": 570, |
| "token_acc": 0.7507948232120492, |
| "train_speed(iter/s)": 0.027955 |
| }, |
| { |
| "epoch": 0.14868446570560476, |
| "grad_norm": 2.3736183643341064, |
| "learning_rate": 4.948364888123925e-06, |
| "loss": 1.3393060684204101, |
| "memory(GiB)": 76.04, |
| "step": 575, |
| "token_acc": 0.6658280922431866, |
| "train_speed(iter/s)": 0.027956 |
| }, |
| { |
| "epoch": 0.1499773741030448, |
| "grad_norm": 1.932016372680664, |
| "learning_rate": 4.991394148020655e-06, |
| "loss": 1.2938769340515137, |
| "memory(GiB)": 76.04, |
| "step": 580, |
| "token_acc": 0.6553120323915148, |
| "train_speed(iter/s)": 0.027949 |
| }, |
| { |
| "epoch": 0.15127028250048485, |
| "grad_norm": 2.1315174102783203, |
| "learning_rate": 4.999998374576611e-06, |
| "loss": 1.3226532936096191, |
| "memory(GiB)": 76.04, |
| "step": 585, |
| "token_acc": 0.6744726857329363, |
| "train_speed(iter/s)": 0.027952 |
| }, |
| { |
| "epoch": 0.1525631908979249, |
| "grad_norm": 1.8392802476882935, |
| "learning_rate": 4.999991771297712e-06, |
| "loss": 1.298147964477539, |
| "memory(GiB)": 76.04, |
| "step": 590, |
| "token_acc": 0.6859605911330049, |
| "train_speed(iter/s)": 0.027946 |
| }, |
| { |
| "epoch": 0.15385609929536492, |
| "grad_norm": 1.761626124382019, |
| "learning_rate": 4.999980088587748e-06, |
| "loss": 1.3261050224304198, |
| "memory(GiB)": 76.04, |
| "step": 595, |
| "token_acc": 0.7036491873658387, |
| "train_speed(iter/s)": 0.027948 |
| }, |
| { |
| "epoch": 0.15514900769280496, |
| "grad_norm": 1.6543495655059814, |
| "learning_rate": 4.999963326470457e-06, |
| "loss": 1.2572940826416015, |
| "memory(GiB)": 76.04, |
| "step": 600, |
| "token_acc": 0.6750695088044486, |
| "train_speed(iter/s)": 0.027957 |
| }, |
| { |
| "epoch": 0.156441916090245, |
| "grad_norm": 1.7115892171859741, |
| "learning_rate": 4.999941484979894e-06, |
| "loss": 1.279404354095459, |
| "memory(GiB)": 76.04, |
| "step": 605, |
| "token_acc": 0.6865163550249149, |
| "train_speed(iter/s)": 0.027961 |
| }, |
| { |
| "epoch": 0.15773482448768505, |
| "grad_norm": 1.595937967300415, |
| "learning_rate": 4.999914564160437e-06, |
| "loss": 1.239435577392578, |
| "memory(GiB)": 76.04, |
| "step": 610, |
| "token_acc": 0.6807362476469357, |
| "train_speed(iter/s)": 0.02795 |
| }, |
| { |
| "epoch": 0.1590277328851251, |
| "grad_norm": 1.494962215423584, |
| "learning_rate": 4.9998825640667835e-06, |
| "loss": 1.249193286895752, |
| "memory(GiB)": 76.04, |
| "step": 615, |
| "token_acc": 0.7230293299917766, |
| "train_speed(iter/s)": 0.027954 |
| }, |
| { |
| "epoch": 0.16032064128256512, |
| "grad_norm": 1.5208592414855957, |
| "learning_rate": 4.99984548476395e-06, |
| "loss": 1.259343719482422, |
| "memory(GiB)": 76.04, |
| "step": 620, |
| "token_acc": 0.7032384586844755, |
| "train_speed(iter/s)": 0.027964 |
| }, |
| { |
| "epoch": 0.16161354968000516, |
| "grad_norm": 1.6536712646484375, |
| "learning_rate": 4.999803326327274e-06, |
| "loss": 1.2767888069152833, |
| "memory(GiB)": 76.04, |
| "step": 625, |
| "token_acc": 0.6942289403874068, |
| "train_speed(iter/s)": 0.027964 |
| }, |
| { |
| "epoch": 0.16290645807744522, |
| "grad_norm": 1.6706756353378296, |
| "learning_rate": 4.9997560888424115e-06, |
| "loss": 1.2389703750610352, |
| "memory(GiB)": 76.04, |
| "step": 630, |
| "token_acc": 0.7009845373278569, |
| "train_speed(iter/s)": 0.027965 |
| }, |
| { |
| "epoch": 0.16419936647488526, |
| "grad_norm": 1.7092621326446533, |
| "learning_rate": 4.999703772405339e-06, |
| "loss": 1.1957599639892578, |
| "memory(GiB)": 76.04, |
| "step": 635, |
| "token_acc": 0.7072115946546149, |
| "train_speed(iter/s)": 0.027969 |
| }, |
| { |
| "epoch": 0.1654922748723253, |
| "grad_norm": 1.8869761228561401, |
| "learning_rate": 4.999646377122352e-06, |
| "loss": 1.1767961502075195, |
| "memory(GiB)": 76.04, |
| "step": 640, |
| "token_acc": 0.7185826888756692, |
| "train_speed(iter/s)": 0.027965 |
| }, |
| { |
| "epoch": 0.16678518326976532, |
| "grad_norm": 1.5505192279815674, |
| "learning_rate": 4.9995839031100636e-06, |
| "loss": 1.2072343826293945, |
| "memory(GiB)": 76.04, |
| "step": 645, |
| "token_acc": 0.6714076782449726, |
| "train_speed(iter/s)": 0.027966 |
| }, |
| { |
| "epoch": 0.1680780916672054, |
| "grad_norm": 1.6234943866729736, |
| "learning_rate": 4.9995163504954105e-06, |
| "loss": 1.1328813552856445, |
| "memory(GiB)": 76.04, |
| "step": 650, |
| "token_acc": 0.718151112416477, |
| "train_speed(iter/s)": 0.02796 |
| }, |
| { |
| "epoch": 0.16937100006464542, |
| "grad_norm": 1.6178089380264282, |
| "learning_rate": 4.999443719415641e-06, |
| "loss": 1.2163790702819823, |
| "memory(GiB)": 76.04, |
| "step": 655, |
| "token_acc": 0.7040156056713294, |
| "train_speed(iter/s)": 0.027964 |
| }, |
| { |
| "epoch": 0.17066390846208546, |
| "grad_norm": 1.6068527698516846, |
| "learning_rate": 4.999366010018328e-06, |
| "loss": 1.254256248474121, |
| "memory(GiB)": 76.04, |
| "step": 660, |
| "token_acc": 0.6559546915269338, |
| "train_speed(iter/s)": 0.02796 |
| }, |
| { |
| "epoch": 0.1719568168595255, |
| "grad_norm": 1.6556439399719238, |
| "learning_rate": 4.999283222461359e-06, |
| "loss": 1.1994304656982422, |
| "memory(GiB)": 76.04, |
| "step": 665, |
| "token_acc": 0.7349007266163743, |
| "train_speed(iter/s)": 0.027956 |
| }, |
| { |
| "epoch": 0.17324972525696555, |
| "grad_norm": 1.3648124933242798, |
| "learning_rate": 4.999195356912941e-06, |
| "loss": 1.1895877838134765, |
| "memory(GiB)": 76.04, |
| "step": 670, |
| "token_acc": 0.7510124364534566, |
| "train_speed(iter/s)": 0.027953 |
| }, |
| { |
| "epoch": 0.1745426336544056, |
| "grad_norm": 1.3924592733383179, |
| "learning_rate": 4.999102413551594e-06, |
| "loss": 1.1863578796386718, |
| "memory(GiB)": 76.04, |
| "step": 675, |
| "token_acc": 0.7059563448020718, |
| "train_speed(iter/s)": 0.02796 |
| }, |
| { |
| "epoch": 0.17583554205184562, |
| "grad_norm": 1.3376160860061646, |
| "learning_rate": 4.9990043925661625e-06, |
| "loss": 1.2073113441467285, |
| "memory(GiB)": 76.04, |
| "step": 680, |
| "token_acc": 0.7219528395881767, |
| "train_speed(iter/s)": 0.027965 |
| }, |
| { |
| "epoch": 0.17712845044928566, |
| "grad_norm": 1.3140792846679688, |
| "learning_rate": 4.998901294155801e-06, |
| "loss": 1.1344953536987306, |
| "memory(GiB)": 76.04, |
| "step": 685, |
| "token_acc": 0.7513243683781581, |
| "train_speed(iter/s)": 0.027967 |
| }, |
| { |
| "epoch": 0.17842135884672572, |
| "grad_norm": 1.3123114109039307, |
| "learning_rate": 4.9987931185299836e-06, |
| "loss": 1.1784892082214355, |
| "memory(GiB)": 76.04, |
| "step": 690, |
| "token_acc": 0.6674288089794221, |
| "train_speed(iter/s)": 0.027966 |
| }, |
| { |
| "epoch": 0.17971426724416575, |
| "grad_norm": 2.659935235977173, |
| "learning_rate": 4.998679865908499e-06, |
| "loss": 1.157151985168457, |
| "memory(GiB)": 76.04, |
| "step": 695, |
| "token_acc": 0.7257713001430225, |
| "train_speed(iter/s)": 0.027976 |
| }, |
| { |
| "epoch": 0.1810071756416058, |
| "grad_norm": 1.3218928575515747, |
| "learning_rate": 4.998561536521452e-06, |
| "loss": 1.1583141326904296, |
| "memory(GiB)": 76.04, |
| "step": 700, |
| "token_acc": 0.7759477598403773, |
| "train_speed(iter/s)": 0.027968 |
| }, |
| { |
| "epoch": 0.18230008403904582, |
| "grad_norm": 1.296757698059082, |
| "learning_rate": 4.998438130609261e-06, |
| "loss": 1.1259532928466798, |
| "memory(GiB)": 76.04, |
| "step": 705, |
| "token_acc": 0.727386377384361, |
| "train_speed(iter/s)": 0.02797 |
| }, |
| { |
| "epoch": 0.18359299243648589, |
| "grad_norm": 1.4752744436264038, |
| "learning_rate": 4.9983096484226605e-06, |
| "loss": 1.1163427352905273, |
| "memory(GiB)": 76.04, |
| "step": 710, |
| "token_acc": 0.7253694101134547, |
| "train_speed(iter/s)": 0.027972 |
| }, |
| { |
| "epoch": 0.18488590083392592, |
| "grad_norm": 1.3070727586746216, |
| "learning_rate": 4.998176090222697e-06, |
| "loss": 1.1096561431884766, |
| "memory(GiB)": 76.04, |
| "step": 715, |
| "token_acc": 0.7192952446117004, |
| "train_speed(iter/s)": 0.027971 |
| }, |
| { |
| "epoch": 0.18617880923136595, |
| "grad_norm": 1.4229007959365845, |
| "learning_rate": 4.998037456280732e-06, |
| "loss": 1.1451845169067383, |
| "memory(GiB)": 76.04, |
| "step": 720, |
| "token_acc": 0.7217227852239294, |
| "train_speed(iter/s)": 0.027973 |
| }, |
| { |
| "epoch": 0.187471717628806, |
| "grad_norm": 1.2608271837234497, |
| "learning_rate": 4.9978937468784376e-06, |
| "loss": 1.106486701965332, |
| "memory(GiB)": 76.04, |
| "step": 725, |
| "token_acc": 0.7184099215637961, |
| "train_speed(iter/s)": 0.027972 |
| }, |
| { |
| "epoch": 0.18876462602624605, |
| "grad_norm": 1.3104456663131714, |
| "learning_rate": 4.9977449623078015e-06, |
| "loss": 1.1219176292419433, |
| "memory(GiB)": 76.04, |
| "step": 730, |
| "token_acc": 0.7516463274234401, |
| "train_speed(iter/s)": 0.027972 |
| }, |
| { |
| "epoch": 0.1900575344236861, |
| "grad_norm": 1.353380799293518, |
| "learning_rate": 4.9975911028711195e-06, |
| "loss": 1.1417633056640626, |
| "memory(GiB)": 76.04, |
| "step": 735, |
| "token_acc": 0.739439049637699, |
| "train_speed(iter/s)": 0.027974 |
| }, |
| { |
| "epoch": 0.19135044282112612, |
| "grad_norm": 1.4334852695465088, |
| "learning_rate": 4.997432168881002e-06, |
| "loss": 1.1226820945739746, |
| "memory(GiB)": 76.04, |
| "step": 740, |
| "token_acc": 0.7192217376719222, |
| "train_speed(iter/s)": 0.027973 |
| }, |
| { |
| "epoch": 0.19264335121856616, |
| "grad_norm": 1.3817933797836304, |
| "learning_rate": 4.997268160660366e-06, |
| "loss": 1.1217589378356934, |
| "memory(GiB)": 76.04, |
| "step": 745, |
| "token_acc": 0.731275833562965, |
| "train_speed(iter/s)": 0.027968 |
| }, |
| { |
| "epoch": 0.19393625961600622, |
| "grad_norm": 1.2183865308761597, |
| "learning_rate": 4.99709907854244e-06, |
| "loss": 1.1149643898010253, |
| "memory(GiB)": 76.04, |
| "step": 750, |
| "token_acc": 0.7301776086267048, |
| "train_speed(iter/s)": 0.027963 |
| }, |
| { |
| "epoch": 0.19522916801344625, |
| "grad_norm": 1.2150975465774536, |
| "learning_rate": 4.9969249228707625e-06, |
| "loss": 1.0912652969360352, |
| "memory(GiB)": 76.04, |
| "step": 755, |
| "token_acc": 0.7373849358328294, |
| "train_speed(iter/s)": 0.027958 |
| }, |
| { |
| "epoch": 0.1965220764108863, |
| "grad_norm": 1.3223531246185303, |
| "learning_rate": 4.996745693999179e-06, |
| "loss": 1.1588199615478516, |
| "memory(GiB)": 76.04, |
| "step": 760, |
| "token_acc": 0.7059101248980643, |
| "train_speed(iter/s)": 0.027957 |
| }, |
| { |
| "epoch": 0.19781498480832632, |
| "grad_norm": 1.419268250465393, |
| "learning_rate": 4.996561392291842e-06, |
| "loss": 1.1392223358154296, |
| "memory(GiB)": 76.04, |
| "step": 765, |
| "token_acc": 0.6890176058642581, |
| "train_speed(iter/s)": 0.027957 |
| }, |
| { |
| "epoch": 0.19910789320576638, |
| "grad_norm": 1.4065704345703125, |
| "learning_rate": 4.996372018123213e-06, |
| "loss": 1.0843055725097657, |
| "memory(GiB)": 76.04, |
| "step": 770, |
| "token_acc": 0.7391510740140037, |
| "train_speed(iter/s)": 0.027954 |
| }, |
| { |
| "epoch": 0.20040080160320642, |
| "grad_norm": 1.3634155988693237, |
| "learning_rate": 4.996177571878058e-06, |
| "loss": 1.1053363800048828, |
| "memory(GiB)": 76.04, |
| "step": 775, |
| "token_acc": 0.7354631733725437, |
| "train_speed(iter/s)": 0.027955 |
| }, |
| { |
| "epoch": 0.20169371000064645, |
| "grad_norm": 1.2733179330825806, |
| "learning_rate": 4.995978053951449e-06, |
| "loss": 1.120106315612793, |
| "memory(GiB)": 76.04, |
| "step": 780, |
| "token_acc": 0.7424737177445047, |
| "train_speed(iter/s)": 0.027949 |
| }, |
| { |
| "epoch": 0.2029866183980865, |
| "grad_norm": 1.224840521812439, |
| "learning_rate": 4.995773464748763e-06, |
| "loss": 1.117567253112793, |
| "memory(GiB)": 76.04, |
| "step": 785, |
| "token_acc": 0.7321232876712329, |
| "train_speed(iter/s)": 0.027949 |
| }, |
| { |
| "epoch": 0.20427952679552655, |
| "grad_norm": 1.5229790210723877, |
| "learning_rate": 4.995563804685679e-06, |
| "loss": 1.0795328140258789, |
| "memory(GiB)": 76.04, |
| "step": 790, |
| "token_acc": 0.7580057607590647, |
| "train_speed(iter/s)": 0.027946 |
| }, |
| { |
| "epoch": 0.20557243519296658, |
| "grad_norm": 1.4150093793869019, |
| "learning_rate": 4.9953490741881796e-06, |
| "loss": 1.1043382644653321, |
| "memory(GiB)": 76.04, |
| "step": 795, |
| "token_acc": 0.700471466457126, |
| "train_speed(iter/s)": 0.027945 |
| }, |
| { |
| "epoch": 0.20686534359040662, |
| "grad_norm": 1.4656771421432495, |
| "learning_rate": 4.9951292736925515e-06, |
| "loss": 1.0530550956726075, |
| "memory(GiB)": 76.04, |
| "step": 800, |
| "token_acc": 0.7616183012073713, |
| "train_speed(iter/s)": 0.027949 |
| }, |
| { |
| "epoch": 0.20815825198784665, |
| "grad_norm": 1.2729520797729492, |
| "learning_rate": 4.994904403645378e-06, |
| "loss": 1.0495079040527344, |
| "memory(GiB)": 76.04, |
| "step": 805, |
| "token_acc": 0.7524990313831849, |
| "train_speed(iter/s)": 0.027946 |
| }, |
| { |
| "epoch": 0.2094511603852867, |
| "grad_norm": 1.4444711208343506, |
| "learning_rate": 4.9946744645035496e-06, |
| "loss": 1.0937026977539062, |
| "memory(GiB)": 76.04, |
| "step": 810, |
| "token_acc": 0.6928733031674208, |
| "train_speed(iter/s)": 0.027942 |
| }, |
| { |
| "epoch": 0.21074406878272675, |
| "grad_norm": 1.390000820159912, |
| "learning_rate": 4.994439456734248e-06, |
| "loss": 1.092994499206543, |
| "memory(GiB)": 76.04, |
| "step": 815, |
| "token_acc": 0.7247861835309672, |
| "train_speed(iter/s)": 0.027945 |
| }, |
| { |
| "epoch": 0.21203697718016679, |
| "grad_norm": 1.1274871826171875, |
| "learning_rate": 4.994199380814958e-06, |
| "loss": 1.064702320098877, |
| "memory(GiB)": 76.04, |
| "step": 820, |
| "token_acc": 0.7177709296353364, |
| "train_speed(iter/s)": 0.027947 |
| }, |
| { |
| "epoch": 0.21332988557760682, |
| "grad_norm": 1.1373642683029175, |
| "learning_rate": 4.9939542372334625e-06, |
| "loss": 1.0526296615600585, |
| "memory(GiB)": 76.04, |
| "step": 825, |
| "token_acc": 0.7360295948493579, |
| "train_speed(iter/s)": 0.027945 |
| }, |
| { |
| "epoch": 0.21462279397504685, |
| "grad_norm": 1.2715613842010498, |
| "learning_rate": 4.993704026487837e-06, |
| "loss": 1.0497617721557617, |
| "memory(GiB)": 76.04, |
| "step": 830, |
| "token_acc": 0.7389768524290404, |
| "train_speed(iter/s)": 0.027948 |
| }, |
| { |
| "epoch": 0.21591570237248692, |
| "grad_norm": 1.3336645364761353, |
| "learning_rate": 4.993448749086455e-06, |
| "loss": 1.0744933128356933, |
| "memory(GiB)": 76.04, |
| "step": 835, |
| "token_acc": 0.7657203842049093, |
| "train_speed(iter/s)": 0.027954 |
| }, |
| { |
| "epoch": 0.21720861076992695, |
| "grad_norm": 1.3586411476135254, |
| "learning_rate": 4.9931884055479855e-06, |
| "loss": 1.0742916107177733, |
| "memory(GiB)": 76.04, |
| "step": 840, |
| "token_acc": 0.7213896713615023, |
| "train_speed(iter/s)": 0.027951 |
| }, |
| { |
| "epoch": 0.218501519167367, |
| "grad_norm": 1.2536128759384155, |
| "learning_rate": 4.992922996401386e-06, |
| "loss": 1.0365938186645507, |
| "memory(GiB)": 76.04, |
| "step": 845, |
| "token_acc": 0.7336075791573173, |
| "train_speed(iter/s)": 0.027954 |
| }, |
| { |
| "epoch": 0.21979442756480702, |
| "grad_norm": 1.0951530933380127, |
| "learning_rate": 4.992652522185912e-06, |
| "loss": 1.0514394760131835, |
| "memory(GiB)": 76.04, |
| "step": 850, |
| "token_acc": 0.7264080844107594, |
| "train_speed(iter/s)": 0.027953 |
| }, |
| { |
| "epoch": 0.22108733596224708, |
| "grad_norm": 1.302139401435852, |
| "learning_rate": 4.992376983451106e-06, |
| "loss": 1.0614931106567382, |
| "memory(GiB)": 76.04, |
| "step": 855, |
| "token_acc": 0.7223553005403599, |
| "train_speed(iter/s)": 0.02795 |
| }, |
| { |
| "epoch": 0.22238024435968712, |
| "grad_norm": 1.2117213010787964, |
| "learning_rate": 4.992096380756802e-06, |
| "loss": 1.0211700439453124, |
| "memory(GiB)": 76.04, |
| "step": 860, |
| "token_acc": 0.7403405370006548, |
| "train_speed(iter/s)": 0.027946 |
| }, |
| { |
| "epoch": 0.22367315275712715, |
| "grad_norm": 1.1893075704574585, |
| "learning_rate": 4.9918107146731234e-06, |
| "loss": 1.02754545211792, |
| "memory(GiB)": 76.04, |
| "step": 865, |
| "token_acc": 0.7416530944625407, |
| "train_speed(iter/s)": 0.027949 |
| }, |
| { |
| "epoch": 0.2249660611545672, |
| "grad_norm": 1.3030766248703003, |
| "learning_rate": 4.991519985780479e-06, |
| "loss": 1.0705657958984376, |
| "memory(GiB)": 76.04, |
| "step": 870, |
| "token_acc": 0.7212401717504043, |
| "train_speed(iter/s)": 0.027952 |
| }, |
| { |
| "epoch": 0.22625896955200725, |
| "grad_norm": 1.3120644092559814, |
| "learning_rate": 4.991224194669567e-06, |
| "loss": 1.0438270568847656, |
| "memory(GiB)": 76.04, |
| "step": 875, |
| "token_acc": 0.7455082274151154, |
| "train_speed(iter/s)": 0.027949 |
| }, |
| { |
| "epoch": 0.22755187794944728, |
| "grad_norm": 1.2804142236709595, |
| "learning_rate": 4.99092334194137e-06, |
| "loss": 1.0537703514099122, |
| "memory(GiB)": 76.04, |
| "step": 880, |
| "token_acc": 0.7769719655320311, |
| "train_speed(iter/s)": 0.027949 |
| }, |
| { |
| "epoch": 0.22884478634688732, |
| "grad_norm": 1.141849160194397, |
| "learning_rate": 4.990617428207153e-06, |
| "loss": 1.0109355926513672, |
| "memory(GiB)": 76.04, |
| "step": 885, |
| "token_acc": 0.7504947627552252, |
| "train_speed(iter/s)": 0.027943 |
| }, |
| { |
| "epoch": 0.23013769474432735, |
| "grad_norm": 1.3178867101669312, |
| "learning_rate": 4.990306454088467e-06, |
| "loss": 1.0286626815795898, |
| "memory(GiB)": 76.04, |
| "step": 890, |
| "token_acc": 0.7264516717602518, |
| "train_speed(iter/s)": 0.027938 |
| }, |
| { |
| "epoch": 0.23143060314176742, |
| "grad_norm": 1.5300428867340088, |
| "learning_rate": 4.98999042021714e-06, |
| "loss": 1.0479674339294434, |
| "memory(GiB)": 76.04, |
| "step": 895, |
| "token_acc": 0.7131689263189698, |
| "train_speed(iter/s)": 0.027942 |
| }, |
| { |
| "epoch": 0.23272351153920745, |
| "grad_norm": 1.482164740562439, |
| "learning_rate": 4.989669327235285e-06, |
| "loss": 1.0493934631347657, |
| "memory(GiB)": 76.04, |
| "step": 900, |
| "token_acc": 0.7489839605373375, |
| "train_speed(iter/s)": 0.027946 |
| }, |
| { |
| "epoch": 0.23401641993664749, |
| "grad_norm": 1.259787678718567, |
| "learning_rate": 4.989343175795291e-06, |
| "loss": 1.0503623962402344, |
| "memory(GiB)": 76.04, |
| "step": 905, |
| "token_acc": 0.7800720499077409, |
| "train_speed(iter/s)": 0.027945 |
| }, |
| { |
| "epoch": 0.23530932833408752, |
| "grad_norm": 1.2325483560562134, |
| "learning_rate": 4.9890119665598265e-06, |
| "loss": 1.0217859268188476, |
| "memory(GiB)": 76.04, |
| "step": 910, |
| "token_acc": 0.7489180775947156, |
| "train_speed(iter/s)": 0.027956 |
| }, |
| { |
| "epoch": 0.23660223673152758, |
| "grad_norm": 1.2175496816635132, |
| "learning_rate": 4.988675700201836e-06, |
| "loss": 1.0210546493530273, |
| "memory(GiB)": 76.04, |
| "step": 915, |
| "token_acc": 0.7292449540928989, |
| "train_speed(iter/s)": 0.02796 |
| }, |
| { |
| "epoch": 0.23789514512896762, |
| "grad_norm": 1.282808542251587, |
| "learning_rate": 4.988334377404537e-06, |
| "loss": 1.0249996185302734, |
| "memory(GiB)": 76.04, |
| "step": 920, |
| "token_acc": 0.7644511799090712, |
| "train_speed(iter/s)": 0.027959 |
| }, |
| { |
| "epoch": 0.23918805352640765, |
| "grad_norm": 1.388667106628418, |
| "learning_rate": 4.9879879988614226e-06, |
| "loss": 1.0185768127441406, |
| "memory(GiB)": 76.04, |
| "step": 925, |
| "token_acc": 0.7619047619047619, |
| "train_speed(iter/s)": 0.027952 |
| }, |
| { |
| "epoch": 0.24048096192384769, |
| "grad_norm": 1.1671382188796997, |
| "learning_rate": 4.987636565276258e-06, |
| "loss": 0.9928812026977539, |
| "memory(GiB)": 76.04, |
| "step": 930, |
| "token_acc": 0.7683724235963042, |
| "train_speed(iter/s)": 0.027942 |
| }, |
| { |
| "epoch": 0.24177387032128775, |
| "grad_norm": 1.122075080871582, |
| "learning_rate": 4.987280077363077e-06, |
| "loss": 0.993900203704834, |
| "memory(GiB)": 76.04, |
| "step": 935, |
| "token_acc": 0.7723164046901659, |
| "train_speed(iter/s)": 0.027941 |
| }, |
| { |
| "epoch": 0.24306677871872778, |
| "grad_norm": 1.2567425966262817, |
| "learning_rate": 4.986918535846187e-06, |
| "loss": 1.0273015975952149, |
| "memory(GiB)": 76.04, |
| "step": 940, |
| "token_acc": 0.7539662184442898, |
| "train_speed(iter/s)": 0.027938 |
| }, |
| { |
| "epoch": 0.24435968711616782, |
| "grad_norm": 1.1676665544509888, |
| "learning_rate": 4.986551941460158e-06, |
| "loss": 0.9982240676879883, |
| "memory(GiB)": 76.04, |
| "step": 945, |
| "token_acc": 0.7614623913694936, |
| "train_speed(iter/s)": 0.027934 |
| }, |
| { |
| "epoch": 0.24565259551360785, |
| "grad_norm": 1.2946789264678955, |
| "learning_rate": 4.98618029494983e-06, |
| "loss": 1.0252174377441405, |
| "memory(GiB)": 76.04, |
| "step": 950, |
| "token_acc": 0.7381703470031545, |
| "train_speed(iter/s)": 0.027937 |
| }, |
| { |
| "epoch": 0.24694550391104791, |
| "grad_norm": 1.6065422296524048, |
| "learning_rate": 4.985803597070306e-06, |
| "loss": 1.0155667304992675, |
| "memory(GiB)": 76.04, |
| "step": 955, |
| "token_acc": 0.736665709087427, |
| "train_speed(iter/s)": 0.027941 |
| }, |
| { |
| "epoch": 0.24823841230848795, |
| "grad_norm": 1.2003506422042847, |
| "learning_rate": 4.985421848586954e-06, |
| "loss": 1.020925521850586, |
| "memory(GiB)": 76.04, |
| "step": 960, |
| "token_acc": 0.7488078360613482, |
| "train_speed(iter/s)": 0.027942 |
| }, |
| { |
| "epoch": 0.24953132070592798, |
| "grad_norm": 1.5374075174331665, |
| "learning_rate": 4.985035050275402e-06, |
| "loss": 1.013150405883789, |
| "memory(GiB)": 76.04, |
| "step": 965, |
| "token_acc": 0.7596559355296673, |
| "train_speed(iter/s)": 0.027937 |
| }, |
| { |
| "epoch": 0.25082422910336805, |
| "grad_norm": 1.2153679132461548, |
| "learning_rate": 4.984643202921538e-06, |
| "loss": 1.0238693237304688, |
| "memory(GiB)": 76.04, |
| "step": 970, |
| "token_acc": 0.7379732289822666, |
| "train_speed(iter/s)": 0.027935 |
| }, |
| { |
| "epoch": 0.25211713750080805, |
| "grad_norm": 1.153549075126648, |
| "learning_rate": 4.984246307321511e-06, |
| "loss": 0.9940820693969726, |
| "memory(GiB)": 76.04, |
| "step": 975, |
| "token_acc": 0.7361269479285443, |
| "train_speed(iter/s)": 0.02794 |
| }, |
| { |
| "epoch": 0.2534100458982481, |
| "grad_norm": 1.8333048820495605, |
| "learning_rate": 4.983844364281723e-06, |
| "loss": 0.9937544822692871, |
| "memory(GiB)": 76.04, |
| "step": 980, |
| "token_acc": 0.739548529052237, |
| "train_speed(iter/s)": 0.027942 |
| }, |
| { |
| "epoch": 0.2547029542956882, |
| "grad_norm": 2.401205062866211, |
| "learning_rate": 4.983437374618835e-06, |
| "loss": 1.039668083190918, |
| "memory(GiB)": 76.04, |
| "step": 985, |
| "token_acc": 0.7770313636558127, |
| "train_speed(iter/s)": 0.027946 |
| }, |
| { |
| "epoch": 0.2559958626931282, |
| "grad_norm": 4.683147430419922, |
| "learning_rate": 4.983025339159759e-06, |
| "loss": 1.0015725135803222, |
| "memory(GiB)": 76.04, |
| "step": 990, |
| "token_acc": 0.7626828398144845, |
| "train_speed(iter/s)": 0.027952 |
| }, |
| { |
| "epoch": 0.25728877109056825, |
| "grad_norm": 1.1371662616729736, |
| "learning_rate": 4.982608258741662e-06, |
| "loss": 1.0269445419311523, |
| "memory(GiB)": 76.04, |
| "step": 995, |
| "token_acc": 0.7389006342494715, |
| "train_speed(iter/s)": 0.027949 |
| }, |
| { |
| "epoch": 0.25858167948800825, |
| "grad_norm": 1.1645772457122803, |
| "learning_rate": 4.982186134211957e-06, |
| "loss": 1.0103599548339843, |
| "memory(GiB)": 76.04, |
| "step": 1000, |
| "token_acc": 0.7301087055814378, |
| "train_speed(iter/s)": 0.027949 |
| }, |
| { |
| "epoch": 0.2598745878854483, |
| "grad_norm": 1.1663211584091187, |
| "learning_rate": 4.98175896642831e-06, |
| "loss": 0.9422775268554687, |
| "memory(GiB)": 76.04, |
| "step": 1005, |
| "token_acc": 0.7944890929965557, |
| "train_speed(iter/s)": 0.027805 |
| }, |
| { |
| "epoch": 0.2611674962828884, |
| "grad_norm": 1.0399482250213623, |
| "learning_rate": 4.981326756258629e-06, |
| "loss": 0.9761096000671386, |
| "memory(GiB)": 76.04, |
| "step": 1010, |
| "token_acc": 0.7143371038011112, |
| "train_speed(iter/s)": 0.027808 |
| }, |
| { |
| "epoch": 0.2624604046803284, |
| "grad_norm": 1.1718828678131104, |
| "learning_rate": 4.9808895045810715e-06, |
| "loss": 0.9784045219421387, |
| "memory(GiB)": 76.04, |
| "step": 1015, |
| "token_acc": 0.7504920027194332, |
| "train_speed(iter/s)": 0.027808 |
| }, |
| { |
| "epoch": 0.26375331307776845, |
| "grad_norm": 1.5339823961257935, |
| "learning_rate": 4.980447212284035e-06, |
| "loss": 0.9676334381103515, |
| "memory(GiB)": 76.04, |
| "step": 1020, |
| "token_acc": 0.759060549655097, |
| "train_speed(iter/s)": 0.027814 |
| }, |
| { |
| "epoch": 0.26504622147520845, |
| "grad_norm": 1.1942572593688965, |
| "learning_rate": 4.979999880266162e-06, |
| "loss": 0.9654909133911133, |
| "memory(GiB)": 76.04, |
| "step": 1025, |
| "token_acc": 0.7506817855604995, |
| "train_speed(iter/s)": 0.027813 |
| }, |
| { |
| "epoch": 0.2663391298726485, |
| "grad_norm": 1.2203813791275024, |
| "learning_rate": 4.979547509436329e-06, |
| "loss": 1.0167608261108398, |
| "memory(GiB)": 76.04, |
| "step": 1030, |
| "token_acc": 0.7280177187153931, |
| "train_speed(iter/s)": 0.027814 |
| }, |
| { |
| "epoch": 0.2676320382700886, |
| "grad_norm": 1.1107381582260132, |
| "learning_rate": 4.979090100713657e-06, |
| "loss": 0.944733715057373, |
| "memory(GiB)": 76.04, |
| "step": 1035, |
| "token_acc": 0.8061226833245297, |
| "train_speed(iter/s)": 0.027814 |
| }, |
| { |
| "epoch": 0.2689249466675286, |
| "grad_norm": 1.2653183937072754, |
| "learning_rate": 4.978627655027497e-06, |
| "loss": 0.9633764266967774, |
| "memory(GiB)": 76.04, |
| "step": 1040, |
| "token_acc": 0.7571960586546668, |
| "train_speed(iter/s)": 0.027816 |
| }, |
| { |
| "epoch": 0.27021785506496865, |
| "grad_norm": 1.3168116807937622, |
| "learning_rate": 4.978160173317439e-06, |
| "loss": 1.0249298095703125, |
| "memory(GiB)": 76.04, |
| "step": 1045, |
| "token_acc": 0.7487644982349975, |
| "train_speed(iter/s)": 0.027818 |
| }, |
| { |
| "epoch": 0.2715107634624087, |
| "grad_norm": 1.2121893167495728, |
| "learning_rate": 4.9776876565332995e-06, |
| "loss": 0.9906470298767089, |
| "memory(GiB)": 76.04, |
| "step": 1050, |
| "token_acc": 0.7474919305591904, |
| "train_speed(iter/s)": 0.027814 |
| }, |
| { |
| "epoch": 0.2728036718598487, |
| "grad_norm": 1.1107871532440186, |
| "learning_rate": 4.97721010563513e-06, |
| "loss": 0.9752838134765625, |
| "memory(GiB)": 76.04, |
| "step": 1055, |
| "token_acc": 0.7437383839277812, |
| "train_speed(iter/s)": 0.027814 |
| }, |
| { |
| "epoch": 0.2740965802572888, |
| "grad_norm": 1.0446956157684326, |
| "learning_rate": 4.976727521593209e-06, |
| "loss": 0.9726054191589355, |
| "memory(GiB)": 76.04, |
| "step": 1060, |
| "token_acc": 0.7167705088265836, |
| "train_speed(iter/s)": 0.027817 |
| }, |
| { |
| "epoch": 0.2753894886547288, |
| "grad_norm": 1.2050210237503052, |
| "learning_rate": 4.9762399053880395e-06, |
| "loss": 1.0308534622192382, |
| "memory(GiB)": 76.04, |
| "step": 1065, |
| "token_acc": 0.7568904593639576, |
| "train_speed(iter/s)": 0.027811 |
| }, |
| { |
| "epoch": 0.27668239705216885, |
| "grad_norm": 1.5667829513549805, |
| "learning_rate": 4.97574725801035e-06, |
| "loss": 0.9559214591979981, |
| "memory(GiB)": 76.04, |
| "step": 1070, |
| "token_acc": 0.7598421892050701, |
| "train_speed(iter/s)": 0.027811 |
| }, |
| { |
| "epoch": 0.2779753054496089, |
| "grad_norm": 1.1865519285202026, |
| "learning_rate": 4.975249580461092e-06, |
| "loss": 1.010297679901123, |
| "memory(GiB)": 76.04, |
| "step": 1075, |
| "token_acc": 0.744272567064813, |
| "train_speed(iter/s)": 0.02781 |
| }, |
| { |
| "epoch": 0.2792682138470489, |
| "grad_norm": 1.058516263961792, |
| "learning_rate": 4.974746873751435e-06, |
| "loss": 1.015725040435791, |
| "memory(GiB)": 76.04, |
| "step": 1080, |
| "token_acc": 0.7451629446594247, |
| "train_speed(iter/s)": 0.027809 |
| }, |
| { |
| "epoch": 0.280561122244489, |
| "grad_norm": 1.0704551935195923, |
| "learning_rate": 4.9742391389027695e-06, |
| "loss": 0.9716552734375, |
| "memory(GiB)": 76.04, |
| "step": 1085, |
| "token_acc": 0.7283868278449354, |
| "train_speed(iter/s)": 0.027812 |
| }, |
| { |
| "epoch": 0.28185403064192904, |
| "grad_norm": 1.0352953672409058, |
| "learning_rate": 4.973726376946699e-06, |
| "loss": 0.9752028465270997, |
| "memory(GiB)": 76.04, |
| "step": 1090, |
| "token_acc": 0.7493878800244848, |
| "train_speed(iter/s)": 0.027812 |
| }, |
| { |
| "epoch": 0.28314693903936905, |
| "grad_norm": 1.1525194644927979, |
| "learning_rate": 4.973208588925045e-06, |
| "loss": 0.9712867736816406, |
| "memory(GiB)": 76.04, |
| "step": 1095, |
| "token_acc": 0.7517407605784682, |
| "train_speed(iter/s)": 0.027815 |
| }, |
| { |
| "epoch": 0.2844398474368091, |
| "grad_norm": 1.0644663572311401, |
| "learning_rate": 4.972685775889836e-06, |
| "loss": 0.9582048416137695, |
| "memory(GiB)": 76.04, |
| "step": 1100, |
| "token_acc": 0.7543896103896104, |
| "train_speed(iter/s)": 0.027814 |
| }, |
| { |
| "epoch": 0.2857327558342491, |
| "grad_norm": 1.143939733505249, |
| "learning_rate": 4.9721579389033125e-06, |
| "loss": 0.9977324485778809, |
| "memory(GiB)": 76.04, |
| "step": 1105, |
| "token_acc": 0.7675656607767053, |
| "train_speed(iter/s)": 0.027815 |
| }, |
| { |
| "epoch": 0.2870256642316892, |
| "grad_norm": 1.3790191411972046, |
| "learning_rate": 4.971625079037925e-06, |
| "loss": 0.9518113136291504, |
| "memory(GiB)": 76.04, |
| "step": 1110, |
| "token_acc": 0.7680853988179778, |
| "train_speed(iter/s)": 0.027815 |
| }, |
| { |
| "epoch": 0.28831857262912924, |
| "grad_norm": 1.1732274293899536, |
| "learning_rate": 4.971087197376325e-06, |
| "loss": 0.9738147735595704, |
| "memory(GiB)": 76.04, |
| "step": 1115, |
| "token_acc": 0.7463278436450451, |
| "train_speed(iter/s)": 0.027817 |
| }, |
| { |
| "epoch": 0.28961148102656925, |
| "grad_norm": 1.0615239143371582, |
| "learning_rate": 4.970544295011369e-06, |
| "loss": 0.9704501152038574, |
| "memory(GiB)": 76.04, |
| "step": 1120, |
| "token_acc": 0.72969752000454, |
| "train_speed(iter/s)": 0.027813 |
| }, |
| { |
| "epoch": 0.2909043894240093, |
| "grad_norm": 3877.243896484375, |
| "learning_rate": 4.969996373046117e-06, |
| "loss": 1.222921085357666, |
| "memory(GiB)": 76.04, |
| "step": 1125, |
| "token_acc": 0.7576140282702677, |
| "train_speed(iter/s)": 0.027814 |
| }, |
| { |
| "epoch": 0.2921972978214494, |
| "grad_norm": 358.6942138671875, |
| "learning_rate": 4.969443432593823e-06, |
| "loss": 4.470377349853516, |
| "memory(GiB)": 76.04, |
| "step": 1130, |
| "token_acc": 0.40514805901433504, |
| "train_speed(iter/s)": 0.027816 |
| }, |
| { |
| "epoch": 0.2934902062188894, |
| "grad_norm": 646.41748046875, |
| "learning_rate": 4.968885474777941e-06, |
| "loss": 4.353573226928711, |
| "memory(GiB)": 76.04, |
| "step": 1135, |
| "token_acc": 0.404987128989238, |
| "train_speed(iter/s)": 0.027819 |
| }, |
| { |
| "epoch": 0.29478311461632944, |
| "grad_norm": 133.66444396972656, |
| "learning_rate": 4.968322500732118e-06, |
| "loss": 3.6896575927734374, |
| "memory(GiB)": 76.04, |
| "step": 1140, |
| "token_acc": 0.4133568570076165, |
| "train_speed(iter/s)": 0.027822 |
| }, |
| { |
| "epoch": 0.29607602301376945, |
| "grad_norm": 371.82977294921875, |
| "learning_rate": 4.967754511600192e-06, |
| "loss": 3.5502925872802735, |
| "memory(GiB)": 76.04, |
| "step": 1145, |
| "token_acc": 0.5075539022168236, |
| "train_speed(iter/s)": 0.027827 |
| }, |
| { |
| "epoch": 0.2973689314112095, |
| "grad_norm": 716.2843627929688, |
| "learning_rate": 4.967181508536193e-06, |
| "loss": 2.8075958251953126, |
| "memory(GiB)": 76.04, |
| "step": 1150, |
| "token_acc": 0.47253147435230464, |
| "train_speed(iter/s)": 0.027825 |
| }, |
| { |
| "epoch": 0.2986618398086496, |
| "grad_norm": 235.48390197753906, |
| "learning_rate": 4.9666034927043346e-06, |
| "loss": 2.0499923706054686, |
| "memory(GiB)": 76.04, |
| "step": 1155, |
| "token_acc": 0.6685902289996606, |
| "train_speed(iter/s)": 0.027823 |
| }, |
| { |
| "epoch": 0.2999547482060896, |
| "grad_norm": 4.845156192779541, |
| "learning_rate": 4.96602046527902e-06, |
| "loss": 1.2279346466064454, |
| "memory(GiB)": 76.04, |
| "step": 1160, |
| "token_acc": 0.7027854164818593, |
| "train_speed(iter/s)": 0.027823 |
| }, |
| { |
| "epoch": 0.30124765660352965, |
| "grad_norm": 1.512497901916504, |
| "learning_rate": 4.96543242744483e-06, |
| "loss": 1.0421756744384765, |
| "memory(GiB)": 76.04, |
| "step": 1165, |
| "token_acc": 0.7120159370292989, |
| "train_speed(iter/s)": 0.027825 |
| }, |
| { |
| "epoch": 0.3025405650009697, |
| "grad_norm": 1.465827465057373, |
| "learning_rate": 4.964839380396529e-06, |
| "loss": 0.953398609161377, |
| "memory(GiB)": 76.04, |
| "step": 1170, |
| "token_acc": 0.7572973559518211, |
| "train_speed(iter/s)": 0.027828 |
| }, |
| { |
| "epoch": 0.3038334733984097, |
| "grad_norm": 1.2102559804916382, |
| "learning_rate": 4.964241325339056e-06, |
| "loss": 0.9315123558044434, |
| "memory(GiB)": 76.04, |
| "step": 1175, |
| "token_acc": 0.7300858454781394, |
| "train_speed(iter/s)": 0.027824 |
| }, |
| { |
| "epoch": 0.3051263817958498, |
| "grad_norm": 2.6368777751922607, |
| "learning_rate": 4.963638263487528e-06, |
| "loss": 0.9297597885131836, |
| "memory(GiB)": 76.04, |
| "step": 1180, |
| "token_acc": 0.7474219317356572, |
| "train_speed(iter/s)": 0.027825 |
| }, |
| { |
| "epoch": 0.3064192901932898, |
| "grad_norm": 353.7561950683594, |
| "learning_rate": 4.963030196067233e-06, |
| "loss": 0.942746639251709, |
| "memory(GiB)": 76.04, |
| "step": 1185, |
| "token_acc": 0.7225467822911913, |
| "train_speed(iter/s)": 0.027827 |
| }, |
| { |
| "epoch": 0.30771219859072985, |
| "grad_norm": 1.3002510070800781, |
| "learning_rate": 4.96241712431363e-06, |
| "loss": 0.9472110748291016, |
| "memory(GiB)": 76.04, |
| "step": 1190, |
| "token_acc": 0.7242681047765793, |
| "train_speed(iter/s)": 0.027824 |
| }, |
| { |
| "epoch": 0.3090051069881699, |
| "grad_norm": 1.1316903829574585, |
| "learning_rate": 4.9617990494723444e-06, |
| "loss": 0.9454745292663574, |
| "memory(GiB)": 76.04, |
| "step": 1195, |
| "token_acc": 0.7497220256747195, |
| "train_speed(iter/s)": 0.027828 |
| }, |
| { |
| "epoch": 0.3102980153856099, |
| "grad_norm": 1.108446478843689, |
| "learning_rate": 4.961175972799169e-06, |
| "loss": 0.9554048538208008, |
| "memory(GiB)": 76.04, |
| "step": 1200, |
| "token_acc": 0.7410781445883828, |
| "train_speed(iter/s)": 0.02783 |
| }, |
| { |
| "epoch": 0.31159092378305, |
| "grad_norm": 1.25348961353302, |
| "learning_rate": 4.960547895560058e-06, |
| "loss": 0.9723408699035645, |
| "memory(GiB)": 76.04, |
| "step": 1205, |
| "token_acc": 0.7924053665548635, |
| "train_speed(iter/s)": 0.027837 |
| }, |
| { |
| "epoch": 0.31288383218049, |
| "grad_norm": 2.172182559967041, |
| "learning_rate": 4.959914819031125e-06, |
| "loss": 0.9340225219726562, |
| "memory(GiB)": 76.04, |
| "step": 1210, |
| "token_acc": 0.7698709945900957, |
| "train_speed(iter/s)": 0.027838 |
| }, |
| { |
| "epoch": 0.31417674057793005, |
| "grad_norm": 9.525141716003418, |
| "learning_rate": 4.959276744498642e-06, |
| "loss": 0.9623056411743164, |
| "memory(GiB)": 76.04, |
| "step": 1215, |
| "token_acc": 0.7322046531438002, |
| "train_speed(iter/s)": 0.027841 |
| }, |
| { |
| "epoch": 0.3154696489753701, |
| "grad_norm": 1.1645443439483643, |
| "learning_rate": 4.9586336732590344e-06, |
| "loss": 0.9339606285095214, |
| "memory(GiB)": 76.04, |
| "step": 1220, |
| "token_acc": 0.7669276434655424, |
| "train_speed(iter/s)": 0.02784 |
| }, |
| { |
| "epoch": 0.3167625573728101, |
| "grad_norm": 1.1521093845367432, |
| "learning_rate": 4.957985606618882e-06, |
| "loss": 0.9695714950561524, |
| "memory(GiB)": 76.04, |
| "step": 1225, |
| "token_acc": 0.7714450456843785, |
| "train_speed(iter/s)": 0.027843 |
| }, |
| { |
| "epoch": 0.3180554657702502, |
| "grad_norm": 1.1521477699279785, |
| "learning_rate": 4.957332545894914e-06, |
| "loss": 0.9398648262023925, |
| "memory(GiB)": 76.04, |
| "step": 1230, |
| "token_acc": 0.8035982876316093, |
| "train_speed(iter/s)": 0.027843 |
| }, |
| { |
| "epoch": 0.31934837416769024, |
| "grad_norm": 1.1556190252304077, |
| "learning_rate": 4.956674492414003e-06, |
| "loss": 0.9573787689208985, |
| "memory(GiB)": 76.04, |
| "step": 1235, |
| "token_acc": 0.7518860016764459, |
| "train_speed(iter/s)": 0.027841 |
| }, |
| { |
| "epoch": 0.32064128256513025, |
| "grad_norm": 1.3817846775054932, |
| "learning_rate": 4.95601144751317e-06, |
| "loss": 0.9417957305908203, |
| "memory(GiB)": 76.04, |
| "step": 1240, |
| "token_acc": 0.7512520868113522, |
| "train_speed(iter/s)": 0.027839 |
| }, |
| { |
| "epoch": 0.3219341909625703, |
| "grad_norm": 1.157069206237793, |
| "learning_rate": 4.955343412539576e-06, |
| "loss": 0.9470592498779297, |
| "memory(GiB)": 76.04, |
| "step": 1245, |
| "token_acc": 0.7514258079578428, |
| "train_speed(iter/s)": 0.027833 |
| }, |
| { |
| "epoch": 0.3232270993600103, |
| "grad_norm": 1.0551875829696655, |
| "learning_rate": 4.954670388850521e-06, |
| "loss": 0.9208686828613282, |
| "memory(GiB)": 76.04, |
| "step": 1250, |
| "token_acc": 0.7600368787193027, |
| "train_speed(iter/s)": 0.027834 |
| }, |
| { |
| "epoch": 0.3245200077574504, |
| "grad_norm": 1.0360503196716309, |
| "learning_rate": 4.953992377813438e-06, |
| "loss": 0.9444967269897461, |
| "memory(GiB)": 76.04, |
| "step": 1255, |
| "token_acc": 0.8037869164814226, |
| "train_speed(iter/s)": 0.027836 |
| }, |
| { |
| "epoch": 0.32581291615489044, |
| "grad_norm": 0.9959269762039185, |
| "learning_rate": 4.953309380805897e-06, |
| "loss": 0.9297657012939453, |
| "memory(GiB)": 76.04, |
| "step": 1260, |
| "token_acc": 0.7694388999778221, |
| "train_speed(iter/s)": 0.027837 |
| }, |
| { |
| "epoch": 0.32710582455233045, |
| "grad_norm": 1.429611086845398, |
| "learning_rate": 4.952621399215598e-06, |
| "loss": 0.9631167411804199, |
| "memory(GiB)": 76.04, |
| "step": 1265, |
| "token_acc": 0.7283613171938045, |
| "train_speed(iter/s)": 0.027842 |
| }, |
| { |
| "epoch": 0.3283987329497705, |
| "grad_norm": 1.3571066856384277, |
| "learning_rate": 4.951928434440367e-06, |
| "loss": 0.9502096176147461, |
| "memory(GiB)": 76.04, |
| "step": 1270, |
| "token_acc": 0.7302693616497888, |
| "train_speed(iter/s)": 0.027844 |
| }, |
| { |
| "epoch": 0.3296916413472106, |
| "grad_norm": 3.4126250743865967, |
| "learning_rate": 4.951230487888154e-06, |
| "loss": 0.9481155395507812, |
| "memory(GiB)": 76.04, |
| "step": 1275, |
| "token_acc": 0.7534746180384426, |
| "train_speed(iter/s)": 0.027842 |
| }, |
| { |
| "epoch": 0.3309845497446506, |
| "grad_norm": 1.0404834747314453, |
| "learning_rate": 4.950527560977035e-06, |
| "loss": 0.945584487915039, |
| "memory(GiB)": 76.04, |
| "step": 1280, |
| "token_acc": 0.7738585496866607, |
| "train_speed(iter/s)": 0.027844 |
| }, |
| { |
| "epoch": 0.33227745814209064, |
| "grad_norm": 1.1452839374542236, |
| "learning_rate": 4.9498196551352e-06, |
| "loss": 0.9602731704711914, |
| "memory(GiB)": 76.04, |
| "step": 1285, |
| "token_acc": 0.722290316932236, |
| "train_speed(iter/s)": 0.027851 |
| }, |
| { |
| "epoch": 0.33357036653953065, |
| "grad_norm": 1.2483798265457153, |
| "learning_rate": 4.949106771800958e-06, |
| "loss": 0.9069469451904297, |
| "memory(GiB)": 76.04, |
| "step": 1290, |
| "token_acc": 0.7798567304608147, |
| "train_speed(iter/s)": 0.027851 |
| }, |
| { |
| "epoch": 0.3348632749369707, |
| "grad_norm": 1.1929678916931152, |
| "learning_rate": 4.94838891242273e-06, |
| "loss": 0.924495792388916, |
| "memory(GiB)": 76.04, |
| "step": 1295, |
| "token_acc": 0.7753519103705832, |
| "train_speed(iter/s)": 0.027853 |
| }, |
| { |
| "epoch": 0.3361561833344108, |
| "grad_norm": 1.1313072443008423, |
| "learning_rate": 4.947666078459049e-06, |
| "loss": 0.9245437622070313, |
| "memory(GiB)": 76.04, |
| "step": 1300, |
| "token_acc": 0.7716767637913902, |
| "train_speed(iter/s)": 0.027845 |
| }, |
| { |
| "epoch": 0.3374490917318508, |
| "grad_norm": 1.1400386095046997, |
| "learning_rate": 4.946938271378552e-06, |
| "loss": 0.9137565612792968, |
| "memory(GiB)": 76.04, |
| "step": 1305, |
| "token_acc": 0.7587449115602147, |
| "train_speed(iter/s)": 0.027844 |
| }, |
| { |
| "epoch": 0.33874200012929084, |
| "grad_norm": 1.0596169233322144, |
| "learning_rate": 4.946205492659984e-06, |
| "loss": 0.8961214065551758, |
| "memory(GiB)": 76.04, |
| "step": 1310, |
| "token_acc": 0.7542817732480172, |
| "train_speed(iter/s)": 0.027844 |
| }, |
| { |
| "epoch": 0.3400349085267309, |
| "grad_norm": 0.9843769073486328, |
| "learning_rate": 4.945467743792188e-06, |
| "loss": 0.9182037353515625, |
| "memory(GiB)": 76.04, |
| "step": 1315, |
| "token_acc": 0.766585993622988, |
| "train_speed(iter/s)": 0.027846 |
| }, |
| { |
| "epoch": 0.3413278169241709, |
| "grad_norm": 1.1926190853118896, |
| "learning_rate": 4.9447250262741085e-06, |
| "loss": 0.9283374786376953, |
| "memory(GiB)": 76.04, |
| "step": 1320, |
| "token_acc": 0.7409656847859095, |
| "train_speed(iter/s)": 0.027843 |
| }, |
| { |
| "epoch": 0.342620725321611, |
| "grad_norm": 1.0026227235794067, |
| "learning_rate": 4.943977341614782e-06, |
| "loss": 0.9378311157226562, |
| "memory(GiB)": 76.04, |
| "step": 1325, |
| "token_acc": 0.7585428321089169, |
| "train_speed(iter/s)": 0.027841 |
| }, |
| { |
| "epoch": 0.343913633719051, |
| "grad_norm": 1.1508121490478516, |
| "learning_rate": 4.943224691333339e-06, |
| "loss": 0.9445396423339844, |
| "memory(GiB)": 76.04, |
| "step": 1330, |
| "token_acc": 0.7383880704599721, |
| "train_speed(iter/s)": 0.027834 |
| }, |
| { |
| "epoch": 0.34520654211649104, |
| "grad_norm": 1.5133693218231201, |
| "learning_rate": 4.942467076958999e-06, |
| "loss": 0.8884575843811036, |
| "memory(GiB)": 76.04, |
| "step": 1335, |
| "token_acc": 0.7681140292991949, |
| "train_speed(iter/s)": 0.027831 |
| }, |
| { |
| "epoch": 0.3464994505139311, |
| "grad_norm": 1.0745313167572021, |
| "learning_rate": 4.941704500031066e-06, |
| "loss": 0.8931808471679688, |
| "memory(GiB)": 76.04, |
| "step": 1340, |
| "token_acc": 0.7731155696658266, |
| "train_speed(iter/s)": 0.027831 |
| }, |
| { |
| "epoch": 0.3477923589113711, |
| "grad_norm": 1.1880706548690796, |
| "learning_rate": 4.940936962098929e-06, |
| "loss": 0.9454404830932617, |
| "memory(GiB)": 76.04, |
| "step": 1345, |
| "token_acc": 0.7694478894923662, |
| "train_speed(iter/s)": 0.027836 |
| }, |
| { |
| "epoch": 0.3490852673088112, |
| "grad_norm": 1.056232213973999, |
| "learning_rate": 4.9401644647220545e-06, |
| "loss": 0.8956671714782715, |
| "memory(GiB)": 76.04, |
| "step": 1350, |
| "token_acc": 0.7636090870124304, |
| "train_speed(iter/s)": 0.027838 |
| }, |
| { |
| "epoch": 0.35037817570625124, |
| "grad_norm": 0.9796701073646545, |
| "learning_rate": 4.939387009469988e-06, |
| "loss": 0.9031806945800781, |
| "memory(GiB)": 76.04, |
| "step": 1355, |
| "token_acc": 0.7830841262649146, |
| "train_speed(iter/s)": 0.027838 |
| }, |
| { |
| "epoch": 0.35167108410369124, |
| "grad_norm": 1.0584688186645508, |
| "learning_rate": 4.938604597922346e-06, |
| "loss": 0.9216032981872558, |
| "memory(GiB)": 76.04, |
| "step": 1360, |
| "token_acc": 0.7680820851083322, |
| "train_speed(iter/s)": 0.027842 |
| }, |
| { |
| "epoch": 0.3529639925011313, |
| "grad_norm": 1.00162672996521, |
| "learning_rate": 4.937817231668815e-06, |
| "loss": 0.8896630287170411, |
| "memory(GiB)": 76.04, |
| "step": 1365, |
| "token_acc": 0.7780082987551867, |
| "train_speed(iter/s)": 0.027839 |
| }, |
| { |
| "epoch": 0.3542569008985713, |
| "grad_norm": 2.0998218059539795, |
| "learning_rate": 4.937024912309152e-06, |
| "loss": 0.9393485069274903, |
| "memory(GiB)": 76.04, |
| "step": 1370, |
| "token_acc": 0.7547226992625518, |
| "train_speed(iter/s)": 0.027835 |
| }, |
| { |
| "epoch": 0.3555498092960114, |
| "grad_norm": 1.2376868724822998, |
| "learning_rate": 4.936227641453172e-06, |
| "loss": 0.9312064170837402, |
| "memory(GiB)": 76.04, |
| "step": 1375, |
| "token_acc": 0.7210518525827618, |
| "train_speed(iter/s)": 0.027834 |
| }, |
| { |
| "epoch": 0.35684271769345144, |
| "grad_norm": 1.0578678846359253, |
| "learning_rate": 4.935425420720754e-06, |
| "loss": 0.9209253311157226, |
| "memory(GiB)": 76.04, |
| "step": 1380, |
| "token_acc": 0.7725167678058128, |
| "train_speed(iter/s)": 0.027834 |
| }, |
| { |
| "epoch": 0.35813562609089145, |
| "grad_norm": 1.360901951789856, |
| "learning_rate": 4.934618251741835e-06, |
| "loss": 0.9340425491333008, |
| "memory(GiB)": 76.04, |
| "step": 1385, |
| "token_acc": 0.779114302812687, |
| "train_speed(iter/s)": 0.027829 |
| }, |
| { |
| "epoch": 0.3594285344883315, |
| "grad_norm": 1.5933281183242798, |
| "learning_rate": 4.933806136156402e-06, |
| "loss": 0.8858348846435546, |
| "memory(GiB)": 76.04, |
| "step": 1390, |
| "token_acc": 0.7908785127852725, |
| "train_speed(iter/s)": 0.027822 |
| }, |
| { |
| "epoch": 0.36072144288577157, |
| "grad_norm": 0.9765493273735046, |
| "learning_rate": 4.932989075614496e-06, |
| "loss": 0.9056285858154297, |
| "memory(GiB)": 76.04, |
| "step": 1395, |
| "token_acc": 0.7752316896727017, |
| "train_speed(iter/s)": 0.027825 |
| }, |
| { |
| "epoch": 0.3620143512832116, |
| "grad_norm": 1.0190749168395996, |
| "learning_rate": 4.932167071776203e-06, |
| "loss": 0.8850472450256348, |
| "memory(GiB)": 76.04, |
| "step": 1400, |
| "token_acc": 0.7488913755232293, |
| "train_speed(iter/s)": 0.027823 |
| }, |
| { |
| "epoch": 0.36330725968065164, |
| "grad_norm": 0.9817783832550049, |
| "learning_rate": 4.931340126311652e-06, |
| "loss": 0.8637564659118653, |
| "memory(GiB)": 76.04, |
| "step": 1405, |
| "token_acc": 0.8092789765596534, |
| "train_speed(iter/s)": 0.02782 |
| }, |
| { |
| "epoch": 0.36460016807809165, |
| "grad_norm": 1.116895318031311, |
| "learning_rate": 4.930508240901015e-06, |
| "loss": 0.9004463195800781, |
| "memory(GiB)": 76.04, |
| "step": 1410, |
| "token_acc": 0.7735781849843669, |
| "train_speed(iter/s)": 0.027815 |
| }, |
| { |
| "epoch": 0.3658930764755317, |
| "grad_norm": 1.0126279592514038, |
| "learning_rate": 4.9296714172345e-06, |
| "loss": 0.9231013298034668, |
| "memory(GiB)": 76.04, |
| "step": 1415, |
| "token_acc": 0.7658435279228997, |
| "train_speed(iter/s)": 0.027811 |
| }, |
| { |
| "epoch": 0.36718598487297177, |
| "grad_norm": 0.9945583939552307, |
| "learning_rate": 4.928829657012346e-06, |
| "loss": 0.8893575668334961, |
| "memory(GiB)": 76.04, |
| "step": 1420, |
| "token_acc": 0.7532214137636681, |
| "train_speed(iter/s)": 0.027807 |
| }, |
| { |
| "epoch": 0.3684788932704118, |
| "grad_norm": 1.384425163269043, |
| "learning_rate": 4.927982961944825e-06, |
| "loss": 0.9314968109130859, |
| "memory(GiB)": 76.04, |
| "step": 1425, |
| "token_acc": 0.7419209649833468, |
| "train_speed(iter/s)": 0.027811 |
| }, |
| { |
| "epoch": 0.36977180166785184, |
| "grad_norm": 0.8520684242248535, |
| "learning_rate": 4.9271313337522346e-06, |
| "loss": 0.9494674682617188, |
| "memory(GiB)": 76.04, |
| "step": 1430, |
| "token_acc": 0.782346893817007, |
| "train_speed(iter/s)": 0.027809 |
| }, |
| { |
| "epoch": 0.37106471006529185, |
| "grad_norm": 0.9675486087799072, |
| "learning_rate": 4.926274774164893e-06, |
| "loss": 0.9049705505371094, |
| "memory(GiB)": 76.04, |
| "step": 1435, |
| "token_acc": 0.7790923317683881, |
| "train_speed(iter/s)": 0.02781 |
| }, |
| { |
| "epoch": 0.3723576184627319, |
| "grad_norm": 0.9582749009132385, |
| "learning_rate": 4.925413284923143e-06, |
| "loss": 0.8903584480285645, |
| "memory(GiB)": 76.04, |
| "step": 1440, |
| "token_acc": 0.7866145377848655, |
| "train_speed(iter/s)": 0.027812 |
| }, |
| { |
| "epoch": 0.37365052686017197, |
| "grad_norm": 1.1724884510040283, |
| "learning_rate": 4.924546867777339e-06, |
| "loss": 0.9676746368408203, |
| "memory(GiB)": 76.04, |
| "step": 1445, |
| "token_acc": 0.7439509954058193, |
| "train_speed(iter/s)": 0.027815 |
| }, |
| { |
| "epoch": 0.374943435257612, |
| "grad_norm": 1.0407586097717285, |
| "learning_rate": 4.92367552448785e-06, |
| "loss": 0.9126652717590332, |
| "memory(GiB)": 76.04, |
| "step": 1450, |
| "token_acc": 0.781545586561482, |
| "train_speed(iter/s)": 0.027815 |
| }, |
| { |
| "epoch": 0.37623634365505204, |
| "grad_norm": 1.0518836975097656, |
| "learning_rate": 4.922799256825052e-06, |
| "loss": 0.8564701080322266, |
| "memory(GiB)": 76.04, |
| "step": 1455, |
| "token_acc": 0.7408266266539354, |
| "train_speed(iter/s)": 0.027818 |
| }, |
| { |
| "epoch": 0.3775292520524921, |
| "grad_norm": 0.9088625907897949, |
| "learning_rate": 4.921918066569328e-06, |
| "loss": 0.8742757797241211, |
| "memory(GiB)": 76.04, |
| "step": 1460, |
| "token_acc": 0.7589349964020149, |
| "train_speed(iter/s)": 0.027815 |
| }, |
| { |
| "epoch": 0.3788221604499321, |
| "grad_norm": 1.0474976301193237, |
| "learning_rate": 4.921031955511061e-06, |
| "loss": 0.8954677581787109, |
| "memory(GiB)": 76.04, |
| "step": 1465, |
| "token_acc": 0.7686122547832405, |
| "train_speed(iter/s)": 0.027811 |
| }, |
| { |
| "epoch": 0.3801150688473722, |
| "grad_norm": 1.012302279472351, |
| "learning_rate": 4.920140925450634e-06, |
| "loss": 0.9504472732543945, |
| "memory(GiB)": 76.04, |
| "step": 1470, |
| "token_acc": 0.7751241428233625, |
| "train_speed(iter/s)": 0.027809 |
| }, |
| { |
| "epoch": 0.3814079772448122, |
| "grad_norm": 0.9360347390174866, |
| "learning_rate": 4.919244978198424e-06, |
| "loss": 0.8807231903076171, |
| "memory(GiB)": 76.04, |
| "step": 1475, |
| "token_acc": 0.7640212437379938, |
| "train_speed(iter/s)": 0.027808 |
| }, |
| { |
| "epoch": 0.38270088564225224, |
| "grad_norm": 1.006102442741394, |
| "learning_rate": 4.918344115574797e-06, |
| "loss": 0.9025184631347656, |
| "memory(GiB)": 76.04, |
| "step": 1480, |
| "token_acc": 0.7411331796417805, |
| "train_speed(iter/s)": 0.027809 |
| }, |
| { |
| "epoch": 0.3839937940396923, |
| "grad_norm": 0.8951787948608398, |
| "learning_rate": 4.917438339410105e-06, |
| "loss": 0.8877702713012695, |
| "memory(GiB)": 76.04, |
| "step": 1485, |
| "token_acc": 0.7563314788673918, |
| "train_speed(iter/s)": 0.027806 |
| }, |
| { |
| "epoch": 0.3852867024371323, |
| "grad_norm": 1.0641748905181885, |
| "learning_rate": 4.916527651544689e-06, |
| "loss": 0.8795459747314454, |
| "memory(GiB)": 76.04, |
| "step": 1490, |
| "token_acc": 0.8010365029292474, |
| "train_speed(iter/s)": 0.027805 |
| }, |
| { |
| "epoch": 0.3865796108345724, |
| "grad_norm": 1.0358333587646484, |
| "learning_rate": 4.915612053828862e-06, |
| "loss": 0.8692068099975586, |
| "memory(GiB)": 76.04, |
| "step": 1495, |
| "token_acc": 0.8206166847085937, |
| "train_speed(iter/s)": 0.027807 |
| }, |
| { |
| "epoch": 0.38787251923201244, |
| "grad_norm": 1.0726227760314941, |
| "learning_rate": 4.914691548122919e-06, |
| "loss": 0.8898172378540039, |
| "memory(GiB)": 76.04, |
| "step": 1500, |
| "token_acc": 0.7631806836126535, |
| "train_speed(iter/s)": 0.027806 |
| }, |
| { |
| "epoch": 0.38916542762945244, |
| "grad_norm": 1.0023351907730103, |
| "learning_rate": 4.9137661362971225e-06, |
| "loss": 0.9159588813781738, |
| "memory(GiB)": 76.04, |
| "step": 1505, |
| "token_acc": 0.7451686323194703, |
| "train_speed(iter/s)": 0.027808 |
| }, |
| { |
| "epoch": 0.3904583360268925, |
| "grad_norm": 1.1598299741744995, |
| "learning_rate": 4.912835820231705e-06, |
| "loss": 0.8731332778930664, |
| "memory(GiB)": 76.04, |
| "step": 1510, |
| "token_acc": 0.7731112837444164, |
| "train_speed(iter/s)": 0.027808 |
| }, |
| { |
| "epoch": 0.3917512444243325, |
| "grad_norm": 1.2678931951522827, |
| "learning_rate": 4.9119006018168645e-06, |
| "loss": 0.8393604278564453, |
| "memory(GiB)": 76.04, |
| "step": 1515, |
| "token_acc": 0.7939447383891828, |
| "train_speed(iter/s)": 0.027803 |
| }, |
| { |
| "epoch": 0.3930441528217726, |
| "grad_norm": 1.062391996383667, |
| "learning_rate": 4.910960482952757e-06, |
| "loss": 0.9229723930358886, |
| "memory(GiB)": 76.04, |
| "step": 1520, |
| "token_acc": 0.7733812949640287, |
| "train_speed(iter/s)": 0.027804 |
| }, |
| { |
| "epoch": 0.39433706121921264, |
| "grad_norm": 0.9789305925369263, |
| "learning_rate": 4.910015465549497e-06, |
| "loss": 0.9235004425048828, |
| "memory(GiB)": 76.04, |
| "step": 1525, |
| "token_acc": 0.7600789189591631, |
| "train_speed(iter/s)": 0.027803 |
| }, |
| { |
| "epoch": 0.39562996961665264, |
| "grad_norm": 1.0256842374801636, |
| "learning_rate": 4.909065551527151e-06, |
| "loss": 0.8544706344604492, |
| "memory(GiB)": 76.04, |
| "step": 1530, |
| "token_acc": 0.7887576797255246, |
| "train_speed(iter/s)": 0.027803 |
| }, |
| { |
| "epoch": 0.3969228780140927, |
| "grad_norm": 1.0858112573623657, |
| "learning_rate": 4.908110742815735e-06, |
| "loss": 0.8899390220642089, |
| "memory(GiB)": 76.04, |
| "step": 1535, |
| "token_acc": 0.7800430187973441, |
| "train_speed(iter/s)": 0.027803 |
| }, |
| { |
| "epoch": 0.39821578641153277, |
| "grad_norm": 0.9626816511154175, |
| "learning_rate": 4.907151041355208e-06, |
| "loss": 0.8749662399291992, |
| "memory(GiB)": 76.04, |
| "step": 1540, |
| "token_acc": 0.8031964754405699, |
| "train_speed(iter/s)": 0.027798 |
| }, |
| { |
| "epoch": 0.3995086948089728, |
| "grad_norm": 0.9466427564620972, |
| "learning_rate": 4.9061864490954725e-06, |
| "loss": 0.8291332244873046, |
| "memory(GiB)": 76.04, |
| "step": 1545, |
| "token_acc": 0.76341123125218, |
| "train_speed(iter/s)": 0.027798 |
| }, |
| { |
| "epoch": 0.40080160320641284, |
| "grad_norm": 1.047394037246704, |
| "learning_rate": 4.905216967996367e-06, |
| "loss": 0.8456403732299804, |
| "memory(GiB)": 76.04, |
| "step": 1550, |
| "token_acc": 0.7513071152534667, |
| "train_speed(iter/s)": 0.0278 |
| }, |
| { |
| "epoch": 0.40209451160385284, |
| "grad_norm": 0.9296531677246094, |
| "learning_rate": 4.904242600027662e-06, |
| "loss": 0.8476978302001953, |
| "memory(GiB)": 76.04, |
| "step": 1555, |
| "token_acc": 0.8000686931135154, |
| "train_speed(iter/s)": 0.027798 |
| }, |
| { |
| "epoch": 0.4033874200012929, |
| "grad_norm": 1.176629900932312, |
| "learning_rate": 4.903263347169058e-06, |
| "loss": 0.9175498962402344, |
| "memory(GiB)": 76.04, |
| "step": 1560, |
| "token_acc": 0.8039329091960671, |
| "train_speed(iter/s)": 0.027801 |
| }, |
| { |
| "epoch": 0.40468032839873297, |
| "grad_norm": 1.2354000806808472, |
| "learning_rate": 4.902279211410182e-06, |
| "loss": 0.9165899276733398, |
| "memory(GiB)": 76.04, |
| "step": 1565, |
| "token_acc": 0.7679446219382322, |
| "train_speed(iter/s)": 0.027801 |
| }, |
| { |
| "epoch": 0.405973236796173, |
| "grad_norm": 2.35729718208313, |
| "learning_rate": 4.901290194750579e-06, |
| "loss": 0.8489980697631836, |
| "memory(GiB)": 76.04, |
| "step": 1570, |
| "token_acc": 0.8143410397840629, |
| "train_speed(iter/s)": 0.0278 |
| }, |
| { |
| "epoch": 0.40726614519361304, |
| "grad_norm": 0.986346960067749, |
| "learning_rate": 4.900296299199714e-06, |
| "loss": 0.87310791015625, |
| "memory(GiB)": 76.04, |
| "step": 1575, |
| "token_acc": 0.7906714736367734, |
| "train_speed(iter/s)": 0.027801 |
| }, |
| { |
| "epoch": 0.4085590535910531, |
| "grad_norm": 0.9651816487312317, |
| "learning_rate": 4.899297526776962e-06, |
| "loss": 0.8573389053344727, |
| "memory(GiB)": 76.04, |
| "step": 1580, |
| "token_acc": 0.7923456022732318, |
| "train_speed(iter/s)": 0.027803 |
| }, |
| { |
| "epoch": 0.4098519619884931, |
| "grad_norm": 1.0119379758834839, |
| "learning_rate": 4.898293879511608e-06, |
| "loss": 0.8485713958740234, |
| "memory(GiB)": 76.04, |
| "step": 1585, |
| "token_acc": 0.7772183472677722, |
| "train_speed(iter/s)": 0.027803 |
| }, |
| { |
| "epoch": 0.41114487038593317, |
| "grad_norm": 0.925304651260376, |
| "learning_rate": 4.897285359442841e-06, |
| "loss": 0.891656494140625, |
| "memory(GiB)": 76.04, |
| "step": 1590, |
| "token_acc": 0.7572081654822794, |
| "train_speed(iter/s)": 0.027804 |
| }, |
| { |
| "epoch": 0.4124377787833732, |
| "grad_norm": 1.0190355777740479, |
| "learning_rate": 4.896271968619752e-06, |
| "loss": 0.8359519004821777, |
| "memory(GiB)": 76.04, |
| "step": 1595, |
| "token_acc": 0.7742064125059818, |
| "train_speed(iter/s)": 0.027803 |
| }, |
| { |
| "epoch": 0.41373068718081324, |
| "grad_norm": 4.286404132843018, |
| "learning_rate": 4.895253709101327e-06, |
| "loss": 0.865880012512207, |
| "memory(GiB)": 76.04, |
| "step": 1600, |
| "token_acc": 0.7423144213946513, |
| "train_speed(iter/s)": 0.027806 |
| }, |
| { |
| "epoch": 0.4150235955782533, |
| "grad_norm": 1.1072417497634888, |
| "learning_rate": 4.894230582956444e-06, |
| "loss": 0.8957183837890625, |
| "memory(GiB)": 76.04, |
| "step": 1605, |
| "token_acc": 0.7621177149451818, |
| "train_speed(iter/s)": 0.027807 |
| }, |
| { |
| "epoch": 0.4163165039756933, |
| "grad_norm": 1.0325218439102173, |
| "learning_rate": 4.89320259226387e-06, |
| "loss": 0.8576887130737305, |
| "memory(GiB)": 76.04, |
| "step": 1610, |
| "token_acc": 0.7561050328227571, |
| "train_speed(iter/s)": 0.027809 |
| }, |
| { |
| "epoch": 0.41760941237313337, |
| "grad_norm": 1.0209314823150635, |
| "learning_rate": 4.8921697391122555e-06, |
| "loss": 0.8784740447998047, |
| "memory(GiB)": 76.04, |
| "step": 1615, |
| "token_acc": 0.7576126674786845, |
| "train_speed(iter/s)": 0.02781 |
| }, |
| { |
| "epoch": 0.4189023207705734, |
| "grad_norm": 1.2361773252487183, |
| "learning_rate": 4.891132025600128e-06, |
| "loss": 0.8804727554321289, |
| "memory(GiB)": 76.04, |
| "step": 1620, |
| "token_acc": 0.7516129032258064, |
| "train_speed(iter/s)": 0.027805 |
| }, |
| { |
| "epoch": 0.42019522916801344, |
| "grad_norm": 1.0383259057998657, |
| "learning_rate": 4.890089453835894e-06, |
| "loss": 0.8933810234069824, |
| "memory(GiB)": 76.04, |
| "step": 1625, |
| "token_acc": 0.7678587433898001, |
| "train_speed(iter/s)": 0.027808 |
| }, |
| { |
| "epoch": 0.4214881375654535, |
| "grad_norm": 1.1485199928283691, |
| "learning_rate": 4.889042025937829e-06, |
| "loss": 0.8679392814636231, |
| "memory(GiB)": 76.04, |
| "step": 1630, |
| "token_acc": 0.7426187419768935, |
| "train_speed(iter/s)": 0.027808 |
| }, |
| { |
| "epoch": 0.4227810459628935, |
| "grad_norm": 0.9472554922103882, |
| "learning_rate": 4.887989744034074e-06, |
| "loss": 0.8719472885131836, |
| "memory(GiB)": 76.04, |
| "step": 1635, |
| "token_acc": 0.7547612635142934, |
| "train_speed(iter/s)": 0.027806 |
| }, |
| { |
| "epoch": 0.42407395436033357, |
| "grad_norm": 1.0658771991729736, |
| "learning_rate": 4.886932610262634e-06, |
| "loss": 0.8944738388061524, |
| "memory(GiB)": 76.04, |
| "step": 1640, |
| "token_acc": 0.8233047873087183, |
| "train_speed(iter/s)": 0.027805 |
| }, |
| { |
| "epoch": 0.42536686275777363, |
| "grad_norm": 0.9620645642280579, |
| "learning_rate": 4.885870626771371e-06, |
| "loss": 0.8650775909423828, |
| "memory(GiB)": 76.04, |
| "step": 1645, |
| "token_acc": 0.7568502864923129, |
| "train_speed(iter/s)": 0.027803 |
| }, |
| { |
| "epoch": 0.42665977115521364, |
| "grad_norm": 1.0661425590515137, |
| "learning_rate": 4.884803795718001e-06, |
| "loss": 0.9001960754394531, |
| "memory(GiB)": 76.04, |
| "step": 1650, |
| "token_acc": 0.7570867129358642, |
| "train_speed(iter/s)": 0.027807 |
| }, |
| { |
| "epoch": 0.4279526795526537, |
| "grad_norm": 1.1007188558578491, |
| "learning_rate": 4.88373211927009e-06, |
| "loss": 0.8699914932250976, |
| "memory(GiB)": 76.04, |
| "step": 1655, |
| "token_acc": 0.7583125246418715, |
| "train_speed(iter/s)": 0.027808 |
| }, |
| { |
| "epoch": 0.4292455879500937, |
| "grad_norm": 1.0380903482437134, |
| "learning_rate": 4.882655599605045e-06, |
| "loss": 0.8764565467834473, |
| "memory(GiB)": 76.04, |
| "step": 1660, |
| "token_acc": 0.7656633221850613, |
| "train_speed(iter/s)": 0.027808 |
| }, |
| { |
| "epoch": 0.4305384963475338, |
| "grad_norm": 1.033884882926941, |
| "learning_rate": 4.88157423891012e-06, |
| "loss": 0.8574346542358399, |
| "memory(GiB)": 76.04, |
| "step": 1665, |
| "token_acc": 0.7794178559325226, |
| "train_speed(iter/s)": 0.027804 |
| }, |
| { |
| "epoch": 0.43183140474497383, |
| "grad_norm": 0.911655068397522, |
| "learning_rate": 4.8804880393823986e-06, |
| "loss": 0.8541059494018555, |
| "memory(GiB)": 76.04, |
| "step": 1670, |
| "token_acc": 0.7727084040907204, |
| "train_speed(iter/s)": 0.027807 |
| }, |
| { |
| "epoch": 0.43312431314241384, |
| "grad_norm": 0.9688575863838196, |
| "learning_rate": 4.8793970032287985e-06, |
| "loss": 0.8185391426086426, |
| "memory(GiB)": 76.04, |
| "step": 1675, |
| "token_acc": 0.8178748580649017, |
| "train_speed(iter/s)": 0.027804 |
| }, |
| { |
| "epoch": 0.4344172215398539, |
| "grad_norm": 0.9555157423019409, |
| "learning_rate": 4.878301132666066e-06, |
| "loss": 0.8625661849975585, |
| "memory(GiB)": 76.04, |
| "step": 1680, |
| "token_acc": 0.7851330293761182, |
| "train_speed(iter/s)": 0.027804 |
| }, |
| { |
| "epoch": 0.43571012993729397, |
| "grad_norm": 1.0205975770950317, |
| "learning_rate": 4.877200429920765e-06, |
| "loss": 0.8751688003540039, |
| "memory(GiB)": 76.04, |
| "step": 1685, |
| "token_acc": 0.772077701884416, |
| "train_speed(iter/s)": 0.027807 |
| }, |
| { |
| "epoch": 0.437003038334734, |
| "grad_norm": 1.0219794511795044, |
| "learning_rate": 4.876094897229283e-06, |
| "loss": 0.810630989074707, |
| "memory(GiB)": 76.04, |
| "step": 1690, |
| "token_acc": 0.8176644891911913, |
| "train_speed(iter/s)": 0.027806 |
| }, |
| { |
| "epoch": 0.43829594673217404, |
| "grad_norm": 1.1282055377960205, |
| "learning_rate": 4.874984536837817e-06, |
| "loss": 0.8619385719299316, |
| "memory(GiB)": 76.04, |
| "step": 1695, |
| "token_acc": 0.7675262655205348, |
| "train_speed(iter/s)": 0.027807 |
| }, |
| { |
| "epoch": 0.43958885512961404, |
| "grad_norm": 0.9397704601287842, |
| "learning_rate": 4.873869351002374e-06, |
| "loss": 0.820007610321045, |
| "memory(GiB)": 76.04, |
| "step": 1700, |
| "token_acc": 0.7972864541542053, |
| "train_speed(iter/s)": 0.027803 |
| }, |
| { |
| "epoch": 0.4408817635270541, |
| "grad_norm": 1.0283387899398804, |
| "learning_rate": 4.872749341988765e-06, |
| "loss": 0.8253473281860352, |
| "memory(GiB)": 76.04, |
| "step": 1705, |
| "token_acc": 0.7752377949445584, |
| "train_speed(iter/s)": 0.027804 |
| }, |
| { |
| "epoch": 0.44217467192449417, |
| "grad_norm": 0.9191579818725586, |
| "learning_rate": 4.871624512072603e-06, |
| "loss": 0.8367796897888183, |
| "memory(GiB)": 76.04, |
| "step": 1710, |
| "token_acc": 0.7903411821239789, |
| "train_speed(iter/s)": 0.027801 |
| }, |
| { |
| "epoch": 0.4434675803219342, |
| "grad_norm": 1.2455042600631714, |
| "learning_rate": 4.870494863539291e-06, |
| "loss": 0.8392200469970703, |
| "memory(GiB)": 76.04, |
| "step": 1715, |
| "token_acc": 0.7550399545694236, |
| "train_speed(iter/s)": 0.027798 |
| }, |
| { |
| "epoch": 0.44476048871937424, |
| "grad_norm": 1.0765002965927124, |
| "learning_rate": 4.8693603986840274e-06, |
| "loss": 0.8334452629089355, |
| "memory(GiB)": 76.04, |
| "step": 1720, |
| "token_acc": 0.7612031220255092, |
| "train_speed(iter/s)": 0.027797 |
| }, |
| { |
| "epoch": 0.4460533971168143, |
| "grad_norm": 1.0502086877822876, |
| "learning_rate": 4.868221119811793e-06, |
| "loss": 0.8496732711791992, |
| "memory(GiB)": 76.04, |
| "step": 1725, |
| "token_acc": 0.7960901439044258, |
| "train_speed(iter/s)": 0.027801 |
| }, |
| { |
| "epoch": 0.4473463055142543, |
| "grad_norm": 0.883604884147644, |
| "learning_rate": 4.867077029237352e-06, |
| "loss": 0.817476749420166, |
| "memory(GiB)": 76.04, |
| "step": 1730, |
| "token_acc": 0.746853904492041, |
| "train_speed(iter/s)": 0.027799 |
| }, |
| { |
| "epoch": 0.44863921391169437, |
| "grad_norm": 1.0631402730941772, |
| "learning_rate": 4.865928129285242e-06, |
| "loss": 0.8631902694702148, |
| "memory(GiB)": 76.04, |
| "step": 1735, |
| "token_acc": 0.7656874459231181, |
| "train_speed(iter/s)": 0.0278 |
| }, |
| { |
| "epoch": 0.4499321223091344, |
| "grad_norm": 1.0118037462234497, |
| "learning_rate": 4.864774422289776e-06, |
| "loss": 0.8337348937988281, |
| "memory(GiB)": 76.04, |
| "step": 1740, |
| "token_acc": 0.7787095835959087, |
| "train_speed(iter/s)": 0.027798 |
| }, |
| { |
| "epoch": 0.45122503070657444, |
| "grad_norm": 1.099346399307251, |
| "learning_rate": 4.863615910595031e-06, |
| "loss": 0.8567562103271484, |
| "memory(GiB)": 76.04, |
| "step": 1745, |
| "token_acc": 0.8049837122611412, |
| "train_speed(iter/s)": 0.027797 |
| }, |
| { |
| "epoch": 0.4525179391040145, |
| "grad_norm": 0.9110936522483826, |
| "learning_rate": 4.8624525965548456e-06, |
| "loss": 0.858333683013916, |
| "memory(GiB)": 76.04, |
| "step": 1750, |
| "token_acc": 0.7628092095319663, |
| "train_speed(iter/s)": 0.0278 |
| }, |
| { |
| "epoch": 0.4538108475014545, |
| "grad_norm": 1.1097652912139893, |
| "learning_rate": 4.861284482532819e-06, |
| "loss": 0.8601787567138672, |
| "memory(GiB)": 76.04, |
| "step": 1755, |
| "token_acc": 0.7758728179551122, |
| "train_speed(iter/s)": 0.027803 |
| }, |
| { |
| "epoch": 0.45510375589889457, |
| "grad_norm": 0.9955366253852844, |
| "learning_rate": 4.860111570902298e-06, |
| "loss": 0.8417009353637696, |
| "memory(GiB)": 76.04, |
| "step": 1760, |
| "token_acc": 0.7899390978219729, |
| "train_speed(iter/s)": 0.027798 |
| }, |
| { |
| "epoch": 0.45639666429633463, |
| "grad_norm": 0.9379090666770935, |
| "learning_rate": 4.858933864046384e-06, |
| "loss": 0.8158811569213867, |
| "memory(GiB)": 76.04, |
| "step": 1765, |
| "token_acc": 0.7703970866307165, |
| "train_speed(iter/s)": 0.027797 |
| }, |
| { |
| "epoch": 0.45768957269377464, |
| "grad_norm": 0.9244673252105713, |
| "learning_rate": 4.857751364357913e-06, |
| "loss": 0.8572831153869629, |
| "memory(GiB)": 76.04, |
| "step": 1770, |
| "token_acc": 0.7679937895087058, |
| "train_speed(iter/s)": 0.027796 |
| }, |
| { |
| "epoch": 0.4589824810912147, |
| "grad_norm": 0.8768739104270935, |
| "learning_rate": 4.856564074239467e-06, |
| "loss": 0.8114492416381835, |
| "memory(GiB)": 76.04, |
| "step": 1775, |
| "token_acc": 0.7533039647577092, |
| "train_speed(iter/s)": 0.027797 |
| }, |
| { |
| "epoch": 0.4602753894886547, |
| "grad_norm": 1.0087144374847412, |
| "learning_rate": 4.855371996103354e-06, |
| "loss": 0.8448333740234375, |
| "memory(GiB)": 76.04, |
| "step": 1780, |
| "token_acc": 0.7925396227993142, |
| "train_speed(iter/s)": 0.027798 |
| }, |
| { |
| "epoch": 0.46156829788609477, |
| "grad_norm": 0.9475561380386353, |
| "learning_rate": 4.854175132371615e-06, |
| "loss": 0.8426584243774414, |
| "memory(GiB)": 76.04, |
| "step": 1785, |
| "token_acc": 0.7877760352646972, |
| "train_speed(iter/s)": 0.027799 |
| }, |
| { |
| "epoch": 0.46286120628353483, |
| "grad_norm": 0.8809593915939331, |
| "learning_rate": 4.852973485476014e-06, |
| "loss": 0.8447649002075195, |
| "memory(GiB)": 76.04, |
| "step": 1790, |
| "token_acc": 0.7644977511244377, |
| "train_speed(iter/s)": 0.027801 |
| }, |
| { |
| "epoch": 0.46415411468097484, |
| "grad_norm": 0.9489724636077881, |
| "learning_rate": 4.85176705785803e-06, |
| "loss": 0.8333120346069336, |
| "memory(GiB)": 76.04, |
| "step": 1795, |
| "token_acc": 0.8094422805290417, |
| "train_speed(iter/s)": 0.027801 |
| }, |
| { |
| "epoch": 0.4654470230784149, |
| "grad_norm": 1.0435246229171753, |
| "learning_rate": 4.850555851968858e-06, |
| "loss": 0.8334157943725586, |
| "memory(GiB)": 76.04, |
| "step": 1800, |
| "token_acc": 0.7686591887926546, |
| "train_speed(iter/s)": 0.027804 |
| }, |
| { |
| "epoch": 0.46673993147585496, |
| "grad_norm": 1.1650222539901733, |
| "learning_rate": 4.849339870269401e-06, |
| "loss": 0.9079343795776367, |
| "memory(GiB)": 76.04, |
| "step": 1805, |
| "token_acc": 0.7502884738664045, |
| "train_speed(iter/s)": 0.027805 |
| }, |
| { |
| "epoch": 0.46803283987329497, |
| "grad_norm": 0.9376285076141357, |
| "learning_rate": 4.848119115230264e-06, |
| "loss": 0.8245293617248535, |
| "memory(GiB)": 76.04, |
| "step": 1810, |
| "token_acc": 0.7958735551228404, |
| "train_speed(iter/s)": 0.027805 |
| }, |
| { |
| "epoch": 0.46932574827073503, |
| "grad_norm": 0.9769212603569031, |
| "learning_rate": 4.8468935893317545e-06, |
| "loss": 0.8638315200805664, |
| "memory(GiB)": 76.04, |
| "step": 1815, |
| "token_acc": 0.7886973180076629, |
| "train_speed(iter/s)": 0.027805 |
| }, |
| { |
| "epoch": 0.47061865666817504, |
| "grad_norm": 0.9659924507141113, |
| "learning_rate": 4.8456632950638675e-06, |
| "loss": 0.8185907363891601, |
| "memory(GiB)": 76.04, |
| "step": 1820, |
| "token_acc": 0.7737749169435216, |
| "train_speed(iter/s)": 0.027804 |
| }, |
| { |
| "epoch": 0.4719115650656151, |
| "grad_norm": 0.9423291683197021, |
| "learning_rate": 4.844428234926291e-06, |
| "loss": 0.8167947769165039, |
| "memory(GiB)": 76.04, |
| "step": 1825, |
| "token_acc": 0.7969283276450512, |
| "train_speed(iter/s)": 0.027801 |
| }, |
| { |
| "epoch": 0.47320447346305516, |
| "grad_norm": 0.9784870147705078, |
| "learning_rate": 4.843188411428394e-06, |
| "loss": 0.838237190246582, |
| "memory(GiB)": 76.04, |
| "step": 1830, |
| "token_acc": 0.8140107775211701, |
| "train_speed(iter/s)": 0.027801 |
| }, |
| { |
| "epoch": 0.47449738186049517, |
| "grad_norm": 2.1008307933807373, |
| "learning_rate": 4.841943827089223e-06, |
| "loss": 0.8713891983032227, |
| "memory(GiB)": 76.04, |
| "step": 1835, |
| "token_acc": 0.7783555923255723, |
| "train_speed(iter/s)": 0.027803 |
| }, |
| { |
| "epoch": 0.47579029025793523, |
| "grad_norm": 1.0217597484588623, |
| "learning_rate": 4.840694484437499e-06, |
| "loss": 0.8342850685119629, |
| "memory(GiB)": 76.04, |
| "step": 1840, |
| "token_acc": 0.7716500553709856, |
| "train_speed(iter/s)": 0.027805 |
| }, |
| { |
| "epoch": 0.47708319865537524, |
| "grad_norm": 0.935716986656189, |
| "learning_rate": 4.8394403860116115e-06, |
| "loss": 0.8083118438720703, |
| "memory(GiB)": 76.04, |
| "step": 1845, |
| "token_acc": 0.7871954487364472, |
| "train_speed(iter/s)": 0.027805 |
| }, |
| { |
| "epoch": 0.4783761070528153, |
| "grad_norm": 0.906399667263031, |
| "learning_rate": 4.83818153435961e-06, |
| "loss": 0.8438366889953614, |
| "memory(GiB)": 76.04, |
| "step": 1850, |
| "token_acc": 0.7733610953372453, |
| "train_speed(iter/s)": 0.027803 |
| }, |
| { |
| "epoch": 0.47966901545025536, |
| "grad_norm": 1.9505832195281982, |
| "learning_rate": 4.836917932039204e-06, |
| "loss": 0.8615127563476562, |
| "memory(GiB)": 76.04, |
| "step": 1855, |
| "token_acc": 0.7813404825737266, |
| "train_speed(iter/s)": 0.027804 |
| }, |
| { |
| "epoch": 0.48096192384769537, |
| "grad_norm": 1.1248425245285034, |
| "learning_rate": 4.835649581617753e-06, |
| "loss": 0.8535722732543946, |
| "memory(GiB)": 76.04, |
| "step": 1860, |
| "token_acc": 0.7716059271125351, |
| "train_speed(iter/s)": 0.027805 |
| }, |
| { |
| "epoch": 0.48225483224513543, |
| "grad_norm": 0.9488275647163391, |
| "learning_rate": 4.834376485672266e-06, |
| "loss": 0.8235734939575196, |
| "memory(GiB)": 76.04, |
| "step": 1865, |
| "token_acc": 0.772141609970498, |
| "train_speed(iter/s)": 0.027805 |
| }, |
| { |
| "epoch": 0.4835477406425755, |
| "grad_norm": 0.9314141273498535, |
| "learning_rate": 4.833098646789393e-06, |
| "loss": 0.825401496887207, |
| "memory(GiB)": 76.04, |
| "step": 1870, |
| "token_acc": 0.802578972013111, |
| "train_speed(iter/s)": 0.027804 |
| }, |
| { |
| "epoch": 0.4848406490400155, |
| "grad_norm": 1.1258958578109741, |
| "learning_rate": 4.831816067565419e-06, |
| "loss": 0.8634084701538086, |
| "memory(GiB)": 76.04, |
| "step": 1875, |
| "token_acc": 0.7933092156789617, |
| "train_speed(iter/s)": 0.027807 |
| }, |
| { |
| "epoch": 0.48613355743745557, |
| "grad_norm": 0.8910898566246033, |
| "learning_rate": 4.830528750606263e-06, |
| "loss": 0.8147882461547852, |
| "memory(GiB)": 76.04, |
| "step": 1880, |
| "token_acc": 0.812186275932105, |
| "train_speed(iter/s)": 0.027803 |
| }, |
| { |
| "epoch": 0.4874264658348956, |
| "grad_norm": 1.1478573083877563, |
| "learning_rate": 4.829236698527469e-06, |
| "loss": 0.8461570739746094, |
| "memory(GiB)": 76.04, |
| "step": 1885, |
| "token_acc": 0.7892949615858058, |
| "train_speed(iter/s)": 0.027802 |
| }, |
| { |
| "epoch": 0.48871937423233563, |
| "grad_norm": 0.9229076504707336, |
| "learning_rate": 4.827939913954199e-06, |
| "loss": 0.8387362480163574, |
| "memory(GiB)": 76.04, |
| "step": 1890, |
| "token_acc": 0.8044181034482759, |
| "train_speed(iter/s)": 0.027803 |
| }, |
| { |
| "epoch": 0.4900122826297757, |
| "grad_norm": 1.209421992301941, |
| "learning_rate": 4.826638399521235e-06, |
| "loss": 0.8628839492797852, |
| "memory(GiB)": 76.04, |
| "step": 1895, |
| "token_acc": 0.7861487236403996, |
| "train_speed(iter/s)": 0.027804 |
| }, |
| { |
| "epoch": 0.4913051910272157, |
| "grad_norm": 0.9494127631187439, |
| "learning_rate": 4.825332157872966e-06, |
| "loss": 0.8163295745849609, |
| "memory(GiB)": 76.04, |
| "step": 1900, |
| "token_acc": 0.7902574714203331, |
| "train_speed(iter/s)": 0.027806 |
| }, |
| { |
| "epoch": 0.49259809942465577, |
| "grad_norm": 1.0148690938949585, |
| "learning_rate": 4.824021191663387e-06, |
| "loss": 0.8092700004577636, |
| "memory(GiB)": 76.04, |
| "step": 1905, |
| "token_acc": 0.7959501969388564, |
| "train_speed(iter/s)": 0.027805 |
| }, |
| { |
| "epoch": 0.49389100782209583, |
| "grad_norm": 1.064133644104004, |
| "learning_rate": 4.822705503556092e-06, |
| "loss": 0.8303569793701172, |
| "memory(GiB)": 76.04, |
| "step": 1910, |
| "token_acc": 0.8055975400010423, |
| "train_speed(iter/s)": 0.027802 |
| }, |
| { |
| "epoch": 0.49518391621953584, |
| "grad_norm": 1.0159554481506348, |
| "learning_rate": 4.821385096224268e-06, |
| "loss": 0.8641040802001954, |
| "memory(GiB)": 76.04, |
| "step": 1915, |
| "token_acc": 0.7566073149698169, |
| "train_speed(iter/s)": 0.027801 |
| }, |
| { |
| "epoch": 0.4964768246169759, |
| "grad_norm": 1.0026555061340332, |
| "learning_rate": 4.820059972350691e-06, |
| "loss": 0.8560010910034179, |
| "memory(GiB)": 76.04, |
| "step": 1920, |
| "token_acc": 0.7960721112402084, |
| "train_speed(iter/s)": 0.027801 |
| }, |
| { |
| "epoch": 0.4977697330144159, |
| "grad_norm": 0.8519271016120911, |
| "learning_rate": 4.81873013462772e-06, |
| "loss": 0.8016116142272949, |
| "memory(GiB)": 76.04, |
| "step": 1925, |
| "token_acc": 0.7795004600129485, |
| "train_speed(iter/s)": 0.027796 |
| }, |
| { |
| "epoch": 0.49906264141185597, |
| "grad_norm": 0.9784479141235352, |
| "learning_rate": 4.8173955857572926e-06, |
| "loss": 0.8383674621582031, |
| "memory(GiB)": 76.04, |
| "step": 1930, |
| "token_acc": 0.7987399059366403, |
| "train_speed(iter/s)": 0.027796 |
| }, |
| { |
| "epoch": 0.500355549809296, |
| "grad_norm": 0.958125650882721, |
| "learning_rate": 4.816056328450916e-06, |
| "loss": 0.8211706161499024, |
| "memory(GiB)": 76.04, |
| "step": 1935, |
| "token_acc": 0.8177427201334353, |
| "train_speed(iter/s)": 0.02779 |
| }, |
| { |
| "epoch": 0.5016484582067361, |
| "grad_norm": 1.0574296712875366, |
| "learning_rate": 4.814712365429665e-06, |
| "loss": 0.8358111381530762, |
| "memory(GiB)": 76.04, |
| "step": 1940, |
| "token_acc": 0.773138165533256, |
| "train_speed(iter/s)": 0.02779 |
| }, |
| { |
| "epoch": 0.5029413666041761, |
| "grad_norm": 1.152383804321289, |
| "learning_rate": 4.813363699424176e-06, |
| "loss": 0.8466585159301758, |
| "memory(GiB)": 76.04, |
| "step": 1945, |
| "token_acc": 0.7601995890813033, |
| "train_speed(iter/s)": 0.027789 |
| }, |
| { |
| "epoch": 0.5042342750016161, |
| "grad_norm": 0.9282750487327576, |
| "learning_rate": 4.812010333174642e-06, |
| "loss": 0.821980094909668, |
| "memory(GiB)": 76.04, |
| "step": 1950, |
| "token_acc": 0.7666437886067261, |
| "train_speed(iter/s)": 0.027792 |
| }, |
| { |
| "epoch": 0.5055271833990562, |
| "grad_norm": 1.0676088333129883, |
| "learning_rate": 4.8106522694308026e-06, |
| "loss": 0.8337220191955567, |
| "memory(GiB)": 76.04, |
| "step": 1955, |
| "token_acc": 0.7935955447267664, |
| "train_speed(iter/s)": 0.027792 |
| }, |
| { |
| "epoch": 0.5068200917964962, |
| "grad_norm": 1.028906226158142, |
| "learning_rate": 4.809289510951943e-06, |
| "loss": 0.8194513320922852, |
| "memory(GiB)": 76.04, |
| "step": 1960, |
| "token_acc": 0.7874173098125689, |
| "train_speed(iter/s)": 0.027794 |
| }, |
| { |
| "epoch": 0.5081130001939362, |
| "grad_norm": 1.000504732131958, |
| "learning_rate": 4.807922060506889e-06, |
| "loss": 0.8190900802612304, |
| "memory(GiB)": 76.04, |
| "step": 1965, |
| "token_acc": 0.7874103263615237, |
| "train_speed(iter/s)": 0.027795 |
| }, |
| { |
| "epoch": 0.5094059085913764, |
| "grad_norm": 0.9075422883033752, |
| "learning_rate": 4.806549920873996e-06, |
| "loss": 0.797203254699707, |
| "memory(GiB)": 76.04, |
| "step": 1970, |
| "token_acc": 0.7732008028290165, |
| "train_speed(iter/s)": 0.027794 |
| }, |
| { |
| "epoch": 0.5106988169888164, |
| "grad_norm": 1.4181991815567017, |
| "learning_rate": 4.8051730948411505e-06, |
| "loss": 0.795828914642334, |
| "memory(GiB)": 76.04, |
| "step": 1975, |
| "token_acc": 0.8003244957409934, |
| "train_speed(iter/s)": 0.027794 |
| }, |
| { |
| "epoch": 0.5119917253862564, |
| "grad_norm": 1.0753087997436523, |
| "learning_rate": 4.803791585205757e-06, |
| "loss": 0.8330776214599609, |
| "memory(GiB)": 76.04, |
| "step": 1980, |
| "token_acc": 0.7414700390426073, |
| "train_speed(iter/s)": 0.027795 |
| }, |
| { |
| "epoch": 0.5132846337836964, |
| "grad_norm": 1.0679435729980469, |
| "learning_rate": 4.802405394774739e-06, |
| "loss": 0.8332581520080566, |
| "memory(GiB)": 76.04, |
| "step": 1985, |
| "token_acc": 0.7553387146214366, |
| "train_speed(iter/s)": 0.027794 |
| }, |
| { |
| "epoch": 0.5145775421811365, |
| "grad_norm": 0.8826218843460083, |
| "learning_rate": 4.801014526364531e-06, |
| "loss": 0.7712962627410889, |
| "memory(GiB)": 76.04, |
| "step": 1990, |
| "token_acc": 0.7524300269352383, |
| "train_speed(iter/s)": 0.027795 |
| }, |
| { |
| "epoch": 0.5158704505785765, |
| "grad_norm": 3.6322293281555176, |
| "learning_rate": 4.799618982801066e-06, |
| "loss": 0.8304604530334473, |
| "memory(GiB)": 76.04, |
| "step": 1995, |
| "token_acc": 0.8159029172079839, |
| "train_speed(iter/s)": 0.027797 |
| }, |
| { |
| "epoch": 0.5171633589760165, |
| "grad_norm": 1.0873634815216064, |
| "learning_rate": 4.798218766919784e-06, |
| "loss": 0.8011078834533691, |
| "memory(GiB)": 76.04, |
| "step": 2000, |
| "token_acc": 0.7605788670946689, |
| "train_speed(iter/s)": 0.027796 |
| }, |
| { |
| "epoch": 0.5184562673734566, |
| "grad_norm": 0.9646498560905457, |
| "learning_rate": 4.796813881565614e-06, |
| "loss": 0.7656961441040039, |
| "memory(GiB)": 76.04, |
| "step": 2005, |
| "token_acc": 0.8116391078933645, |
| "train_speed(iter/s)": 0.027724 |
| }, |
| { |
| "epoch": 0.5197491757708966, |
| "grad_norm": 0.9246786832809448, |
| "learning_rate": 4.795404329592971e-06, |
| "loss": 0.7999061107635498, |
| "memory(GiB)": 76.04, |
| "step": 2010, |
| "token_acc": 0.8214931011826544, |
| "train_speed(iter/s)": 0.027723 |
| }, |
| { |
| "epoch": 0.5210420841683366, |
| "grad_norm": 0.9651414155960083, |
| "learning_rate": 4.793990113865754e-06, |
| "loss": 0.8470598220825195, |
| "memory(GiB)": 76.04, |
| "step": 2015, |
| "token_acc": 0.7925045299862995, |
| "train_speed(iter/s)": 0.027723 |
| }, |
| { |
| "epoch": 0.5223349925657768, |
| "grad_norm": 0.9942532777786255, |
| "learning_rate": 4.792571237257338e-06, |
| "loss": 0.8307376861572265, |
| "memory(GiB)": 76.04, |
| "step": 2020, |
| "token_acc": 0.760911584985659, |
| "train_speed(iter/s)": 0.027724 |
| }, |
| { |
| "epoch": 0.5236279009632168, |
| "grad_norm": 0.9892558455467224, |
| "learning_rate": 4.7911477026505656e-06, |
| "loss": 0.8515020370483398, |
| "memory(GiB)": 76.04, |
| "step": 2025, |
| "token_acc": 0.7540479906359735, |
| "train_speed(iter/s)": 0.027722 |
| }, |
| { |
| "epoch": 0.5249208093606568, |
| "grad_norm": 1.0707569122314453, |
| "learning_rate": 4.789719512937745e-06, |
| "loss": 0.8141921997070313, |
| "memory(GiB)": 76.04, |
| "step": 2030, |
| "token_acc": 0.7763675366464069, |
| "train_speed(iter/s)": 0.027721 |
| }, |
| { |
| "epoch": 0.5262137177580969, |
| "grad_norm": 0.9917581677436829, |
| "learning_rate": 4.788286671020642e-06, |
| "loss": 0.8206811904907226, |
| "memory(GiB)": 76.04, |
| "step": 2035, |
| "token_acc": 0.7850752688172044, |
| "train_speed(iter/s)": 0.027723 |
| }, |
| { |
| "epoch": 0.5275066261555369, |
| "grad_norm": 0.9246799945831299, |
| "learning_rate": 4.786849179810475e-06, |
| "loss": 0.7965336799621582, |
| "memory(GiB)": 76.04, |
| "step": 2040, |
| "token_acc": 0.7653000594177065, |
| "train_speed(iter/s)": 0.027725 |
| }, |
| { |
| "epoch": 0.5287995345529769, |
| "grad_norm": 1.071942925453186, |
| "learning_rate": 4.78540704222791e-06, |
| "loss": 0.8213727951049805, |
| "memory(GiB)": 76.04, |
| "step": 2045, |
| "token_acc": 0.8214273371349576, |
| "train_speed(iter/s)": 0.027722 |
| }, |
| { |
| "epoch": 0.5300924429504169, |
| "grad_norm": 1.2201160192489624, |
| "learning_rate": 4.783960261203051e-06, |
| "loss": 0.8097395896911621, |
| "memory(GiB)": 76.04, |
| "step": 2050, |
| "token_acc": 0.7943978387601308, |
| "train_speed(iter/s)": 0.02772 |
| }, |
| { |
| "epoch": 0.531385351347857, |
| "grad_norm": 0.9751284122467041, |
| "learning_rate": 4.782508839675436e-06, |
| "loss": 0.8254419326782226, |
| "memory(GiB)": 76.04, |
| "step": 2055, |
| "token_acc": 0.7764489832482308, |
| "train_speed(iter/s)": 0.027722 |
| }, |
| { |
| "epoch": 0.532678259745297, |
| "grad_norm": 1.0070680379867554, |
| "learning_rate": 4.7810527805940344e-06, |
| "loss": 0.8492563247680665, |
| "memory(GiB)": 76.04, |
| "step": 2060, |
| "token_acc": 0.7705357535270074, |
| "train_speed(iter/s)": 0.027723 |
| }, |
| { |
| "epoch": 0.533971168142737, |
| "grad_norm": 0.8822097182273865, |
| "learning_rate": 4.779592086917238e-06, |
| "loss": 0.7865631580352783, |
| "memory(GiB)": 76.04, |
| "step": 2065, |
| "token_acc": 0.799327011318446, |
| "train_speed(iter/s)": 0.027724 |
| }, |
| { |
| "epoch": 0.5352640765401772, |
| "grad_norm": 1.0193886756896973, |
| "learning_rate": 4.77812676161285e-06, |
| "loss": 0.8170513153076172, |
| "memory(GiB)": 76.04, |
| "step": 2070, |
| "token_acc": 0.7710854546297584, |
| "train_speed(iter/s)": 0.027726 |
| }, |
| { |
| "epoch": 0.5365569849376172, |
| "grad_norm": 0.9742515683174133, |
| "learning_rate": 4.776656807658091e-06, |
| "loss": 0.844205379486084, |
| "memory(GiB)": 76.04, |
| "step": 2075, |
| "token_acc": 0.7571799189841154, |
| "train_speed(iter/s)": 0.027726 |
| }, |
| { |
| "epoch": 0.5378498933350572, |
| "grad_norm": 1.2338061332702637, |
| "learning_rate": 4.775182228039582e-06, |
| "loss": 0.8240803718566895, |
| "memory(GiB)": 76.04, |
| "step": 2080, |
| "token_acc": 0.7739548334963637, |
| "train_speed(iter/s)": 0.027728 |
| }, |
| { |
| "epoch": 0.5391428017324973, |
| "grad_norm": 1.135621428489685, |
| "learning_rate": 4.773703025753343e-06, |
| "loss": 0.7704273700714112, |
| "memory(GiB)": 76.04, |
| "step": 2085, |
| "token_acc": 0.8158826332629859, |
| "train_speed(iter/s)": 0.02773 |
| }, |
| { |
| "epoch": 0.5404357101299373, |
| "grad_norm": 0.9862043261528015, |
| "learning_rate": 4.772219203804785e-06, |
| "loss": 0.8293350219726563, |
| "memory(GiB)": 76.04, |
| "step": 2090, |
| "token_acc": 0.7778981581798483, |
| "train_speed(iter/s)": 0.027731 |
| }, |
| { |
| "epoch": 0.5417286185273773, |
| "grad_norm": 1.0542078018188477, |
| "learning_rate": 4.770730765208708e-06, |
| "loss": 0.8214458465576172, |
| "memory(GiB)": 76.04, |
| "step": 2095, |
| "token_acc": 0.8010532239909953, |
| "train_speed(iter/s)": 0.027732 |
| }, |
| { |
| "epoch": 0.5430215269248174, |
| "grad_norm": 1.3685965538024902, |
| "learning_rate": 4.76923771298929e-06, |
| "loss": 0.7963518142700196, |
| "memory(GiB)": 76.04, |
| "step": 2100, |
| "token_acc": 0.7876639186707104, |
| "train_speed(iter/s)": 0.027734 |
| }, |
| { |
| "epoch": 0.5443144353222574, |
| "grad_norm": 0.9173294901847839, |
| "learning_rate": 4.767740050180083e-06, |
| "loss": 0.797146987915039, |
| "memory(GiB)": 76.04, |
| "step": 2105, |
| "token_acc": 0.8026390843061946, |
| "train_speed(iter/s)": 0.027727 |
| }, |
| { |
| "epoch": 0.5456073437196974, |
| "grad_norm": 1.0344001054763794, |
| "learning_rate": 4.766237779824008e-06, |
| "loss": 0.8145599365234375, |
| "memory(GiB)": 76.04, |
| "step": 2110, |
| "token_acc": 0.8000528162372204, |
| "train_speed(iter/s)": 0.027726 |
| }, |
| { |
| "epoch": 0.5469002521171376, |
| "grad_norm": 0.9387233257293701, |
| "learning_rate": 4.764730904973345e-06, |
| "loss": 0.8474384307861328, |
| "memory(GiB)": 76.04, |
| "step": 2115, |
| "token_acc": 0.7702894841608372, |
| "train_speed(iter/s)": 0.027726 |
| }, |
| { |
| "epoch": 0.5481931605145776, |
| "grad_norm": 0.8692566156387329, |
| "learning_rate": 4.7632194286897315e-06, |
| "loss": 0.8177039146423339, |
| "memory(GiB)": 76.04, |
| "step": 2120, |
| "token_acc": 0.8068763457940626, |
| "train_speed(iter/s)": 0.027729 |
| }, |
| { |
| "epoch": 0.5494860689120176, |
| "grad_norm": 1.0659557580947876, |
| "learning_rate": 4.761703354044155e-06, |
| "loss": 0.7883958339691162, |
| "memory(GiB)": 76.04, |
| "step": 2125, |
| "token_acc": 0.800734618916437, |
| "train_speed(iter/s)": 0.027731 |
| }, |
| { |
| "epoch": 0.5507789773094576, |
| "grad_norm": 0.9900258779525757, |
| "learning_rate": 4.760182684116942e-06, |
| "loss": 0.8056777954101563, |
| "memory(GiB)": 76.04, |
| "step": 2130, |
| "token_acc": 0.7733108386141059, |
| "train_speed(iter/s)": 0.027731 |
| }, |
| { |
| "epoch": 0.5520718857068977, |
| "grad_norm": 1.03944993019104, |
| "learning_rate": 4.7586574219977585e-06, |
| "loss": 0.8212559700012207, |
| "memory(GiB)": 76.04, |
| "step": 2135, |
| "token_acc": 0.7548755884330868, |
| "train_speed(iter/s)": 0.02773 |
| }, |
| { |
| "epoch": 0.5533647941043377, |
| "grad_norm": 0.9362234473228455, |
| "learning_rate": 4.7571275707856e-06, |
| "loss": 0.798857307434082, |
| "memory(GiB)": 76.04, |
| "step": 2140, |
| "token_acc": 0.8130052348563085, |
| "train_speed(iter/s)": 0.027732 |
| }, |
| { |
| "epoch": 0.5546577025017777, |
| "grad_norm": 1.0358259677886963, |
| "learning_rate": 4.755593133588788e-06, |
| "loss": 0.8120311737060547, |
| "memory(GiB)": 76.04, |
| "step": 2145, |
| "token_acc": 0.8000494239026349, |
| "train_speed(iter/s)": 0.027731 |
| }, |
| { |
| "epoch": 0.5559506108992178, |
| "grad_norm": 1.1722190380096436, |
| "learning_rate": 4.754054113524959e-06, |
| "loss": 0.8086760520935059, |
| "memory(GiB)": 76.04, |
| "step": 2150, |
| "token_acc": 0.8190579981609508, |
| "train_speed(iter/s)": 0.027731 |
| }, |
| { |
| "epoch": 0.5572435192966578, |
| "grad_norm": 0.9975719451904297, |
| "learning_rate": 4.752510513721061e-06, |
| "loss": 0.8197290420532226, |
| "memory(GiB)": 76.04, |
| "step": 2155, |
| "token_acc": 0.7630993323892373, |
| "train_speed(iter/s)": 0.027732 |
| }, |
| { |
| "epoch": 0.5585364276940978, |
| "grad_norm": 1.0064895153045654, |
| "learning_rate": 4.750962337313347e-06, |
| "loss": 0.8426996231079101, |
| "memory(GiB)": 76.04, |
| "step": 2160, |
| "token_acc": 0.7553154809791978, |
| "train_speed(iter/s)": 0.02773 |
| }, |
| { |
| "epoch": 0.559829336091538, |
| "grad_norm": 1.056726336479187, |
| "learning_rate": 4.749409587447372e-06, |
| "loss": 0.8352632522583008, |
| "memory(GiB)": 76.04, |
| "step": 2165, |
| "token_acc": 0.8019056825243389, |
| "train_speed(iter/s)": 0.027734 |
| }, |
| { |
| "epoch": 0.561122244488978, |
| "grad_norm": 0.9361665844917297, |
| "learning_rate": 4.747852267277981e-06, |
| "loss": 0.765074634552002, |
| "memory(GiB)": 76.04, |
| "step": 2170, |
| "token_acc": 0.7859190721313611, |
| "train_speed(iter/s)": 0.027733 |
| }, |
| { |
| "epoch": 0.562415152886418, |
| "grad_norm": 1.1270101070404053, |
| "learning_rate": 4.746290379969301e-06, |
| "loss": 0.8160411834716796, |
| "memory(GiB)": 76.04, |
| "step": 2175, |
| "token_acc": 0.7946054543900145, |
| "train_speed(iter/s)": 0.027735 |
| }, |
| { |
| "epoch": 0.5637080612838581, |
| "grad_norm": 0.957750678062439, |
| "learning_rate": 4.744723928694745e-06, |
| "loss": 0.8085262298583984, |
| "memory(GiB)": 76.04, |
| "step": 2180, |
| "token_acc": 0.7642607683352736, |
| "train_speed(iter/s)": 0.027735 |
| }, |
| { |
| "epoch": 0.5650009696812981, |
| "grad_norm": 1.0245423316955566, |
| "learning_rate": 4.743152916636995e-06, |
| "loss": 0.793109130859375, |
| "memory(GiB)": 76.04, |
| "step": 2185, |
| "token_acc": 0.7618901098901099, |
| "train_speed(iter/s)": 0.027736 |
| }, |
| { |
| "epoch": 0.5662938780787381, |
| "grad_norm": 1.0268268585205078, |
| "learning_rate": 4.7415773469880015e-06, |
| "loss": 0.8279844284057617, |
| "memory(GiB)": 76.04, |
| "step": 2190, |
| "token_acc": 0.7590428234859334, |
| "train_speed(iter/s)": 0.027737 |
| }, |
| { |
| "epoch": 0.5675867864761782, |
| "grad_norm": 0.9654160737991333, |
| "learning_rate": 4.739997222948972e-06, |
| "loss": 0.8115758895874023, |
| "memory(GiB)": 76.04, |
| "step": 2195, |
| "token_acc": 0.824989417785816, |
| "train_speed(iter/s)": 0.027738 |
| }, |
| { |
| "epoch": 0.5688796948736182, |
| "grad_norm": 0.9180038571357727, |
| "learning_rate": 4.738412547730371e-06, |
| "loss": 0.7820042133331299, |
| "memory(GiB)": 76.04, |
| "step": 2200, |
| "token_acc": 0.7811721577290032, |
| "train_speed(iter/s)": 0.027738 |
| }, |
| { |
| "epoch": 0.5701726032710582, |
| "grad_norm": 0.9447706341743469, |
| "learning_rate": 4.736823324551909e-06, |
| "loss": 0.8502116203308105, |
| "memory(GiB)": 76.04, |
| "step": 2205, |
| "token_acc": 0.7345110180295028, |
| "train_speed(iter/s)": 0.027737 |
| }, |
| { |
| "epoch": 0.5714655116684982, |
| "grad_norm": 1.0418199300765991, |
| "learning_rate": 4.7352295566425355e-06, |
| "loss": 0.7954240322113038, |
| "memory(GiB)": 76.04, |
| "step": 2210, |
| "token_acc": 0.7976113712187053, |
| "train_speed(iter/s)": 0.027736 |
| }, |
| { |
| "epoch": 0.5727584200659384, |
| "grad_norm": 2.2441470623016357, |
| "learning_rate": 4.733631247240435e-06, |
| "loss": 0.8036426544189453, |
| "memory(GiB)": 76.04, |
| "step": 2215, |
| "token_acc": 0.7925195951601857, |
| "train_speed(iter/s)": 0.027736 |
| }, |
| { |
| "epoch": 0.5740513284633784, |
| "grad_norm": 0.8851604461669922, |
| "learning_rate": 4.732028399593018e-06, |
| "loss": 0.8041337013244629, |
| "memory(GiB)": 76.04, |
| "step": 2220, |
| "token_acc": 0.7804418779814211, |
| "train_speed(iter/s)": 0.027734 |
| }, |
| { |
| "epoch": 0.5753442368608184, |
| "grad_norm": 0.897997260093689, |
| "learning_rate": 4.730421016956919e-06, |
| "loss": 0.7801138877868652, |
| "memory(GiB)": 76.04, |
| "step": 2225, |
| "token_acc": 0.8051513959889894, |
| "train_speed(iter/s)": 0.027732 |
| }, |
| { |
| "epoch": 0.5766371452582585, |
| "grad_norm": 3.450253486633301, |
| "learning_rate": 4.728809102597984e-06, |
| "loss": 0.795560646057129, |
| "memory(GiB)": 76.04, |
| "step": 2230, |
| "token_acc": 0.777429320351994, |
| "train_speed(iter/s)": 0.027732 |
| }, |
| { |
| "epoch": 0.5779300536556985, |
| "grad_norm": 1.5096064805984497, |
| "learning_rate": 4.727192659791265e-06, |
| "loss": 0.800804615020752, |
| "memory(GiB)": 76.04, |
| "step": 2235, |
| "token_acc": 0.7972484309406044, |
| "train_speed(iter/s)": 0.027733 |
| }, |
| { |
| "epoch": 0.5792229620531385, |
| "grad_norm": 1.0118114948272705, |
| "learning_rate": 4.72557169182102e-06, |
| "loss": 0.7758650302886962, |
| "memory(GiB)": 76.04, |
| "step": 2240, |
| "token_acc": 0.7874528625299966, |
| "train_speed(iter/s)": 0.027734 |
| }, |
| { |
| "epoch": 0.5805158704505786, |
| "grad_norm": 1.16028892993927, |
| "learning_rate": 4.723946201980695e-06, |
| "loss": 0.8420794486999512, |
| "memory(GiB)": 76.04, |
| "step": 2245, |
| "token_acc": 0.7777456885881674, |
| "train_speed(iter/s)": 0.027735 |
| }, |
| { |
| "epoch": 0.5818087788480186, |
| "grad_norm": 1.1023540496826172, |
| "learning_rate": 4.7223161935729274e-06, |
| "loss": 0.801850700378418, |
| "memory(GiB)": 76.04, |
| "step": 2250, |
| "token_acc": 0.7952162077736624, |
| "train_speed(iter/s)": 0.027737 |
| }, |
| { |
| "epoch": 0.5831016872454586, |
| "grad_norm": 0.8935644626617432, |
| "learning_rate": 4.7206816699095345e-06, |
| "loss": 0.7811629295349121, |
| "memory(GiB)": 76.04, |
| "step": 2255, |
| "token_acc": 0.789712556732224, |
| "train_speed(iter/s)": 0.027737 |
| }, |
| { |
| "epoch": 0.5843945956428988, |
| "grad_norm": 1.0098074674606323, |
| "learning_rate": 4.719042634311507e-06, |
| "loss": 0.8304760932922364, |
| "memory(GiB)": 76.04, |
| "step": 2260, |
| "token_acc": 0.7755578712853498, |
| "train_speed(iter/s)": 0.027737 |
| }, |
| { |
| "epoch": 0.5856875040403388, |
| "grad_norm": 1.1288141012191772, |
| "learning_rate": 4.717399090109003e-06, |
| "loss": 0.8142587661743164, |
| "memory(GiB)": 76.04, |
| "step": 2265, |
| "token_acc": 0.7781233799896319, |
| "train_speed(iter/s)": 0.027739 |
| }, |
| { |
| "epoch": 0.5869804124377788, |
| "grad_norm": 1.0086054801940918, |
| "learning_rate": 4.715751040641341e-06, |
| "loss": 0.8228842735290527, |
| "memory(GiB)": 76.04, |
| "step": 2270, |
| "token_acc": 0.7793262574988463, |
| "train_speed(iter/s)": 0.027739 |
| }, |
| { |
| "epoch": 0.5882733208352188, |
| "grad_norm": 5.436489105224609, |
| "learning_rate": 4.714098489256994e-06, |
| "loss": 0.7786747932434082, |
| "memory(GiB)": 76.04, |
| "step": 2275, |
| "token_acc": 0.8480059038774945, |
| "train_speed(iter/s)": 0.027741 |
| }, |
| { |
| "epoch": 0.5895662292326589, |
| "grad_norm": 0.8497810363769531, |
| "learning_rate": 4.712441439313583e-06, |
| "loss": 0.7513184070587158, |
| "memory(GiB)": 76.04, |
| "step": 2280, |
| "token_acc": 0.804937625403472, |
| "train_speed(iter/s)": 0.027739 |
| }, |
| { |
| "epoch": 0.5908591376300989, |
| "grad_norm": 1.5754011869430542, |
| "learning_rate": 4.710779894177864e-06, |
| "loss": 0.8058387756347656, |
| "memory(GiB)": 76.04, |
| "step": 2285, |
| "token_acc": 0.7810834813499112, |
| "train_speed(iter/s)": 0.027738 |
| }, |
| { |
| "epoch": 0.5921520460275389, |
| "grad_norm": 1.010524868965149, |
| "learning_rate": 4.709113857225732e-06, |
| "loss": 0.8032638549804687, |
| "memory(GiB)": 76.04, |
| "step": 2290, |
| "token_acc": 0.8110142754505982, |
| "train_speed(iter/s)": 0.027739 |
| }, |
| { |
| "epoch": 0.593444954424979, |
| "grad_norm": 0.877875804901123, |
| "learning_rate": 4.707443331842206e-06, |
| "loss": 0.809267234802246, |
| "memory(GiB)": 76.04, |
| "step": 2295, |
| "token_acc": 0.7685890635548269, |
| "train_speed(iter/s)": 0.027741 |
| }, |
| { |
| "epoch": 0.594737862822419, |
| "grad_norm": 1.047855257987976, |
| "learning_rate": 4.705768321421425e-06, |
| "loss": 0.7906962394714355, |
| "memory(GiB)": 76.04, |
| "step": 2300, |
| "token_acc": 0.7821157343031341, |
| "train_speed(iter/s)": 0.027738 |
| }, |
| { |
| "epoch": 0.596030771219859, |
| "grad_norm": 1.188430905342102, |
| "learning_rate": 4.704088829366638e-06, |
| "loss": 0.8145524978637695, |
| "memory(GiB)": 76.04, |
| "step": 2305, |
| "token_acc": 0.7796888204006561, |
| "train_speed(iter/s)": 0.027737 |
| }, |
| { |
| "epoch": 0.5973236796172992, |
| "grad_norm": 1.0411370992660522, |
| "learning_rate": 4.702404859090204e-06, |
| "loss": 0.7802029609680176, |
| "memory(GiB)": 76.04, |
| "step": 2310, |
| "token_acc": 0.7938298768784233, |
| "train_speed(iter/s)": 0.027739 |
| }, |
| { |
| "epoch": 0.5986165880147392, |
| "grad_norm": 0.9956724643707275, |
| "learning_rate": 4.700716414013577e-06, |
| "loss": 0.7613677978515625, |
| "memory(GiB)": 76.04, |
| "step": 2315, |
| "token_acc": 0.8293824550807791, |
| "train_speed(iter/s)": 0.027739 |
| }, |
| { |
| "epoch": 0.5999094964121792, |
| "grad_norm": 1.021669626235962, |
| "learning_rate": 4.6990234975673065e-06, |
| "loss": 0.7912391662597656, |
| "memory(GiB)": 76.04, |
| "step": 2320, |
| "token_acc": 0.7770263788968825, |
| "train_speed(iter/s)": 0.027739 |
| }, |
| { |
| "epoch": 0.6012024048096193, |
| "grad_norm": 2.0476624965667725, |
| "learning_rate": 4.697326113191024e-06, |
| "loss": 0.8161981582641602, |
| "memory(GiB)": 76.04, |
| "step": 2325, |
| "token_acc": 0.7861008259755056, |
| "train_speed(iter/s)": 0.027737 |
| }, |
| { |
| "epoch": 0.6024953132070593, |
| "grad_norm": 2.5752296447753906, |
| "learning_rate": 4.695624264333438e-06, |
| "loss": 0.7860607624053955, |
| "memory(GiB)": 76.04, |
| "step": 2330, |
| "token_acc": 0.7906607543657962, |
| "train_speed(iter/s)": 0.027737 |
| }, |
| { |
| "epoch": 0.6037882216044993, |
| "grad_norm": 1.1529428958892822, |
| "learning_rate": 4.6939179544523315e-06, |
| "loss": 0.8076473236083984, |
| "memory(GiB)": 76.04, |
| "step": 2335, |
| "token_acc": 0.7956367704642924, |
| "train_speed(iter/s)": 0.027737 |
| }, |
| { |
| "epoch": 0.6050811300019394, |
| "grad_norm": 0.9944195747375488, |
| "learning_rate": 4.692207187014548e-06, |
| "loss": 0.8114787101745605, |
| "memory(GiB)": 76.04, |
| "step": 2340, |
| "token_acc": 0.8053776627151746, |
| "train_speed(iter/s)": 0.027736 |
| }, |
| { |
| "epoch": 0.6063740383993794, |
| "grad_norm": 0.9465590715408325, |
| "learning_rate": 4.690491965495989e-06, |
| "loss": 0.7890607357025147, |
| "memory(GiB)": 76.04, |
| "step": 2345, |
| "token_acc": 0.7868282075178626, |
| "train_speed(iter/s)": 0.027736 |
| }, |
| { |
| "epoch": 0.6076669467968194, |
| "grad_norm": 1.0112555027008057, |
| "learning_rate": 4.688772293381608e-06, |
| "loss": 0.7973843574523926, |
| "memory(GiB)": 76.04, |
| "step": 2350, |
| "token_acc": 0.7798850081524071, |
| "train_speed(iter/s)": 0.027737 |
| }, |
| { |
| "epoch": 0.6089598551942594, |
| "grad_norm": 1.1251353025436401, |
| "learning_rate": 4.6870481741653965e-06, |
| "loss": 0.8469139099121094, |
| "memory(GiB)": 76.04, |
| "step": 2355, |
| "token_acc": 0.7770078088638361, |
| "train_speed(iter/s)": 0.027739 |
| }, |
| { |
| "epoch": 0.6102527635916996, |
| "grad_norm": 0.9500820636749268, |
| "learning_rate": 4.685319611350384e-06, |
| "loss": 0.8143545150756836, |
| "memory(GiB)": 76.04, |
| "step": 2360, |
| "token_acc": 0.8021919497701536, |
| "train_speed(iter/s)": 0.027741 |
| }, |
| { |
| "epoch": 0.6115456719891396, |
| "grad_norm": 1.0462709665298462, |
| "learning_rate": 4.683586608448629e-06, |
| "loss": 0.7490966320037842, |
| "memory(GiB)": 76.04, |
| "step": 2365, |
| "token_acc": 0.8057272352698805, |
| "train_speed(iter/s)": 0.027738 |
| }, |
| { |
| "epoch": 0.6128385803865796, |
| "grad_norm": 0.982092022895813, |
| "learning_rate": 4.681849168981211e-06, |
| "loss": 0.8468921661376954, |
| "memory(GiB)": 76.04, |
| "step": 2370, |
| "token_acc": 0.7924534664148908, |
| "train_speed(iter/s)": 0.02774 |
| }, |
| { |
| "epoch": 0.6141314887840197, |
| "grad_norm": 1.270372748374939, |
| "learning_rate": 4.680107296478223e-06, |
| "loss": 0.799936580657959, |
| "memory(GiB)": 76.04, |
| "step": 2375, |
| "token_acc": 0.8000295322824763, |
| "train_speed(iter/s)": 0.027741 |
| }, |
| { |
| "epoch": 0.6154243971814597, |
| "grad_norm": 1.3359791040420532, |
| "learning_rate": 4.678360994478763e-06, |
| "loss": 0.8011417388916016, |
| "memory(GiB)": 76.04, |
| "step": 2380, |
| "token_acc": 0.7963584606708382, |
| "train_speed(iter/s)": 0.027739 |
| }, |
| { |
| "epoch": 0.6167173055788997, |
| "grad_norm": 1.0611239671707153, |
| "learning_rate": 4.676610266530935e-06, |
| "loss": 0.800925350189209, |
| "memory(GiB)": 76.04, |
| "step": 2385, |
| "token_acc": 0.7784312845148835, |
| "train_speed(iter/s)": 0.027735 |
| }, |
| { |
| "epoch": 0.6180102139763398, |
| "grad_norm": 0.9599133729934692, |
| "learning_rate": 4.6748551161918285e-06, |
| "loss": 0.7691280364990234, |
| "memory(GiB)": 76.04, |
| "step": 2390, |
| "token_acc": 0.8164638974875819, |
| "train_speed(iter/s)": 0.027734 |
| }, |
| { |
| "epoch": 0.6193031223737798, |
| "grad_norm": 1.0434238910675049, |
| "learning_rate": 4.673095547027522e-06, |
| "loss": 0.7575326442718506, |
| "memory(GiB)": 76.04, |
| "step": 2395, |
| "token_acc": 0.8145789878142496, |
| "train_speed(iter/s)": 0.027734 |
| }, |
| { |
| "epoch": 0.6205960307712198, |
| "grad_norm": 1.002805233001709, |
| "learning_rate": 4.671331562613072e-06, |
| "loss": 0.7855173110961914, |
| "memory(GiB)": 76.04, |
| "step": 2400, |
| "token_acc": 0.8110472959950661, |
| "train_speed(iter/s)": 0.027734 |
| }, |
| { |
| "epoch": 0.62188893916866, |
| "grad_norm": 0.8859378099441528, |
| "learning_rate": 4.669563166532504e-06, |
| "loss": 0.807244873046875, |
| "memory(GiB)": 76.04, |
| "step": 2405, |
| "token_acc": 0.7864419894252676, |
| "train_speed(iter/s)": 0.027733 |
| }, |
| { |
| "epoch": 0.6231818475661, |
| "grad_norm": 2.113131046295166, |
| "learning_rate": 4.667790362378809e-06, |
| "loss": 0.794129753112793, |
| "memory(GiB)": 76.04, |
| "step": 2410, |
| "token_acc": 0.7970005356186395, |
| "train_speed(iter/s)": 0.027733 |
| }, |
| { |
| "epoch": 0.62447475596354, |
| "grad_norm": 1.0956636667251587, |
| "learning_rate": 4.6660131537539335e-06, |
| "loss": 0.8120314598083496, |
| "memory(GiB)": 76.04, |
| "step": 2415, |
| "token_acc": 0.7850858214337227, |
| "train_speed(iter/s)": 0.027734 |
| }, |
| { |
| "epoch": 0.62576766436098, |
| "grad_norm": 2.5566296577453613, |
| "learning_rate": 4.664231544268774e-06, |
| "loss": 0.7688230037689209, |
| "memory(GiB)": 76.04, |
| "step": 2420, |
| "token_acc": 0.7974286336892569, |
| "train_speed(iter/s)": 0.027737 |
| }, |
| { |
| "epoch": 0.6270605727584201, |
| "grad_norm": 0.8976960182189941, |
| "learning_rate": 4.662445537543164e-06, |
| "loss": 0.8087752342224122, |
| "memory(GiB)": 76.04, |
| "step": 2425, |
| "token_acc": 0.7868685635201693, |
| "train_speed(iter/s)": 0.027735 |
| }, |
| { |
| "epoch": 0.6283534811558601, |
| "grad_norm": 1.0024232864379883, |
| "learning_rate": 4.660655137205878e-06, |
| "loss": 0.7957705020904541, |
| "memory(GiB)": 76.04, |
| "step": 2430, |
| "token_acc": 0.7706113070005151, |
| "train_speed(iter/s)": 0.027736 |
| }, |
| { |
| "epoch": 0.6296463895533001, |
| "grad_norm": 1.0616440773010254, |
| "learning_rate": 4.658860346894613e-06, |
| "loss": 0.7973846912384033, |
| "memory(GiB)": 76.04, |
| "step": 2435, |
| "token_acc": 0.8036959869553402, |
| "train_speed(iter/s)": 0.027737 |
| }, |
| { |
| "epoch": 0.6309392979507402, |
| "grad_norm": 1.0026406049728394, |
| "learning_rate": 4.6570611702559854e-06, |
| "loss": 0.8205162048339844, |
| "memory(GiB)": 76.04, |
| "step": 2440, |
| "token_acc": 0.7975911152823401, |
| "train_speed(iter/s)": 0.027737 |
| }, |
| { |
| "epoch": 0.6322322063481802, |
| "grad_norm": 0.9040783047676086, |
| "learning_rate": 4.655257610945526e-06, |
| "loss": 0.8040790557861328, |
| "memory(GiB)": 76.04, |
| "step": 2445, |
| "token_acc": 0.8114757319709177, |
| "train_speed(iter/s)": 0.027735 |
| }, |
| { |
| "epoch": 0.6335251147456202, |
| "grad_norm": 1.0662907361984253, |
| "learning_rate": 4.653449672627669e-06, |
| "loss": 0.7849656105041504, |
| "memory(GiB)": 76.04, |
| "step": 2450, |
| "token_acc": 0.8061563270726617, |
| "train_speed(iter/s)": 0.027735 |
| }, |
| { |
| "epoch": 0.6348180231430604, |
| "grad_norm": 1.0695264339447021, |
| "learning_rate": 4.6516373589757445e-06, |
| "loss": 0.7940691947937012, |
| "memory(GiB)": 76.04, |
| "step": 2455, |
| "token_acc": 0.7807667525773195, |
| "train_speed(iter/s)": 0.027737 |
| }, |
| { |
| "epoch": 0.6361109315405004, |
| "grad_norm": 1.1556239128112793, |
| "learning_rate": 4.649820673671976e-06, |
| "loss": 0.7685293197631836, |
| "memory(GiB)": 76.04, |
| "step": 2460, |
| "token_acc": 0.7840851495184997, |
| "train_speed(iter/s)": 0.027739 |
| }, |
| { |
| "epoch": 0.6374038399379404, |
| "grad_norm": 2.466895580291748, |
| "learning_rate": 4.647999620407463e-06, |
| "loss": 0.7619011878967286, |
| "memory(GiB)": 76.04, |
| "step": 2465, |
| "token_acc": 0.7804016362960208, |
| "train_speed(iter/s)": 0.02774 |
| }, |
| { |
| "epoch": 0.6386967483353805, |
| "grad_norm": 1.1291913986206055, |
| "learning_rate": 4.646174202882186e-06, |
| "loss": 0.8165172576904297, |
| "memory(GiB)": 76.04, |
| "step": 2470, |
| "token_acc": 0.7608570606844981, |
| "train_speed(iter/s)": 0.027742 |
| }, |
| { |
| "epoch": 0.6399896567328205, |
| "grad_norm": 1.1947365999221802, |
| "learning_rate": 4.64434442480499e-06, |
| "loss": 0.7749819755554199, |
| "memory(GiB)": 76.04, |
| "step": 2475, |
| "token_acc": 0.7708522212148685, |
| "train_speed(iter/s)": 0.027742 |
| }, |
| { |
| "epoch": 0.6412825651302605, |
| "grad_norm": 1.0024884939193726, |
| "learning_rate": 4.64251028989358e-06, |
| "loss": 0.766645097732544, |
| "memory(GiB)": 76.04, |
| "step": 2480, |
| "token_acc": 0.7914130613587761, |
| "train_speed(iter/s)": 0.027743 |
| }, |
| { |
| "epoch": 0.6425754735277006, |
| "grad_norm": 0.9784958362579346, |
| "learning_rate": 4.640671801874512e-06, |
| "loss": 0.8136966705322266, |
| "memory(GiB)": 76.04, |
| "step": 2485, |
| "token_acc": 0.7942760819377771, |
| "train_speed(iter/s)": 0.027746 |
| }, |
| { |
| "epoch": 0.6438683819251406, |
| "grad_norm": 0.8597215414047241, |
| "learning_rate": 4.638828964483188e-06, |
| "loss": 0.775879955291748, |
| "memory(GiB)": 76.04, |
| "step": 2490, |
| "token_acc": 0.7876452918897741, |
| "train_speed(iter/s)": 0.027745 |
| }, |
| { |
| "epoch": 0.6451612903225806, |
| "grad_norm": 1.1758781671524048, |
| "learning_rate": 4.636981781463848e-06, |
| "loss": 0.8091221809387207, |
| "memory(GiB)": 76.04, |
| "step": 2495, |
| "token_acc": 0.8069754035357417, |
| "train_speed(iter/s)": 0.027745 |
| }, |
| { |
| "epoch": 0.6464541987200206, |
| "grad_norm": 0.9592023491859436, |
| "learning_rate": 4.635130256569558e-06, |
| "loss": 0.7946199417114258, |
| "memory(GiB)": 76.04, |
| "step": 2500, |
| "token_acc": 0.7830649234049717, |
| "train_speed(iter/s)": 0.027746 |
| }, |
| { |
| "epoch": 0.6477471071174608, |
| "grad_norm": 1.495296835899353, |
| "learning_rate": 4.633274393562208e-06, |
| "loss": 0.7667324542999268, |
| "memory(GiB)": 76.04, |
| "step": 2505, |
| "token_acc": 0.8036371800628649, |
| "train_speed(iter/s)": 0.027748 |
| }, |
| { |
| "epoch": 0.6490400155149008, |
| "grad_norm": 1.0845485925674438, |
| "learning_rate": 4.631414196212502e-06, |
| "loss": 0.774350357055664, |
| "memory(GiB)": 76.04, |
| "step": 2510, |
| "token_acc": 0.7877581120943953, |
| "train_speed(iter/s)": 0.02775 |
| }, |
| { |
| "epoch": 0.6503329239123408, |
| "grad_norm": 0.9458225965499878, |
| "learning_rate": 4.629549668299949e-06, |
| "loss": 0.7802841186523437, |
| "memory(GiB)": 76.04, |
| "step": 2515, |
| "token_acc": 0.7762283711761699, |
| "train_speed(iter/s)": 0.027751 |
| }, |
| { |
| "epoch": 0.6516258323097809, |
| "grad_norm": 1.0014280080795288, |
| "learning_rate": 4.62768081361286e-06, |
| "loss": 0.7994625568389893, |
| "memory(GiB)": 76.04, |
| "step": 2520, |
| "token_acc": 0.8127975163849603, |
| "train_speed(iter/s)": 0.027749 |
| }, |
| { |
| "epoch": 0.6529187407072209, |
| "grad_norm": 1.5184024572372437, |
| "learning_rate": 4.6258076359483335e-06, |
| "loss": 0.7841564655303955, |
| "memory(GiB)": 76.04, |
| "step": 2525, |
| "token_acc": 0.8111151834205178, |
| "train_speed(iter/s)": 0.027751 |
| }, |
| { |
| "epoch": 0.6542116491046609, |
| "grad_norm": 1.1411337852478027, |
| "learning_rate": 4.623930139112252e-06, |
| "loss": 0.7719697952270508, |
| "memory(GiB)": 76.04, |
| "step": 2530, |
| "token_acc": 0.7725351785631357, |
| "train_speed(iter/s)": 0.027751 |
| }, |
| { |
| "epoch": 0.655504557502101, |
| "grad_norm": 1.3554903268814087, |
| "learning_rate": 4.622048326919277e-06, |
| "loss": 0.7868958950042725, |
| "memory(GiB)": 76.04, |
| "step": 2535, |
| "token_acc": 0.7877291008718654, |
| "train_speed(iter/s)": 0.027749 |
| }, |
| { |
| "epoch": 0.656797465899541, |
| "grad_norm": 1.3750821352005005, |
| "learning_rate": 4.620162203192833e-06, |
| "loss": 0.7791455268859864, |
| "memory(GiB)": 76.04, |
| "step": 2540, |
| "token_acc": 0.7791341738940311, |
| "train_speed(iter/s)": 0.02775 |
| }, |
| { |
| "epoch": 0.658090374296981, |
| "grad_norm": 1.1238117218017578, |
| "learning_rate": 4.618271771765108e-06, |
| "loss": 0.7734639644622803, |
| "memory(GiB)": 76.04, |
| "step": 2545, |
| "token_acc": 0.7830758898589657, |
| "train_speed(iter/s)": 0.027753 |
| }, |
| { |
| "epoch": 0.6593832826944211, |
| "grad_norm": 1.0922011137008667, |
| "learning_rate": 4.616377036477039e-06, |
| "loss": 0.769841194152832, |
| "memory(GiB)": 76.04, |
| "step": 2550, |
| "token_acc": 0.7772533671002647, |
| "train_speed(iter/s)": 0.027753 |
| }, |
| { |
| "epoch": 0.6606761910918612, |
| "grad_norm": 1.0475714206695557, |
| "learning_rate": 4.614478001178312e-06, |
| "loss": 0.7945080280303956, |
| "memory(GiB)": 76.04, |
| "step": 2555, |
| "token_acc": 0.7906106546310226, |
| "train_speed(iter/s)": 0.027752 |
| }, |
| { |
| "epoch": 0.6619690994893012, |
| "grad_norm": 1.1444096565246582, |
| "learning_rate": 4.612574669727346e-06, |
| "loss": 0.7711798667907714, |
| "memory(GiB)": 76.04, |
| "step": 2560, |
| "token_acc": 0.7995795091578054, |
| "train_speed(iter/s)": 0.027752 |
| }, |
| { |
| "epoch": 0.6632620078867413, |
| "grad_norm": 1.4287755489349365, |
| "learning_rate": 4.6106670459912915e-06, |
| "loss": 0.794065284729004, |
| "memory(GiB)": 76.04, |
| "step": 2565, |
| "token_acc": 0.7696101905947706, |
| "train_speed(iter/s)": 0.027751 |
| }, |
| { |
| "epoch": 0.6645549162841813, |
| "grad_norm": 1.3806992769241333, |
| "learning_rate": 4.608755133846017e-06, |
| "loss": 0.8211702346801758, |
| "memory(GiB)": 76.04, |
| "step": 2570, |
| "token_acc": 0.80044866626941, |
| "train_speed(iter/s)": 0.027752 |
| }, |
| { |
| "epoch": 0.6658478246816213, |
| "grad_norm": 0.9568463563919067, |
| "learning_rate": 4.6068389371761055e-06, |
| "loss": 0.7481316566467285, |
| "memory(GiB)": 76.04, |
| "step": 2575, |
| "token_acc": 0.8280512901693842, |
| "train_speed(iter/s)": 0.027751 |
| }, |
| { |
| "epoch": 0.6671407330790613, |
| "grad_norm": 1.2518895864486694, |
| "learning_rate": 4.604918459874846e-06, |
| "loss": 0.7877891540527344, |
| "memory(GiB)": 76.04, |
| "step": 2580, |
| "token_acc": 0.8081138790035587, |
| "train_speed(iter/s)": 0.027751 |
| }, |
| { |
| "epoch": 0.6684336414765014, |
| "grad_norm": 1.919282078742981, |
| "learning_rate": 4.602993705844225e-06, |
| "loss": 0.7748439311981201, |
| "memory(GiB)": 76.04, |
| "step": 2585, |
| "token_acc": 0.8042981252857796, |
| "train_speed(iter/s)": 0.02775 |
| }, |
| { |
| "epoch": 0.6697265498739414, |
| "grad_norm": 1.1794474124908447, |
| "learning_rate": 4.601064678994916e-06, |
| "loss": 0.7562169075012207, |
| "memory(GiB)": 76.04, |
| "step": 2590, |
| "token_acc": 0.7905587888470463, |
| "train_speed(iter/s)": 0.027747 |
| }, |
| { |
| "epoch": 0.6710194582713814, |
| "grad_norm": 0.9287105798721313, |
| "learning_rate": 4.599131383246277e-06, |
| "loss": 0.7767970085144043, |
| "memory(GiB)": 76.04, |
| "step": 2595, |
| "token_acc": 0.7871586083297619, |
| "train_speed(iter/s)": 0.027748 |
| }, |
| { |
| "epoch": 0.6723123666688215, |
| "grad_norm": 1.4129362106323242, |
| "learning_rate": 4.5971938225263366e-06, |
| "loss": 0.7788604736328125, |
| "memory(GiB)": 76.04, |
| "step": 2600, |
| "token_acc": 0.810065880876619, |
| "train_speed(iter/s)": 0.027748 |
| }, |
| { |
| "epoch": 0.6736052750662616, |
| "grad_norm": 1.1094108819961548, |
| "learning_rate": 4.59525200077179e-06, |
| "loss": 0.7465203285217286, |
| "memory(GiB)": 76.04, |
| "step": 2605, |
| "token_acc": 0.8041896446078431, |
| "train_speed(iter/s)": 0.027748 |
| }, |
| { |
| "epoch": 0.6748981834637016, |
| "grad_norm": 1.05765962600708, |
| "learning_rate": 4.593305921927992e-06, |
| "loss": 0.7598991394042969, |
| "memory(GiB)": 76.04, |
| "step": 2610, |
| "token_acc": 0.8296476919196166, |
| "train_speed(iter/s)": 0.027747 |
| }, |
| { |
| "epoch": 0.6761910918611417, |
| "grad_norm": 1.0570799112319946, |
| "learning_rate": 4.591355589948943e-06, |
| "loss": 0.7356798648834229, |
| "memory(GiB)": 76.04, |
| "step": 2615, |
| "token_acc": 0.7747376064426695, |
| "train_speed(iter/s)": 0.027745 |
| }, |
| { |
| "epoch": 0.6774840002585817, |
| "grad_norm": 1.726942777633667, |
| "learning_rate": 4.589401008797288e-06, |
| "loss": 0.7580029487609863, |
| "memory(GiB)": 76.04, |
| "step": 2620, |
| "token_acc": 0.7843151506341535, |
| "train_speed(iter/s)": 0.027745 |
| }, |
| { |
| "epoch": 0.6787769086560217, |
| "grad_norm": 1.046608805656433, |
| "learning_rate": 4.587442182444303e-06, |
| "loss": 0.7981472969055176, |
| "memory(GiB)": 76.04, |
| "step": 2625, |
| "token_acc": 0.8134403515732291, |
| "train_speed(iter/s)": 0.027744 |
| }, |
| { |
| "epoch": 0.6800698170534618, |
| "grad_norm": 1.075890302658081, |
| "learning_rate": 4.585479114869892e-06, |
| "loss": 0.7996755599975586, |
| "memory(GiB)": 76.04, |
| "step": 2630, |
| "token_acc": 0.7618249365712214, |
| "train_speed(iter/s)": 0.027747 |
| }, |
| { |
| "epoch": 0.6813627254509018, |
| "grad_norm": 1.182303786277771, |
| "learning_rate": 4.583511810062573e-06, |
| "loss": 0.7393967628479003, |
| "memory(GiB)": 76.04, |
| "step": 2635, |
| "token_acc": 0.7840963855421687, |
| "train_speed(iter/s)": 0.027749 |
| }, |
| { |
| "epoch": 0.6826556338483418, |
| "grad_norm": 0.9905603528022766, |
| "learning_rate": 4.581540272019476e-06, |
| "loss": 0.7551537036895752, |
| "memory(GiB)": 76.04, |
| "step": 2640, |
| "token_acc": 0.804885036888475, |
| "train_speed(iter/s)": 0.027751 |
| }, |
| { |
| "epoch": 0.6839485422457818, |
| "grad_norm": 0.9618648290634155, |
| "learning_rate": 4.579564504746331e-06, |
| "loss": 0.7748908996582031, |
| "memory(GiB)": 76.04, |
| "step": 2645, |
| "token_acc": 0.8088857158547971, |
| "train_speed(iter/s)": 0.02775 |
| }, |
| { |
| "epoch": 0.685241450643222, |
| "grad_norm": 1.2999211549758911, |
| "learning_rate": 4.577584512257459e-06, |
| "loss": 0.7771445274353027, |
| "memory(GiB)": 76.04, |
| "step": 2650, |
| "token_acc": 0.8316016931592813, |
| "train_speed(iter/s)": 0.027747 |
| }, |
| { |
| "epoch": 0.686534359040662, |
| "grad_norm": 0.9438580274581909, |
| "learning_rate": 4.57560029857577e-06, |
| "loss": 0.7551321983337402, |
| "memory(GiB)": 76.04, |
| "step": 2655, |
| "token_acc": 0.7968830005120328, |
| "train_speed(iter/s)": 0.027747 |
| }, |
| { |
| "epoch": 0.687827267438102, |
| "grad_norm": 1.2633525133132935, |
| "learning_rate": 4.573611867732746e-06, |
| "loss": 0.750664758682251, |
| "memory(GiB)": 76.04, |
| "step": 2660, |
| "token_acc": 0.7704320666319625, |
| "train_speed(iter/s)": 0.027748 |
| }, |
| { |
| "epoch": 0.6891201758355421, |
| "grad_norm": 1.7194573879241943, |
| "learning_rate": 4.571619223768439e-06, |
| "loss": 0.7772263526916504, |
| "memory(GiB)": 76.04, |
| "step": 2665, |
| "token_acc": 0.7634119583104773, |
| "train_speed(iter/s)": 0.02775 |
| }, |
| { |
| "epoch": 0.6904130842329821, |
| "grad_norm": 1.2169469594955444, |
| "learning_rate": 4.569622370731463e-06, |
| "loss": 0.7446264743804931, |
| "memory(GiB)": 76.04, |
| "step": 2670, |
| "token_acc": 0.7990216722278014, |
| "train_speed(iter/s)": 0.027749 |
| }, |
| { |
| "epoch": 0.6917059926304221, |
| "grad_norm": 1.146213173866272, |
| "learning_rate": 4.56762131267898e-06, |
| "loss": 0.7797055244445801, |
| "memory(GiB)": 76.04, |
| "step": 2675, |
| "token_acc": 0.7709560205488034, |
| "train_speed(iter/s)": 0.02775 |
| }, |
| { |
| "epoch": 0.6929989010278622, |
| "grad_norm": 6.729126930236816, |
| "learning_rate": 4.565616053676701e-06, |
| "loss": 0.7762058258056641, |
| "memory(GiB)": 76.04, |
| "step": 2680, |
| "token_acc": 0.8343838296022604, |
| "train_speed(iter/s)": 0.02775 |
| }, |
| { |
| "epoch": 0.6942918094253022, |
| "grad_norm": 1.7651880979537964, |
| "learning_rate": 4.563606597798866e-06, |
| "loss": 0.8064382553100586, |
| "memory(GiB)": 76.04, |
| "step": 2685, |
| "token_acc": 0.7710679099225898, |
| "train_speed(iter/s)": 0.027751 |
| }, |
| { |
| "epoch": 0.6955847178227422, |
| "grad_norm": 1.7482510805130005, |
| "learning_rate": 4.561592949128249e-06, |
| "loss": 0.7633975505828857, |
| "memory(GiB)": 76.04, |
| "step": 2690, |
| "token_acc": 0.7979380661789789, |
| "train_speed(iter/s)": 0.027752 |
| }, |
| { |
| "epoch": 0.6968776262201823, |
| "grad_norm": 1.2659438848495483, |
| "learning_rate": 4.5595751117561365e-06, |
| "loss": 0.7893208503723145, |
| "memory(GiB)": 76.04, |
| "step": 2695, |
| "token_acc": 0.8003590821509897, |
| "train_speed(iter/s)": 0.027751 |
| }, |
| { |
| "epoch": 0.6981705346176224, |
| "grad_norm": 1.2541935443878174, |
| "learning_rate": 4.5575530897823296e-06, |
| "loss": 0.7760859489440918, |
| "memory(GiB)": 76.04, |
| "step": 2700, |
| "token_acc": 0.7648711490021314, |
| "train_speed(iter/s)": 0.027751 |
| }, |
| { |
| "epoch": 0.6994634430150624, |
| "grad_norm": 1.4929347038269043, |
| "learning_rate": 4.55552688731513e-06, |
| "loss": 0.7721807479858398, |
| "memory(GiB)": 76.04, |
| "step": 2705, |
| "token_acc": 0.7744538013073435, |
| "train_speed(iter/s)": 0.027753 |
| }, |
| { |
| "epoch": 0.7007563514125025, |
| "grad_norm": 1.3372719287872314, |
| "learning_rate": 4.553496508471333e-06, |
| "loss": 0.7598706245422363, |
| "memory(GiB)": 76.04, |
| "step": 2710, |
| "token_acc": 0.7882575476596692, |
| "train_speed(iter/s)": 0.027753 |
| }, |
| { |
| "epoch": 0.7020492598099425, |
| "grad_norm": 1.0163182020187378, |
| "learning_rate": 4.551461957376221e-06, |
| "loss": 0.7641387939453125, |
| "memory(GiB)": 76.04, |
| "step": 2715, |
| "token_acc": 0.8151145642243085, |
| "train_speed(iter/s)": 0.027755 |
| }, |
| { |
| "epoch": 0.7033421682073825, |
| "grad_norm": 2.0491156578063965, |
| "learning_rate": 4.5494232381635526e-06, |
| "loss": 0.7833964347839355, |
| "memory(GiB)": 76.04, |
| "step": 2720, |
| "token_acc": 0.795193260654113, |
| "train_speed(iter/s)": 0.027756 |
| }, |
| { |
| "epoch": 0.7046350766048225, |
| "grad_norm": 1.0847963094711304, |
| "learning_rate": 4.547380354975554e-06, |
| "loss": 0.774288558959961, |
| "memory(GiB)": 76.04, |
| "step": 2725, |
| "token_acc": 0.7972633104565412, |
| "train_speed(iter/s)": 0.027757 |
| }, |
| { |
| "epoch": 0.7059279850022626, |
| "grad_norm": 0.9379494190216064, |
| "learning_rate": 4.545333311962912e-06, |
| "loss": 0.7845103740692139, |
| "memory(GiB)": 76.04, |
| "step": 2730, |
| "token_acc": 0.7804776566530748, |
| "train_speed(iter/s)": 0.027756 |
| }, |
| { |
| "epoch": 0.7072208933997026, |
| "grad_norm": 0.9910460114479065, |
| "learning_rate": 4.543282113284767e-06, |
| "loss": 0.7749279022216797, |
| "memory(GiB)": 76.04, |
| "step": 2735, |
| "token_acc": 0.7755603122639134, |
| "train_speed(iter/s)": 0.027758 |
| }, |
| { |
| "epoch": 0.7085138017971426, |
| "grad_norm": 0.8512127995491028, |
| "learning_rate": 4.541226763108702e-06, |
| "loss": 0.750948715209961, |
| "memory(GiB)": 76.04, |
| "step": 2740, |
| "token_acc": 0.804368820418487, |
| "train_speed(iter/s)": 0.027757 |
| }, |
| { |
| "epoch": 0.7098067101945827, |
| "grad_norm": 2.1939456462860107, |
| "learning_rate": 4.5391672656107335e-06, |
| "loss": 0.7639683723449707, |
| "memory(GiB)": 76.04, |
| "step": 2745, |
| "token_acc": 0.8181778169014085, |
| "train_speed(iter/s)": 0.027758 |
| }, |
| { |
| "epoch": 0.7110996185920228, |
| "grad_norm": 1.079122543334961, |
| "learning_rate": 4.537103624975306e-06, |
| "loss": 0.7661020278930664, |
| "memory(GiB)": 76.04, |
| "step": 2750, |
| "token_acc": 0.7944695989650712, |
| "train_speed(iter/s)": 0.027758 |
| }, |
| { |
| "epoch": 0.7123925269894628, |
| "grad_norm": 1.3096694946289062, |
| "learning_rate": 4.53503584539528e-06, |
| "loss": 0.7214805603027343, |
| "memory(GiB)": 76.04, |
| "step": 2755, |
| "token_acc": 0.7952853160179271, |
| "train_speed(iter/s)": 0.027758 |
| }, |
| { |
| "epoch": 0.7136854353869029, |
| "grad_norm": 1.1697825193405151, |
| "learning_rate": 4.532963931071929e-06, |
| "loss": 0.7563837051391602, |
| "memory(GiB)": 76.04, |
| "step": 2760, |
| "token_acc": 0.7784021071115013, |
| "train_speed(iter/s)": 0.027758 |
| }, |
| { |
| "epoch": 0.7149783437843429, |
| "grad_norm": 0.9587258100509644, |
| "learning_rate": 4.530887886214925e-06, |
| "loss": 0.7307098388671875, |
| "memory(GiB)": 76.04, |
| "step": 2765, |
| "token_acc": 0.8118209311876937, |
| "train_speed(iter/s)": 0.027757 |
| }, |
| { |
| "epoch": 0.7162712521817829, |
| "grad_norm": 1.2170313596725464, |
| "learning_rate": 4.528807715042333e-06, |
| "loss": 0.7652310371398926, |
| "memory(GiB)": 76.04, |
| "step": 2770, |
| "token_acc": 0.8206977655821247, |
| "train_speed(iter/s)": 0.027758 |
| }, |
| { |
| "epoch": 0.717564160579223, |
| "grad_norm": 1.1587222814559937, |
| "learning_rate": 4.526723421780598e-06, |
| "loss": 0.757373857498169, |
| "memory(GiB)": 76.04, |
| "step": 2775, |
| "token_acc": 0.8355521801286633, |
| "train_speed(iter/s)": 0.027758 |
| }, |
| { |
| "epoch": 0.718857068976663, |
| "grad_norm": 1.151134967803955, |
| "learning_rate": 4.524635010664547e-06, |
| "loss": 0.7718755722045898, |
| "memory(GiB)": 76.04, |
| "step": 2780, |
| "token_acc": 0.8152306441780126, |
| "train_speed(iter/s)": 0.027757 |
| }, |
| { |
| "epoch": 0.720149977374103, |
| "grad_norm": 1.1560102701187134, |
| "learning_rate": 4.522542485937369e-06, |
| "loss": 0.7426802635192871, |
| "memory(GiB)": 76.04, |
| "step": 2785, |
| "token_acc": 0.806016436656846, |
| "train_speed(iter/s)": 0.027754 |
| }, |
| { |
| "epoch": 0.7214428857715431, |
| "grad_norm": 0.993427038192749, |
| "learning_rate": 4.520445851850612e-06, |
| "loss": 0.7491902828216552, |
| "memory(GiB)": 76.04, |
| "step": 2790, |
| "token_acc": 0.8148384523334663, |
| "train_speed(iter/s)": 0.027754 |
| }, |
| { |
| "epoch": 0.7227357941689831, |
| "grad_norm": 0.9622454047203064, |
| "learning_rate": 4.518345112664173e-06, |
| "loss": 0.731049919128418, |
| "memory(GiB)": 76.04, |
| "step": 2795, |
| "token_acc": 0.8307215380677455, |
| "train_speed(iter/s)": 0.027753 |
| }, |
| { |
| "epoch": 0.7240287025664232, |
| "grad_norm": 1.0693981647491455, |
| "learning_rate": 4.516240272646291e-06, |
| "loss": 0.7997897148132325, |
| "memory(GiB)": 76.04, |
| "step": 2800, |
| "token_acc": 0.7474579404695877, |
| "train_speed(iter/s)": 0.027754 |
| }, |
| { |
| "epoch": 0.7253216109638632, |
| "grad_norm": 0.9485954642295837, |
| "learning_rate": 4.514131336073534e-06, |
| "loss": 0.76673583984375, |
| "memory(GiB)": 76.04, |
| "step": 2805, |
| "token_acc": 0.7821131082858396, |
| "train_speed(iter/s)": 0.027753 |
| }, |
| { |
| "epoch": 0.7266145193613033, |
| "grad_norm": 1.123063564300537, |
| "learning_rate": 4.512018307230798e-06, |
| "loss": 0.7704802036285401, |
| "memory(GiB)": 76.04, |
| "step": 2810, |
| "token_acc": 0.7895082445644244, |
| "train_speed(iter/s)": 0.027755 |
| }, |
| { |
| "epoch": 0.7279074277587433, |
| "grad_norm": 1.4126653671264648, |
| "learning_rate": 4.509901190411289e-06, |
| "loss": 0.7815113544464112, |
| "memory(GiB)": 76.04, |
| "step": 2815, |
| "token_acc": 0.8011522700531505, |
| "train_speed(iter/s)": 0.027754 |
| }, |
| { |
| "epoch": 0.7292003361561833, |
| "grad_norm": 1.1078338623046875, |
| "learning_rate": 4.5077799899165206e-06, |
| "loss": 0.7516324996948243, |
| "memory(GiB)": 76.04, |
| "step": 2820, |
| "token_acc": 0.7875029811590747, |
| "train_speed(iter/s)": 0.027753 |
| }, |
| { |
| "epoch": 0.7304932445536234, |
| "grad_norm": 1.1581236124038696, |
| "learning_rate": 4.505654710056305e-06, |
| "loss": 0.7554468154907227, |
| "memory(GiB)": 76.04, |
| "step": 2825, |
| "token_acc": 0.7982930298719773, |
| "train_speed(iter/s)": 0.027752 |
| }, |
| { |
| "epoch": 0.7317861529510634, |
| "grad_norm": 0.9877261519432068, |
| "learning_rate": 4.50352535514874e-06, |
| "loss": 0.7270550727844238, |
| "memory(GiB)": 76.04, |
| "step": 2830, |
| "token_acc": 0.8090806830964311, |
| "train_speed(iter/s)": 0.027753 |
| }, |
| { |
| "epoch": 0.7330790613485034, |
| "grad_norm": 1.0771080255508423, |
| "learning_rate": 4.501391929520206e-06, |
| "loss": 0.7520308494567871, |
| "memory(GiB)": 76.04, |
| "step": 2835, |
| "token_acc": 0.7689856611789697, |
| "train_speed(iter/s)": 0.027755 |
| }, |
| { |
| "epoch": 0.7343719697459435, |
| "grad_norm": 1.3513661623001099, |
| "learning_rate": 4.499254437505351e-06, |
| "loss": 0.7171365737915039, |
| "memory(GiB)": 76.04, |
| "step": 2840, |
| "token_acc": 0.813343427029162, |
| "train_speed(iter/s)": 0.027755 |
| }, |
| { |
| "epoch": 0.7356648781433835, |
| "grad_norm": 1.1246927976608276, |
| "learning_rate": 4.497112883447088e-06, |
| "loss": 0.7306987762451171, |
| "memory(GiB)": 76.04, |
| "step": 2845, |
| "token_acc": 0.8194618966664203, |
| "train_speed(iter/s)": 0.027755 |
| }, |
| { |
| "epoch": 0.7369577865408236, |
| "grad_norm": 1.2061104774475098, |
| "learning_rate": 4.494967271696581e-06, |
| "loss": 0.787189531326294, |
| "memory(GiB)": 76.04, |
| "step": 2850, |
| "token_acc": 0.7943105778422388, |
| "train_speed(iter/s)": 0.027755 |
| }, |
| { |
| "epoch": 0.7382506949382637, |
| "grad_norm": 1.228200078010559, |
| "learning_rate": 4.492817606613239e-06, |
| "loss": 0.736682653427124, |
| "memory(GiB)": 76.04, |
| "step": 2855, |
| "token_acc": 0.8220771643206185, |
| "train_speed(iter/s)": 0.027754 |
| }, |
| { |
| "epoch": 0.7395436033357037, |
| "grad_norm": 1.1733845472335815, |
| "learning_rate": 4.4906638925647075e-06, |
| "loss": 0.7503646850585938, |
| "memory(GiB)": 76.04, |
| "step": 2860, |
| "token_acc": 0.7979779479101798, |
| "train_speed(iter/s)": 0.027756 |
| }, |
| { |
| "epoch": 0.7408365117331437, |
| "grad_norm": 1.2325780391693115, |
| "learning_rate": 4.488506133926857e-06, |
| "loss": 0.7381996154785156, |
| "memory(GiB)": 76.04, |
| "step": 2865, |
| "token_acc": 0.7863309352517985, |
| "train_speed(iter/s)": 0.027757 |
| }, |
| { |
| "epoch": 0.7421294201305837, |
| "grad_norm": 1.1675026416778564, |
| "learning_rate": 4.486344335083775e-06, |
| "loss": 0.7488877296447753, |
| "memory(GiB)": 76.04, |
| "step": 2870, |
| "token_acc": 0.797289709130386, |
| "train_speed(iter/s)": 0.027757 |
| }, |
| { |
| "epoch": 0.7434223285280238, |
| "grad_norm": 1.6887255907058716, |
| "learning_rate": 4.484178500427762e-06, |
| "loss": 0.7432705402374268, |
| "memory(GiB)": 76.04, |
| "step": 2875, |
| "token_acc": 0.805889321374175, |
| "train_speed(iter/s)": 0.027756 |
| }, |
| { |
| "epoch": 0.7447152369254638, |
| "grad_norm": 1.2886244058609009, |
| "learning_rate": 4.482008634359316e-06, |
| "loss": 0.7218676567077636, |
| "memory(GiB)": 76.04, |
| "step": 2880, |
| "token_acc": 0.8163206292290787, |
| "train_speed(iter/s)": 0.027754 |
| }, |
| { |
| "epoch": 0.7460081453229038, |
| "grad_norm": 1.7008750438690186, |
| "learning_rate": 4.4798347412871226e-06, |
| "loss": 0.7312119960784912, |
| "memory(GiB)": 76.04, |
| "step": 2885, |
| "token_acc": 0.8356855218094915, |
| "train_speed(iter/s)": 0.027755 |
| }, |
| { |
| "epoch": 0.7473010537203439, |
| "grad_norm": 1.5202350616455078, |
| "learning_rate": 4.477656825628054e-06, |
| "loss": 0.7271114349365234, |
| "memory(GiB)": 76.04, |
| "step": 2890, |
| "token_acc": 0.8097763430943048, |
| "train_speed(iter/s)": 0.027754 |
| }, |
| { |
| "epoch": 0.748593962117784, |
| "grad_norm": 1.6034634113311768, |
| "learning_rate": 4.475474891807153e-06, |
| "loss": 0.6789961814880371, |
| "memory(GiB)": 76.04, |
| "step": 2895, |
| "token_acc": 0.78770261615017, |
| "train_speed(iter/s)": 0.027753 |
| }, |
| { |
| "epoch": 0.749886870515224, |
| "grad_norm": 1.1834633350372314, |
| "learning_rate": 4.473288944257627e-06, |
| "loss": 0.712617301940918, |
| "memory(GiB)": 76.04, |
| "step": 2900, |
| "token_acc": 0.7983367123174314, |
| "train_speed(iter/s)": 0.027753 |
| }, |
| { |
| "epoch": 0.7511797789126641, |
| "grad_norm": 2.4413537979125977, |
| "learning_rate": 4.471098987420841e-06, |
| "loss": 0.7433537483215332, |
| "memory(GiB)": 76.04, |
| "step": 2905, |
| "token_acc": 0.8024606971975393, |
| "train_speed(iter/s)": 0.027754 |
| }, |
| { |
| "epoch": 0.7524726873101041, |
| "grad_norm": 1.2915472984313965, |
| "learning_rate": 4.468905025746301e-06, |
| "loss": 0.7077127456665039, |
| "memory(GiB)": 76.04, |
| "step": 2910, |
| "token_acc": 0.8141985793699815, |
| "train_speed(iter/s)": 0.027756 |
| }, |
| { |
| "epoch": 0.7537655957075441, |
| "grad_norm": 1.2215969562530518, |
| "learning_rate": 4.466707063691653e-06, |
| "loss": 0.7059410095214844, |
| "memory(GiB)": 76.04, |
| "step": 2915, |
| "token_acc": 0.7902067464635474, |
| "train_speed(iter/s)": 0.027753 |
| }, |
| { |
| "epoch": 0.7550585041049842, |
| "grad_norm": 1.0937923192977905, |
| "learning_rate": 4.464505105722672e-06, |
| "loss": 0.7048573017120361, |
| "memory(GiB)": 76.04, |
| "step": 2920, |
| "token_acc": 0.7998023436397007, |
| "train_speed(iter/s)": 0.027752 |
| }, |
| { |
| "epoch": 0.7563514125024242, |
| "grad_norm": 1.2312453985214233, |
| "learning_rate": 4.4622991563132475e-06, |
| "loss": 0.6955265045166016, |
| "memory(GiB)": 76.04, |
| "step": 2925, |
| "token_acc": 0.808813281410125, |
| "train_speed(iter/s)": 0.027754 |
| }, |
| { |
| "epoch": 0.7576443208998642, |
| "grad_norm": 1.7371655702590942, |
| "learning_rate": 4.460089219945383e-06, |
| "loss": 0.6832226276397705, |
| "memory(GiB)": 76.04, |
| "step": 2930, |
| "token_acc": 0.8051185818094706, |
| "train_speed(iter/s)": 0.027754 |
| }, |
| { |
| "epoch": 0.7589372292973043, |
| "grad_norm": 1.2064937353134155, |
| "learning_rate": 4.457875301109181e-06, |
| "loss": 0.6924856662750244, |
| "memory(GiB)": 76.04, |
| "step": 2935, |
| "token_acc": 0.8090518665345227, |
| "train_speed(iter/s)": 0.027753 |
| }, |
| { |
| "epoch": 0.7602301376947443, |
| "grad_norm": 1.9841270446777344, |
| "learning_rate": 4.455657404302836e-06, |
| "loss": 0.6858362197875977, |
| "memory(GiB)": 76.04, |
| "step": 2940, |
| "token_acc": 0.8241852487135506, |
| "train_speed(iter/s)": 0.027753 |
| }, |
| { |
| "epoch": 0.7615230460921844, |
| "grad_norm": 1.5064440965652466, |
| "learning_rate": 4.4534355340326226e-06, |
| "loss": 0.6784512519836425, |
| "memory(GiB)": 76.04, |
| "step": 2945, |
| "token_acc": 0.8192387024189012, |
| "train_speed(iter/s)": 0.027752 |
| }, |
| { |
| "epoch": 0.7628159544896244, |
| "grad_norm": 1.823947548866272, |
| "learning_rate": 4.451209694812893e-06, |
| "loss": 0.6957567214965821, |
| "memory(GiB)": 76.04, |
| "step": 2950, |
| "token_acc": 0.8207297541953903, |
| "train_speed(iter/s)": 0.027753 |
| }, |
| { |
| "epoch": 0.7641088628870645, |
| "grad_norm": 1.7657442092895508, |
| "learning_rate": 4.448979891166059e-06, |
| "loss": 0.7199502944946289, |
| "memory(GiB)": 76.04, |
| "step": 2955, |
| "token_acc": 0.8217210270645385, |
| "train_speed(iter/s)": 0.027754 |
| }, |
| { |
| "epoch": 0.7654017712845045, |
| "grad_norm": 1.6712024211883545, |
| "learning_rate": 4.44674612762259e-06, |
| "loss": 0.700252914428711, |
| "memory(GiB)": 76.04, |
| "step": 2960, |
| "token_acc": 0.8259719184364637, |
| "train_speed(iter/s)": 0.027753 |
| }, |
| { |
| "epoch": 0.7666946796819445, |
| "grad_norm": 1.6742796897888184, |
| "learning_rate": 4.444508408720999e-06, |
| "loss": 0.7040081977844238, |
| "memory(GiB)": 76.04, |
| "step": 2965, |
| "token_acc": 0.8206386483928634, |
| "train_speed(iter/s)": 0.027753 |
| }, |
| { |
| "epoch": 0.7679875880793846, |
| "grad_norm": 1.0268195867538452, |
| "learning_rate": 4.442266739007838e-06, |
| "loss": 0.725772476196289, |
| "memory(GiB)": 76.04, |
| "step": 2970, |
| "token_acc": 0.7764441447516296, |
| "train_speed(iter/s)": 0.027754 |
| }, |
| { |
| "epoch": 0.7692804964768246, |
| "grad_norm": 1.271381139755249, |
| "learning_rate": 4.440021123037683e-06, |
| "loss": 0.7173772335052491, |
| "memory(GiB)": 76.04, |
| "step": 2975, |
| "token_acc": 0.8247627142654766, |
| "train_speed(iter/s)": 0.027754 |
| }, |
| { |
| "epoch": 0.7705734048742646, |
| "grad_norm": 1.742287039756775, |
| "learning_rate": 4.437771565373131e-06, |
| "loss": 0.6777096748352051, |
| "memory(GiB)": 76.04, |
| "step": 2980, |
| "token_acc": 0.8065741348588578, |
| "train_speed(iter/s)": 0.027755 |
| }, |
| { |
| "epoch": 0.7718663132717047, |
| "grad_norm": 1.113531470298767, |
| "learning_rate": 4.4355180705847854e-06, |
| "loss": 0.6992631912231445, |
| "memory(GiB)": 76.04, |
| "step": 2985, |
| "token_acc": 0.8095548168203159, |
| "train_speed(iter/s)": 0.027755 |
| }, |
| { |
| "epoch": 0.7731592216691447, |
| "grad_norm": 1.5363075733184814, |
| "learning_rate": 4.43326064325125e-06, |
| "loss": 0.6818428993225097, |
| "memory(GiB)": 76.04, |
| "step": 2990, |
| "token_acc": 0.8093805660003958, |
| "train_speed(iter/s)": 0.027756 |
| }, |
| { |
| "epoch": 0.7744521300665848, |
| "grad_norm": 1.2914507389068604, |
| "learning_rate": 4.43099928795912e-06, |
| "loss": 0.6791769027709961, |
| "memory(GiB)": 76.04, |
| "step": 2995, |
| "token_acc": 0.7912520619379556, |
| "train_speed(iter/s)": 0.027756 |
| }, |
| { |
| "epoch": 0.7757450384640249, |
| "grad_norm": 1.2839219570159912, |
| "learning_rate": 4.428734009302968e-06, |
| "loss": 0.6807722091674805, |
| "memory(GiB)": 76.04, |
| "step": 3000, |
| "token_acc": 0.8037732367729139, |
| "train_speed(iter/s)": 0.027757 |
| }, |
| { |
| "epoch": 0.7770379468614649, |
| "grad_norm": 1.6240931749343872, |
| "learning_rate": 4.42646481188534e-06, |
| "loss": 0.6738556861877442, |
| "memory(GiB)": 76.04, |
| "step": 3005, |
| "token_acc": 0.8336527405136067, |
| "train_speed(iter/s)": 0.02771 |
| }, |
| { |
| "epoch": 0.7783308552589049, |
| "grad_norm": 2.119504690170288, |
| "learning_rate": 4.424191700316745e-06, |
| "loss": 0.7054489135742188, |
| "memory(GiB)": 76.04, |
| "step": 3010, |
| "token_acc": 0.8144756176741961, |
| "train_speed(iter/s)": 0.02771 |
| }, |
| { |
| "epoch": 0.7796237636563449, |
| "grad_norm": 1.3605269193649292, |
| "learning_rate": 4.421914679215643e-06, |
| "loss": 0.6763367652893066, |
| "memory(GiB)": 76.04, |
| "step": 3015, |
| "token_acc": 0.832611100866679, |
| "train_speed(iter/s)": 0.02771 |
| }, |
| { |
| "epoch": 0.780916672053785, |
| "grad_norm": 1.5182582139968872, |
| "learning_rate": 4.419633753208438e-06, |
| "loss": 0.6742976188659668, |
| "memory(GiB)": 76.04, |
| "step": 3020, |
| "token_acc": 0.8088533082175653, |
| "train_speed(iter/s)": 0.027711 |
| }, |
| { |
| "epoch": 0.782209580451225, |
| "grad_norm": 1.180389404296875, |
| "learning_rate": 4.417348926929467e-06, |
| "loss": 0.6577554702758789, |
| "memory(GiB)": 76.04, |
| "step": 3025, |
| "token_acc": 0.7916683734076106, |
| "train_speed(iter/s)": 0.027712 |
| }, |
| { |
| "epoch": 0.783502488848665, |
| "grad_norm": 1.0676547288894653, |
| "learning_rate": 4.4150602050209935e-06, |
| "loss": 0.6725570678710937, |
| "memory(GiB)": 76.04, |
| "step": 3030, |
| "token_acc": 0.8131301520575388, |
| "train_speed(iter/s)": 0.027712 |
| }, |
| { |
| "epoch": 0.7847953972461051, |
| "grad_norm": 1.066395878791809, |
| "learning_rate": 4.412767592133195e-06, |
| "loss": 0.6555842399597168, |
| "memory(GiB)": 76.04, |
| "step": 3035, |
| "token_acc": 0.8660530809527944, |
| "train_speed(iter/s)": 0.027711 |
| }, |
| { |
| "epoch": 0.7860883056435451, |
| "grad_norm": 5.532017230987549, |
| "learning_rate": 4.410471092924154e-06, |
| "loss": 0.6637729167938232, |
| "memory(GiB)": 76.04, |
| "step": 3040, |
| "token_acc": 0.814479006834984, |
| "train_speed(iter/s)": 0.02771 |
| }, |
| { |
| "epoch": 0.7873812140409852, |
| "grad_norm": 1.9172098636627197, |
| "learning_rate": 4.408170712059848e-06, |
| "loss": 0.706690502166748, |
| "memory(GiB)": 76.04, |
| "step": 3045, |
| "token_acc": 0.7951164898437917, |
| "train_speed(iter/s)": 0.027708 |
| }, |
| { |
| "epoch": 0.7886741224384253, |
| "grad_norm": 2.5375490188598633, |
| "learning_rate": 4.405866454214145e-06, |
| "loss": 0.6923388481140137, |
| "memory(GiB)": 76.04, |
| "step": 3050, |
| "token_acc": 0.7856790394210209, |
| "train_speed(iter/s)": 0.027708 |
| }, |
| { |
| "epoch": 0.7899670308358653, |
| "grad_norm": 1.3066571950912476, |
| "learning_rate": 4.403558324068787e-06, |
| "loss": 0.6584675788879395, |
| "memory(GiB)": 76.04, |
| "step": 3055, |
| "token_acc": 0.8082852648138438, |
| "train_speed(iter/s)": 0.027709 |
| }, |
| { |
| "epoch": 0.7912599392333053, |
| "grad_norm": 1.8484247922897339, |
| "learning_rate": 4.401246326313386e-06, |
| "loss": 0.6835250854492188, |
| "memory(GiB)": 76.04, |
| "step": 3060, |
| "token_acc": 0.8032010726107177, |
| "train_speed(iter/s)": 0.02771 |
| }, |
| { |
| "epoch": 0.7925528476307454, |
| "grad_norm": 1.7470216751098633, |
| "learning_rate": 4.398930465645409e-06, |
| "loss": 0.6875529289245605, |
| "memory(GiB)": 76.04, |
| "step": 3065, |
| "token_acc": 0.8029278650053081, |
| "train_speed(iter/s)": 0.02771 |
| }, |
| { |
| "epoch": 0.7938457560281854, |
| "grad_norm": 1.1840174198150635, |
| "learning_rate": 4.396610746770173e-06, |
| "loss": 0.6479888916015625, |
| "memory(GiB)": 76.04, |
| "step": 3070, |
| "token_acc": 0.8107086371176175, |
| "train_speed(iter/s)": 0.02771 |
| }, |
| { |
| "epoch": 0.7951386644256254, |
| "grad_norm": 1.2682684659957886, |
| "learning_rate": 4.394287174400838e-06, |
| "loss": 0.6412975788116455, |
| "memory(GiB)": 76.04, |
| "step": 3075, |
| "token_acc": 0.829871190130624, |
| "train_speed(iter/s)": 0.027708 |
| }, |
| { |
| "epoch": 0.7964315728230655, |
| "grad_norm": 1.5862990617752075, |
| "learning_rate": 4.3919597532583845e-06, |
| "loss": 0.680488395690918, |
| "memory(GiB)": 76.04, |
| "step": 3080, |
| "token_acc": 0.82756076566791, |
| "train_speed(iter/s)": 0.027709 |
| }, |
| { |
| "epoch": 0.7977244812205055, |
| "grad_norm": 1.4510713815689087, |
| "learning_rate": 4.389628488071622e-06, |
| "loss": 0.644444751739502, |
| "memory(GiB)": 76.04, |
| "step": 3085, |
| "token_acc": 0.800807537012113, |
| "train_speed(iter/s)": 0.027708 |
| }, |
| { |
| "epoch": 0.7990173896179456, |
| "grad_norm": 1.2568798065185547, |
| "learning_rate": 4.387293383577165e-06, |
| "loss": 0.6682034015655518, |
| "memory(GiB)": 76.04, |
| "step": 3090, |
| "token_acc": 0.8065676636686886, |
| "train_speed(iter/s)": 0.027707 |
| }, |
| { |
| "epoch": 0.8003102980153856, |
| "grad_norm": 1.0545753240585327, |
| "learning_rate": 4.38495444451943e-06, |
| "loss": 0.6688919544219971, |
| "memory(GiB)": 76.04, |
| "step": 3095, |
| "token_acc": 0.7727210465036641, |
| "train_speed(iter/s)": 0.027708 |
| }, |
| { |
| "epoch": 0.8016032064128257, |
| "grad_norm": 1.586976170539856, |
| "learning_rate": 4.382611675650626e-06, |
| "loss": 0.6349334716796875, |
| "memory(GiB)": 76.04, |
| "step": 3100, |
| "token_acc": 0.806030889924001, |
| "train_speed(iter/s)": 0.027707 |
| }, |
| { |
| "epoch": 0.8028961148102657, |
| "grad_norm": 2.7589170932769775, |
| "learning_rate": 4.380265081730739e-06, |
| "loss": 0.6485045433044434, |
| "memory(GiB)": 76.04, |
| "step": 3105, |
| "token_acc": 0.8114932360204947, |
| "train_speed(iter/s)": 0.027707 |
| }, |
| { |
| "epoch": 0.8041890232077057, |
| "grad_norm": 1.262620210647583, |
| "learning_rate": 4.377914667527532e-06, |
| "loss": 0.6574973106384278, |
| "memory(GiB)": 76.04, |
| "step": 3110, |
| "token_acc": 0.8018425922280404, |
| "train_speed(iter/s)": 0.027707 |
| }, |
| { |
| "epoch": 0.8054819316051458, |
| "grad_norm": 1.670192003250122, |
| "learning_rate": 4.375560437816527e-06, |
| "loss": 0.6576096534729003, |
| "memory(GiB)": 76.04, |
| "step": 3115, |
| "token_acc": 0.8066886816886817, |
| "train_speed(iter/s)": 0.027706 |
| }, |
| { |
| "epoch": 0.8067748400025858, |
| "grad_norm": 1.9839909076690674, |
| "learning_rate": 4.373202397380998e-06, |
| "loss": 0.6304091930389404, |
| "memory(GiB)": 76.04, |
| "step": 3120, |
| "token_acc": 0.8234421364985163, |
| "train_speed(iter/s)": 0.027704 |
| }, |
| { |
| "epoch": 0.8080677484000258, |
| "grad_norm": 1.1080540418624878, |
| "learning_rate": 4.370840551011963e-06, |
| "loss": 0.6576041221618653, |
| "memory(GiB)": 76.04, |
| "step": 3125, |
| "token_acc": 0.8217494089834515, |
| "train_speed(iter/s)": 0.027706 |
| }, |
| { |
| "epoch": 0.8093606567974659, |
| "grad_norm": 1.1593878269195557, |
| "learning_rate": 4.3684749035081705e-06, |
| "loss": 0.6419290542602539, |
| "memory(GiB)": 76.04, |
| "step": 3130, |
| "token_acc": 0.7999515151515152, |
| "train_speed(iter/s)": 0.027706 |
| }, |
| { |
| "epoch": 0.810653565194906, |
| "grad_norm": 1.1493967771530151, |
| "learning_rate": 4.366105459676097e-06, |
| "loss": 0.646766471862793, |
| "memory(GiB)": 76.04, |
| "step": 3135, |
| "token_acc": 0.8102760440126118, |
| "train_speed(iter/s)": 0.027707 |
| }, |
| { |
| "epoch": 0.811946473592346, |
| "grad_norm": 1.3651187419891357, |
| "learning_rate": 4.3637322243299255e-06, |
| "loss": 0.6666352272033691, |
| "memory(GiB)": 76.04, |
| "step": 3140, |
| "token_acc": 0.8152125937913786, |
| "train_speed(iter/s)": 0.027707 |
| }, |
| { |
| "epoch": 0.8132393819897861, |
| "grad_norm": 1.128293514251709, |
| "learning_rate": 4.361355202291548e-06, |
| "loss": 0.6353740692138672, |
| "memory(GiB)": 76.04, |
| "step": 3145, |
| "token_acc": 0.8045256453234998, |
| "train_speed(iter/s)": 0.027707 |
| }, |
| { |
| "epoch": 0.8145322903872261, |
| "grad_norm": 1.6029019355773926, |
| "learning_rate": 4.358974398390548e-06, |
| "loss": 0.6691800117492676, |
| "memory(GiB)": 76.04, |
| "step": 3150, |
| "token_acc": 0.8306377243385117, |
| "train_speed(iter/s)": 0.027707 |
| }, |
| { |
| "epoch": 0.8158251987846661, |
| "grad_norm": 1.3377734422683716, |
| "learning_rate": 4.356589817464193e-06, |
| "loss": 0.6470844745635986, |
| "memory(GiB)": 76.04, |
| "step": 3155, |
| "token_acc": 0.8250958558747833, |
| "train_speed(iter/s)": 0.027707 |
| }, |
| { |
| "epoch": 0.8171181071821062, |
| "grad_norm": 19.943740844726562, |
| "learning_rate": 4.354201464357424e-06, |
| "loss": 0.6441401481628418, |
| "memory(GiB)": 76.04, |
| "step": 3160, |
| "token_acc": 0.8164092901323066, |
| "train_speed(iter/s)": 0.027705 |
| }, |
| { |
| "epoch": 0.8184110155795462, |
| "grad_norm": 2.096036672592163, |
| "learning_rate": 4.3518093439228484e-06, |
| "loss": 0.6595673561096191, |
| "memory(GiB)": 76.04, |
| "step": 3165, |
| "token_acc": 0.8180080986396105, |
| "train_speed(iter/s)": 0.027705 |
| }, |
| { |
| "epoch": 0.8197039239769862, |
| "grad_norm": 1.3042539358139038, |
| "learning_rate": 4.349413461020725e-06, |
| "loss": 0.6536635398864746, |
| "memory(GiB)": 76.04, |
| "step": 3170, |
| "token_acc": 0.7721032106415942, |
| "train_speed(iter/s)": 0.027707 |
| }, |
| { |
| "epoch": 0.8209968323744262, |
| "grad_norm": 1.1923153400421143, |
| "learning_rate": 4.347013820518959e-06, |
| "loss": 0.6662230491638184, |
| "memory(GiB)": 76.04, |
| "step": 3175, |
| "token_acc": 0.7864515044545302, |
| "train_speed(iter/s)": 0.027706 |
| }, |
| { |
| "epoch": 0.8222897407718663, |
| "grad_norm": 0.9587339162826538, |
| "learning_rate": 4.344610427293091e-06, |
| "loss": 0.637930154800415, |
| "memory(GiB)": 76.04, |
| "step": 3180, |
| "token_acc": 0.8349569816879248, |
| "train_speed(iter/s)": 0.027703 |
| }, |
| { |
| "epoch": 0.8235826491693063, |
| "grad_norm": 1.4377241134643555, |
| "learning_rate": 4.342203286226284e-06, |
| "loss": 0.6546686172485352, |
| "memory(GiB)": 76.04, |
| "step": 3185, |
| "token_acc": 0.8348570317058938, |
| "train_speed(iter/s)": 0.027704 |
| }, |
| { |
| "epoch": 0.8248755575667464, |
| "grad_norm": 1.3020581007003784, |
| "learning_rate": 4.339792402209318e-06, |
| "loss": 0.6620816707611084, |
| "memory(GiB)": 76.04, |
| "step": 3190, |
| "token_acc": 0.8184861571423789, |
| "train_speed(iter/s)": 0.027705 |
| }, |
| { |
| "epoch": 0.8261684659641865, |
| "grad_norm": 1.6828721761703491, |
| "learning_rate": 4.337377780140575e-06, |
| "loss": 0.6277073860168457, |
| "memory(GiB)": 76.04, |
| "step": 3195, |
| "token_acc": 0.8200602270094973, |
| "train_speed(iter/s)": 0.027704 |
| }, |
| { |
| "epoch": 0.8274613743616265, |
| "grad_norm": 1.6351348161697388, |
| "learning_rate": 4.334959424926036e-06, |
| "loss": 0.6136197566986084, |
| "memory(GiB)": 76.04, |
| "step": 3200, |
| "token_acc": 0.805330584597261, |
| "train_speed(iter/s)": 0.027705 |
| }, |
| { |
| "epoch": 0.8287542827590665, |
| "grad_norm": 1.1907836198806763, |
| "learning_rate": 4.3325373414792625e-06, |
| "loss": 0.647891902923584, |
| "memory(GiB)": 76.04, |
| "step": 3205, |
| "token_acc": 0.8155163083583411, |
| "train_speed(iter/s)": 0.027705 |
| }, |
| { |
| "epoch": 0.8300471911565066, |
| "grad_norm": 1.5931166410446167, |
| "learning_rate": 4.330111534721394e-06, |
| "loss": 0.6463868141174316, |
| "memory(GiB)": 76.04, |
| "step": 3210, |
| "token_acc": 0.8210717829970228, |
| "train_speed(iter/s)": 0.027707 |
| }, |
| { |
| "epoch": 0.8313400995539466, |
| "grad_norm": 1.8650577068328857, |
| "learning_rate": 4.327682009581134e-06, |
| "loss": 0.6347787380218506, |
| "memory(GiB)": 76.04, |
| "step": 3215, |
| "token_acc": 0.8082674179217684, |
| "train_speed(iter/s)": 0.027707 |
| }, |
| { |
| "epoch": 0.8326330079513866, |
| "grad_norm": 2.3283803462982178, |
| "learning_rate": 4.325248770994741e-06, |
| "loss": 0.6708244800567627, |
| "memory(GiB)": 76.04, |
| "step": 3220, |
| "token_acc": 0.7994605743296843, |
| "train_speed(iter/s)": 0.027706 |
| }, |
| { |
| "epoch": 0.8339259163488267, |
| "grad_norm": 1.4204248189926147, |
| "learning_rate": 4.322811823906018e-06, |
| "loss": 0.6237285137176514, |
| "memory(GiB)": 76.04, |
| "step": 3225, |
| "token_acc": 0.8479689603294006, |
| "train_speed(iter/s)": 0.027704 |
| }, |
| { |
| "epoch": 0.8352188247462667, |
| "grad_norm": 2.9352431297302246, |
| "learning_rate": 4.3203711732663035e-06, |
| "loss": 0.6458423614501954, |
| "memory(GiB)": 76.04, |
| "step": 3230, |
| "token_acc": 0.8232549095858448, |
| "train_speed(iter/s)": 0.027703 |
| }, |
| { |
| "epoch": 0.8365117331437067, |
| "grad_norm": 1.1581929922103882, |
| "learning_rate": 4.31792682403446e-06, |
| "loss": 0.6388854026794434, |
| "memory(GiB)": 76.04, |
| "step": 3235, |
| "token_acc": 0.7934440464560947, |
| "train_speed(iter/s)": 0.027701 |
| }, |
| { |
| "epoch": 0.8378046415411468, |
| "grad_norm": 1.1175485849380493, |
| "learning_rate": 4.315478781176867e-06, |
| "loss": 0.6145687103271484, |
| "memory(GiB)": 76.04, |
| "step": 3240, |
| "token_acc": 0.8398847580708817, |
| "train_speed(iter/s)": 0.027701 |
| }, |
| { |
| "epoch": 0.8390975499385869, |
| "grad_norm": 1.2444353103637695, |
| "learning_rate": 4.313027049667405e-06, |
| "loss": 0.6328566074371338, |
| "memory(GiB)": 76.04, |
| "step": 3245, |
| "token_acc": 0.8066215947504474, |
| "train_speed(iter/s)": 0.027699 |
| }, |
| { |
| "epoch": 0.8403904583360269, |
| "grad_norm": 1.141342043876648, |
| "learning_rate": 4.310571634487451e-06, |
| "loss": 0.629487419128418, |
| "memory(GiB)": 76.04, |
| "step": 3250, |
| "token_acc": 0.8341686379856461, |
| "train_speed(iter/s)": 0.0277 |
| }, |
| { |
| "epoch": 0.8416833667334669, |
| "grad_norm": 1.3321287631988525, |
| "learning_rate": 4.3081125406258655e-06, |
| "loss": 0.6453184604644775, |
| "memory(GiB)": 76.04, |
| "step": 3255, |
| "token_acc": 0.7997035782341732, |
| "train_speed(iter/s)": 0.0277 |
| }, |
| { |
| "epoch": 0.842976275130907, |
| "grad_norm": 1.0039650201797485, |
| "learning_rate": 4.305649773078987e-06, |
| "loss": 0.666410255432129, |
| "memory(GiB)": 76.04, |
| "step": 3260, |
| "token_acc": 0.8168428282519937, |
| "train_speed(iter/s)": 0.027699 |
| }, |
| { |
| "epoch": 0.844269183528347, |
| "grad_norm": 1.2001808881759644, |
| "learning_rate": 4.303183336850612e-06, |
| "loss": 0.660033893585205, |
| "memory(GiB)": 76.04, |
| "step": 3265, |
| "token_acc": 0.8161585530947095, |
| "train_speed(iter/s)": 0.027699 |
| }, |
| { |
| "epoch": 0.845562091925787, |
| "grad_norm": 1.2713844776153564, |
| "learning_rate": 4.300713236951996e-06, |
| "loss": 0.6356592655181885, |
| "memory(GiB)": 76.04, |
| "step": 3270, |
| "token_acc": 0.8038586795618277, |
| "train_speed(iter/s)": 0.027701 |
| }, |
| { |
| "epoch": 0.8468550003232271, |
| "grad_norm": 1.3221766948699951, |
| "learning_rate": 4.298239478401836e-06, |
| "loss": 0.6444936275482178, |
| "memory(GiB)": 76.04, |
| "step": 3275, |
| "token_acc": 0.8171707402848603, |
| "train_speed(iter/s)": 0.027701 |
| }, |
| { |
| "epoch": 0.8481479087206671, |
| "grad_norm": 1.2477511167526245, |
| "learning_rate": 4.295762066226262e-06, |
| "loss": 0.611814022064209, |
| "memory(GiB)": 76.04, |
| "step": 3280, |
| "token_acc": 0.8180022127390548, |
| "train_speed(iter/s)": 0.027701 |
| }, |
| { |
| "epoch": 0.8494408171181071, |
| "grad_norm": 1.2124427556991577, |
| "learning_rate": 4.293281005458831e-06, |
| "loss": 0.6272024631500244, |
| "memory(GiB)": 76.04, |
| "step": 3285, |
| "token_acc": 0.8499176225558768, |
| "train_speed(iter/s)": 0.027699 |
| }, |
| { |
| "epoch": 0.8507337255155473, |
| "grad_norm": 4.538881778717041, |
| "learning_rate": 4.290796301140506e-06, |
| "loss": 0.6252808094024658, |
| "memory(GiB)": 76.04, |
| "step": 3290, |
| "token_acc": 0.8332118523213436, |
| "train_speed(iter/s)": 0.027698 |
| }, |
| { |
| "epoch": 0.8520266339129873, |
| "grad_norm": 1.4854230880737305, |
| "learning_rate": 4.288307958319662e-06, |
| "loss": 0.6353150367736816, |
| "memory(GiB)": 76.04, |
| "step": 3295, |
| "token_acc": 0.7681834998150203, |
| "train_speed(iter/s)": 0.027696 |
| }, |
| { |
| "epoch": 0.8533195423104273, |
| "grad_norm": 1.1235853433609009, |
| "learning_rate": 4.285815982052058e-06, |
| "loss": 0.6190371036529541, |
| "memory(GiB)": 76.04, |
| "step": 3300, |
| "token_acc": 0.8698982508288556, |
| "train_speed(iter/s)": 0.027695 |
| }, |
| { |
| "epoch": 0.8546124507078674, |
| "grad_norm": 1.0786458253860474, |
| "learning_rate": 4.283320377400842e-06, |
| "loss": 0.6302780151367188, |
| "memory(GiB)": 76.04, |
| "step": 3305, |
| "token_acc": 0.792910447761194, |
| "train_speed(iter/s)": 0.027693 |
| }, |
| { |
| "epoch": 0.8559053591053074, |
| "grad_norm": 1.0524226427078247, |
| "learning_rate": 4.280821149436531e-06, |
| "loss": 0.629145622253418, |
| "memory(GiB)": 76.04, |
| "step": 3310, |
| "token_acc": 0.8330289590399165, |
| "train_speed(iter/s)": 0.027692 |
| }, |
| { |
| "epoch": 0.8571982675027474, |
| "grad_norm": 1.4898467063903809, |
| "learning_rate": 4.278318303237003e-06, |
| "loss": 0.6266490459442139, |
| "memory(GiB)": 76.04, |
| "step": 3315, |
| "token_acc": 0.8104506584124652, |
| "train_speed(iter/s)": 0.027692 |
| }, |
| { |
| "epoch": 0.8584911759001874, |
| "grad_norm": 1.1593666076660156, |
| "learning_rate": 4.275811843887491e-06, |
| "loss": 0.6542300224304199, |
| "memory(GiB)": 76.04, |
| "step": 3320, |
| "token_acc": 0.8437677735485847, |
| "train_speed(iter/s)": 0.027692 |
| }, |
| { |
| "epoch": 0.8597840842976275, |
| "grad_norm": 1.2023606300354004, |
| "learning_rate": 4.273301776480564e-06, |
| "loss": 0.6109468936920166, |
| "memory(GiB)": 76.04, |
| "step": 3325, |
| "token_acc": 0.8550740689464211, |
| "train_speed(iter/s)": 0.02769 |
| }, |
| { |
| "epoch": 0.8610769926950675, |
| "grad_norm": 1.4408375024795532, |
| "learning_rate": 4.270788106116125e-06, |
| "loss": 0.6247062683105469, |
| "memory(GiB)": 76.04, |
| "step": 3330, |
| "token_acc": 0.8023508574188873, |
| "train_speed(iter/s)": 0.027689 |
| }, |
| { |
| "epoch": 0.8623699010925076, |
| "grad_norm": 1.197199821472168, |
| "learning_rate": 4.268270837901399e-06, |
| "loss": 0.638817024230957, |
| "memory(GiB)": 76.04, |
| "step": 3335, |
| "token_acc": 0.8134350688210652, |
| "train_speed(iter/s)": 0.02769 |
| }, |
| { |
| "epoch": 0.8636628094899477, |
| "grad_norm": 1.215605616569519, |
| "learning_rate": 4.265749976950917e-06, |
| "loss": 0.6219228744506836, |
| "memory(GiB)": 76.04, |
| "step": 3340, |
| "token_acc": 0.830684302174799, |
| "train_speed(iter/s)": 0.027689 |
| }, |
| { |
| "epoch": 0.8649557178873877, |
| "grad_norm": 1.1016297340393066, |
| "learning_rate": 4.263225528386512e-06, |
| "loss": 0.6183833122253418, |
| "memory(GiB)": 76.04, |
| "step": 3345, |
| "token_acc": 0.8504693786320966, |
| "train_speed(iter/s)": 0.027689 |
| }, |
| { |
| "epoch": 0.8662486262848277, |
| "grad_norm": 1.1827194690704346, |
| "learning_rate": 4.260697497337306e-06, |
| "loss": 0.6260892868041992, |
| "memory(GiB)": 76.04, |
| "step": 3350, |
| "token_acc": 0.8206773446545735, |
| "train_speed(iter/s)": 0.027689 |
| }, |
| { |
| "epoch": 0.8675415346822678, |
| "grad_norm": 1.1964573860168457, |
| "learning_rate": 4.2581658889397e-06, |
| "loss": 0.6217505931854248, |
| "memory(GiB)": 76.04, |
| "step": 3355, |
| "token_acc": 0.8112748538011696, |
| "train_speed(iter/s)": 0.027691 |
| }, |
| { |
| "epoch": 0.8688344430797078, |
| "grad_norm": 1.9450868368148804, |
| "learning_rate": 4.2556307083373635e-06, |
| "loss": 0.6057548522949219, |
| "memory(GiB)": 76.04, |
| "step": 3360, |
| "token_acc": 0.8432432432432433, |
| "train_speed(iter/s)": 0.027691 |
| }, |
| { |
| "epoch": 0.8701273514771478, |
| "grad_norm": 1.085252285003662, |
| "learning_rate": 4.253091960681222e-06, |
| "loss": 0.650747537612915, |
| "memory(GiB)": 76.04, |
| "step": 3365, |
| "token_acc": 0.8127441586201813, |
| "train_speed(iter/s)": 0.027692 |
| }, |
| { |
| "epoch": 0.8714202598745879, |
| "grad_norm": 1.4419254064559937, |
| "learning_rate": 4.250549651129451e-06, |
| "loss": 0.6490330696105957, |
| "memory(GiB)": 76.04, |
| "step": 3370, |
| "token_acc": 0.817296827466319, |
| "train_speed(iter/s)": 0.027693 |
| }, |
| { |
| "epoch": 0.8727131682720279, |
| "grad_norm": 0.9393129348754883, |
| "learning_rate": 4.248003784847462e-06, |
| "loss": 0.5855797290802002, |
| "memory(GiB)": 76.04, |
| "step": 3375, |
| "token_acc": 0.8437131244263799, |
| "train_speed(iter/s)": 0.027693 |
| }, |
| { |
| "epoch": 0.874006076669468, |
| "grad_norm": 1.4661402702331543, |
| "learning_rate": 4.245454367007893e-06, |
| "loss": 0.6375166416168213, |
| "memory(GiB)": 76.04, |
| "step": 3380, |
| "token_acc": 0.8220987966001851, |
| "train_speed(iter/s)": 0.027691 |
| }, |
| { |
| "epoch": 0.8752989850669081, |
| "grad_norm": 1.0783532857894897, |
| "learning_rate": 4.242901402790597e-06, |
| "loss": 0.5942583084106445, |
| "memory(GiB)": 76.04, |
| "step": 3385, |
| "token_acc": 0.8271346924848588, |
| "train_speed(iter/s)": 0.027691 |
| }, |
| { |
| "epoch": 0.8765918934643481, |
| "grad_norm": 1.002106785774231, |
| "learning_rate": 4.240344897382633e-06, |
| "loss": 0.6190349578857421, |
| "memory(GiB)": 76.04, |
| "step": 3390, |
| "token_acc": 0.8104156272786583, |
| "train_speed(iter/s)": 0.02769 |
| }, |
| { |
| "epoch": 0.8778848018617881, |
| "grad_norm": 1.0779801607131958, |
| "learning_rate": 4.237784855978258e-06, |
| "loss": 0.6126032829284668, |
| "memory(GiB)": 76.04, |
| "step": 3395, |
| "token_acc": 0.8469879143753689, |
| "train_speed(iter/s)": 0.02769 |
| }, |
| { |
| "epoch": 0.8791777102592281, |
| "grad_norm": 1.1293411254882812, |
| "learning_rate": 4.2352212837789086e-06, |
| "loss": 0.6498593330383301, |
| "memory(GiB)": 76.04, |
| "step": 3400, |
| "token_acc": 0.8174972974987885, |
| "train_speed(iter/s)": 0.02769 |
| }, |
| { |
| "epoch": 0.8804706186566682, |
| "grad_norm": 1.9169739484786987, |
| "learning_rate": 4.232654185993197e-06, |
| "loss": 0.6312263965606689, |
| "memory(GiB)": 76.04, |
| "step": 3405, |
| "token_acc": 0.8185292511864264, |
| "train_speed(iter/s)": 0.02769 |
| }, |
| { |
| "epoch": 0.8817635270541082, |
| "grad_norm": 2.342643976211548, |
| "learning_rate": 4.2300835678369005e-06, |
| "loss": 0.5902108192443848, |
| "memory(GiB)": 76.04, |
| "step": 3410, |
| "token_acc": 0.8093889113719142, |
| "train_speed(iter/s)": 0.02769 |
| }, |
| { |
| "epoch": 0.8830564354515482, |
| "grad_norm": 1.2619035243988037, |
| "learning_rate": 4.227509434532945e-06, |
| "loss": 0.6150105953216553, |
| "memory(GiB)": 76.04, |
| "step": 3415, |
| "token_acc": 0.8166153846153846, |
| "train_speed(iter/s)": 0.027689 |
| }, |
| { |
| "epoch": 0.8843493438489883, |
| "grad_norm": 1.1575284004211426, |
| "learning_rate": 4.224931791311403e-06, |
| "loss": 0.6235898017883301, |
| "memory(GiB)": 76.04, |
| "step": 3420, |
| "token_acc": 0.8300420709195501, |
| "train_speed(iter/s)": 0.027689 |
| }, |
| { |
| "epoch": 0.8856422522464283, |
| "grad_norm": 1.627013921737671, |
| "learning_rate": 4.2223506434094754e-06, |
| "loss": 0.601617431640625, |
| "memory(GiB)": 76.04, |
| "step": 3425, |
| "token_acc": 0.8208349821923229, |
| "train_speed(iter/s)": 0.027689 |
| }, |
| { |
| "epoch": 0.8869351606438683, |
| "grad_norm": 1.0518633127212524, |
| "learning_rate": 4.219765996071483e-06, |
| "loss": 0.6408526420593261, |
| "memory(GiB)": 76.04, |
| "step": 3430, |
| "token_acc": 0.8019846954820224, |
| "train_speed(iter/s)": 0.02769 |
| }, |
| { |
| "epoch": 0.8882280690413085, |
| "grad_norm": 0.9839140176773071, |
| "learning_rate": 4.217177854548862e-06, |
| "loss": 0.6014208793640137, |
| "memory(GiB)": 76.04, |
| "step": 3435, |
| "token_acc": 0.8176200504021818, |
| "train_speed(iter/s)": 0.027689 |
| }, |
| { |
| "epoch": 0.8895209774387485, |
| "grad_norm": 1.1011220216751099, |
| "learning_rate": 4.21458622410014e-06, |
| "loss": 0.6313972473144531, |
| "memory(GiB)": 76.04, |
| "step": 3440, |
| "token_acc": 0.8165993852079553, |
| "train_speed(iter/s)": 0.027689 |
| }, |
| { |
| "epoch": 0.8908138858361885, |
| "grad_norm": 1.156149983406067, |
| "learning_rate": 4.211991109990941e-06, |
| "loss": 0.6519000053405761, |
| "memory(GiB)": 76.04, |
| "step": 3445, |
| "token_acc": 0.7982918203025058, |
| "train_speed(iter/s)": 0.027691 |
| }, |
| { |
| "epoch": 0.8921067942336286, |
| "grad_norm": 1.144892692565918, |
| "learning_rate": 4.2093925174939606e-06, |
| "loss": 0.6042433738708496, |
| "memory(GiB)": 76.04, |
| "step": 3450, |
| "token_acc": 0.8215976553693545, |
| "train_speed(iter/s)": 0.02769 |
| }, |
| { |
| "epoch": 0.8933997026310686, |
| "grad_norm": 1.2235312461853027, |
| "learning_rate": 4.206790451888968e-06, |
| "loss": 0.6446715354919433, |
| "memory(GiB)": 76.04, |
| "step": 3455, |
| "token_acc": 0.8082164853885467, |
| "train_speed(iter/s)": 0.02769 |
| }, |
| { |
| "epoch": 0.8946926110285086, |
| "grad_norm": 1.1150991916656494, |
| "learning_rate": 4.204184918462783e-06, |
| "loss": 0.628176212310791, |
| "memory(GiB)": 76.04, |
| "step": 3460, |
| "token_acc": 0.803219563687544, |
| "train_speed(iter/s)": 0.027688 |
| }, |
| { |
| "epoch": 0.8959855194259486, |
| "grad_norm": 1.0735828876495361, |
| "learning_rate": 4.201575922509277e-06, |
| "loss": 0.6142620086669922, |
| "memory(GiB)": 76.04, |
| "step": 3465, |
| "token_acc": 0.8073942988329826, |
| "train_speed(iter/s)": 0.027688 |
| }, |
| { |
| "epoch": 0.8972784278233887, |
| "grad_norm": 0.9149400591850281, |
| "learning_rate": 4.198963469329351e-06, |
| "loss": 0.5981680870056152, |
| "memory(GiB)": 76.04, |
| "step": 3470, |
| "token_acc": 0.8314239727324371, |
| "train_speed(iter/s)": 0.027688 |
| }, |
| { |
| "epoch": 0.8985713362208287, |
| "grad_norm": 0.9329715371131897, |
| "learning_rate": 4.196347564230933e-06, |
| "loss": 0.6357330322265625, |
| "memory(GiB)": 76.04, |
| "step": 3475, |
| "token_acc": 0.815760798500632, |
| "train_speed(iter/s)": 0.027685 |
| }, |
| { |
| "epoch": 0.8998642446182687, |
| "grad_norm": 2.6730282306671143, |
| "learning_rate": 4.193728212528965e-06, |
| "loss": 0.6184768676757812, |
| "memory(GiB)": 76.04, |
| "step": 3480, |
| "token_acc": 0.8195593938666986, |
| "train_speed(iter/s)": 0.027686 |
| }, |
| { |
| "epoch": 0.9011571530157089, |
| "grad_norm": 1.3464089632034302, |
| "learning_rate": 4.191105419545391e-06, |
| "loss": 0.6040889263153076, |
| "memory(GiB)": 76.04, |
| "step": 3485, |
| "token_acc": 0.8135844450257215, |
| "train_speed(iter/s)": 0.027687 |
| }, |
| { |
| "epoch": 0.9024500614131489, |
| "grad_norm": 1.3222928047180176, |
| "learning_rate": 4.188479190609146e-06, |
| "loss": 0.6070952415466309, |
| "memory(GiB)": 76.04, |
| "step": 3490, |
| "token_acc": 0.8631930567568373, |
| "train_speed(iter/s)": 0.027685 |
| }, |
| { |
| "epoch": 0.9037429698105889, |
| "grad_norm": 1.5099354982376099, |
| "learning_rate": 4.185849531056149e-06, |
| "loss": 0.6029548645019531, |
| "memory(GiB)": 76.04, |
| "step": 3495, |
| "token_acc": 0.8064048588584444, |
| "train_speed(iter/s)": 0.027686 |
| }, |
| { |
| "epoch": 0.905035878208029, |
| "grad_norm": 0.861173689365387, |
| "learning_rate": 4.1832164462292865e-06, |
| "loss": 0.6235533714294433, |
| "memory(GiB)": 76.04, |
| "step": 3500, |
| "token_acc": 0.8324725253388218, |
| "train_speed(iter/s)": 0.027686 |
| }, |
| { |
| "epoch": 0.906328786605469, |
| "grad_norm": 1.607367992401123, |
| "learning_rate": 4.1805799414784044e-06, |
| "loss": 0.6227012634277344, |
| "memory(GiB)": 76.04, |
| "step": 3505, |
| "token_acc": 0.7834105927606273, |
| "train_speed(iter/s)": 0.027685 |
| }, |
| { |
| "epoch": 0.907621695002909, |
| "grad_norm": 1.4191631078720093, |
| "learning_rate": 4.177940022160299e-06, |
| "loss": 0.6287036895751953, |
| "memory(GiB)": 76.04, |
| "step": 3510, |
| "token_acc": 0.8294466536361799, |
| "train_speed(iter/s)": 0.027686 |
| }, |
| { |
| "epoch": 0.9089146034003491, |
| "grad_norm": 1.4229148626327515, |
| "learning_rate": 4.175296693638703e-06, |
| "loss": 0.6371709823608398, |
| "memory(GiB)": 76.04, |
| "step": 3515, |
| "token_acc": 0.7917865974784124, |
| "train_speed(iter/s)": 0.027687 |
| }, |
| { |
| "epoch": 0.9102075117977891, |
| "grad_norm": 1.202496886253357, |
| "learning_rate": 4.172649961284276e-06, |
| "loss": 0.6231961250305176, |
| "memory(GiB)": 76.04, |
| "step": 3520, |
| "token_acc": 0.7966687617850409, |
| "train_speed(iter/s)": 0.027686 |
| }, |
| { |
| "epoch": 0.9115004201952291, |
| "grad_norm": 7.718347549438477, |
| "learning_rate": 4.169999830474594e-06, |
| "loss": 0.6057699203491211, |
| "memory(GiB)": 76.04, |
| "step": 3525, |
| "token_acc": 0.8327813797285322, |
| "train_speed(iter/s)": 0.027687 |
| }, |
| { |
| "epoch": 0.9127933285926693, |
| "grad_norm": 2.148632526397705, |
| "learning_rate": 4.167346306594136e-06, |
| "loss": 0.6129049777984619, |
| "memory(GiB)": 76.04, |
| "step": 3530, |
| "token_acc": 0.8131457736835553, |
| "train_speed(iter/s)": 0.027687 |
| }, |
| { |
| "epoch": 0.9140862369901093, |
| "grad_norm": 3.352047920227051, |
| "learning_rate": 4.1646893950342785e-06, |
| "loss": 0.6119277000427246, |
| "memory(GiB)": 76.04, |
| "step": 3535, |
| "token_acc": 0.8336779068938476, |
| "train_speed(iter/s)": 0.027686 |
| }, |
| { |
| "epoch": 0.9153791453875493, |
| "grad_norm": 1.2060611248016357, |
| "learning_rate": 4.1620291011932765e-06, |
| "loss": 0.6040964126586914, |
| "memory(GiB)": 76.04, |
| "step": 3540, |
| "token_acc": 0.8219741053244108, |
| "train_speed(iter/s)": 0.027686 |
| }, |
| { |
| "epoch": 0.9166720537849893, |
| "grad_norm": 1.5047026872634888, |
| "learning_rate": 4.159365430476262e-06, |
| "loss": 0.6265472412109375, |
| "memory(GiB)": 76.04, |
| "step": 3545, |
| "token_acc": 0.8295702534832969, |
| "train_speed(iter/s)": 0.027688 |
| }, |
| { |
| "epoch": 0.9179649621824294, |
| "grad_norm": 1.3251553773880005, |
| "learning_rate": 4.156698388295222e-06, |
| "loss": 0.6167987823486328, |
| "memory(GiB)": 76.04, |
| "step": 3550, |
| "token_acc": 0.8122264371170119, |
| "train_speed(iter/s)": 0.027688 |
| }, |
| { |
| "epoch": 0.9192578705798694, |
| "grad_norm": 0.998990535736084, |
| "learning_rate": 4.154027980069002e-06, |
| "loss": 0.5864760398864746, |
| "memory(GiB)": 76.04, |
| "step": 3555, |
| "token_acc": 0.8300649626616478, |
| "train_speed(iter/s)": 0.027686 |
| }, |
| { |
| "epoch": 0.9205507789773094, |
| "grad_norm": 1.6433701515197754, |
| "learning_rate": 4.151354211223278e-06, |
| "loss": 0.5933123588562011, |
| "memory(GiB)": 76.04, |
| "step": 3560, |
| "token_acc": 0.8285720878715156, |
| "train_speed(iter/s)": 0.027685 |
| }, |
| { |
| "epoch": 0.9218436873747495, |
| "grad_norm": 1.4250352382659912, |
| "learning_rate": 4.148677087190559e-06, |
| "loss": 0.6165533065795898, |
| "memory(GiB)": 76.04, |
| "step": 3565, |
| "token_acc": 0.8124648441894476, |
| "train_speed(iter/s)": 0.027686 |
| }, |
| { |
| "epoch": 0.9231365957721895, |
| "grad_norm": 1.9832043647766113, |
| "learning_rate": 4.145996613410169e-06, |
| "loss": 0.601347017288208, |
| "memory(GiB)": 76.04, |
| "step": 3570, |
| "token_acc": 0.8363061287980919, |
| "train_speed(iter/s)": 0.027686 |
| }, |
| { |
| "epoch": 0.9244295041696295, |
| "grad_norm": 1.1738619804382324, |
| "learning_rate": 4.143312795328239e-06, |
| "loss": 0.5805646419525147, |
| "memory(GiB)": 76.04, |
| "step": 3575, |
| "token_acc": 0.7983795574945466, |
| "train_speed(iter/s)": 0.027683 |
| }, |
| { |
| "epoch": 0.9257224125670697, |
| "grad_norm": 1.162148356437683, |
| "learning_rate": 4.1406256383976945e-06, |
| "loss": 0.6304599285125733, |
| "memory(GiB)": 76.04, |
| "step": 3580, |
| "token_acc": 0.7998939233217154, |
| "train_speed(iter/s)": 0.027683 |
| }, |
| { |
| "epoch": 0.9270153209645097, |
| "grad_norm": 1.555344820022583, |
| "learning_rate": 4.1379351480782445e-06, |
| "loss": 0.6200345039367676, |
| "memory(GiB)": 76.04, |
| "step": 3585, |
| "token_acc": 0.8321557607386592, |
| "train_speed(iter/s)": 0.027683 |
| }, |
| { |
| "epoch": 0.9283082293619497, |
| "grad_norm": 1.4574919939041138, |
| "learning_rate": 4.135241329836372e-06, |
| "loss": 0.6034027099609375, |
| "memory(GiB)": 76.04, |
| "step": 3590, |
| "token_acc": 0.8022632918173296, |
| "train_speed(iter/s)": 0.027682 |
| }, |
| { |
| "epoch": 0.9296011377593898, |
| "grad_norm": 1.1182024478912354, |
| "learning_rate": 4.132544189145321e-06, |
| "loss": 0.6192724227905273, |
| "memory(GiB)": 76.04, |
| "step": 3595, |
| "token_acc": 0.8323038628192126, |
| "train_speed(iter/s)": 0.027684 |
| }, |
| { |
| "epoch": 0.9308940461568298, |
| "grad_norm": 1.55838143825531, |
| "learning_rate": 4.129843731485084e-06, |
| "loss": 0.6345338821411133, |
| "memory(GiB)": 76.04, |
| "step": 3600, |
| "token_acc": 0.7966842932685436, |
| "train_speed(iter/s)": 0.027684 |
| }, |
| { |
| "epoch": 0.9321869545542698, |
| "grad_norm": 5.046814918518066, |
| "learning_rate": 4.127139962342395e-06, |
| "loss": 0.5721908569335937, |
| "memory(GiB)": 76.04, |
| "step": 3605, |
| "token_acc": 0.8486438258386867, |
| "train_speed(iter/s)": 0.027683 |
| }, |
| { |
| "epoch": 0.9334798629517099, |
| "grad_norm": 1.2368015050888062, |
| "learning_rate": 4.124432887210715e-06, |
| "loss": 0.6120264053344726, |
| "memory(GiB)": 76.04, |
| "step": 3610, |
| "token_acc": 0.7992660550458716, |
| "train_speed(iter/s)": 0.027684 |
| }, |
| { |
| "epoch": 0.9347727713491499, |
| "grad_norm": 0.935610294342041, |
| "learning_rate": 4.121722511590225e-06, |
| "loss": 0.5871891975402832, |
| "memory(GiB)": 76.04, |
| "step": 3615, |
| "token_acc": 0.814580080217997, |
| "train_speed(iter/s)": 0.027681 |
| }, |
| { |
| "epoch": 0.9360656797465899, |
| "grad_norm": 1.0046820640563965, |
| "learning_rate": 4.119008840987807e-06, |
| "loss": 0.6071587562561035, |
| "memory(GiB)": 76.04, |
| "step": 3620, |
| "token_acc": 0.836279004654744, |
| "train_speed(iter/s)": 0.027681 |
| }, |
| { |
| "epoch": 0.93735858814403, |
| "grad_norm": 1.2669122219085693, |
| "learning_rate": 4.116291880917042e-06, |
| "loss": 0.6148792266845703, |
| "memory(GiB)": 76.04, |
| "step": 3625, |
| "token_acc": 0.8264746964650264, |
| "train_speed(iter/s)": 0.027682 |
| }, |
| { |
| "epoch": 0.9386514965414701, |
| "grad_norm": 1.177242636680603, |
| "learning_rate": 4.113571636898191e-06, |
| "loss": 0.6176681518554688, |
| "memory(GiB)": 76.04, |
| "step": 3630, |
| "token_acc": 0.8233010616902857, |
| "train_speed(iter/s)": 0.027684 |
| }, |
| { |
| "epoch": 0.9399444049389101, |
| "grad_norm": 1.3925117254257202, |
| "learning_rate": 4.110848114458191e-06, |
| "loss": 0.5971219062805175, |
| "memory(GiB)": 76.04, |
| "step": 3635, |
| "token_acc": 0.8236563174186287, |
| "train_speed(iter/s)": 0.027685 |
| }, |
| { |
| "epoch": 0.9412373133363501, |
| "grad_norm": 1.1656811237335205, |
| "learning_rate": 4.108121319130638e-06, |
| "loss": 0.6168715476989746, |
| "memory(GiB)": 76.04, |
| "step": 3640, |
| "token_acc": 0.8150085866048964, |
| "train_speed(iter/s)": 0.027684 |
| }, |
| { |
| "epoch": 0.9425302217337902, |
| "grad_norm": 1.0381889343261719, |
| "learning_rate": 4.105391256455776e-06, |
| "loss": 0.6066938400268554, |
| "memory(GiB)": 76.04, |
| "step": 3645, |
| "token_acc": 0.8139741020502543, |
| "train_speed(iter/s)": 0.027684 |
| }, |
| { |
| "epoch": 0.9438231301312302, |
| "grad_norm": 1.0766489505767822, |
| "learning_rate": 4.1026579319804894e-06, |
| "loss": 0.60537691116333, |
| "memory(GiB)": 76.04, |
| "step": 3650, |
| "token_acc": 0.8072617246596067, |
| "train_speed(iter/s)": 0.027684 |
| }, |
| { |
| "epoch": 0.9451160385286702, |
| "grad_norm": 1.0981967449188232, |
| "learning_rate": 4.099921351258292e-06, |
| "loss": 0.6052407264709473, |
| "memory(GiB)": 76.04, |
| "step": 3655, |
| "token_acc": 0.7947088678415858, |
| "train_speed(iter/s)": 0.027682 |
| }, |
| { |
| "epoch": 0.9464089469261103, |
| "grad_norm": 1.5217511653900146, |
| "learning_rate": 4.097181519849309e-06, |
| "loss": 0.5945847034454346, |
| "memory(GiB)": 76.04, |
| "step": 3660, |
| "token_acc": 0.8231042745613357, |
| "train_speed(iter/s)": 0.027681 |
| }, |
| { |
| "epoch": 0.9477018553235503, |
| "grad_norm": 1.4334150552749634, |
| "learning_rate": 4.094438443320274e-06, |
| "loss": 0.6149433135986329, |
| "memory(GiB)": 76.04, |
| "step": 3665, |
| "token_acc": 0.8234104473930844, |
| "train_speed(iter/s)": 0.02768 |
| }, |
| { |
| "epoch": 0.9489947637209903, |
| "grad_norm": 1.6359714269638062, |
| "learning_rate": 4.091692127244511e-06, |
| "loss": 0.6281001567840576, |
| "memory(GiB)": 76.04, |
| "step": 3670, |
| "token_acc": 0.8364228557642044, |
| "train_speed(iter/s)": 0.027681 |
| }, |
| { |
| "epoch": 0.9502876721184305, |
| "grad_norm": 1.234947681427002, |
| "learning_rate": 4.088942577201931e-06, |
| "loss": 0.5957602977752685, |
| "memory(GiB)": 76.04, |
| "step": 3675, |
| "token_acc": 0.841389663306884, |
| "train_speed(iter/s)": 0.027681 |
| }, |
| { |
| "epoch": 0.9515805805158705, |
| "grad_norm": 1.0987111330032349, |
| "learning_rate": 4.086189798779008e-06, |
| "loss": 0.6053364753723145, |
| "memory(GiB)": 76.04, |
| "step": 3680, |
| "token_acc": 0.8306053185547966, |
| "train_speed(iter/s)": 0.027681 |
| }, |
| { |
| "epoch": 0.9528734889133105, |
| "grad_norm": 1.273268222808838, |
| "learning_rate": 4.083433797568783e-06, |
| "loss": 0.6212119579315185, |
| "memory(GiB)": 76.04, |
| "step": 3685, |
| "token_acc": 0.8268924889543446, |
| "train_speed(iter/s)": 0.027681 |
| }, |
| { |
| "epoch": 0.9541663973107505, |
| "grad_norm": 1.1013809442520142, |
| "learning_rate": 4.0806745791708406e-06, |
| "loss": 0.6078325271606445, |
| "memory(GiB)": 76.04, |
| "step": 3690, |
| "token_acc": 0.8134150493701056, |
| "train_speed(iter/s)": 0.027682 |
| }, |
| { |
| "epoch": 0.9554593057081906, |
| "grad_norm": 1.449779987335205, |
| "learning_rate": 4.0779121491913035e-06, |
| "loss": 0.6228477478027343, |
| "memory(GiB)": 76.04, |
| "step": 3695, |
| "token_acc": 0.7949391768744967, |
| "train_speed(iter/s)": 0.027683 |
| }, |
| { |
| "epoch": 0.9567522141056306, |
| "grad_norm": 1.177632451057434, |
| "learning_rate": 4.075146513242818e-06, |
| "loss": 0.6086900711059571, |
| "memory(GiB)": 76.04, |
| "step": 3700, |
| "token_acc": 0.8241206030150754, |
| "train_speed(iter/s)": 0.027683 |
| }, |
| { |
| "epoch": 0.9580451225030706, |
| "grad_norm": 1.7194976806640625, |
| "learning_rate": 4.072377676944545e-06, |
| "loss": 0.6042545318603516, |
| "memory(GiB)": 76.04, |
| "step": 3705, |
| "token_acc": 0.8258380709664772, |
| "train_speed(iter/s)": 0.027682 |
| }, |
| { |
| "epoch": 0.9593380309005107, |
| "grad_norm": 1.1962262392044067, |
| "learning_rate": 4.069605645922152e-06, |
| "loss": 0.5851446151733398, |
| "memory(GiB)": 76.04, |
| "step": 3710, |
| "token_acc": 0.8344977304124729, |
| "train_speed(iter/s)": 0.027682 |
| }, |
| { |
| "epoch": 0.9606309392979507, |
| "grad_norm": 1.1156598329544067, |
| "learning_rate": 4.066830425807789e-06, |
| "loss": 0.5880330085754395, |
| "memory(GiB)": 76.04, |
| "step": 3715, |
| "token_acc": 0.8415927377759439, |
| "train_speed(iter/s)": 0.027682 |
| }, |
| { |
| "epoch": 0.9619238476953907, |
| "grad_norm": 2.626760244369507, |
| "learning_rate": 4.0640520222400945e-06, |
| "loss": 0.6129249095916748, |
| "memory(GiB)": 76.04, |
| "step": 3720, |
| "token_acc": 0.8348955352032055, |
| "train_speed(iter/s)": 0.027683 |
| }, |
| { |
| "epoch": 0.9632167560928309, |
| "grad_norm": 1.0791536569595337, |
| "learning_rate": 4.0612704408641675e-06, |
| "loss": 0.6016806125640869, |
| "memory(GiB)": 76.04, |
| "step": 3725, |
| "token_acc": 0.8637572233842663, |
| "train_speed(iter/s)": 0.027684 |
| }, |
| { |
| "epoch": 0.9645096644902709, |
| "grad_norm": 1.5433270931243896, |
| "learning_rate": 4.058485687331569e-06, |
| "loss": 0.6169820785522461, |
| "memory(GiB)": 76.04, |
| "step": 3730, |
| "token_acc": 0.8325906172146391, |
| "train_speed(iter/s)": 0.027684 |
| }, |
| { |
| "epoch": 0.9658025728877109, |
| "grad_norm": 1.101804256439209, |
| "learning_rate": 4.055697767300302e-06, |
| "loss": 0.5675209999084473, |
| "memory(GiB)": 76.04, |
| "step": 3735, |
| "token_acc": 0.8326423357664233, |
| "train_speed(iter/s)": 0.027683 |
| }, |
| { |
| "epoch": 0.967095481285151, |
| "grad_norm": 1.0612066984176636, |
| "learning_rate": 4.0529066864348046e-06, |
| "loss": 0.5953152656555176, |
| "memory(GiB)": 76.04, |
| "step": 3740, |
| "token_acc": 0.8497927240323893, |
| "train_speed(iter/s)": 0.027684 |
| }, |
| { |
| "epoch": 0.968388389682591, |
| "grad_norm": 1.5735872983932495, |
| "learning_rate": 4.050112450405937e-06, |
| "loss": 0.6017944812774658, |
| "memory(GiB)": 76.04, |
| "step": 3745, |
| "token_acc": 0.8264524103831892, |
| "train_speed(iter/s)": 0.027682 |
| }, |
| { |
| "epoch": 0.969681298080031, |
| "grad_norm": 1.157410979270935, |
| "learning_rate": 4.047315064890968e-06, |
| "loss": 0.5977309226989747, |
| "memory(GiB)": 76.04, |
| "step": 3750, |
| "token_acc": 0.815944055944056, |
| "train_speed(iter/s)": 0.027682 |
| }, |
| { |
| "epoch": 0.9709742064774711, |
| "grad_norm": 1.57929265499115, |
| "learning_rate": 4.044514535573569e-06, |
| "loss": 0.589405632019043, |
| "memory(GiB)": 76.04, |
| "step": 3755, |
| "token_acc": 0.8179965511835711, |
| "train_speed(iter/s)": 0.027684 |
| }, |
| { |
| "epoch": 0.9722671148749111, |
| "grad_norm": 1.016497015953064, |
| "learning_rate": 4.041710868143796e-06, |
| "loss": 0.589882230758667, |
| "memory(GiB)": 76.04, |
| "step": 3760, |
| "token_acc": 0.8092984293193717, |
| "train_speed(iter/s)": 0.027683 |
| }, |
| { |
| "epoch": 0.9735600232723511, |
| "grad_norm": 1.266408085823059, |
| "learning_rate": 4.038904068298083e-06, |
| "loss": 0.5920291423797608, |
| "memory(GiB)": 76.04, |
| "step": 3765, |
| "token_acc": 0.823118662159758, |
| "train_speed(iter/s)": 0.027685 |
| }, |
| { |
| "epoch": 0.9748529316697911, |
| "grad_norm": 1.1780680418014526, |
| "learning_rate": 4.036094141739225e-06, |
| "loss": 0.6140639305114746, |
| "memory(GiB)": 76.04, |
| "step": 3770, |
| "token_acc": 0.7946187371681734, |
| "train_speed(iter/s)": 0.027686 |
| }, |
| { |
| "epoch": 0.9761458400672313, |
| "grad_norm": 1.0114781856536865, |
| "learning_rate": 4.0332810941763745e-06, |
| "loss": 0.5897872924804688, |
| "memory(GiB)": 76.04, |
| "step": 3775, |
| "token_acc": 0.8349316002363314, |
| "train_speed(iter/s)": 0.027685 |
| }, |
| { |
| "epoch": 0.9774387484646713, |
| "grad_norm": 0.9569077491760254, |
| "learning_rate": 4.030464931325021e-06, |
| "loss": 0.6214170455932617, |
| "memory(GiB)": 76.04, |
| "step": 3780, |
| "token_acc": 0.8027143591975626, |
| "train_speed(iter/s)": 0.027686 |
| }, |
| { |
| "epoch": 0.9787316568621113, |
| "grad_norm": 1.5879004001617432, |
| "learning_rate": 4.027645658906986e-06, |
| "loss": 0.6039529800415039, |
| "memory(GiB)": 76.04, |
| "step": 3785, |
| "token_acc": 0.815490288962577, |
| "train_speed(iter/s)": 0.027686 |
| }, |
| { |
| "epoch": 0.9800245652595514, |
| "grad_norm": 1.1456199884414673, |
| "learning_rate": 4.02482328265041e-06, |
| "loss": 0.5806800842285156, |
| "memory(GiB)": 76.04, |
| "step": 3790, |
| "token_acc": 0.8437443809112813, |
| "train_speed(iter/s)": 0.027686 |
| }, |
| { |
| "epoch": 0.9813174736569914, |
| "grad_norm": 0.8533855676651001, |
| "learning_rate": 4.0219978082897355e-06, |
| "loss": 0.593365478515625, |
| "memory(GiB)": 76.04, |
| "step": 3795, |
| "token_acc": 0.8269325803035651, |
| "train_speed(iter/s)": 0.027687 |
| }, |
| { |
| "epoch": 0.9826103820544314, |
| "grad_norm": 1.1695212125778198, |
| "learning_rate": 4.019169241565704e-06, |
| "loss": 0.6025032043457031, |
| "memory(GiB)": 76.04, |
| "step": 3800, |
| "token_acc": 0.8349052595802532, |
| "train_speed(iter/s)": 0.027688 |
| }, |
| { |
| "epoch": 0.9839032904518715, |
| "grad_norm": 1.4339202642440796, |
| "learning_rate": 4.0163375882253366e-06, |
| "loss": 0.6019165992736817, |
| "memory(GiB)": 76.04, |
| "step": 3805, |
| "token_acc": 0.8244785353007565, |
| "train_speed(iter/s)": 0.027688 |
| }, |
| { |
| "epoch": 0.9851961988493115, |
| "grad_norm": 1.4449810981750488, |
| "learning_rate": 4.013502854021929e-06, |
| "loss": 0.606717872619629, |
| "memory(GiB)": 76.04, |
| "step": 3810, |
| "token_acc": 0.8192271272038598, |
| "train_speed(iter/s)": 0.027689 |
| }, |
| { |
| "epoch": 0.9864891072467515, |
| "grad_norm": 1.1717056035995483, |
| "learning_rate": 4.010665044715034e-06, |
| "loss": 0.609653091430664, |
| "memory(GiB)": 76.04, |
| "step": 3815, |
| "token_acc": 0.8458090195926885, |
| "train_speed(iter/s)": 0.027688 |
| }, |
| { |
| "epoch": 0.9877820156441917, |
| "grad_norm": 4.467617988586426, |
| "learning_rate": 4.007824166070455e-06, |
| "loss": 0.6024861335754395, |
| "memory(GiB)": 76.04, |
| "step": 3820, |
| "token_acc": 0.8408324188107141, |
| "train_speed(iter/s)": 0.027687 |
| }, |
| { |
| "epoch": 0.9890749240416317, |
| "grad_norm": 1.2757420539855957, |
| "learning_rate": 4.004980223860228e-06, |
| "loss": 0.6156288146972656, |
| "memory(GiB)": 76.04, |
| "step": 3825, |
| "token_acc": 0.826805096743747, |
| "train_speed(iter/s)": 0.027687 |
| }, |
| { |
| "epoch": 0.9903678324390717, |
| "grad_norm": 1.178419828414917, |
| "learning_rate": 4.002133223862615e-06, |
| "loss": 0.5892780303955079, |
| "memory(GiB)": 76.04, |
| "step": 3830, |
| "token_acc": 0.809322033898305, |
| "train_speed(iter/s)": 0.027687 |
| }, |
| { |
| "epoch": 0.9916607408365117, |
| "grad_norm": 1.8065563440322876, |
| "learning_rate": 3.999283171862093e-06, |
| "loss": 0.6025252342224121, |
| "memory(GiB)": 76.04, |
| "step": 3835, |
| "token_acc": 0.804472722092968, |
| "train_speed(iter/s)": 0.02769 |
| }, |
| { |
| "epoch": 0.9929536492339518, |
| "grad_norm": 1.030576229095459, |
| "learning_rate": 3.996430073649338e-06, |
| "loss": 0.5885412216186523, |
| "memory(GiB)": 76.04, |
| "step": 3840, |
| "token_acc": 0.8501988939555641, |
| "train_speed(iter/s)": 0.02769 |
| }, |
| { |
| "epoch": 0.9942465576313918, |
| "grad_norm": 1.0532441139221191, |
| "learning_rate": 3.993573935021213e-06, |
| "loss": 0.6129741191864013, |
| "memory(GiB)": 76.04, |
| "step": 3845, |
| "token_acc": 0.8317618076792389, |
| "train_speed(iter/s)": 0.027689 |
| }, |
| { |
| "epoch": 0.9955394660288318, |
| "grad_norm": 1.620641827583313, |
| "learning_rate": 3.990714761780763e-06, |
| "loss": 0.583595085144043, |
| "memory(GiB)": 76.04, |
| "step": 3850, |
| "token_acc": 0.8565638488261922, |
| "train_speed(iter/s)": 0.027689 |
| }, |
| { |
| "epoch": 0.9968323744262719, |
| "grad_norm": 1.6248869895935059, |
| "learning_rate": 3.987852559737196e-06, |
| "loss": 0.6013848304748535, |
| "memory(GiB)": 76.04, |
| "step": 3855, |
| "token_acc": 0.7915984036967024, |
| "train_speed(iter/s)": 0.027689 |
| }, |
| { |
| "epoch": 0.9981252828237119, |
| "grad_norm": 1.3243690729141235, |
| "learning_rate": 3.984987334705875e-06, |
| "loss": 0.5860433101654052, |
| "memory(GiB)": 76.04, |
| "step": 3860, |
| "token_acc": 0.834250554154246, |
| "train_speed(iter/s)": 0.027689 |
| }, |
| { |
| "epoch": 0.9994181912211519, |
| "grad_norm": 1.1566941738128662, |
| "learning_rate": 3.9821190925083025e-06, |
| "loss": 0.5727869033813476, |
| "memory(GiB)": 76.04, |
| "step": 3865, |
| "token_acc": 0.8439064677644144, |
| "train_speed(iter/s)": 0.027688 |
| }, |
| { |
| "epoch": 1.000517163358976, |
| "grad_norm": 2.703934669494629, |
| "learning_rate": 3.979247838972114e-06, |
| "loss": 0.604734230041504, |
| "memory(GiB)": 76.04, |
| "step": 3870, |
| "token_acc": 0.8727201521763456, |
| "train_speed(iter/s)": 0.027694 |
| }, |
| { |
| "epoch": 1.001810071756416, |
| "grad_norm": 1.0446311235427856, |
| "learning_rate": 3.976373579931063e-06, |
| "loss": 0.5894432544708252, |
| "memory(GiB)": 76.04, |
| "step": 3875, |
| "token_acc": 0.7687516615396301, |
| "train_speed(iter/s)": 0.027693 |
| }, |
| { |
| "epoch": 1.003102980153856, |
| "grad_norm": 1.3841272592544556, |
| "learning_rate": 3.97349632122501e-06, |
| "loss": 0.5918097019195556, |
| "memory(GiB)": 76.04, |
| "step": 3880, |
| "token_acc": 0.8106176985459612, |
| "train_speed(iter/s)": 0.027694 |
| }, |
| { |
| "epoch": 1.0043958885512962, |
| "grad_norm": 1.3004447221755981, |
| "learning_rate": 3.970616068699906e-06, |
| "loss": 0.5655845642089844, |
| "memory(GiB)": 76.04, |
| "step": 3885, |
| "token_acc": 0.8302924727239388, |
| "train_speed(iter/s)": 0.027694 |
| }, |
| { |
| "epoch": 1.0056887969487363, |
| "grad_norm": 1.2096521854400635, |
| "learning_rate": 3.96773282820779e-06, |
| "loss": 0.5523721218109131, |
| "memory(GiB)": 76.04, |
| "step": 3890, |
| "token_acc": 0.8321216960121024, |
| "train_speed(iter/s)": 0.027693 |
| }, |
| { |
| "epoch": 1.0069817053461763, |
| "grad_norm": 0.9291108846664429, |
| "learning_rate": 3.9648466056067705e-06, |
| "loss": 0.5470512390136719, |
| "memory(GiB)": 76.04, |
| "step": 3895, |
| "token_acc": 0.8546372106154715, |
| "train_speed(iter/s)": 0.027694 |
| }, |
| { |
| "epoch": 1.0082746137436163, |
| "grad_norm": 1.7991231679916382, |
| "learning_rate": 3.961957406761012e-06, |
| "loss": 0.5519303321838379, |
| "memory(GiB)": 76.04, |
| "step": 3900, |
| "token_acc": 0.8346437931856088, |
| "train_speed(iter/s)": 0.027695 |
| }, |
| { |
| "epoch": 1.0095675221410563, |
| "grad_norm": 0.9921211004257202, |
| "learning_rate": 3.9590652375407305e-06, |
| "loss": 0.5495428562164306, |
| "memory(GiB)": 76.04, |
| "step": 3905, |
| "token_acc": 0.8289614561027837, |
| "train_speed(iter/s)": 0.027694 |
| }, |
| { |
| "epoch": 1.0108604305384963, |
| "grad_norm": 0.9098636507987976, |
| "learning_rate": 3.956170103822174e-06, |
| "loss": 0.5806960105895996, |
| "memory(GiB)": 76.04, |
| "step": 3910, |
| "token_acc": 0.8316886778453777, |
| "train_speed(iter/s)": 0.027694 |
| }, |
| { |
| "epoch": 1.0121533389359363, |
| "grad_norm": 1.403108835220337, |
| "learning_rate": 3.953272011487615e-06, |
| "loss": 0.5835510730743408, |
| "memory(GiB)": 76.04, |
| "step": 3915, |
| "token_acc": 0.8022167487684729, |
| "train_speed(iter/s)": 0.027695 |
| }, |
| { |
| "epoch": 1.0134462473333765, |
| "grad_norm": 0.8766242861747742, |
| "learning_rate": 3.950370966425336e-06, |
| "loss": 0.5739788055419922, |
| "memory(GiB)": 76.04, |
| "step": 3920, |
| "token_acc": 0.8167596743207391, |
| "train_speed(iter/s)": 0.027695 |
| }, |
| { |
| "epoch": 1.0147391557308165, |
| "grad_norm": 1.0786027908325195, |
| "learning_rate": 3.947466974529622e-06, |
| "loss": 0.57960524559021, |
| "memory(GiB)": 76.04, |
| "step": 3925, |
| "token_acc": 0.806325589127634, |
| "train_speed(iter/s)": 0.027695 |
| }, |
| { |
| "epoch": 1.0160320641282565, |
| "grad_norm": 1.3315753936767578, |
| "learning_rate": 3.9445600417007416e-06, |
| "loss": 0.5844710826873779, |
| "memory(GiB)": 76.04, |
| "step": 3930, |
| "token_acc": 0.8209227957971676, |
| "train_speed(iter/s)": 0.027695 |
| }, |
| { |
| "epoch": 1.0173249725256965, |
| "grad_norm": 1.4551076889038086, |
| "learning_rate": 3.941650173844939e-06, |
| "loss": 0.5371768951416016, |
| "memory(GiB)": 76.04, |
| "step": 3935, |
| "token_acc": 0.8264791248046439, |
| "train_speed(iter/s)": 0.027695 |
| }, |
| { |
| "epoch": 1.0186178809231365, |
| "grad_norm": 1.0872153043746948, |
| "learning_rate": 3.938737376874425e-06, |
| "loss": 0.5733814239501953, |
| "memory(GiB)": 76.04, |
| "step": 3940, |
| "token_acc": 0.8316657328103738, |
| "train_speed(iter/s)": 0.027697 |
| }, |
| { |
| "epoch": 1.0199107893205765, |
| "grad_norm": 0.9862046241760254, |
| "learning_rate": 3.935821656707359e-06, |
| "loss": 0.5849019050598144, |
| "memory(GiB)": 76.04, |
| "step": 3945, |
| "token_acc": 0.8450649147505084, |
| "train_speed(iter/s)": 0.027697 |
| }, |
| { |
| "epoch": 1.0212036977180168, |
| "grad_norm": 1.2747979164123535, |
| "learning_rate": 3.93290301926784e-06, |
| "loss": 0.5715857982635498, |
| "memory(GiB)": 76.04, |
| "step": 3950, |
| "token_acc": 0.8034803940358005, |
| "train_speed(iter/s)": 0.027699 |
| }, |
| { |
| "epoch": 1.0224966061154568, |
| "grad_norm": 0.8877357840538025, |
| "learning_rate": 3.929981470485897e-06, |
| "loss": 0.5560395240783691, |
| "memory(GiB)": 76.04, |
| "step": 3955, |
| "token_acc": 0.8330184222957014, |
| "train_speed(iter/s)": 0.027699 |
| }, |
| { |
| "epoch": 1.0237895145128968, |
| "grad_norm": 3.0129168033599854, |
| "learning_rate": 3.927057016297466e-06, |
| "loss": 0.5378780364990234, |
| "memory(GiB)": 76.04, |
| "step": 3960, |
| "token_acc": 0.8276883389862896, |
| "train_speed(iter/s)": 0.027697 |
| }, |
| { |
| "epoch": 1.0250824229103368, |
| "grad_norm": 1.279038429260254, |
| "learning_rate": 3.924129662644398e-06, |
| "loss": 0.5460095405578613, |
| "memory(GiB)": 76.04, |
| "step": 3965, |
| "token_acc": 0.8445347544377564, |
| "train_speed(iter/s)": 0.027698 |
| }, |
| { |
| "epoch": 1.0263753313077768, |
| "grad_norm": 1.0856847763061523, |
| "learning_rate": 3.921199415474426e-06, |
| "loss": 0.5677762985229492, |
| "memory(GiB)": 76.04, |
| "step": 3970, |
| "token_acc": 0.8344854941069809, |
| "train_speed(iter/s)": 0.027698 |
| }, |
| { |
| "epoch": 1.0276682397052168, |
| "grad_norm": 1.0214816331863403, |
| "learning_rate": 3.918266280741166e-06, |
| "loss": 0.5525214195251464, |
| "memory(GiB)": 76.04, |
| "step": 3975, |
| "token_acc": 0.8022044260742272, |
| "train_speed(iter/s)": 0.027699 |
| }, |
| { |
| "epoch": 1.0289611481026568, |
| "grad_norm": 2.20583176612854, |
| "learning_rate": 3.915330264404098e-06, |
| "loss": 0.5635844230651855, |
| "memory(GiB)": 76.04, |
| "step": 3980, |
| "token_acc": 0.8163771712158809, |
| "train_speed(iter/s)": 0.0277 |
| }, |
| { |
| "epoch": 1.030254056500097, |
| "grad_norm": 1.1798099279403687, |
| "learning_rate": 3.912391372428561e-06, |
| "loss": 0.563462209701538, |
| "memory(GiB)": 76.04, |
| "step": 3985, |
| "token_acc": 0.8396355353075171, |
| "train_speed(iter/s)": 0.027699 |
| }, |
| { |
| "epoch": 1.031546964897537, |
| "grad_norm": 0.9924677014350891, |
| "learning_rate": 3.9094496107857336e-06, |
| "loss": 0.5675541400909424, |
| "memory(GiB)": 76.04, |
| "step": 3990, |
| "token_acc": 0.8223220012828736, |
| "train_speed(iter/s)": 0.027699 |
| }, |
| { |
| "epoch": 1.032839873294977, |
| "grad_norm": 1.259884238243103, |
| "learning_rate": 3.906504985452626e-06, |
| "loss": 0.5578344345092774, |
| "memory(GiB)": 76.04, |
| "step": 3995, |
| "token_acc": 0.8310387984981227, |
| "train_speed(iter/s)": 0.027698 |
| }, |
| { |
| "epoch": 1.034132781692417, |
| "grad_norm": 1.3022695779800415, |
| "learning_rate": 3.903557502412065e-06, |
| "loss": 0.5636180877685547, |
| "memory(GiB)": 76.04, |
| "step": 4000, |
| "token_acc": 0.8399380474257637, |
| "train_speed(iter/s)": 0.027698 |
| }, |
| { |
| "epoch": 1.035425690089857, |
| "grad_norm": 1.0616282224655151, |
| "learning_rate": 3.900607167652687e-06, |
| "loss": 0.5414395809173584, |
| "memory(GiB)": 76.04, |
| "step": 4005, |
| "token_acc": 0.8558815464765561, |
| "train_speed(iter/s)": 0.027663 |
| }, |
| { |
| "epoch": 1.036718598487297, |
| "grad_norm": 1.1525382995605469, |
| "learning_rate": 3.897653987168919e-06, |
| "loss": 0.5726981163024902, |
| "memory(GiB)": 76.04, |
| "step": 4010, |
| "token_acc": 0.8494477021682804, |
| "train_speed(iter/s)": 0.027663 |
| }, |
| { |
| "epoch": 1.0380115068847373, |
| "grad_norm": 1.1002613306045532, |
| "learning_rate": 3.894697966960972e-06, |
| "loss": 0.5688316345214843, |
| "memory(GiB)": 76.04, |
| "step": 4015, |
| "token_acc": 0.8253162219554981, |
| "train_speed(iter/s)": 0.027664 |
| }, |
| { |
| "epoch": 1.0393044152821773, |
| "grad_norm": 0.9993764758110046, |
| "learning_rate": 3.891739113034826e-06, |
| "loss": 0.5663973331451416, |
| "memory(GiB)": 76.04, |
| "step": 4020, |
| "token_acc": 0.847761685319289, |
| "train_speed(iter/s)": 0.027665 |
| }, |
| { |
| "epoch": 1.0405973236796173, |
| "grad_norm": 1.1270966529846191, |
| "learning_rate": 3.888777431402219e-06, |
| "loss": 0.5679460525512695, |
| "memory(GiB)": 76.04, |
| "step": 4025, |
| "token_acc": 0.8138078016016533, |
| "train_speed(iter/s)": 0.027665 |
| }, |
| { |
| "epoch": 1.0418902320770573, |
| "grad_norm": 1.4055637121200562, |
| "learning_rate": 3.885812928080634e-06, |
| "loss": 0.5653609275817871, |
| "memory(GiB)": 76.04, |
| "step": 4030, |
| "token_acc": 0.8330082979618371, |
| "train_speed(iter/s)": 0.027665 |
| }, |
| { |
| "epoch": 1.0431831404744973, |
| "grad_norm": 1.0064263343811035, |
| "learning_rate": 3.8828456090932855e-06, |
| "loss": 0.5649868011474609, |
| "memory(GiB)": 76.04, |
| "step": 4035, |
| "token_acc": 0.8298977309044423, |
| "train_speed(iter/s)": 0.027665 |
| }, |
| { |
| "epoch": 1.0444760488719373, |
| "grad_norm": 1.3120592832565308, |
| "learning_rate": 3.879875480469112e-06, |
| "loss": 0.558688735961914, |
| "memory(GiB)": 76.04, |
| "step": 4040, |
| "token_acc": 0.8430891302155129, |
| "train_speed(iter/s)": 0.027664 |
| }, |
| { |
| "epoch": 1.0457689572693774, |
| "grad_norm": 1.1473355293273926, |
| "learning_rate": 3.876902548242758e-06, |
| "loss": 0.5573469161987304, |
| "memory(GiB)": 76.04, |
| "step": 4045, |
| "token_acc": 0.8069763883930848, |
| "train_speed(iter/s)": 0.027665 |
| }, |
| { |
| "epoch": 1.0470618656668176, |
| "grad_norm": 1.2132424116134644, |
| "learning_rate": 3.873926818454565e-06, |
| "loss": 0.6102540016174316, |
| "memory(GiB)": 76.04, |
| "step": 4050, |
| "token_acc": 0.8502197995428169, |
| "train_speed(iter/s)": 0.027665 |
| }, |
| { |
| "epoch": 1.0483547740642576, |
| "grad_norm": 6.2116618156433105, |
| "learning_rate": 3.87094829715056e-06, |
| "loss": 0.548386812210083, |
| "memory(GiB)": 76.04, |
| "step": 4055, |
| "token_acc": 0.8210007451137732, |
| "train_speed(iter/s)": 0.027665 |
| }, |
| { |
| "epoch": 1.0496476824616976, |
| "grad_norm": 1.0829912424087524, |
| "learning_rate": 3.867966990382438e-06, |
| "loss": 0.5702716827392578, |
| "memory(GiB)": 76.04, |
| "step": 4060, |
| "token_acc": 0.7936434822662367, |
| "train_speed(iter/s)": 0.027665 |
| }, |
| { |
| "epoch": 1.0509405908591376, |
| "grad_norm": 1.3445277214050293, |
| "learning_rate": 3.864982904207557e-06, |
| "loss": 0.5754476547241211, |
| "memory(GiB)": 76.04, |
| "step": 4065, |
| "token_acc": 0.824864653316809, |
| "train_speed(iter/s)": 0.027663 |
| }, |
| { |
| "epoch": 1.0522334992565776, |
| "grad_norm": 4.769752025604248, |
| "learning_rate": 3.861996044688922e-06, |
| "loss": 0.5743865013122559, |
| "memory(GiB)": 76.04, |
| "step": 4070, |
| "token_acc": 0.8401046687784052, |
| "train_speed(iter/s)": 0.027664 |
| }, |
| { |
| "epoch": 1.0535264076540176, |
| "grad_norm": 3.4262094497680664, |
| "learning_rate": 3.8590064178951695e-06, |
| "loss": 0.5537999153137207, |
| "memory(GiB)": 76.04, |
| "step": 4075, |
| "token_acc": 0.852727935517842, |
| "train_speed(iter/s)": 0.027663 |
| }, |
| { |
| "epoch": 1.0548193160514578, |
| "grad_norm": 1.150668978691101, |
| "learning_rate": 3.856014029900563e-06, |
| "loss": 0.541869068145752, |
| "memory(GiB)": 76.04, |
| "step": 4080, |
| "token_acc": 0.8340331114524663, |
| "train_speed(iter/s)": 0.027661 |
| }, |
| { |
| "epoch": 1.0561122244488979, |
| "grad_norm": 1.7679377794265747, |
| "learning_rate": 3.853018886784973e-06, |
| "loss": 0.5608885765075684, |
| "memory(GiB)": 76.04, |
| "step": 4085, |
| "token_acc": 0.8438836612489307, |
| "train_speed(iter/s)": 0.027661 |
| }, |
| { |
| "epoch": 1.0574051328463379, |
| "grad_norm": 3.0141847133636475, |
| "learning_rate": 3.850020994633869e-06, |
| "loss": 0.5597274303436279, |
| "memory(GiB)": 76.04, |
| "step": 4090, |
| "token_acc": 0.8566967231141412, |
| "train_speed(iter/s)": 0.02766 |
| }, |
| { |
| "epoch": 1.0586980412437779, |
| "grad_norm": 1.5420576333999634, |
| "learning_rate": 3.8470203595383034e-06, |
| "loss": 0.5814280986785889, |
| "memory(GiB)": 76.04, |
| "step": 4095, |
| "token_acc": 0.8152106326752682, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.0599909496412179, |
| "grad_norm": 3.600728988647461, |
| "learning_rate": 3.8440169875949075e-06, |
| "loss": 0.55950927734375, |
| "memory(GiB)": 76.04, |
| "step": 4100, |
| "token_acc": 0.8275422378068218, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.0612838580386579, |
| "grad_norm": 1.3479124307632446, |
| "learning_rate": 3.841010884905868e-06, |
| "loss": 0.5699577331542969, |
| "memory(GiB)": 76.04, |
| "step": 4105, |
| "token_acc": 0.8080579942442898, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.062576766436098, |
| "grad_norm": 0.989827036857605, |
| "learning_rate": 3.838002057578921e-06, |
| "loss": 0.5578522682189941, |
| "memory(GiB)": 76.04, |
| "step": 4110, |
| "token_acc": 0.8228656838896867, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.0638696748335381, |
| "grad_norm": 1.1100108623504639, |
| "learning_rate": 3.834990511727341e-06, |
| "loss": 0.5745999813079834, |
| "memory(GiB)": 76.04, |
| "step": 4115, |
| "token_acc": 0.8142663088493522, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.0651625832309781, |
| "grad_norm": 0.9445801973342896, |
| "learning_rate": 3.831976253469921e-06, |
| "loss": 0.5575265884399414, |
| "memory(GiB)": 76.04, |
| "step": 4120, |
| "token_acc": 0.813193334855056, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.0664554916284181, |
| "grad_norm": 1.1623046398162842, |
| "learning_rate": 3.828959288930971e-06, |
| "loss": 0.5857258796691894, |
| "memory(GiB)": 76.04, |
| "step": 4125, |
| "token_acc": 0.8250850433446725, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.0677484000258581, |
| "grad_norm": 1.0290391445159912, |
| "learning_rate": 3.825939624240294e-06, |
| "loss": 0.5558303833007813, |
| "memory(GiB)": 76.04, |
| "step": 4130, |
| "token_acc": 0.8497815003641661, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.0690413084232981, |
| "grad_norm": 1.3947314023971558, |
| "learning_rate": 3.822917265533184e-06, |
| "loss": 0.5638031959533691, |
| "memory(GiB)": 76.04, |
| "step": 4135, |
| "token_acc": 0.8299896824486989, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.0703342168207381, |
| "grad_norm": 1.2008622884750366, |
| "learning_rate": 3.819892218950403e-06, |
| "loss": 0.5699079513549805, |
| "memory(GiB)": 76.04, |
| "step": 4140, |
| "token_acc": 0.8410833741230217, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.0716271252181784, |
| "grad_norm": 1.117075800895691, |
| "learning_rate": 3.816864490638181e-06, |
| "loss": 0.546845531463623, |
| "memory(GiB)": 76.04, |
| "step": 4145, |
| "token_acc": 0.8247305985692294, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.0729200336156184, |
| "grad_norm": 1.193162202835083, |
| "learning_rate": 3.8138340867481914e-06, |
| "loss": 0.540710735321045, |
| "memory(GiB)": 76.04, |
| "step": 4150, |
| "token_acc": 0.8468981429794202, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.0742129420130584, |
| "grad_norm": 1.0668905973434448, |
| "learning_rate": 3.810801013437546e-06, |
| "loss": 0.5506375312805176, |
| "memory(GiB)": 76.04, |
| "step": 4155, |
| "token_acc": 0.8278915329275042, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.0755058504104984, |
| "grad_norm": 1.1097651720046997, |
| "learning_rate": 3.807765276868779e-06, |
| "loss": 0.5460940361022949, |
| "memory(GiB)": 76.04, |
| "step": 4160, |
| "token_acc": 0.8142139418044798, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.0767987588079384, |
| "grad_norm": 1.4875537157058716, |
| "learning_rate": 3.8047268832098376e-06, |
| "loss": 0.5787097454071045, |
| "memory(GiB)": 76.04, |
| "step": 4165, |
| "token_acc": 0.8459223372238127, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.0780916672053784, |
| "grad_norm": 1.1992076635360718, |
| "learning_rate": 3.801685838634066e-06, |
| "loss": 0.5527867794036865, |
| "memory(GiB)": 76.04, |
| "step": 4170, |
| "token_acc": 0.8586858373272209, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.0793845756028186, |
| "grad_norm": 1.046899676322937, |
| "learning_rate": 3.7986421493201952e-06, |
| "loss": 0.5584450721740722, |
| "memory(GiB)": 76.04, |
| "step": 4175, |
| "token_acc": 0.813550135501355, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.0806774840002586, |
| "grad_norm": 1.144406795501709, |
| "learning_rate": 3.7955958214523297e-06, |
| "loss": 0.5506217002868652, |
| "memory(GiB)": 76.04, |
| "step": 4180, |
| "token_acc": 0.8326120340639397, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.0819703923976987, |
| "grad_norm": 1.1309776306152344, |
| "learning_rate": 3.7925468612199344e-06, |
| "loss": 0.5434449195861817, |
| "memory(GiB)": 76.04, |
| "step": 4185, |
| "token_acc": 0.8455437400857764, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.0832633007951387, |
| "grad_norm": 1.772002100944519, |
| "learning_rate": 3.7894952748178238e-06, |
| "loss": 0.5281466484069824, |
| "memory(GiB)": 76.04, |
| "step": 4190, |
| "token_acc": 0.8599103788530303, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.0845562091925787, |
| "grad_norm": 1.2275793552398682, |
| "learning_rate": 3.786441068446146e-06, |
| "loss": 0.5290435791015625, |
| "memory(GiB)": 76.04, |
| "step": 4195, |
| "token_acc": 0.8503279666070364, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.0858491175900187, |
| "grad_norm": 1.002516746520996, |
| "learning_rate": 3.7833842483103754e-06, |
| "loss": 0.553908109664917, |
| "memory(GiB)": 76.04, |
| "step": 4200, |
| "token_acc": 0.83946196437169, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.087142025987459, |
| "grad_norm": 0.9520712494850159, |
| "learning_rate": 3.7803248206212943e-06, |
| "loss": 0.5496163368225098, |
| "memory(GiB)": 76.04, |
| "step": 4205, |
| "token_acc": 0.8473314975085234, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.088434934384899, |
| "grad_norm": 1.1912646293640137, |
| "learning_rate": 3.7772627915949844e-06, |
| "loss": 0.5416050910949707, |
| "memory(GiB)": 76.04, |
| "step": 4210, |
| "token_acc": 0.862577306575792, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.089727842782339, |
| "grad_norm": 0.875792920589447, |
| "learning_rate": 3.7741981674528116e-06, |
| "loss": 0.5520293235778808, |
| "memory(GiB)": 76.04, |
| "step": 4215, |
| "token_acc": 0.8360961569212728, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.091020751179779, |
| "grad_norm": 1.6698824167251587, |
| "learning_rate": 3.7711309544214163e-06, |
| "loss": 0.5539298534393311, |
| "memory(GiB)": 76.04, |
| "step": 4220, |
| "token_acc": 0.8197564955441194, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.092313659577219, |
| "grad_norm": 1.9617213010787964, |
| "learning_rate": 3.768061158732697e-06, |
| "loss": 0.543891191482544, |
| "memory(GiB)": 76.04, |
| "step": 4225, |
| "token_acc": 0.8655219780219781, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.093606567974659, |
| "grad_norm": 1.0107512474060059, |
| "learning_rate": 3.764988786623801e-06, |
| "loss": 0.5563596725463867, |
| "memory(GiB)": 76.04, |
| "step": 4230, |
| "token_acc": 0.8210617141917989, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.094899476372099, |
| "grad_norm": 1.1788944005966187, |
| "learning_rate": 3.76191384433711e-06, |
| "loss": 0.5319845676422119, |
| "memory(GiB)": 76.04, |
| "step": 4235, |
| "token_acc": 0.8385356134816536, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.0961923847695392, |
| "grad_norm": 1.3050668239593506, |
| "learning_rate": 3.7588363381202264e-06, |
| "loss": 0.5554252624511719, |
| "memory(GiB)": 76.04, |
| "step": 4240, |
| "token_acc": 0.8417724746315843, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.0974852931669792, |
| "grad_norm": 1.0011587142944336, |
| "learning_rate": 3.7557562742259635e-06, |
| "loss": 0.5328820705413818, |
| "memory(GiB)": 76.04, |
| "step": 4245, |
| "token_acc": 0.8045835662381219, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.0987782015644192, |
| "grad_norm": 0.953105092048645, |
| "learning_rate": 3.752673658912331e-06, |
| "loss": 0.5456388473510743, |
| "memory(GiB)": 76.04, |
| "step": 4250, |
| "token_acc": 0.8526051825020897, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.1000711099618592, |
| "grad_norm": 1.271561622619629, |
| "learning_rate": 3.7495884984425235e-06, |
| "loss": 0.5330571174621582, |
| "memory(GiB)": 76.04, |
| "step": 4255, |
| "token_acc": 0.8340923877683799, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.1013640183592992, |
| "grad_norm": 1.6461479663848877, |
| "learning_rate": 3.746500799084904e-06, |
| "loss": 0.5901468753814697, |
| "memory(GiB)": 76.04, |
| "step": 4260, |
| "token_acc": 0.8404071670932793, |
| "train_speed(iter/s)": 0.02766 |
| }, |
| { |
| "epoch": 1.1026569267567392, |
| "grad_norm": 1.140838384628296, |
| "learning_rate": 3.7434105671129962e-06, |
| "loss": 0.5382442474365234, |
| "memory(GiB)": 76.04, |
| "step": 4265, |
| "token_acc": 0.8247861227962376, |
| "train_speed(iter/s)": 0.02766 |
| }, |
| { |
| "epoch": 1.1039498351541792, |
| "grad_norm": 1.1467549800872803, |
| "learning_rate": 3.7403178088054676e-06, |
| "loss": 0.5643450260162354, |
| "memory(GiB)": 76.04, |
| "step": 4270, |
| "token_acc": 0.8297176451105407, |
| "train_speed(iter/s)": 0.02766 |
| }, |
| { |
| "epoch": 1.1052427435516194, |
| "grad_norm": 1.270693302154541, |
| "learning_rate": 3.737222530446122e-06, |
| "loss": 0.5628186225891113, |
| "memory(GiB)": 76.04, |
| "step": 4275, |
| "token_acc": 0.801499403646277, |
| "train_speed(iter/s)": 0.02766 |
| }, |
| { |
| "epoch": 1.1065356519490595, |
| "grad_norm": 1.3963029384613037, |
| "learning_rate": 3.7341247383238793e-06, |
| "loss": 0.5608326911926269, |
| "memory(GiB)": 76.04, |
| "step": 4280, |
| "token_acc": 0.8412363787523383, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.1078285603464995, |
| "grad_norm": 1.8522865772247314, |
| "learning_rate": 3.731024438732771e-06, |
| "loss": 0.5282313346862793, |
| "memory(GiB)": 76.04, |
| "step": 4285, |
| "token_acc": 0.8429379193156183, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1091214687439395, |
| "grad_norm": 1.071329951286316, |
| "learning_rate": 3.7279216379719194e-06, |
| "loss": 0.5438883781433106, |
| "memory(GiB)": 76.04, |
| "step": 4290, |
| "token_acc": 0.8636459342232703, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.1104143771413795, |
| "grad_norm": 1.016403317451477, |
| "learning_rate": 3.7248163423455307e-06, |
| "loss": 0.5469881057739258, |
| "memory(GiB)": 76.04, |
| "step": 4295, |
| "token_acc": 0.8709039687639005, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1117072855388195, |
| "grad_norm": 1.8317582607269287, |
| "learning_rate": 3.721708558162881e-06, |
| "loss": 0.5621847152709961, |
| "memory(GiB)": 76.04, |
| "step": 4300, |
| "token_acc": 0.8440362706347361, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1130001939362597, |
| "grad_norm": 0.949685275554657, |
| "learning_rate": 3.7185982917382986e-06, |
| "loss": 0.5375046730041504, |
| "memory(GiB)": 76.04, |
| "step": 4305, |
| "token_acc": 0.8541533400347254, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1142931023336997, |
| "grad_norm": 1.1784414052963257, |
| "learning_rate": 3.7154855493911596e-06, |
| "loss": 0.5650627136230468, |
| "memory(GiB)": 76.04, |
| "step": 4310, |
| "token_acc": 0.837442021839962, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.1155860107311397, |
| "grad_norm": 1.3990085124969482, |
| "learning_rate": 3.7123703374458685e-06, |
| "loss": 0.5586078643798829, |
| "memory(GiB)": 76.04, |
| "step": 4315, |
| "token_acc": 0.8209936463113308, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1168789191285797, |
| "grad_norm": 1.3244526386260986, |
| "learning_rate": 3.709252662231849e-06, |
| "loss": 0.5645613670349121, |
| "memory(GiB)": 76.04, |
| "step": 4320, |
| "token_acc": 0.8429337789112655, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1181718275260197, |
| "grad_norm": 0.8955655097961426, |
| "learning_rate": 3.706132530083527e-06, |
| "loss": 0.5438594818115234, |
| "memory(GiB)": 76.04, |
| "step": 4325, |
| "token_acc": 0.8423168980373384, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.1194647359234597, |
| "grad_norm": 1.5076687335968018, |
| "learning_rate": 3.703009947340322e-06, |
| "loss": 0.5539616584777832, |
| "memory(GiB)": 76.04, |
| "step": 4330, |
| "token_acc": 0.831388096935139, |
| "train_speed(iter/s)": 0.02766 |
| }, |
| { |
| "epoch": 1.1207576443209, |
| "grad_norm": 1.3437058925628662, |
| "learning_rate": 3.6998849203466324e-06, |
| "loss": 0.5941734313964844, |
| "memory(GiB)": 76.04, |
| "step": 4335, |
| "token_acc": 0.8199184374329255, |
| "train_speed(iter/s)": 0.027661 |
| }, |
| { |
| "epoch": 1.12205055271834, |
| "grad_norm": 1.2628297805786133, |
| "learning_rate": 3.6967574554518237e-06, |
| "loss": 0.5422052383422852, |
| "memory(GiB)": 76.04, |
| "step": 4340, |
| "token_acc": 0.8556654985226414, |
| "train_speed(iter/s)": 0.02766 |
| }, |
| { |
| "epoch": 1.12334346111578, |
| "grad_norm": 1.053979516029358, |
| "learning_rate": 3.6936275590102133e-06, |
| "loss": 0.5253170967102051, |
| "memory(GiB)": 76.04, |
| "step": 4345, |
| "token_acc": 0.839918890776566, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.12463636951322, |
| "grad_norm": 1.1738144159317017, |
| "learning_rate": 3.6904952373810586e-06, |
| "loss": 0.5661196231842041, |
| "memory(GiB)": 76.04, |
| "step": 4350, |
| "token_acc": 0.8402832743178504, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.12592927791066, |
| "grad_norm": 1.5563236474990845, |
| "learning_rate": 3.6873604969285466e-06, |
| "loss": 0.5621729850769043, |
| "memory(GiB)": 76.04, |
| "step": 4355, |
| "token_acc": 0.8411411300726107, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.1272221863081, |
| "grad_norm": 0.9622613787651062, |
| "learning_rate": 3.6842233440217757e-06, |
| "loss": 0.554353904724121, |
| "memory(GiB)": 76.04, |
| "step": 4360, |
| "token_acc": 0.845444059976932, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.12851509470554, |
| "grad_norm": 1.0929608345031738, |
| "learning_rate": 3.68108378503475e-06, |
| "loss": 0.5726329803466796, |
| "memory(GiB)": 76.04, |
| "step": 4365, |
| "token_acc": 0.8360975096088032, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1298080031029802, |
| "grad_norm": 1.5161771774291992, |
| "learning_rate": 3.677941826346358e-06, |
| "loss": 0.5386641502380372, |
| "memory(GiB)": 76.04, |
| "step": 4370, |
| "token_acc": 0.8529266398361929, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1311009115004202, |
| "grad_norm": 1.0926306247711182, |
| "learning_rate": 3.674797474340367e-06, |
| "loss": 0.567785120010376, |
| "memory(GiB)": 76.04, |
| "step": 4375, |
| "token_acc": 0.8242666666666667, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.1323938198978603, |
| "grad_norm": 1.2848386764526367, |
| "learning_rate": 3.6716507354054044e-06, |
| "loss": 0.5367423534393311, |
| "memory(GiB)": 76.04, |
| "step": 4380, |
| "token_acc": 0.8228389830508475, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.1336867282953003, |
| "grad_norm": 1.2967411279678345, |
| "learning_rate": 3.6685016159349483e-06, |
| "loss": 0.5374815940856934, |
| "memory(GiB)": 76.04, |
| "step": 4385, |
| "token_acc": 0.8508662193411426, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.1349796366927403, |
| "grad_norm": 0.8757297396659851, |
| "learning_rate": 3.665350122327316e-06, |
| "loss": 0.5277114391326905, |
| "memory(GiB)": 76.04, |
| "step": 4390, |
| "token_acc": 0.8365678150894025, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.1362725450901803, |
| "grad_norm": 1.1371816396713257, |
| "learning_rate": 3.662196260985646e-06, |
| "loss": 0.5421219825744629, |
| "memory(GiB)": 76.04, |
| "step": 4395, |
| "token_acc": 0.8501142154278686, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.1375654534876203, |
| "grad_norm": 1.695295810699463, |
| "learning_rate": 3.6590400383178866e-06, |
| "loss": 0.5642148971557617, |
| "memory(GiB)": 76.04, |
| "step": 4400, |
| "token_acc": 0.8535144713526285, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.1388583618850605, |
| "grad_norm": 1.0313785076141357, |
| "learning_rate": 3.6558814607367854e-06, |
| "loss": 0.5805719375610352, |
| "memory(GiB)": 76.04, |
| "step": 4405, |
| "token_acc": 0.8027235587834771, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.1401512702825005, |
| "grad_norm": 4.604849815368652, |
| "learning_rate": 3.6527205346598754e-06, |
| "loss": 0.5551558017730713, |
| "memory(GiB)": 76.04, |
| "step": 4410, |
| "token_acc": 0.8510540083089706, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.1414441786799405, |
| "grad_norm": 0.8938732147216797, |
| "learning_rate": 3.649557266509458e-06, |
| "loss": 0.5434865951538086, |
| "memory(GiB)": 76.04, |
| "step": 4415, |
| "token_acc": 0.8330908429571702, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1427370870773805, |
| "grad_norm": 1.1276806592941284, |
| "learning_rate": 3.646391662712598e-06, |
| "loss": 0.5468146324157714, |
| "memory(GiB)": 76.04, |
| "step": 4420, |
| "token_acc": 0.8307084785133566, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1440299954748205, |
| "grad_norm": 0.9285451173782349, |
| "learning_rate": 3.6432237297011016e-06, |
| "loss": 0.5583270072937012, |
| "memory(GiB)": 76.04, |
| "step": 4425, |
| "token_acc": 0.8463666452600899, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1453229038722608, |
| "grad_norm": 1.3203446865081787, |
| "learning_rate": 3.640053473911509e-06, |
| "loss": 0.546565055847168, |
| "memory(GiB)": 76.04, |
| "step": 4430, |
| "token_acc": 0.8101812275602667, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1466158122697008, |
| "grad_norm": 1.0341274738311768, |
| "learning_rate": 3.6368809017850796e-06, |
| "loss": 0.5599943161010742, |
| "memory(GiB)": 76.04, |
| "step": 4435, |
| "token_acc": 0.8432740304620504, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.1479087206671408, |
| "grad_norm": 1.6448158025741577, |
| "learning_rate": 3.6337060197677803e-06, |
| "loss": 0.5772030830383301, |
| "memory(GiB)": 76.04, |
| "step": 4440, |
| "token_acc": 0.8330186134340437, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.1492016290645808, |
| "grad_norm": 1.2664222717285156, |
| "learning_rate": 3.6305288343102686e-06, |
| "loss": 0.5556002616882324, |
| "memory(GiB)": 76.04, |
| "step": 4445, |
| "token_acc": 0.8380842848927955, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.1504945374620208, |
| "grad_norm": 1.121952772140503, |
| "learning_rate": 3.6273493518678843e-06, |
| "loss": 0.5274020671844483, |
| "memory(GiB)": 76.04, |
| "step": 4450, |
| "token_acc": 0.8465437496040044, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.1517874458594608, |
| "grad_norm": 2.5075345039367676, |
| "learning_rate": 3.624167578900633e-06, |
| "loss": 0.5526081085205078, |
| "memory(GiB)": 76.04, |
| "step": 4455, |
| "token_acc": 0.8559080459770115, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.1530803542569008, |
| "grad_norm": 1.5982297658920288, |
| "learning_rate": 3.6209835218731753e-06, |
| "loss": 0.5586674213409424, |
| "memory(GiB)": 76.04, |
| "step": 4460, |
| "token_acc": 0.8431690299347288, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.154373262654341, |
| "grad_norm": 0.9923174977302551, |
| "learning_rate": 3.6177971872548116e-06, |
| "loss": 0.5380115032196044, |
| "memory(GiB)": 76.04, |
| "step": 4465, |
| "token_acc": 0.8524468348607622, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.155666171051781, |
| "grad_norm": 1.332160234451294, |
| "learning_rate": 3.6146085815194694e-06, |
| "loss": 0.5499836444854737, |
| "memory(GiB)": 76.04, |
| "step": 4470, |
| "token_acc": 0.8119310724416783, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.156959079449221, |
| "grad_norm": 1.0057822465896606, |
| "learning_rate": 3.6114177111456916e-06, |
| "loss": 0.5390474319458007, |
| "memory(GiB)": 76.04, |
| "step": 4475, |
| "token_acc": 0.8468805191604715, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.158251987846661, |
| "grad_norm": 1.1006532907485962, |
| "learning_rate": 3.608224582616622e-06, |
| "loss": 0.5385686874389648, |
| "memory(GiB)": 76.04, |
| "step": 4480, |
| "token_acc": 0.8392769471100201, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.159544896244101, |
| "grad_norm": 3.348762035369873, |
| "learning_rate": 3.6050292024199916e-06, |
| "loss": 0.5231637001037598, |
| "memory(GiB)": 76.04, |
| "step": 4485, |
| "token_acc": 0.8456874336819766, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.160837804641541, |
| "grad_norm": 1.867474913597107, |
| "learning_rate": 3.601831577048109e-06, |
| "loss": 0.5361900329589844, |
| "memory(GiB)": 76.04, |
| "step": 4490, |
| "token_acc": 0.848513334725994, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.162130713038981, |
| "grad_norm": 1.6171462535858154, |
| "learning_rate": 3.598631712997841e-06, |
| "loss": 0.5521645545959473, |
| "memory(GiB)": 76.04, |
| "step": 4495, |
| "token_acc": 0.8328944218338521, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.1634236214364213, |
| "grad_norm": 1.5987505912780762, |
| "learning_rate": 3.5954296167706054e-06, |
| "loss": 0.5655074119567871, |
| "memory(GiB)": 76.04, |
| "step": 4500, |
| "token_acc": 0.8466187172830574, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1647165298338613, |
| "grad_norm": 0.9721993803977966, |
| "learning_rate": 3.5922252948723547e-06, |
| "loss": 0.5404928684234619, |
| "memory(GiB)": 76.04, |
| "step": 4505, |
| "token_acc": 0.8404960207292245, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.1660094382313013, |
| "grad_norm": 1.1576437950134277, |
| "learning_rate": 3.5890187538135616e-06, |
| "loss": 0.5581830024719239, |
| "memory(GiB)": 76.04, |
| "step": 4510, |
| "token_acc": 0.8301479321887485, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.1673023466287413, |
| "grad_norm": 1.2888849973678589, |
| "learning_rate": 3.5858100001092117e-06, |
| "loss": 0.5397047996520996, |
| "memory(GiB)": 76.04, |
| "step": 4515, |
| "token_acc": 0.8300223392372746, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.1685952550261813, |
| "grad_norm": 1.0602697134017944, |
| "learning_rate": 3.5825990402787815e-06, |
| "loss": 0.5373691558837891, |
| "memory(GiB)": 76.04, |
| "step": 4520, |
| "token_acc": 0.8421267268185757, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.1698881634236213, |
| "grad_norm": 1.1042633056640625, |
| "learning_rate": 3.579385880846232e-06, |
| "loss": 0.5380208015441894, |
| "memory(GiB)": 76.04, |
| "step": 4525, |
| "token_acc": 0.8591904314733356, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1711810718210616, |
| "grad_norm": 1.2820953130722046, |
| "learning_rate": 3.576170528339996e-06, |
| "loss": 0.5534794807434082, |
| "memory(GiB)": 76.04, |
| "step": 4530, |
| "token_acc": 0.8538792049463793, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.1724739802185016, |
| "grad_norm": 1.0828604698181152, |
| "learning_rate": 3.5729529892929577e-06, |
| "loss": 0.5525107383728027, |
| "memory(GiB)": 76.04, |
| "step": 4535, |
| "token_acc": 0.8491177281499862, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.1737668886159416, |
| "grad_norm": 1.0604639053344727, |
| "learning_rate": 3.569733270242446e-06, |
| "loss": 0.5319010734558105, |
| "memory(GiB)": 76.04, |
| "step": 4540, |
| "token_acc": 0.8434462262398613, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1750597970133816, |
| "grad_norm": 1.1371124982833862, |
| "learning_rate": 3.5665113777302184e-06, |
| "loss": 0.5360076904296875, |
| "memory(GiB)": 76.04, |
| "step": 4545, |
| "token_acc": 0.8431543594888123, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.1763527054108216, |
| "grad_norm": 1.3616864681243896, |
| "learning_rate": 3.56328731830245e-06, |
| "loss": 0.5336400508880615, |
| "memory(GiB)": 76.04, |
| "step": 4550, |
| "token_acc": 0.8413301476636246, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.1776456138082616, |
| "grad_norm": 1.6478408575057983, |
| "learning_rate": 3.5600610985097158e-06, |
| "loss": 0.5487207412719727, |
| "memory(GiB)": 76.04, |
| "step": 4555, |
| "token_acc": 0.8626700118843259, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.1789385222057018, |
| "grad_norm": 1.0522022247314453, |
| "learning_rate": 3.5568327249069835e-06, |
| "loss": 0.5672080993652344, |
| "memory(GiB)": 76.04, |
| "step": 4560, |
| "token_acc": 0.8288274920616079, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1802314306031418, |
| "grad_norm": 1.0040467977523804, |
| "learning_rate": 3.553602204053593e-06, |
| "loss": 0.5410587787628174, |
| "memory(GiB)": 76.04, |
| "step": 4565, |
| "token_acc": 0.8245939675174014, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.1815243390005818, |
| "grad_norm": 1.081748366355896, |
| "learning_rate": 3.550369542513252e-06, |
| "loss": 0.5334537982940674, |
| "memory(GiB)": 76.04, |
| "step": 4570, |
| "token_acc": 0.8730332603067118, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.1828172473980219, |
| "grad_norm": 2.2112412452697754, |
| "learning_rate": 3.5471347468540124e-06, |
| "loss": 0.5522329330444335, |
| "memory(GiB)": 76.04, |
| "step": 4575, |
| "token_acc": 0.8477234082750803, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.1841101557954619, |
| "grad_norm": 0.9928748607635498, |
| "learning_rate": 3.5438978236482656e-06, |
| "loss": 0.5604439735412597, |
| "memory(GiB)": 76.04, |
| "step": 4580, |
| "token_acc": 0.7849382585192644, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1854030641929019, |
| "grad_norm": 1.1116896867752075, |
| "learning_rate": 3.540658779472723e-06, |
| "loss": 0.5413738250732422, |
| "memory(GiB)": 76.04, |
| "step": 4585, |
| "token_acc": 0.8287749204588575, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.1866959725903419, |
| "grad_norm": 1.1231666803359985, |
| "learning_rate": 3.5374176209084087e-06, |
| "loss": 0.5632248401641846, |
| "memory(GiB)": 76.04, |
| "step": 4590, |
| "token_acc": 0.8709531013615733, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.187988880987782, |
| "grad_norm": 1.3912053108215332, |
| "learning_rate": 3.5341743545406403e-06, |
| "loss": 0.5327963829040527, |
| "memory(GiB)": 76.04, |
| "step": 4595, |
| "token_acc": 0.834390750074118, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.189281789385222, |
| "grad_norm": 0.8718860149383545, |
| "learning_rate": 3.530928986959019e-06, |
| "loss": 0.5319995880126953, |
| "memory(GiB)": 76.04, |
| "step": 4600, |
| "token_acc": 0.8448853130778072, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.1905746977826621, |
| "grad_norm": 1.055916666984558, |
| "learning_rate": 3.5276815247574148e-06, |
| "loss": 0.5589988708496094, |
| "memory(GiB)": 76.04, |
| "step": 4605, |
| "token_acc": 0.8575937187283504, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.1918676061801021, |
| "grad_norm": 0.9413480758666992, |
| "learning_rate": 3.5244319745339524e-06, |
| "loss": 0.5528499126434326, |
| "memory(GiB)": 76.04, |
| "step": 4610, |
| "token_acc": 0.8506312722563937, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.1931605145775421, |
| "grad_norm": 1.0575560331344604, |
| "learning_rate": 3.5211803428910015e-06, |
| "loss": 0.513238525390625, |
| "memory(GiB)": 76.04, |
| "step": 4615, |
| "token_acc": 0.8514719699342311, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.1944534229749821, |
| "grad_norm": 1.3240203857421875, |
| "learning_rate": 3.5179266364351584e-06, |
| "loss": 0.522664737701416, |
| "memory(GiB)": 76.04, |
| "step": 4620, |
| "token_acc": 0.87151792998951, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.1957463313724221, |
| "grad_norm": 1.1790480613708496, |
| "learning_rate": 3.5146708617772362e-06, |
| "loss": 0.5358052253723145, |
| "memory(GiB)": 76.04, |
| "step": 4625, |
| "token_acc": 0.835999462293319, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.1970392397698624, |
| "grad_norm": 0.9325648546218872, |
| "learning_rate": 3.51141302553225e-06, |
| "loss": 0.5524285316467286, |
| "memory(GiB)": 76.04, |
| "step": 4630, |
| "token_acc": 0.805278226398473, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.1983321481673024, |
| "grad_norm": 2.2591135501861572, |
| "learning_rate": 3.508153134319404e-06, |
| "loss": 0.5479226112365723, |
| "memory(GiB)": 76.04, |
| "step": 4635, |
| "token_acc": 0.8172845227062094, |
| "train_speed(iter/s)": 0.027654 |
| }, |
| { |
| "epoch": 1.1996250565647424, |
| "grad_norm": 0.9014770984649658, |
| "learning_rate": 3.5048911947620774e-06, |
| "loss": 0.5491894245147705, |
| "memory(GiB)": 76.04, |
| "step": 4640, |
| "token_acc": 0.8255224825839139, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.2009179649621824, |
| "grad_norm": 0.9961432814598083, |
| "learning_rate": 3.5016272134878084e-06, |
| "loss": 0.5200064182281494, |
| "memory(GiB)": 76.04, |
| "step": 4645, |
| "token_acc": 0.8566537085189094, |
| "train_speed(iter/s)": 0.027654 |
| }, |
| { |
| "epoch": 1.2022108733596224, |
| "grad_norm": 0.9640424847602844, |
| "learning_rate": 3.4983611971282882e-06, |
| "loss": 0.5232643604278564, |
| "memory(GiB)": 76.04, |
| "step": 4650, |
| "token_acc": 0.8389979490184588, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.2035037817570626, |
| "grad_norm": 14.804332733154297, |
| "learning_rate": 3.49509315231934e-06, |
| "loss": 0.5426907062530517, |
| "memory(GiB)": 76.04, |
| "step": 4655, |
| "token_acc": 0.8573487661061368, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.2047966901545026, |
| "grad_norm": 1.2045928239822388, |
| "learning_rate": 3.4918230857009083e-06, |
| "loss": 0.5525260448455811, |
| "memory(GiB)": 76.04, |
| "step": 4660, |
| "token_acc": 0.8139168327847573, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.2060895985519426, |
| "grad_norm": 6.34891414642334, |
| "learning_rate": 3.488551003917048e-06, |
| "loss": 0.5549496650695801, |
| "memory(GiB)": 76.04, |
| "step": 4665, |
| "token_acc": 0.8387813757424794, |
| "train_speed(iter/s)": 0.027654 |
| }, |
| { |
| "epoch": 1.2073825069493827, |
| "grad_norm": 2.3981356620788574, |
| "learning_rate": 3.4852769136159047e-06, |
| "loss": 0.536187744140625, |
| "memory(GiB)": 76.04, |
| "step": 4670, |
| "token_acc": 0.8524989411266413, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.2086754153468227, |
| "grad_norm": 1.0487481355667114, |
| "learning_rate": 3.482000821449707e-06, |
| "loss": 0.5361638069152832, |
| "memory(GiB)": 76.04, |
| "step": 4675, |
| "token_acc": 0.8482098061573546, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.2099683237442627, |
| "grad_norm": 1.1350358724594116, |
| "learning_rate": 3.4787227340747514e-06, |
| "loss": 0.5472620010375977, |
| "memory(GiB)": 76.04, |
| "step": 4680, |
| "token_acc": 0.8297029702970297, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.2112612321417027, |
| "grad_norm": 1.4464890956878662, |
| "learning_rate": 3.4754426581513866e-06, |
| "loss": 0.5401841163635254, |
| "memory(GiB)": 76.04, |
| "step": 4685, |
| "token_acc": 0.8511260213910848, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.212554140539143, |
| "grad_norm": 1.922498106956482, |
| "learning_rate": 3.4721606003440023e-06, |
| "loss": 0.5158808708190918, |
| "memory(GiB)": 76.04, |
| "step": 4690, |
| "token_acc": 0.8568056902683479, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.213847048936583, |
| "grad_norm": 0.9439219236373901, |
| "learning_rate": 3.4688765673210155e-06, |
| "loss": 0.5801658630371094, |
| "memory(GiB)": 76.04, |
| "step": 4695, |
| "token_acc": 0.8500309427215843, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.215139957334023, |
| "grad_norm": 1.2153284549713135, |
| "learning_rate": 3.465590565754856e-06, |
| "loss": 0.5326606273651123, |
| "memory(GiB)": 76.04, |
| "step": 4700, |
| "token_acc": 0.8437681640787179, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.216432865731463, |
| "grad_norm": 1.3219401836395264, |
| "learning_rate": 3.462302602321953e-06, |
| "loss": 0.5341041088104248, |
| "memory(GiB)": 76.04, |
| "step": 4705, |
| "token_acc": 0.8587982960469481, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.217725774128903, |
| "grad_norm": 0.9187490940093994, |
| "learning_rate": 3.4590126837027216e-06, |
| "loss": 0.5361604690551758, |
| "memory(GiB)": 76.04, |
| "step": 4710, |
| "token_acc": 0.8289933797317942, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.219018682526343, |
| "grad_norm": 1.189947247505188, |
| "learning_rate": 3.4557208165815503e-06, |
| "loss": 0.5369776725769043, |
| "memory(GiB)": 76.04, |
| "step": 4715, |
| "token_acc": 0.784606727522821, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.220311590923783, |
| "grad_norm": 0.8782621026039124, |
| "learning_rate": 3.4524270076467846e-06, |
| "loss": 0.5394928455352783, |
| "memory(GiB)": 76.04, |
| "step": 4720, |
| "token_acc": 0.8381009137862535, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.2216044993212232, |
| "grad_norm": 1.1077133417129517, |
| "learning_rate": 3.449131263590718e-06, |
| "loss": 0.5199668884277344, |
| "memory(GiB)": 76.04, |
| "step": 4725, |
| "token_acc": 0.8437195256220705, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.2228974077186632, |
| "grad_norm": 2.1963064670562744, |
| "learning_rate": 3.445833591109574e-06, |
| "loss": 0.533887529373169, |
| "memory(GiB)": 76.04, |
| "step": 4730, |
| "token_acc": 0.8215962441314554, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.2241903161161032, |
| "grad_norm": 1.0866763591766357, |
| "learning_rate": 3.4425339969034955e-06, |
| "loss": 0.5230364322662353, |
| "memory(GiB)": 76.04, |
| "step": 4735, |
| "token_acc": 0.8641819515774027, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.2254832245135432, |
| "grad_norm": 1.1297239065170288, |
| "learning_rate": 3.439232487676527e-06, |
| "loss": 0.5545130729675293, |
| "memory(GiB)": 76.04, |
| "step": 4740, |
| "token_acc": 0.8013548084891723, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.2267761329109832, |
| "grad_norm": 1.2349060773849487, |
| "learning_rate": 3.435929070136609e-06, |
| "loss": 0.5242255210876465, |
| "memory(GiB)": 76.04, |
| "step": 4745, |
| "token_acc": 0.8695360580716427, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.2280690413084232, |
| "grad_norm": 0.9875677227973938, |
| "learning_rate": 3.4326237509955533e-06, |
| "loss": 0.5407393932342529, |
| "memory(GiB)": 76.04, |
| "step": 4750, |
| "token_acc": 0.8353607552258935, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.2293619497058634, |
| "grad_norm": 1.4724373817443848, |
| "learning_rate": 3.4293165369690406e-06, |
| "loss": 0.5200931549072265, |
| "memory(GiB)": 76.04, |
| "step": 4755, |
| "token_acc": 0.8424033399891088, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.2306548581033034, |
| "grad_norm": 0.8519977331161499, |
| "learning_rate": 3.4260074347765975e-06, |
| "loss": 0.5357259750366211, |
| "memory(GiB)": 76.04, |
| "step": 4760, |
| "token_acc": 0.8267593859249126, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.2319477665007434, |
| "grad_norm": 0.9893440008163452, |
| "learning_rate": 3.42269645114159e-06, |
| "loss": 0.5508286952972412, |
| "memory(GiB)": 76.04, |
| "step": 4765, |
| "token_acc": 0.8041002277904328, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.2332406748981835, |
| "grad_norm": 1.4743452072143555, |
| "learning_rate": 3.419383592791205e-06, |
| "loss": 0.5639371871948242, |
| "memory(GiB)": 76.04, |
| "step": 4770, |
| "token_acc": 0.8497330282227308, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.2345335832956235, |
| "grad_norm": 1.2142781019210815, |
| "learning_rate": 3.4160688664564382e-06, |
| "loss": 0.5326876640319824, |
| "memory(GiB)": 76.04, |
| "step": 4775, |
| "token_acc": 0.8382480707313333, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.2358264916930635, |
| "grad_norm": 8.9053955078125, |
| "learning_rate": 3.4127522788720836e-06, |
| "loss": 0.5383922100067139, |
| "memory(GiB)": 76.04, |
| "step": 4780, |
| "token_acc": 0.8079113088728835, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.2371194000905037, |
| "grad_norm": 1.0299571752548218, |
| "learning_rate": 3.4094338367767117e-06, |
| "loss": 0.5383823394775391, |
| "memory(GiB)": 76.04, |
| "step": 4785, |
| "token_acc": 0.8180522825669974, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.2384123084879437, |
| "grad_norm": 2.690765619277954, |
| "learning_rate": 3.4061135469126654e-06, |
| "loss": 0.5509030818939209, |
| "memory(GiB)": 76.04, |
| "step": 4790, |
| "token_acc": 0.8323850658249927, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.2397052168853837, |
| "grad_norm": 1.1407406330108643, |
| "learning_rate": 3.40279141602604e-06, |
| "loss": 0.5402188777923584, |
| "memory(GiB)": 76.04, |
| "step": 4795, |
| "token_acc": 0.8609082248332804, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.2409981252828237, |
| "grad_norm": 0.979908287525177, |
| "learning_rate": 3.3994674508666715e-06, |
| "loss": 0.5451946258544922, |
| "memory(GiB)": 76.04, |
| "step": 4800, |
| "token_acc": 0.8271758253130498, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.2422910336802637, |
| "grad_norm": 1.4777721166610718, |
| "learning_rate": 3.3961416581881236e-06, |
| "loss": 0.566465187072754, |
| "memory(GiB)": 76.04, |
| "step": 4805, |
| "token_acc": 0.8578219364893824, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.2435839420777037, |
| "grad_norm": 1.3482059240341187, |
| "learning_rate": 3.3928140447476722e-06, |
| "loss": 0.5285268783569336, |
| "memory(GiB)": 76.04, |
| "step": 4810, |
| "token_acc": 0.8426216288863005, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.2448768504751437, |
| "grad_norm": 1.7287280559539795, |
| "learning_rate": 3.3894846173062917e-06, |
| "loss": 0.5343065738677979, |
| "memory(GiB)": 76.04, |
| "step": 4815, |
| "token_acc": 0.8432345137847502, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.246169758872584, |
| "grad_norm": 2.6334922313690186, |
| "learning_rate": 3.386153382628644e-06, |
| "loss": 0.5239715576171875, |
| "memory(GiB)": 76.04, |
| "step": 4820, |
| "token_acc": 0.8302545572652349, |
| "train_speed(iter/s)": 0.027659 |
| }, |
| { |
| "epoch": 1.247462667270024, |
| "grad_norm": 1.018120527267456, |
| "learning_rate": 3.3828203474830623e-06, |
| "loss": 0.5379975318908692, |
| "memory(GiB)": 76.04, |
| "step": 4825, |
| "token_acc": 0.8100699300699301, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.248755575667464, |
| "grad_norm": 1.9307032823562622, |
| "learning_rate": 3.3794855186415374e-06, |
| "loss": 0.5401200771331787, |
| "memory(GiB)": 76.04, |
| "step": 4830, |
| "token_acc": 0.8570304677442426, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.250048484064904, |
| "grad_norm": 0.8735835552215576, |
| "learning_rate": 3.3761489028797063e-06, |
| "loss": 0.5682656288146972, |
| "memory(GiB)": 76.04, |
| "step": 4835, |
| "token_acc": 0.8328065512535019, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.251341392462344, |
| "grad_norm": 0.9113880395889282, |
| "learning_rate": 3.372810506976833e-06, |
| "loss": 0.519595718383789, |
| "memory(GiB)": 76.04, |
| "step": 4840, |
| "token_acc": 0.8493732447427906, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.2526343008597842, |
| "grad_norm": 1.123384952545166, |
| "learning_rate": 3.369470337715802e-06, |
| "loss": 0.5394314765930176, |
| "memory(GiB)": 76.04, |
| "step": 4845, |
| "token_acc": 0.8533837894922116, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.253927209257224, |
| "grad_norm": 2.7480571269989014, |
| "learning_rate": 3.3661284018830986e-06, |
| "loss": 0.5219066619873047, |
| "memory(GiB)": 76.04, |
| "step": 4850, |
| "token_acc": 0.8398635428686099, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.2552201176546642, |
| "grad_norm": 1.892354130744934, |
| "learning_rate": 3.3627847062687996e-06, |
| "loss": 0.5399574756622314, |
| "memory(GiB)": 76.04, |
| "step": 4855, |
| "token_acc": 0.8358763125833962, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.2565130260521042, |
| "grad_norm": 1.2802281379699707, |
| "learning_rate": 3.359439257666554e-06, |
| "loss": 0.5371671676635742, |
| "memory(GiB)": 76.04, |
| "step": 4860, |
| "token_acc": 0.8427890861844954, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.2578059344495442, |
| "grad_norm": 0.9374321699142456, |
| "learning_rate": 3.356092062873576e-06, |
| "loss": 0.5454726219177246, |
| "memory(GiB)": 76.04, |
| "step": 4865, |
| "token_acc": 0.8636550683553564, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.2590988428469843, |
| "grad_norm": 0.8962191939353943, |
| "learning_rate": 3.3527431286906248e-06, |
| "loss": 0.5191185951232911, |
| "memory(GiB)": 76.04, |
| "step": 4870, |
| "token_acc": 0.8448458652748329, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.2603917512444243, |
| "grad_norm": 1.248224139213562, |
| "learning_rate": 3.3493924619219964e-06, |
| "loss": 0.5477604866027832, |
| "memory(GiB)": 76.04, |
| "step": 4875, |
| "token_acc": 0.8294104944936299, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.2616846596418645, |
| "grad_norm": 0.9002890586853027, |
| "learning_rate": 3.3460400693755047e-06, |
| "loss": 0.5323681831359863, |
| "memory(GiB)": 76.04, |
| "step": 4880, |
| "token_acc": 0.8577657555815738, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.2629775680393043, |
| "grad_norm": 1.059833288192749, |
| "learning_rate": 3.3426859578624705e-06, |
| "loss": 0.5649502754211426, |
| "memory(GiB)": 76.04, |
| "step": 4885, |
| "token_acc": 0.7916213275299239, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.2642704764367445, |
| "grad_norm": 1.3469425439834595, |
| "learning_rate": 3.339330134197708e-06, |
| "loss": 0.5313740730285644, |
| "memory(GiB)": 76.04, |
| "step": 4890, |
| "token_acc": 0.8403429238296153, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.2655633848341845, |
| "grad_norm": 1.0573686361312866, |
| "learning_rate": 3.3359726051995097e-06, |
| "loss": 0.5383338451385498, |
| "memory(GiB)": 76.04, |
| "step": 4895, |
| "token_acc": 0.8299968790405279, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.2668562932316245, |
| "grad_norm": 1.5502042770385742, |
| "learning_rate": 3.332613377689632e-06, |
| "loss": 0.5520769119262695, |
| "memory(GiB)": 76.04, |
| "step": 4900, |
| "token_acc": 0.8267734765697351, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.2681492016290645, |
| "grad_norm": 1.3487634658813477, |
| "learning_rate": 3.3292524584932846e-06, |
| "loss": 0.5057527542114257, |
| "memory(GiB)": 76.04, |
| "step": 4905, |
| "token_acc": 0.8129584979223311, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.2694421100265045, |
| "grad_norm": 1.016811490058899, |
| "learning_rate": 3.325889854439112e-06, |
| "loss": 0.5458771228790283, |
| "memory(GiB)": 76.04, |
| "step": 4910, |
| "token_acc": 0.8184902798291486, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.2707350184239448, |
| "grad_norm": 1.8867390155792236, |
| "learning_rate": 3.322525572359183e-06, |
| "loss": 0.5553860664367676, |
| "memory(GiB)": 76.04, |
| "step": 4915, |
| "token_acc": 0.8386983751587614, |
| "train_speed(iter/s)": 0.027654 |
| }, |
| { |
| "epoch": 1.2720279268213848, |
| "grad_norm": 1.2835884094238281, |
| "learning_rate": 3.3191596190889762e-06, |
| "loss": 0.5246952056884766, |
| "memory(GiB)": 76.04, |
| "step": 4920, |
| "token_acc": 0.8337373292199207, |
| "train_speed(iter/s)": 0.027654 |
| }, |
| { |
| "epoch": 1.2733208352188248, |
| "grad_norm": 1.0106348991394043, |
| "learning_rate": 3.3157920014673646e-06, |
| "loss": 0.5335243225097657, |
| "memory(GiB)": 76.04, |
| "step": 4925, |
| "token_acc": 0.8471834913552705, |
| "train_speed(iter/s)": 0.027654 |
| }, |
| { |
| "epoch": 1.2746137436162648, |
| "grad_norm": 1.2083282470703125, |
| "learning_rate": 3.3124227263366036e-06, |
| "loss": 0.557880973815918, |
| "memory(GiB)": 76.04, |
| "step": 4930, |
| "token_acc": 0.8243945635852616, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.2759066520137048, |
| "grad_norm": 1.0607614517211914, |
| "learning_rate": 3.3090518005423157e-06, |
| "loss": 0.5547267436981201, |
| "memory(GiB)": 76.04, |
| "step": 4935, |
| "token_acc": 0.8197203446674578, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.2771995604111448, |
| "grad_norm": 1.0059987306594849, |
| "learning_rate": 3.305679230933478e-06, |
| "loss": 0.5478557586669922, |
| "memory(GiB)": 76.04, |
| "step": 4940, |
| "token_acc": 0.7879940655076654, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.2784924688085848, |
| "grad_norm": 0.9902651906013489, |
| "learning_rate": 3.3023050243624066e-06, |
| "loss": 0.5528521537780762, |
| "memory(GiB)": 76.04, |
| "step": 4945, |
| "token_acc": 0.832831287809007, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.279785377206025, |
| "grad_norm": 1.2956874370574951, |
| "learning_rate": 3.298929187684744e-06, |
| "loss": 0.5243937969207764, |
| "memory(GiB)": 76.04, |
| "step": 4950, |
| "token_acc": 0.8375224024129038, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.281078285603465, |
| "grad_norm": 0.8811606764793396, |
| "learning_rate": 3.2955517277594453e-06, |
| "loss": 0.5211551666259766, |
| "memory(GiB)": 76.04, |
| "step": 4955, |
| "token_acc": 0.8267743146826887, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.282371194000905, |
| "grad_norm": 0.9188509583473206, |
| "learning_rate": 3.292172651448761e-06, |
| "loss": 0.5098612785339356, |
| "memory(GiB)": 76.04, |
| "step": 4960, |
| "token_acc": 0.8693684341651787, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.283664102398345, |
| "grad_norm": 0.9744674563407898, |
| "learning_rate": 3.2887919656182304e-06, |
| "loss": 0.5251672744750977, |
| "memory(GiB)": 76.04, |
| "step": 4965, |
| "token_acc": 0.8267029592406476, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.284957010795785, |
| "grad_norm": 0.9584162831306458, |
| "learning_rate": 3.2854096771366584e-06, |
| "loss": 0.5332806587219239, |
| "memory(GiB)": 76.04, |
| "step": 4970, |
| "token_acc": 0.8322600222529418, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.2862499191932253, |
| "grad_norm": 0.8617119789123535, |
| "learning_rate": 3.28202579287611e-06, |
| "loss": 0.5289664745330811, |
| "memory(GiB)": 76.04, |
| "step": 4975, |
| "token_acc": 0.829112426035503, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.287542827590665, |
| "grad_norm": 1.0189121961593628, |
| "learning_rate": 3.278640319711889e-06, |
| "loss": 0.5311687469482422, |
| "memory(GiB)": 76.04, |
| "step": 4980, |
| "token_acc": 0.8320498040119898, |
| "train_speed(iter/s)": 0.027658 |
| }, |
| { |
| "epoch": 1.2888357359881053, |
| "grad_norm": 0.9284194707870483, |
| "learning_rate": 3.275253264522529e-06, |
| "loss": 0.5279128074645996, |
| "memory(GiB)": 76.04, |
| "step": 4985, |
| "token_acc": 0.8646108400841427, |
| "train_speed(iter/s)": 0.027656 |
| }, |
| { |
| "epoch": 1.2901286443855453, |
| "grad_norm": 1.1427520513534546, |
| "learning_rate": 3.2718646341897796e-06, |
| "loss": 0.5510351181030273, |
| "memory(GiB)": 76.04, |
| "step": 4990, |
| "token_acc": 0.8475239880886732, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.2914215527829853, |
| "grad_norm": 0.9549011588096619, |
| "learning_rate": 3.268474435598587e-06, |
| "loss": 0.5165416240692139, |
| "memory(GiB)": 76.04, |
| "step": 4995, |
| "token_acc": 0.856951293364478, |
| "train_speed(iter/s)": 0.027655 |
| }, |
| { |
| "epoch": 1.2927144611804253, |
| "grad_norm": 1.217895746231079, |
| "learning_rate": 3.265082675637087e-06, |
| "loss": 0.535146427154541, |
| "memory(GiB)": 76.04, |
| "step": 5000, |
| "token_acc": 0.8132745913451641, |
| "train_speed(iter/s)": 0.027657 |
| }, |
| { |
| "epoch": 1.2940073695778653, |
| "grad_norm": 0.9800029993057251, |
| "learning_rate": 3.2616893611965865e-06, |
| "loss": 0.5271368503570557, |
| "memory(GiB)": 76.04, |
| "step": 5005, |
| "token_acc": 0.8222583265637693, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.2953002779753056, |
| "grad_norm": 0.9374005198478699, |
| "learning_rate": 3.258294499171552e-06, |
| "loss": 0.5365757942199707, |
| "memory(GiB)": 76.04, |
| "step": 5010, |
| "token_acc": 0.8411453966124434, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.2965931863727456, |
| "grad_norm": 1.7360183000564575, |
| "learning_rate": 3.254898096459591e-06, |
| "loss": 0.5575047492980957, |
| "memory(GiB)": 76.04, |
| "step": 5015, |
| "token_acc": 0.8461703497103625, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.2978860947701856, |
| "grad_norm": 1.3810652494430542, |
| "learning_rate": 3.251500159961446e-06, |
| "loss": 0.5436039924621582, |
| "memory(GiB)": 76.04, |
| "step": 5020, |
| "token_acc": 0.8282478766907833, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.2991790031676256, |
| "grad_norm": 1.2262240648269653, |
| "learning_rate": 3.2481006965809713e-06, |
| "loss": 0.5245812892913818, |
| "memory(GiB)": 76.04, |
| "step": 5025, |
| "token_acc": 0.8353918706490007, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.3004719115650656, |
| "grad_norm": 1.2243238687515259, |
| "learning_rate": 3.2446997132251267e-06, |
| "loss": 0.5234585762023926, |
| "memory(GiB)": 76.04, |
| "step": 5030, |
| "token_acc": 0.8347611572101368, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.3017648199625056, |
| "grad_norm": 0.8756423592567444, |
| "learning_rate": 3.241297216803959e-06, |
| "loss": 0.5213943004608155, |
| "memory(GiB)": 76.04, |
| "step": 5035, |
| "token_acc": 0.8430144773070433, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.3030577283599456, |
| "grad_norm": 1.070388913154602, |
| "learning_rate": 3.2378932142305896e-06, |
| "loss": 0.5314732551574707, |
| "memory(GiB)": 76.04, |
| "step": 5040, |
| "token_acc": 0.8426463389048185, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.3043506367573858, |
| "grad_norm": 1.6048212051391602, |
| "learning_rate": 3.2344877124211986e-06, |
| "loss": 0.5154043674468994, |
| "memory(GiB)": 76.04, |
| "step": 5045, |
| "token_acc": 0.836419641239355, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.3056435451548258, |
| "grad_norm": 0.8205667734146118, |
| "learning_rate": 3.2310807182950157e-06, |
| "loss": 0.5318900585174561, |
| "memory(GiB)": 76.04, |
| "step": 5050, |
| "token_acc": 0.839140860160196, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.3069364535522658, |
| "grad_norm": 2.0000104904174805, |
| "learning_rate": 3.2276722387742986e-06, |
| "loss": 0.5485349178314209, |
| "memory(GiB)": 76.04, |
| "step": 5055, |
| "token_acc": 0.832, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.3082293619497058, |
| "grad_norm": 1.5011872053146362, |
| "learning_rate": 3.2242622807843256e-06, |
| "loss": 0.5459944725036621, |
| "memory(GiB)": 76.04, |
| "step": 5060, |
| "token_acc": 0.8583078032077852, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.3095222703471459, |
| "grad_norm": 1.206041932106018, |
| "learning_rate": 3.2208508512533777e-06, |
| "loss": 0.5489155769348144, |
| "memory(GiB)": 76.04, |
| "step": 5065, |
| "token_acc": 0.8435306288332225, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.310815178744586, |
| "grad_norm": 0.9339408874511719, |
| "learning_rate": 3.2174379571127255e-06, |
| "loss": 0.5105900764465332, |
| "memory(GiB)": 76.04, |
| "step": 5070, |
| "token_acc": 0.8589012405348799, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.3121080871420259, |
| "grad_norm": 0.9643262624740601, |
| "learning_rate": 3.214023605296618e-06, |
| "loss": 0.5285213947296142, |
| "memory(GiB)": 76.04, |
| "step": 5075, |
| "token_acc": 0.8527266411948593, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.313400995539466, |
| "grad_norm": 0.9289806485176086, |
| "learning_rate": 3.2106078027422617e-06, |
| "loss": 0.546751070022583, |
| "memory(GiB)": 76.04, |
| "step": 5080, |
| "token_acc": 0.8559926386013342, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.314693903936906, |
| "grad_norm": 0.9898268580436707, |
| "learning_rate": 3.2071905563898147e-06, |
| "loss": 0.5333544731140136, |
| "memory(GiB)": 76.04, |
| "step": 5085, |
| "token_acc": 0.8701183055590892, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.3159868123343461, |
| "grad_norm": 1.0481353998184204, |
| "learning_rate": 3.2037718731823654e-06, |
| "loss": 0.5345610618591309, |
| "memory(GiB)": 76.04, |
| "step": 5090, |
| "token_acc": 0.8575830948712304, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.3172797207317861, |
| "grad_norm": 1.0228970050811768, |
| "learning_rate": 3.200351760065924e-06, |
| "loss": 0.5261023998260498, |
| "memory(GiB)": 76.04, |
| "step": 5095, |
| "token_acc": 0.8451571927596062, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.3185726291292261, |
| "grad_norm": 1.2880408763885498, |
| "learning_rate": 3.196930223989404e-06, |
| "loss": 0.5189993858337403, |
| "memory(GiB)": 76.04, |
| "step": 5100, |
| "token_acc": 0.8433385103653184, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.3198655375266664, |
| "grad_norm": 1.5179208517074585, |
| "learning_rate": 3.193507271904612e-06, |
| "loss": 0.5425951957702637, |
| "memory(GiB)": 76.04, |
| "step": 5105, |
| "token_acc": 0.8408305921052631, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.3211584459241061, |
| "grad_norm": 1.4803640842437744, |
| "learning_rate": 3.1900829107662296e-06, |
| "loss": 0.5434229373931885, |
| "memory(GiB)": 76.04, |
| "step": 5110, |
| "token_acc": 0.8403665573028624, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.3224513543215464, |
| "grad_norm": 2.6232545375823975, |
| "learning_rate": 3.186657147531802e-06, |
| "loss": 0.5110975742340088, |
| "memory(GiB)": 76.04, |
| "step": 5115, |
| "token_acc": 0.8574821852731591, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.3237442627189864, |
| "grad_norm": 0.9406218528747559, |
| "learning_rate": 3.1832299891617245e-06, |
| "loss": 0.5422788143157959, |
| "memory(GiB)": 76.04, |
| "step": 5120, |
| "token_acc": 0.8556760308854937, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.3250371711164264, |
| "grad_norm": 1.6263813972473145, |
| "learning_rate": 3.179801442619225e-06, |
| "loss": 0.5206321716308594, |
| "memory(GiB)": 76.04, |
| "step": 5125, |
| "token_acc": 0.8325710236423371, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.3263300795138664, |
| "grad_norm": 1.0195496082305908, |
| "learning_rate": 3.176371514870354e-06, |
| "loss": 0.5497357368469238, |
| "memory(GiB)": 76.04, |
| "step": 5130, |
| "token_acc": 0.8564178043952697, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.3276229879113064, |
| "grad_norm": 1.1096144914627075, |
| "learning_rate": 3.172940212883965e-06, |
| "loss": 0.5373088836669921, |
| "memory(GiB)": 76.04, |
| "step": 5135, |
| "token_acc": 0.8881567463780764, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.3289158963087466, |
| "grad_norm": 1.392109751701355, |
| "learning_rate": 3.1695075436317073e-06, |
| "loss": 0.5438241004943848, |
| "memory(GiB)": 76.04, |
| "step": 5140, |
| "token_acc": 0.8368131622479545, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.3302088047061866, |
| "grad_norm": 1.4390590190887451, |
| "learning_rate": 3.166073514088006e-06, |
| "loss": 0.5391247272491455, |
| "memory(GiB)": 76.04, |
| "step": 5145, |
| "token_acc": 0.8375243285325029, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.3315017131036266, |
| "grad_norm": 1.352954387664795, |
| "learning_rate": 3.1626381312300516e-06, |
| "loss": 0.5338696479797364, |
| "memory(GiB)": 76.04, |
| "step": 5150, |
| "token_acc": 0.847240778978906, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.3327946215010666, |
| "grad_norm": 3.8461802005767822, |
| "learning_rate": 3.1592014020377815e-06, |
| "loss": 0.5344533920288086, |
| "memory(GiB)": 76.04, |
| "step": 5155, |
| "token_acc": 0.8609710100434191, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.3340875298985067, |
| "grad_norm": 3.0712478160858154, |
| "learning_rate": 3.1557633334938712e-06, |
| "loss": 0.5250087261199952, |
| "memory(GiB)": 76.04, |
| "step": 5160, |
| "token_acc": 0.8473618090452262, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.3353804382959467, |
| "grad_norm": 0.8653470873832703, |
| "learning_rate": 3.1523239325837174e-06, |
| "loss": 0.5317577362060547, |
| "memory(GiB)": 76.04, |
| "step": 5165, |
| "token_acc": 0.8672264497507216, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.3366733466933867, |
| "grad_norm": 1.0058352947235107, |
| "learning_rate": 3.1488832062954213e-06, |
| "loss": 0.5124196529388427, |
| "memory(GiB)": 76.04, |
| "step": 5170, |
| "token_acc": 0.8276955161626695, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.337966255090827, |
| "grad_norm": 1.0088118314743042, |
| "learning_rate": 3.145441161619779e-06, |
| "loss": 0.5366281509399414, |
| "memory(GiB)": 76.04, |
| "step": 5175, |
| "token_acc": 0.8506988094357761, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.339259163488267, |
| "grad_norm": 0.955906331539154, |
| "learning_rate": 3.1419978055502666e-06, |
| "loss": 0.5448675155639648, |
| "memory(GiB)": 76.04, |
| "step": 5180, |
| "token_acc": 0.79640928536363, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.340552071885707, |
| "grad_norm": 1.171993374824524, |
| "learning_rate": 3.138553145083022e-06, |
| "loss": 0.5282750129699707, |
| "memory(GiB)": 76.04, |
| "step": 5185, |
| "token_acc": 0.8676538311665308, |
| "train_speed(iter/s)": 0.027621 |
| }, |
| { |
| "epoch": 1.341844980283147, |
| "grad_norm": 1.1022040843963623, |
| "learning_rate": 3.135107187216834e-06, |
| "loss": 0.534688663482666, |
| "memory(GiB)": 76.04, |
| "step": 5190, |
| "token_acc": 0.8357040716489802, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.343137888680587, |
| "grad_norm": 2.8540091514587402, |
| "learning_rate": 3.1316599389531282e-06, |
| "loss": 0.5261801719665528, |
| "memory(GiB)": 76.04, |
| "step": 5195, |
| "token_acc": 0.8275160272718022, |
| "train_speed(iter/s)": 0.027621 |
| }, |
| { |
| "epoch": 1.3444307970780272, |
| "grad_norm": 1.3484967947006226, |
| "learning_rate": 3.128211407295951e-06, |
| "loss": 0.5323428630828857, |
| "memory(GiB)": 76.04, |
| "step": 5200, |
| "token_acc": 0.8412020736880043, |
| "train_speed(iter/s)": 0.027621 |
| }, |
| { |
| "epoch": 1.345723705475467, |
| "grad_norm": 1.8615549802780151, |
| "learning_rate": 3.1247615992519587e-06, |
| "loss": 0.5560379981994629, |
| "memory(GiB)": 76.04, |
| "step": 5205, |
| "token_acc": 0.8468941382327209, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.3470166138729072, |
| "grad_norm": 2.5111136436462402, |
| "learning_rate": 3.1213105218303972e-06, |
| "loss": 0.534544563293457, |
| "memory(GiB)": 76.04, |
| "step": 5210, |
| "token_acc": 0.8330609679446889, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.3483095222703472, |
| "grad_norm": 0.9236442446708679, |
| "learning_rate": 3.1178581820430957e-06, |
| "loss": 0.5287897109985351, |
| "memory(GiB)": 76.04, |
| "step": 5215, |
| "token_acc": 0.8456866092341895, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.3496024306677872, |
| "grad_norm": 1.2912098169326782, |
| "learning_rate": 3.1144045869044437e-06, |
| "loss": 0.5496071815490723, |
| "memory(GiB)": 76.04, |
| "step": 5220, |
| "token_acc": 0.8341737438075018, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.3508953390652272, |
| "grad_norm": 0.8818367123603821, |
| "learning_rate": 3.1109497434313857e-06, |
| "loss": 0.5452832698822021, |
| "memory(GiB)": 76.04, |
| "step": 5225, |
| "token_acc": 0.8284331373254931, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.3521882474626672, |
| "grad_norm": 1.3674169778823853, |
| "learning_rate": 3.1074936586433994e-06, |
| "loss": 0.537296199798584, |
| "memory(GiB)": 76.04, |
| "step": 5230, |
| "token_acc": 0.8586094734702175, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.3534811558601074, |
| "grad_norm": 1.324703335762024, |
| "learning_rate": 3.1040363395624854e-06, |
| "loss": 0.49640579223632814, |
| "memory(GiB)": 76.04, |
| "step": 5235, |
| "token_acc": 0.8524991832734401, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.3547740642575474, |
| "grad_norm": 0.9162821173667908, |
| "learning_rate": 3.1005777932131535e-06, |
| "loss": 0.5111923217773438, |
| "memory(GiB)": 76.04, |
| "step": 5240, |
| "token_acc": 0.8507638072855465, |
| "train_speed(iter/s)": 0.027621 |
| }, |
| { |
| "epoch": 1.3560669726549874, |
| "grad_norm": 1.0688142776489258, |
| "learning_rate": 3.097118026622405e-06, |
| "loss": 0.5468463897705078, |
| "memory(GiB)": 76.04, |
| "step": 5245, |
| "token_acc": 0.832568012476174, |
| "train_speed(iter/s)": 0.027621 |
| }, |
| { |
| "epoch": 1.3573598810524274, |
| "grad_norm": 1.0131880044937134, |
| "learning_rate": 3.093657046819722e-06, |
| "loss": 0.4972386360168457, |
| "memory(GiB)": 76.04, |
| "step": 5250, |
| "token_acc": 0.8424430280275911, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.3586527894498674, |
| "grad_norm": 1.027860164642334, |
| "learning_rate": 3.0901948608370503e-06, |
| "loss": 0.5250637054443359, |
| "memory(GiB)": 76.04, |
| "step": 5255, |
| "token_acc": 0.8416179528424026, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.3599456978473075, |
| "grad_norm": 0.9048755764961243, |
| "learning_rate": 3.086731475708788e-06, |
| "loss": 0.5370029449462891, |
| "memory(GiB)": 76.04, |
| "step": 5260, |
| "token_acc": 0.8346947027901335, |
| "train_speed(iter/s)": 0.027621 |
| }, |
| { |
| "epoch": 1.3612386062447475, |
| "grad_norm": 2.5965473651885986, |
| "learning_rate": 3.0832668984717675e-06, |
| "loss": 0.5500319480895997, |
| "memory(GiB)": 76.04, |
| "step": 5265, |
| "token_acc": 0.8267131242740999, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.3625315146421877, |
| "grad_norm": 1.969552993774414, |
| "learning_rate": 3.079801136165246e-06, |
| "loss": 0.5336560726165771, |
| "memory(GiB)": 76.04, |
| "step": 5270, |
| "token_acc": 0.8175245806824755, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.3638244230396277, |
| "grad_norm": 1.069265604019165, |
| "learning_rate": 3.0763341958308853e-06, |
| "loss": 0.5203034400939941, |
| "memory(GiB)": 76.04, |
| "step": 5275, |
| "token_acc": 0.8421536276680172, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.3651173314370677, |
| "grad_norm": 1.2012121677398682, |
| "learning_rate": 3.072866084512743e-06, |
| "loss": 0.5232099533081055, |
| "memory(GiB)": 76.04, |
| "step": 5280, |
| "token_acc": 0.8650010324179228, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.3664102398345077, |
| "grad_norm": 1.19133722782135, |
| "learning_rate": 3.069396809257256e-06, |
| "loss": 0.5404583930969238, |
| "memory(GiB)": 76.04, |
| "step": 5285, |
| "token_acc": 0.8128638853481241, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.3677031482319477, |
| "grad_norm": 1.1554311513900757, |
| "learning_rate": 3.065926377113224e-06, |
| "loss": 0.5264840126037598, |
| "memory(GiB)": 76.04, |
| "step": 5290, |
| "token_acc": 0.8585351063368996, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.3689960566293877, |
| "grad_norm": 1.4480676651000977, |
| "learning_rate": 3.0624547951318e-06, |
| "loss": 0.5401974678039551, |
| "memory(GiB)": 76.04, |
| "step": 5295, |
| "token_acc": 0.8407013111993263, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.3702889650268277, |
| "grad_norm": 0.975131094455719, |
| "learning_rate": 3.0589820703664707e-06, |
| "loss": 0.5349632263183594, |
| "memory(GiB)": 76.04, |
| "step": 5300, |
| "token_acc": 0.8640092475203222, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.371581873424268, |
| "grad_norm": 1.336971640586853, |
| "learning_rate": 3.0555082098730464e-06, |
| "loss": 0.5260316371917725, |
| "memory(GiB)": 76.04, |
| "step": 5305, |
| "token_acc": 0.8318684124147488, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.372874781821708, |
| "grad_norm": 1.4911202192306519, |
| "learning_rate": 3.0520332207096433e-06, |
| "loss": 0.5175662994384765, |
| "memory(GiB)": 76.04, |
| "step": 5310, |
| "token_acc": 0.8419973789441849, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.374167690219148, |
| "grad_norm": 1.1500071287155151, |
| "learning_rate": 3.0485571099366724e-06, |
| "loss": 0.5503662586212158, |
| "memory(GiB)": 76.04, |
| "step": 5315, |
| "token_acc": 0.800382509562739, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.375460598616588, |
| "grad_norm": 0.9023737907409668, |
| "learning_rate": 3.0450798846168227e-06, |
| "loss": 0.5276325225830079, |
| "memory(GiB)": 76.04, |
| "step": 5320, |
| "token_acc": 0.8494656224308771, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.376753507014028, |
| "grad_norm": 1.091489315032959, |
| "learning_rate": 3.0416015518150494e-06, |
| "loss": 0.5327792167663574, |
| "memory(GiB)": 76.04, |
| "step": 5325, |
| "token_acc": 0.8433721260289526, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.3780464154114682, |
| "grad_norm": 1.429714322090149, |
| "learning_rate": 3.0381221185985543e-06, |
| "loss": 0.5325508117675781, |
| "memory(GiB)": 76.04, |
| "step": 5330, |
| "token_acc": 0.8607167276676185, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.379339323808908, |
| "grad_norm": 0.9919387698173523, |
| "learning_rate": 3.034641592036779e-06, |
| "loss": 0.5155058860778808, |
| "memory(GiB)": 76.04, |
| "step": 5335, |
| "token_acc": 0.8486006657625447, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.3806322322063482, |
| "grad_norm": 2.1436212062835693, |
| "learning_rate": 3.031159979201383e-06, |
| "loss": 0.5232511043548584, |
| "memory(GiB)": 76.04, |
| "step": 5340, |
| "token_acc": 0.8501789414202298, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.3819251406037882, |
| "grad_norm": 1.056612253189087, |
| "learning_rate": 3.027677287166235e-06, |
| "loss": 0.5240641117095948, |
| "memory(GiB)": 76.04, |
| "step": 5345, |
| "token_acc": 0.8404334212261042, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.3832180490012282, |
| "grad_norm": 1.0267983675003052, |
| "learning_rate": 3.0241935230073977e-06, |
| "loss": 0.5429930210113525, |
| "memory(GiB)": 76.04, |
| "step": 5350, |
| "token_acc": 0.8286713286713286, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.3845109573986683, |
| "grad_norm": 1.0295692682266235, |
| "learning_rate": 3.020708693803108e-06, |
| "loss": 0.5250686645507813, |
| "memory(GiB)": 76.04, |
| "step": 5355, |
| "token_acc": 0.8244736210071252, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.3858038657961083, |
| "grad_norm": 0.999599277973175, |
| "learning_rate": 3.0172228066337704e-06, |
| "loss": 0.5352205276489258, |
| "memory(GiB)": 76.04, |
| "step": 5360, |
| "token_acc": 0.84011528503737, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.3870967741935485, |
| "grad_norm": 1.8036699295043945, |
| "learning_rate": 3.013735868581937e-06, |
| "loss": 0.5204336166381835, |
| "memory(GiB)": 76.04, |
| "step": 5365, |
| "token_acc": 0.8598814043234085, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.3883896825909885, |
| "grad_norm": 1.0070078372955322, |
| "learning_rate": 3.0102478867322967e-06, |
| "loss": 0.5483356952667237, |
| "memory(GiB)": 76.04, |
| "step": 5370, |
| "token_acc": 0.824980503222099, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.3896825909884285, |
| "grad_norm": 1.2133702039718628, |
| "learning_rate": 3.0067588681716563e-06, |
| "loss": 0.5264020919799804, |
| "memory(GiB)": 76.04, |
| "step": 5375, |
| "token_acc": 0.8479607640681466, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.3909754993858685, |
| "grad_norm": 1.567028522491455, |
| "learning_rate": 3.0032688199889328e-06, |
| "loss": 0.5459973335266113, |
| "memory(GiB)": 76.04, |
| "step": 5380, |
| "token_acc": 0.8320722155847604, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.3922684077833085, |
| "grad_norm": 0.9477059245109558, |
| "learning_rate": 2.9997777492751313e-06, |
| "loss": 0.522393798828125, |
| "memory(GiB)": 76.04, |
| "step": 5385, |
| "token_acc": 0.8648985404058384, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.3935613161807485, |
| "grad_norm": 0.9237920045852661, |
| "learning_rate": 2.9962856631233388e-06, |
| "loss": 0.5231618404388427, |
| "memory(GiB)": 76.04, |
| "step": 5390, |
| "token_acc": 0.8635757044267358, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.3948542245781885, |
| "grad_norm": 1.1687318086624146, |
| "learning_rate": 2.9927925686287006e-06, |
| "loss": 0.5056675434112549, |
| "memory(GiB)": 76.04, |
| "step": 5395, |
| "token_acc": 0.8491524700055506, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.3961471329756288, |
| "grad_norm": 1.1914643049240112, |
| "learning_rate": 2.9892984728884155e-06, |
| "loss": 0.5470870018005372, |
| "memory(GiB)": 76.04, |
| "step": 5400, |
| "token_acc": 0.8393457238872505, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.3974400413730688, |
| "grad_norm": 0.9498441219329834, |
| "learning_rate": 2.9858033830017127e-06, |
| "loss": 0.5178772926330566, |
| "memory(GiB)": 76.04, |
| "step": 5405, |
| "token_acc": 0.8383829302646169, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.3987329497705088, |
| "grad_norm": 1.407654881477356, |
| "learning_rate": 2.982307306069842e-06, |
| "loss": 0.5494901180267334, |
| "memory(GiB)": 76.04, |
| "step": 5410, |
| "token_acc": 0.8369012373794883, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.4000258581679488, |
| "grad_norm": 1.148308515548706, |
| "learning_rate": 2.9788102491960606e-06, |
| "loss": 0.5415051460266114, |
| "memory(GiB)": 76.04, |
| "step": 5415, |
| "token_acc": 0.8146666105050335, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.4013187665653888, |
| "grad_norm": 1.0424379110336304, |
| "learning_rate": 2.975312219485616e-06, |
| "loss": 0.5347636699676513, |
| "memory(GiB)": 76.04, |
| "step": 5420, |
| "token_acc": 0.8373481740260795, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.402611674962829, |
| "grad_norm": 1.1721105575561523, |
| "learning_rate": 2.971813224045732e-06, |
| "loss": 0.5278305053710938, |
| "memory(GiB)": 76.04, |
| "step": 5425, |
| "token_acc": 0.8396724598930482, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.4039045833602688, |
| "grad_norm": 1.179748296737671, |
| "learning_rate": 2.9683132699855933e-06, |
| "loss": 0.5224045276641845, |
| "memory(GiB)": 76.04, |
| "step": 5430, |
| "token_acc": 0.8568342151675485, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.405197491757709, |
| "grad_norm": 2.302943706512451, |
| "learning_rate": 2.9648123644163344e-06, |
| "loss": 0.51423659324646, |
| "memory(GiB)": 76.04, |
| "step": 5435, |
| "token_acc": 0.8518541896796591, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.406490400155149, |
| "grad_norm": 1.695887804031372, |
| "learning_rate": 2.961310514451021e-06, |
| "loss": 0.5096250534057617, |
| "memory(GiB)": 76.04, |
| "step": 5440, |
| "token_acc": 0.8589175232620451, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.407783308552589, |
| "grad_norm": 1.0010126829147339, |
| "learning_rate": 2.9578077272046407e-06, |
| "loss": 0.5219532012939453, |
| "memory(GiB)": 76.04, |
| "step": 5445, |
| "token_acc": 0.8385249390550633, |
| "train_speed(iter/s)": 0.027631 |
| }, |
| { |
| "epoch": 1.409076216950029, |
| "grad_norm": 1.0724811553955078, |
| "learning_rate": 2.954304009794082e-06, |
| "loss": 0.5457123279571533, |
| "memory(GiB)": 76.04, |
| "step": 5450, |
| "token_acc": 0.8312751004016065, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.410369125347469, |
| "grad_norm": 1.078969120979309, |
| "learning_rate": 2.9507993693381245e-06, |
| "loss": 0.4943378925323486, |
| "memory(GiB)": 76.04, |
| "step": 5455, |
| "token_acc": 0.8496458467482292, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.4116620337449093, |
| "grad_norm": 0.9450307488441467, |
| "learning_rate": 2.9472938129574248e-06, |
| "loss": 0.5415146827697754, |
| "memory(GiB)": 76.04, |
| "step": 5460, |
| "token_acc": 0.8225661328054705, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.4129549421423493, |
| "grad_norm": 1.5226620435714722, |
| "learning_rate": 2.9437873477744973e-06, |
| "loss": 0.5119266033172607, |
| "memory(GiB)": 76.04, |
| "step": 5465, |
| "token_acc": 0.8384682058151446, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.4142478505397893, |
| "grad_norm": 1.004164457321167, |
| "learning_rate": 2.9402799809137066e-06, |
| "loss": 0.5116465091705322, |
| "memory(GiB)": 76.04, |
| "step": 5470, |
| "token_acc": 0.8437827370559665, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.4155407589372293, |
| "grad_norm": 0.9910258054733276, |
| "learning_rate": 2.936771719501246e-06, |
| "loss": 0.5433405876159668, |
| "memory(GiB)": 76.04, |
| "step": 5475, |
| "token_acc": 0.844466902475998, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.4168336673346693, |
| "grad_norm": 0.9276953935623169, |
| "learning_rate": 2.9332625706651287e-06, |
| "loss": 0.5179524898529053, |
| "memory(GiB)": 76.04, |
| "step": 5480, |
| "token_acc": 0.8560186436098352, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.4181265757321093, |
| "grad_norm": 0.9490028619766235, |
| "learning_rate": 2.929752541535169e-06, |
| "loss": 0.5286359786987305, |
| "memory(GiB)": 76.04, |
| "step": 5485, |
| "token_acc": 0.8138392178714351, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.4194194841295493, |
| "grad_norm": 0.9604682326316833, |
| "learning_rate": 2.9262416392429727e-06, |
| "loss": 0.5103157043457032, |
| "memory(GiB)": 76.04, |
| "step": 5490, |
| "token_acc": 0.8366208149493901, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.4207123925269896, |
| "grad_norm": 1.2919334173202515, |
| "learning_rate": 2.922729870921916e-06, |
| "loss": 0.5384269714355469, |
| "memory(GiB)": 76.04, |
| "step": 5495, |
| "token_acc": 0.8337518834756403, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.4220053009244296, |
| "grad_norm": 8.227481842041016, |
| "learning_rate": 2.919217243707137e-06, |
| "loss": 0.5168218612670898, |
| "memory(GiB)": 76.04, |
| "step": 5500, |
| "token_acc": 0.848703986059682, |
| "train_speed(iter/s)": 0.027631 |
| }, |
| { |
| "epoch": 1.4232982093218696, |
| "grad_norm": 1.9908632040023804, |
| "learning_rate": 2.915703764735518e-06, |
| "loss": 0.5363755226135254, |
| "memory(GiB)": 76.04, |
| "step": 5505, |
| "token_acc": 0.8440745986779982, |
| "train_speed(iter/s)": 0.027631 |
| }, |
| { |
| "epoch": 1.4245911177193096, |
| "grad_norm": 4.587954521179199, |
| "learning_rate": 2.9121894411456727e-06, |
| "loss": 0.5621316432952881, |
| "memory(GiB)": 76.04, |
| "step": 5510, |
| "token_acc": 0.831285065455517, |
| "train_speed(iter/s)": 0.027631 |
| }, |
| { |
| "epoch": 1.4258840261167496, |
| "grad_norm": 1.0272570848464966, |
| "learning_rate": 2.90867428007793e-06, |
| "loss": 0.5223082542419434, |
| "memory(GiB)": 76.04, |
| "step": 5515, |
| "token_acc": 0.8410167818361303, |
| "train_speed(iter/s)": 0.027631 |
| }, |
| { |
| "epoch": 1.4271769345141896, |
| "grad_norm": 1.0505872964859009, |
| "learning_rate": 2.90515828867432e-06, |
| "loss": 0.5363224983215332, |
| "memory(GiB)": 76.04, |
| "step": 5520, |
| "token_acc": 0.8203096575979302, |
| "train_speed(iter/s)": 0.027631 |
| }, |
| { |
| "epoch": 1.4284698429116296, |
| "grad_norm": 1.0748521089553833, |
| "learning_rate": 2.9016414740785625e-06, |
| "loss": 0.5091330051422119, |
| "memory(GiB)": 76.04, |
| "step": 5525, |
| "token_acc": 0.8322848205813095, |
| "train_speed(iter/s)": 0.027631 |
| }, |
| { |
| "epoch": 1.4297627513090698, |
| "grad_norm": 1.0750956535339355, |
| "learning_rate": 2.8981238434360467e-06, |
| "loss": 0.5427698135375977, |
| "memory(GiB)": 76.04, |
| "step": 5530, |
| "token_acc": 0.8349636803874092, |
| "train_speed(iter/s)": 0.027631 |
| }, |
| { |
| "epoch": 1.4310556597065098, |
| "grad_norm": 2.610778331756592, |
| "learning_rate": 2.894605403893821e-06, |
| "loss": 0.4974540710449219, |
| "memory(GiB)": 76.04, |
| "step": 5535, |
| "token_acc": 0.8535060294774452, |
| "train_speed(iter/s)": 0.027631 |
| }, |
| { |
| "epoch": 1.4323485681039498, |
| "grad_norm": 1.1820318698883057, |
| "learning_rate": 2.8910861626005774e-06, |
| "loss": 0.5238434314727783, |
| "memory(GiB)": 76.04, |
| "step": 5540, |
| "token_acc": 0.8497934516523867, |
| "train_speed(iter/s)": 0.027632 |
| }, |
| { |
| "epoch": 1.4336414765013898, |
| "grad_norm": 3.4977128505706787, |
| "learning_rate": 2.887566126706638e-06, |
| "loss": 0.5260235786437988, |
| "memory(GiB)": 76.04, |
| "step": 5545, |
| "token_acc": 0.8721027400272683, |
| "train_speed(iter/s)": 0.027632 |
| }, |
| { |
| "epoch": 1.4349343848988299, |
| "grad_norm": 1.0233287811279297, |
| "learning_rate": 2.884045303363936e-06, |
| "loss": 0.5392961978912354, |
| "memory(GiB)": 76.04, |
| "step": 5550, |
| "token_acc": 0.8287411925544221, |
| "train_speed(iter/s)": 0.027632 |
| }, |
| { |
| "epoch": 1.43622729329627, |
| "grad_norm": 2.892608642578125, |
| "learning_rate": 2.8805236997260083e-06, |
| "loss": 0.5215497016906738, |
| "memory(GiB)": 76.04, |
| "step": 5555, |
| "token_acc": 0.8498609823911029, |
| "train_speed(iter/s)": 0.027631 |
| }, |
| { |
| "epoch": 1.4375202016937099, |
| "grad_norm": 1.1967157125473022, |
| "learning_rate": 2.877001322947975e-06, |
| "loss": 0.5007841110229492, |
| "memory(GiB)": 76.04, |
| "step": 5560, |
| "token_acc": 0.861061495279408, |
| "train_speed(iter/s)": 0.027631 |
| }, |
| { |
| "epoch": 1.43881311009115, |
| "grad_norm": 0.955826461315155, |
| "learning_rate": 2.8734781801865295e-06, |
| "loss": 0.5322293281555176, |
| "memory(GiB)": 76.04, |
| "step": 5565, |
| "token_acc": 0.8267916342651332, |
| "train_speed(iter/s)": 0.027632 |
| }, |
| { |
| "epoch": 1.44010601848859, |
| "grad_norm": 0.9877298474311829, |
| "learning_rate": 2.8699542785999174e-06, |
| "loss": 0.5368639469146729, |
| "memory(GiB)": 76.04, |
| "step": 5570, |
| "token_acc": 0.8380945422663841, |
| "train_speed(iter/s)": 0.027632 |
| }, |
| { |
| "epoch": 1.44139892688603, |
| "grad_norm": 1.6299902200698853, |
| "learning_rate": 2.866429625347929e-06, |
| "loss": 0.5405995368957519, |
| "memory(GiB)": 76.04, |
| "step": 5575, |
| "token_acc": 0.8694860906491031, |
| "train_speed(iter/s)": 0.027633 |
| }, |
| { |
| "epoch": 1.4426918352834701, |
| "grad_norm": 1.1087404489517212, |
| "learning_rate": 2.8629042275918816e-06, |
| "loss": 0.5386042118072509, |
| "memory(GiB)": 76.04, |
| "step": 5580, |
| "token_acc": 0.8234553974314475, |
| "train_speed(iter/s)": 0.027633 |
| }, |
| { |
| "epoch": 1.4439847436809101, |
| "grad_norm": 1.3870139122009277, |
| "learning_rate": 2.8593780924946035e-06, |
| "loss": 0.5439047813415527, |
| "memory(GiB)": 76.04, |
| "step": 5585, |
| "token_acc": 0.8323816375162987, |
| "train_speed(iter/s)": 0.027633 |
| }, |
| { |
| "epoch": 1.4452776520783503, |
| "grad_norm": 1.3395311832427979, |
| "learning_rate": 2.8558512272204236e-06, |
| "loss": 0.5457947254180908, |
| "memory(GiB)": 76.04, |
| "step": 5590, |
| "token_acc": 0.8384256861729674, |
| "train_speed(iter/s)": 0.027634 |
| }, |
| { |
| "epoch": 1.4465705604757904, |
| "grad_norm": 1.6546262502670288, |
| "learning_rate": 2.852323638935153e-06, |
| "loss": 0.5411076545715332, |
| "memory(GiB)": 76.04, |
| "step": 5595, |
| "token_acc": 0.7911689027752251, |
| "train_speed(iter/s)": 0.027635 |
| }, |
| { |
| "epoch": 1.4478634688732304, |
| "grad_norm": 1.460876226425171, |
| "learning_rate": 2.8487953348060717e-06, |
| "loss": 0.5316921710968018, |
| "memory(GiB)": 76.04, |
| "step": 5600, |
| "token_acc": 0.841174282138871, |
| "train_speed(iter/s)": 0.027635 |
| }, |
| { |
| "epoch": 1.4491563772706704, |
| "grad_norm": 1.1629871129989624, |
| "learning_rate": 2.845266322001914e-06, |
| "loss": 0.5173054695129394, |
| "memory(GiB)": 76.04, |
| "step": 5605, |
| "token_acc": 0.8505799971707455, |
| "train_speed(iter/s)": 0.027635 |
| }, |
| { |
| "epoch": 1.4504492856681104, |
| "grad_norm": 1.3548821210861206, |
| "learning_rate": 2.841736607692855e-06, |
| "loss": 0.5308181762695312, |
| "memory(GiB)": 76.04, |
| "step": 5610, |
| "token_acc": 0.8199830736297108, |
| "train_speed(iter/s)": 0.027636 |
| }, |
| { |
| "epoch": 1.4517421940655504, |
| "grad_norm": 1.4260450601577759, |
| "learning_rate": 2.8382061990504937e-06, |
| "loss": 0.5264840126037598, |
| "memory(GiB)": 76.04, |
| "step": 5615, |
| "token_acc": 0.8558112625353561, |
| "train_speed(iter/s)": 0.027636 |
| }, |
| { |
| "epoch": 1.4530351024629904, |
| "grad_norm": 1.11582612991333, |
| "learning_rate": 2.8346751032478416e-06, |
| "loss": 0.5299251556396485, |
| "memory(GiB)": 76.04, |
| "step": 5620, |
| "token_acc": 0.8517095224639729, |
| "train_speed(iter/s)": 0.027635 |
| }, |
| { |
| "epoch": 1.4543280108604306, |
| "grad_norm": 1.011853814125061, |
| "learning_rate": 2.831143327459304e-06, |
| "loss": 0.5147687911987304, |
| "memory(GiB)": 76.04, |
| "step": 5625, |
| "token_acc": 0.8489732511286956, |
| "train_speed(iter/s)": 0.027635 |
| }, |
| { |
| "epoch": 1.4556209192578706, |
| "grad_norm": 1.401329755783081, |
| "learning_rate": 2.8276108788606716e-06, |
| "loss": 0.5251947402954101, |
| "memory(GiB)": 76.04, |
| "step": 5630, |
| "token_acc": 0.8668347467338987, |
| "train_speed(iter/s)": 0.027636 |
| }, |
| { |
| "epoch": 1.4569138276553106, |
| "grad_norm": 1.0562440156936646, |
| "learning_rate": 2.8240777646290973e-06, |
| "loss": 0.5131159782409668, |
| "memory(GiB)": 76.04, |
| "step": 5635, |
| "token_acc": 0.8574029383123757, |
| "train_speed(iter/s)": 0.027636 |
| }, |
| { |
| "epoch": 1.4582067360527506, |
| "grad_norm": 1.0854851007461548, |
| "learning_rate": 2.82054399194309e-06, |
| "loss": 0.5298294067382813, |
| "memory(GiB)": 76.04, |
| "step": 5640, |
| "token_acc": 0.8360400339911246, |
| "train_speed(iter/s)": 0.027637 |
| }, |
| { |
| "epoch": 1.4594996444501906, |
| "grad_norm": 1.0677274465560913, |
| "learning_rate": 2.817009567982495e-06, |
| "loss": 0.5486864566802978, |
| "memory(GiB)": 76.04, |
| "step": 5645, |
| "token_acc": 0.8450833930215901, |
| "train_speed(iter/s)": 0.027637 |
| }, |
| { |
| "epoch": 1.4607925528476309, |
| "grad_norm": 1.0018072128295898, |
| "learning_rate": 2.81347449992848e-06, |
| "loss": 0.5392383098602295, |
| "memory(GiB)": 76.04, |
| "step": 5650, |
| "token_acc": 0.8489272284892723, |
| "train_speed(iter/s)": 0.027638 |
| }, |
| { |
| "epoch": 1.4620854612450707, |
| "grad_norm": 1.0016893148422241, |
| "learning_rate": 2.8099387949635244e-06, |
| "loss": 0.5180238723754883, |
| "memory(GiB)": 76.04, |
| "step": 5655, |
| "token_acc": 0.8308455244235061, |
| "train_speed(iter/s)": 0.027638 |
| }, |
| { |
| "epoch": 1.463378369642511, |
| "grad_norm": 0.9236847162246704, |
| "learning_rate": 2.8064024602713978e-06, |
| "loss": 0.5212345600128174, |
| "memory(GiB)": 76.04, |
| "step": 5660, |
| "token_acc": 0.8309616213885296, |
| "train_speed(iter/s)": 0.027638 |
| }, |
| { |
| "epoch": 1.464671278039951, |
| "grad_norm": 1.1901302337646484, |
| "learning_rate": 2.802865503037153e-06, |
| "loss": 0.5204244613647461, |
| "memory(GiB)": 76.04, |
| "step": 5665, |
| "token_acc": 0.8307556954991665, |
| "train_speed(iter/s)": 0.027638 |
| }, |
| { |
| "epoch": 1.465964186437391, |
| "grad_norm": 1.3907729387283325, |
| "learning_rate": 2.799327930447105e-06, |
| "loss": 0.5336479187011719, |
| "memory(GiB)": 76.04, |
| "step": 5670, |
| "token_acc": 0.8514618825974964, |
| "train_speed(iter/s)": 0.027638 |
| }, |
| { |
| "epoch": 1.467257094834831, |
| "grad_norm": 1.4721412658691406, |
| "learning_rate": 2.79578974968882e-06, |
| "loss": 0.5241554737091064, |
| "memory(GiB)": 76.04, |
| "step": 5675, |
| "token_acc": 0.8389457435252415, |
| "train_speed(iter/s)": 0.027639 |
| }, |
| { |
| "epoch": 1.468550003232271, |
| "grad_norm": 1.0565476417541504, |
| "learning_rate": 2.792250967951099e-06, |
| "loss": 0.5248475074768066, |
| "memory(GiB)": 76.04, |
| "step": 5680, |
| "token_acc": 0.8303552659239016, |
| "train_speed(iter/s)": 0.02764 |
| }, |
| { |
| "epoch": 1.4698429116297111, |
| "grad_norm": 1.4000303745269775, |
| "learning_rate": 2.788711592423966e-06, |
| "loss": 0.5044834613800049, |
| "memory(GiB)": 76.04, |
| "step": 5685, |
| "token_acc": 0.8474695172874561, |
| "train_speed(iter/s)": 0.027639 |
| }, |
| { |
| "epoch": 1.4711358200271512, |
| "grad_norm": 1.4237065315246582, |
| "learning_rate": 2.785171630298649e-06, |
| "loss": 0.527522611618042, |
| "memory(GiB)": 76.04, |
| "step": 5690, |
| "token_acc": 0.8151931330472103, |
| "train_speed(iter/s)": 0.027638 |
| }, |
| { |
| "epoch": 1.4724287284245912, |
| "grad_norm": 1.0766621828079224, |
| "learning_rate": 2.7816310887675697e-06, |
| "loss": 0.5117476940155029, |
| "memory(GiB)": 76.04, |
| "step": 5695, |
| "token_acc": 0.8338809784592918, |
| "train_speed(iter/s)": 0.027639 |
| }, |
| { |
| "epoch": 1.4737216368220312, |
| "grad_norm": 1.4094700813293457, |
| "learning_rate": 2.7780899750243275e-06, |
| "loss": 0.5268692970275879, |
| "memory(GiB)": 76.04, |
| "step": 5700, |
| "token_acc": 0.8330804888327012, |
| "train_speed(iter/s)": 0.027639 |
| }, |
| { |
| "epoch": 1.4750145452194712, |
| "grad_norm": 1.1132041215896606, |
| "learning_rate": 2.7745482962636815e-06, |
| "loss": 0.4945709228515625, |
| "memory(GiB)": 76.04, |
| "step": 5705, |
| "token_acc": 0.860883552163992, |
| "train_speed(iter/s)": 0.027639 |
| }, |
| { |
| "epoch": 1.4763074536169112, |
| "grad_norm": 1.0947760343551636, |
| "learning_rate": 2.7710060596815425e-06, |
| "loss": 0.5298891067504883, |
| "memory(GiB)": 76.04, |
| "step": 5710, |
| "token_acc": 0.8435150568998808, |
| "train_speed(iter/s)": 0.027639 |
| }, |
| { |
| "epoch": 1.4776003620143512, |
| "grad_norm": 0.9912996292114258, |
| "learning_rate": 2.767463272474951e-06, |
| "loss": 0.48708510398864746, |
| "memory(GiB)": 76.04, |
| "step": 5715, |
| "token_acc": 0.8264887063655031, |
| "train_speed(iter/s)": 0.027639 |
| }, |
| { |
| "epoch": 1.4788932704117914, |
| "grad_norm": 0.8707374930381775, |
| "learning_rate": 2.763919941842069e-06, |
| "loss": 0.5079801559448243, |
| "memory(GiB)": 76.04, |
| "step": 5720, |
| "token_acc": 0.8312231452305929, |
| "train_speed(iter/s)": 0.02764 |
| }, |
| { |
| "epoch": 1.4801861788092314, |
| "grad_norm": 1.5692007541656494, |
| "learning_rate": 2.760376074982161e-06, |
| "loss": 0.5193423748016357, |
| "memory(GiB)": 76.04, |
| "step": 5725, |
| "token_acc": 0.8429825267734923, |
| "train_speed(iter/s)": 0.027641 |
| }, |
| { |
| "epoch": 1.4814790872066714, |
| "grad_norm": 1.1025525331497192, |
| "learning_rate": 2.756831679095583e-06, |
| "loss": 0.5138895034790039, |
| "memory(GiB)": 76.04, |
| "step": 5730, |
| "token_acc": 0.8342220895013012, |
| "train_speed(iter/s)": 0.027641 |
| }, |
| { |
| "epoch": 1.4827719956041114, |
| "grad_norm": 1.114014983177185, |
| "learning_rate": 2.7532867613837632e-06, |
| "loss": 0.5035554885864257, |
| "memory(GiB)": 76.04, |
| "step": 5735, |
| "token_acc": 0.8443557981664217, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.4840649040015514, |
| "grad_norm": 0.9390487670898438, |
| "learning_rate": 2.7497413290491927e-06, |
| "loss": 0.5343178749084473, |
| "memory(GiB)": 76.04, |
| "step": 5740, |
| "token_acc": 0.8542599136238712, |
| "train_speed(iter/s)": 0.027641 |
| }, |
| { |
| "epoch": 1.4853578123989915, |
| "grad_norm": 0.9856504797935486, |
| "learning_rate": 2.746195389295406e-06, |
| "loss": 0.5330347537994384, |
| "memory(GiB)": 76.04, |
| "step": 5745, |
| "token_acc": 0.8207178164624973, |
| "train_speed(iter/s)": 0.027641 |
| }, |
| { |
| "epoch": 1.4866507207964315, |
| "grad_norm": 1.0896226167678833, |
| "learning_rate": 2.7426489493269693e-06, |
| "loss": 0.538813591003418, |
| "memory(GiB)": 76.04, |
| "step": 5750, |
| "token_acc": 0.8037091060637633, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.4879436291938717, |
| "grad_norm": 1.0768758058547974, |
| "learning_rate": 2.739102016349465e-06, |
| "loss": 0.5243756294250488, |
| "memory(GiB)": 76.04, |
| "step": 5755, |
| "token_acc": 0.8149051903817803, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.4892365375913117, |
| "grad_norm": 1.063176155090332, |
| "learning_rate": 2.7355545975694777e-06, |
| "loss": 0.5046000480651855, |
| "memory(GiB)": 76.04, |
| "step": 5760, |
| "token_acc": 0.8376587897828166, |
| "train_speed(iter/s)": 0.027641 |
| }, |
| { |
| "epoch": 1.4905294459887517, |
| "grad_norm": 1.0583608150482178, |
| "learning_rate": 2.73200670019458e-06, |
| "loss": 0.5038406372070312, |
| "memory(GiB)": 76.04, |
| "step": 5765, |
| "token_acc": 0.835580538569638, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.4918223543861917, |
| "grad_norm": 1.7861460447311401, |
| "learning_rate": 2.7284583314333136e-06, |
| "loss": 0.5076050758361816, |
| "memory(GiB)": 76.04, |
| "step": 5770, |
| "token_acc": 0.8467831009250311, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.4931152627836317, |
| "grad_norm": 6.293168067932129, |
| "learning_rate": 2.7249094984951817e-06, |
| "loss": 0.5296279430389405, |
| "memory(GiB)": 76.04, |
| "step": 5775, |
| "token_acc": 0.8256261520112762, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.494408171181072, |
| "grad_norm": 1.0335216522216797, |
| "learning_rate": 2.7213602085906284e-06, |
| "loss": 0.5116629600524902, |
| "memory(GiB)": 76.04, |
| "step": 5780, |
| "token_acc": 0.8385129247749056, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.4957010795785117, |
| "grad_norm": 1.0121421813964844, |
| "learning_rate": 2.7178104689310268e-06, |
| "loss": 0.49023923873901365, |
| "memory(GiB)": 76.04, |
| "step": 5785, |
| "token_acc": 0.8403039150163565, |
| "train_speed(iter/s)": 0.027641 |
| }, |
| { |
| "epoch": 1.496993987975952, |
| "grad_norm": 2.905419111251831, |
| "learning_rate": 2.714260286728663e-06, |
| "loss": 0.5063573837280273, |
| "memory(GiB)": 76.04, |
| "step": 5790, |
| "token_acc": 0.8344622697563874, |
| "train_speed(iter/s)": 0.027641 |
| }, |
| { |
| "epoch": 1.498286896373392, |
| "grad_norm": 1.3976821899414062, |
| "learning_rate": 2.7107096691967242e-06, |
| "loss": 0.5138403892517089, |
| "memory(GiB)": 76.04, |
| "step": 5795, |
| "token_acc": 0.8758511480601742, |
| "train_speed(iter/s)": 0.02764 |
| }, |
| { |
| "epoch": 1.499579804770832, |
| "grad_norm": 1.0694046020507812, |
| "learning_rate": 2.70715862354928e-06, |
| "loss": 0.5170317649841308, |
| "memory(GiB)": 76.04, |
| "step": 5800, |
| "token_acc": 0.8628498120412913, |
| "train_speed(iter/s)": 0.027641 |
| }, |
| { |
| "epoch": 1.500872713168272, |
| "grad_norm": 1.158471941947937, |
| "learning_rate": 2.703607157001273e-06, |
| "loss": 0.5261846542358398, |
| "memory(GiB)": 76.04, |
| "step": 5805, |
| "token_acc": 0.8195343894257913, |
| "train_speed(iter/s)": 0.02764 |
| }, |
| { |
| "epoch": 1.502165621565712, |
| "grad_norm": 0.9272586107254028, |
| "learning_rate": 2.7000552767684962e-06, |
| "loss": 0.5037094116210937, |
| "memory(GiB)": 76.04, |
| "step": 5810, |
| "token_acc": 0.8422422339722406, |
| "train_speed(iter/s)": 0.027641 |
| }, |
| { |
| "epoch": 1.5034585299631522, |
| "grad_norm": 1.1119998693466187, |
| "learning_rate": 2.696502990067586e-06, |
| "loss": 0.5135734558105469, |
| "memory(GiB)": 76.04, |
| "step": 5815, |
| "token_acc": 0.8626723760072827, |
| "train_speed(iter/s)": 0.02764 |
| }, |
| { |
| "epoch": 1.504751438360592, |
| "grad_norm": 1.0358091592788696, |
| "learning_rate": 2.6929503041160054e-06, |
| "loss": 0.5373703956604003, |
| "memory(GiB)": 76.04, |
| "step": 5820, |
| "token_acc": 0.8301167050647732, |
| "train_speed(iter/s)": 0.027641 |
| }, |
| { |
| "epoch": 1.5060443467580322, |
| "grad_norm": 1.039354920387268, |
| "learning_rate": 2.6893972261320265e-06, |
| "loss": 0.5479695320129394, |
| "memory(GiB)": 76.04, |
| "step": 5825, |
| "token_acc": 0.8479183638468465, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.5073372551554722, |
| "grad_norm": 1.3886367082595825, |
| "learning_rate": 2.6858437633347197e-06, |
| "loss": 0.49077515602111815, |
| "memory(GiB)": 76.04, |
| "step": 5830, |
| "token_acc": 0.8483184202406665, |
| "train_speed(iter/s)": 0.027641 |
| }, |
| { |
| "epoch": 1.5086301635529122, |
| "grad_norm": 1.0695741176605225, |
| "learning_rate": 2.6822899229439354e-06, |
| "loss": 0.5208306789398194, |
| "memory(GiB)": 76.04, |
| "step": 5835, |
| "token_acc": 0.8321777497636307, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.5099230719503525, |
| "grad_norm": 1.3519343137741089, |
| "learning_rate": 2.678735712180294e-06, |
| "loss": 0.5065782070159912, |
| "memory(GiB)": 76.04, |
| "step": 5840, |
| "token_acc": 0.865064039408867, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.5112159803477923, |
| "grad_norm": 1.1784507036209106, |
| "learning_rate": 2.6751811382651656e-06, |
| "loss": 0.5237961769104004, |
| "memory(GiB)": 76.04, |
| "step": 5845, |
| "token_acc": 0.8759957417128593, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.5125088887452325, |
| "grad_norm": 1.2745177745819092, |
| "learning_rate": 2.6716262084206596e-06, |
| "loss": 0.5225517272949218, |
| "memory(GiB)": 76.04, |
| "step": 5850, |
| "token_acc": 0.8607184154574956, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.5138017971426725, |
| "grad_norm": 1.0925981998443604, |
| "learning_rate": 2.6680709298696075e-06, |
| "loss": 0.5313197135925293, |
| "memory(GiB)": 76.04, |
| "step": 5855, |
| "token_acc": 0.8273417489937798, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.5150947055401125, |
| "grad_norm": 1.1206554174423218, |
| "learning_rate": 2.66451530983555e-06, |
| "loss": 0.5206215858459473, |
| "memory(GiB)": 76.04, |
| "step": 5860, |
| "token_acc": 0.8360609797107947, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.5163876139375525, |
| "grad_norm": 1.0352177619934082, |
| "learning_rate": 2.6609593555427233e-06, |
| "loss": 0.5028391361236573, |
| "memory(GiB)": 76.04, |
| "step": 5865, |
| "token_acc": 0.8328511593764844, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.5176805223349925, |
| "grad_norm": 1.2911217212677002, |
| "learning_rate": 2.6574030742160397e-06, |
| "loss": 0.510726022720337, |
| "memory(GiB)": 76.04, |
| "step": 5870, |
| "token_acc": 0.8490013110202822, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.5189734307324327, |
| "grad_norm": 1.0364927053451538, |
| "learning_rate": 2.6538464730810774e-06, |
| "loss": 0.5217413902282715, |
| "memory(GiB)": 76.04, |
| "step": 5875, |
| "token_acc": 0.8393885789449812, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.5202663391298725, |
| "grad_norm": 1.238673210144043, |
| "learning_rate": 2.6502895593640643e-06, |
| "loss": 0.5099982738494873, |
| "memory(GiB)": 76.04, |
| "step": 5880, |
| "token_acc": 0.8526344031928095, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.5215592475273128, |
| "grad_norm": 0.8736827969551086, |
| "learning_rate": 2.646732340291864e-06, |
| "loss": 0.5140372753143311, |
| "memory(GiB)": 76.04, |
| "step": 5885, |
| "token_acc": 0.8392264114084782, |
| "train_speed(iter/s)": 0.027641 |
| }, |
| { |
| "epoch": 1.5228521559247528, |
| "grad_norm": 1.5109645128250122, |
| "learning_rate": 2.6431748230919583e-06, |
| "loss": 0.5010466575622559, |
| "memory(GiB)": 76.04, |
| "step": 5890, |
| "token_acc": 0.8540109197816044, |
| "train_speed(iter/s)": 0.02764 |
| }, |
| { |
| "epoch": 1.5241450643221928, |
| "grad_norm": 1.3737353086471558, |
| "learning_rate": 2.639617014992438e-06, |
| "loss": 0.5450526237487793, |
| "memory(GiB)": 76.04, |
| "step": 5895, |
| "token_acc": 0.8286816981515336, |
| "train_speed(iter/s)": 0.02764 |
| }, |
| { |
| "epoch": 1.5254379727196328, |
| "grad_norm": 1.299264669418335, |
| "learning_rate": 2.6360589232219826e-06, |
| "loss": 0.5287326812744141, |
| "memory(GiB)": 76.04, |
| "step": 5900, |
| "token_acc": 0.8438716156839771, |
| "train_speed(iter/s)": 0.02764 |
| }, |
| { |
| "epoch": 1.5267308811170728, |
| "grad_norm": 1.595326542854309, |
| "learning_rate": 2.632500555009849e-06, |
| "loss": 0.5352768898010254, |
| "memory(GiB)": 76.04, |
| "step": 5905, |
| "token_acc": 0.8498021897138651, |
| "train_speed(iter/s)": 0.02764 |
| }, |
| { |
| "epoch": 1.528023789514513, |
| "grad_norm": 0.9794163107872009, |
| "learning_rate": 2.6289419175858557e-06, |
| "loss": 0.5425346374511719, |
| "memory(GiB)": 76.04, |
| "step": 5910, |
| "token_acc": 0.8340483277884784, |
| "train_speed(iter/s)": 0.02764 |
| }, |
| { |
| "epoch": 1.5293166979119528, |
| "grad_norm": 1.0976084470748901, |
| "learning_rate": 2.625383018180367e-06, |
| "loss": 0.521512794494629, |
| "memory(GiB)": 76.04, |
| "step": 5915, |
| "token_acc": 0.8385939188146319, |
| "train_speed(iter/s)": 0.02764 |
| }, |
| { |
| "epoch": 1.530609606309393, |
| "grad_norm": 0.8977828621864319, |
| "learning_rate": 2.6218238640242804e-06, |
| "loss": 0.5215116500854492, |
| "memory(GiB)": 76.04, |
| "step": 5920, |
| "token_acc": 0.867619533775736, |
| "train_speed(iter/s)": 0.027639 |
| }, |
| { |
| "epoch": 1.531902514706833, |
| "grad_norm": 0.9642647504806519, |
| "learning_rate": 2.6182644623490123e-06, |
| "loss": 0.5066309928894043, |
| "memory(GiB)": 76.04, |
| "step": 5925, |
| "token_acc": 0.8547594142259414, |
| "train_speed(iter/s)": 0.02764 |
| }, |
| { |
| "epoch": 1.533195423104273, |
| "grad_norm": 1.0187339782714844, |
| "learning_rate": 2.6147048203864785e-06, |
| "loss": 0.5130214691162109, |
| "memory(GiB)": 76.04, |
| "step": 5930, |
| "token_acc": 0.8448008040935673, |
| "train_speed(iter/s)": 0.02764 |
| }, |
| { |
| "epoch": 1.5344883315017133, |
| "grad_norm": 1.0723613500595093, |
| "learning_rate": 2.6111449453690867e-06, |
| "loss": 0.5088356971740723, |
| "memory(GiB)": 76.04, |
| "step": 5935, |
| "token_acc": 0.8498759764540372, |
| "train_speed(iter/s)": 0.02764 |
| }, |
| { |
| "epoch": 1.535781239899153, |
| "grad_norm": 0.9338003993034363, |
| "learning_rate": 2.607584844529717e-06, |
| "loss": 0.5098363399505615, |
| "memory(GiB)": 76.04, |
| "step": 5940, |
| "token_acc": 0.8526082509376065, |
| "train_speed(iter/s)": 0.027641 |
| }, |
| { |
| "epoch": 1.5370741482965933, |
| "grad_norm": 0.8586558103561401, |
| "learning_rate": 2.604024525101707e-06, |
| "loss": 0.5505722045898438, |
| "memory(GiB)": 76.04, |
| "step": 5945, |
| "token_acc": 0.8450296382094433, |
| "train_speed(iter/s)": 0.027641 |
| }, |
| { |
| "epoch": 1.538367056694033, |
| "grad_norm": 1.0932189226150513, |
| "learning_rate": 2.6004639943188397e-06, |
| "loss": 0.51469407081604, |
| "memory(GiB)": 76.04, |
| "step": 5950, |
| "token_acc": 0.840957878166293, |
| "train_speed(iter/s)": 0.027641 |
| }, |
| { |
| "epoch": 1.5396599650914733, |
| "grad_norm": 1.4429826736450195, |
| "learning_rate": 2.5969032594153267e-06, |
| "loss": 0.5273025512695313, |
| "memory(GiB)": 76.04, |
| "step": 5955, |
| "token_acc": 0.8560200279459711, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.5409528734889133, |
| "grad_norm": 1.6431050300598145, |
| "learning_rate": 2.5933423276257957e-06, |
| "loss": 0.5339940071105957, |
| "memory(GiB)": 76.04, |
| "step": 5960, |
| "token_acc": 0.8337239801328199, |
| "train_speed(iter/s)": 0.027641 |
| }, |
| { |
| "epoch": 1.5422457818863533, |
| "grad_norm": 1.0177254676818848, |
| "learning_rate": 2.5897812061852728e-06, |
| "loss": 0.523937177658081, |
| "memory(GiB)": 76.04, |
| "step": 5965, |
| "token_acc": 0.8334849487264308, |
| "train_speed(iter/s)": 0.027641 |
| }, |
| { |
| "epoch": 1.5435386902837935, |
| "grad_norm": 1.0822439193725586, |
| "learning_rate": 2.58621990232917e-06, |
| "loss": 0.5013058662414551, |
| "memory(GiB)": 76.04, |
| "step": 5970, |
| "token_acc": 0.8428224266620379, |
| "train_speed(iter/s)": 0.027642 |
| }, |
| { |
| "epoch": 1.5448315986812333, |
| "grad_norm": 1.0421521663665771, |
| "learning_rate": 2.5826584232932707e-06, |
| "loss": 0.5094140052795411, |
| "memory(GiB)": 76.04, |
| "step": 5975, |
| "token_acc": 0.828722488626583, |
| "train_speed(iter/s)": 0.027643 |
| }, |
| { |
| "epoch": 1.5461245070786735, |
| "grad_norm": 1.2964197397232056, |
| "learning_rate": 2.5790967763137136e-06, |
| "loss": 0.5127614498138428, |
| "memory(GiB)": 76.04, |
| "step": 5980, |
| "token_acc": 0.842072213500785, |
| "train_speed(iter/s)": 0.027643 |
| }, |
| { |
| "epoch": 1.5474174154761136, |
| "grad_norm": 0.8814839124679565, |
| "learning_rate": 2.575534968626978e-06, |
| "loss": 0.5174202919006348, |
| "memory(GiB)": 76.04, |
| "step": 5985, |
| "token_acc": 0.8393448656606551, |
| "train_speed(iter/s)": 0.027644 |
| }, |
| { |
| "epoch": 1.5487103238735536, |
| "grad_norm": 1.192781686782837, |
| "learning_rate": 2.5719730074698718e-06, |
| "loss": 0.5092106342315674, |
| "memory(GiB)": 76.04, |
| "step": 5990, |
| "token_acc": 0.8536305586357206, |
| "train_speed(iter/s)": 0.027644 |
| }, |
| { |
| "epoch": 1.5500032322709936, |
| "grad_norm": 1.0610737800598145, |
| "learning_rate": 2.5684109000795114e-06, |
| "loss": 0.4976038932800293, |
| "memory(GiB)": 76.04, |
| "step": 5995, |
| "token_acc": 0.8407422307150759, |
| "train_speed(iter/s)": 0.027645 |
| }, |
| { |
| "epoch": 1.5512961406684336, |
| "grad_norm": 0.9621560573577881, |
| "learning_rate": 2.564848653693313e-06, |
| "loss": 0.5234485626220703, |
| "memory(GiB)": 76.04, |
| "step": 6000, |
| "token_acc": 0.8350293049512783, |
| "train_speed(iter/s)": 0.027645 |
| }, |
| { |
| "epoch": 1.5525890490658738, |
| "grad_norm": 0.9867120385169983, |
| "learning_rate": 2.5612862755489754e-06, |
| "loss": 0.5299267292022705, |
| "memory(GiB)": 76.04, |
| "step": 6005, |
| "token_acc": 0.8546105977748444, |
| "train_speed(iter/s)": 0.027621 |
| }, |
| { |
| "epoch": 1.5538819574633136, |
| "grad_norm": 1.0978645086288452, |
| "learning_rate": 2.5577237728844624e-06, |
| "loss": 0.5120854854583741, |
| "memory(GiB)": 76.04, |
| "step": 6010, |
| "token_acc": 0.826845756196704, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.5551748658607538, |
| "grad_norm": 1.5117709636688232, |
| "learning_rate": 2.554161152937994e-06, |
| "loss": 0.49729576110839846, |
| "memory(GiB)": 76.04, |
| "step": 6015, |
| "token_acc": 0.8334134348774447, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.5564677742581938, |
| "grad_norm": 0.8881126642227173, |
| "learning_rate": 2.5505984229480257e-06, |
| "loss": 0.5338102340698242, |
| "memory(GiB)": 76.04, |
| "step": 6020, |
| "token_acc": 0.8419421487603306, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.5577606826556338, |
| "grad_norm": 1.0090521574020386, |
| "learning_rate": 2.547035590153239e-06, |
| "loss": 0.5258452892303467, |
| "memory(GiB)": 76.04, |
| "step": 6025, |
| "token_acc": 0.8578943772631004, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.5590535910530738, |
| "grad_norm": 0.9812294244766235, |
| "learning_rate": 2.5434726617925214e-06, |
| "loss": 0.5136911392211914, |
| "memory(GiB)": 76.04, |
| "step": 6030, |
| "token_acc": 0.827103274559194, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.5603464994505138, |
| "grad_norm": 0.9446991682052612, |
| "learning_rate": 2.5399096451049586e-06, |
| "loss": 0.5100172996520996, |
| "memory(GiB)": 76.04, |
| "step": 6035, |
| "token_acc": 0.8508031581813231, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.561639407847954, |
| "grad_norm": 1.0973703861236572, |
| "learning_rate": 2.536346547329812e-06, |
| "loss": 0.5151572704315186, |
| "memory(GiB)": 76.04, |
| "step": 6040, |
| "token_acc": 0.8401378579003181, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.5629323162453939, |
| "grad_norm": 1.1219217777252197, |
| "learning_rate": 2.5327833757065102e-06, |
| "loss": 0.5503729343414306, |
| "memory(GiB)": 76.04, |
| "step": 6045, |
| "token_acc": 0.8555493103895543, |
| "train_speed(iter/s)": 0.027621 |
| }, |
| { |
| "epoch": 1.564225224642834, |
| "grad_norm": 0.9930498600006104, |
| "learning_rate": 2.5292201374746306e-06, |
| "loss": 0.5092242240905762, |
| "memory(GiB)": 76.04, |
| "step": 6050, |
| "token_acc": 0.8612712103502479, |
| "train_speed(iter/s)": 0.027621 |
| }, |
| { |
| "epoch": 1.565518133040274, |
| "grad_norm": 1.1532442569732666, |
| "learning_rate": 2.525656839873885e-06, |
| "loss": 0.509462833404541, |
| "memory(GiB)": 76.04, |
| "step": 6055, |
| "token_acc": 0.8444057905958927, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.566811041437714, |
| "grad_norm": 0.9126574397087097, |
| "learning_rate": 2.522093490144109e-06, |
| "loss": 0.5357399940490722, |
| "memory(GiB)": 76.04, |
| "step": 6060, |
| "token_acc": 0.8304055410560128, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.5681039498351543, |
| "grad_norm": 4.396268844604492, |
| "learning_rate": 2.5185300955252406e-06, |
| "loss": 0.5380908489227295, |
| "memory(GiB)": 76.04, |
| "step": 6065, |
| "token_acc": 0.8541631222566266, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.5693968582325941, |
| "grad_norm": 1.2452019453048706, |
| "learning_rate": 2.514966663257311e-06, |
| "loss": 0.5378365516662598, |
| "memory(GiB)": 76.04, |
| "step": 6070, |
| "token_acc": 0.8556809966075302, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.5706897666300343, |
| "grad_norm": 0.9279251098632812, |
| "learning_rate": 2.511403200580428e-06, |
| "loss": 0.5115952014923095, |
| "memory(GiB)": 76.04, |
| "step": 6075, |
| "token_acc": 0.8463237893248498, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.5719826750274744, |
| "grad_norm": 2.242185592651367, |
| "learning_rate": 2.50783971473476e-06, |
| "loss": 0.5192525386810303, |
| "memory(GiB)": 76.04, |
| "step": 6080, |
| "token_acc": 0.8700274811911519, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.5732755834249144, |
| "grad_norm": 2.6423373222351074, |
| "learning_rate": 2.5042762129605235e-06, |
| "loss": 0.5067386150360107, |
| "memory(GiB)": 76.04, |
| "step": 6085, |
| "token_acc": 0.8241304899720742, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.5745684918223544, |
| "grad_norm": 0.9732599854469299, |
| "learning_rate": 2.500712702497967e-06, |
| "loss": 0.4948467254638672, |
| "memory(GiB)": 76.04, |
| "step": 6090, |
| "token_acc": 0.8798655462184874, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.5758614002197944, |
| "grad_norm": 1.5327101945877075, |
| "learning_rate": 2.497149190587356e-06, |
| "loss": 0.5227193355560302, |
| "memory(GiB)": 76.04, |
| "step": 6095, |
| "token_acc": 0.8608474068152293, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.5771543086172346, |
| "grad_norm": 1.2196455001831055, |
| "learning_rate": 2.4935856844689605e-06, |
| "loss": 0.519383716583252, |
| "memory(GiB)": 76.04, |
| "step": 6100, |
| "token_acc": 0.8335468679663424, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.5784472170146744, |
| "grad_norm": 1.0245038270950317, |
| "learning_rate": 2.4900221913830368e-06, |
| "loss": 0.5222830772399902, |
| "memory(GiB)": 76.04, |
| "step": 6105, |
| "token_acc": 0.8626334519572953, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.5797401254121146, |
| "grad_norm": 1.114092469215393, |
| "learning_rate": 2.486458718569817e-06, |
| "loss": 0.5028997898101807, |
| "memory(GiB)": 76.04, |
| "step": 6110, |
| "token_acc": 0.8522080471050049, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.5810330338095546, |
| "grad_norm": 1.0639675855636597, |
| "learning_rate": 2.4828952732694887e-06, |
| "loss": 0.5147637367248535, |
| "memory(GiB)": 76.04, |
| "step": 6115, |
| "token_acc": 0.863227909435292, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.5823259422069946, |
| "grad_norm": 0.8807838559150696, |
| "learning_rate": 2.479331862722188e-06, |
| "loss": 0.5280374526977539, |
| "memory(GiB)": 76.04, |
| "step": 6120, |
| "token_acc": 0.8349608197709464, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.5836188506044346, |
| "grad_norm": 1.1169236898422241, |
| "learning_rate": 2.4757684941679767e-06, |
| "loss": 0.5291852474212646, |
| "memory(GiB)": 76.04, |
| "step": 6125, |
| "token_acc": 0.83143130614048, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.5849117590018746, |
| "grad_norm": 1.1640690565109253, |
| "learning_rate": 2.4722051748468336e-06, |
| "loss": 0.54544095993042, |
| "memory(GiB)": 76.04, |
| "step": 6130, |
| "token_acc": 0.8492234388601274, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.5862046673993149, |
| "grad_norm": 1.0770084857940674, |
| "learning_rate": 2.4686419119986337e-06, |
| "loss": 0.5241689205169677, |
| "memory(GiB)": 76.04, |
| "step": 6135, |
| "token_acc": 0.8025563166443048, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.5874975757967547, |
| "grad_norm": 0.8977876901626587, |
| "learning_rate": 2.4650787128631433e-06, |
| "loss": 0.47954139709472654, |
| "memory(GiB)": 76.04, |
| "step": 6140, |
| "token_acc": 0.8597583511016347, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.5887904841941949, |
| "grad_norm": 5.924169540405273, |
| "learning_rate": 2.461515584679995e-06, |
| "loss": 0.5163521766662598, |
| "memory(GiB)": 76.04, |
| "step": 6145, |
| "token_acc": 0.8557573765102326, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.590083392591635, |
| "grad_norm": 1.4680997133255005, |
| "learning_rate": 2.457952534688678e-06, |
| "loss": 0.5192079544067383, |
| "memory(GiB)": 76.04, |
| "step": 6150, |
| "token_acc": 0.8392393432144142, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.591376300989075, |
| "grad_norm": 1.3616418838500977, |
| "learning_rate": 2.4543895701285214e-06, |
| "loss": 0.521982479095459, |
| "memory(GiB)": 76.04, |
| "step": 6155, |
| "token_acc": 0.8226835625056169, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.5926692093865151, |
| "grad_norm": 0.9679275751113892, |
| "learning_rate": 2.450826698238685e-06, |
| "loss": 0.5229485034942627, |
| "memory(GiB)": 76.04, |
| "step": 6160, |
| "token_acc": 0.8353462704120866, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.593962117783955, |
| "grad_norm": 1.0366556644439697, |
| "learning_rate": 2.447263926258136e-06, |
| "loss": 0.518170976638794, |
| "memory(GiB)": 76.04, |
| "step": 6165, |
| "token_acc": 0.8254823304680038, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.5952550261813951, |
| "grad_norm": 0.9990222454071045, |
| "learning_rate": 2.4437012614256394e-06, |
| "loss": 0.5325229167938232, |
| "memory(GiB)": 76.04, |
| "step": 6170, |
| "token_acc": 0.8040033620770165, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.596547934578835, |
| "grad_norm": 1.1222083568572998, |
| "learning_rate": 2.4401387109797446e-06, |
| "loss": 0.5065582275390625, |
| "memory(GiB)": 76.04, |
| "step": 6175, |
| "token_acc": 0.8548062202884538, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.5978408429762752, |
| "grad_norm": 1.0990639925003052, |
| "learning_rate": 2.4365762821587656e-06, |
| "loss": 0.5230794906616211, |
| "memory(GiB)": 76.04, |
| "step": 6180, |
| "token_acc": 0.8195025958800871, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.5991337513737152, |
| "grad_norm": 1.0618468523025513, |
| "learning_rate": 2.4330139822007726e-06, |
| "loss": 0.5022711753845215, |
| "memory(GiB)": 76.04, |
| "step": 6185, |
| "token_acc": 0.8767551452202347, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.6004266597711552, |
| "grad_norm": 1.059678077697754, |
| "learning_rate": 2.4294518183435715e-06, |
| "loss": 0.5181986808776855, |
| "memory(GiB)": 76.04, |
| "step": 6190, |
| "token_acc": 0.8563752841496177, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.6017195681685954, |
| "grad_norm": 0.8919258117675781, |
| "learning_rate": 2.4258897978246925e-06, |
| "loss": 0.49803409576416013, |
| "memory(GiB)": 76.04, |
| "step": 6195, |
| "token_acc": 0.8634100953710165, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.6030124765660352, |
| "grad_norm": 1.0404894351959229, |
| "learning_rate": 2.4223279278813736e-06, |
| "loss": 0.5113819122314454, |
| "memory(GiB)": 76.04, |
| "step": 6200, |
| "token_acc": 0.828187138284458, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.6043053849634754, |
| "grad_norm": 1.3208954334259033, |
| "learning_rate": 2.418766215750549e-06, |
| "loss": 0.5281610012054443, |
| "memory(GiB)": 76.04, |
| "step": 6205, |
| "token_acc": 0.835780681665095, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.6055982933609154, |
| "grad_norm": 1.3880317211151123, |
| "learning_rate": 2.4152046686688305e-06, |
| "loss": 0.5289054870605469, |
| "memory(GiB)": 76.04, |
| "step": 6210, |
| "token_acc": 0.8374174516442627, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.6068912017583554, |
| "grad_norm": 1.5898443460464478, |
| "learning_rate": 2.4116432938724953e-06, |
| "loss": 0.5337974548339843, |
| "memory(GiB)": 76.04, |
| "step": 6215, |
| "token_acc": 0.8592785422089996, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.6081841101557954, |
| "grad_norm": 1.666271686553955, |
| "learning_rate": 2.4080820985974707e-06, |
| "loss": 0.5134634017944336, |
| "memory(GiB)": 76.04, |
| "step": 6220, |
| "token_acc": 0.8779267140307283, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.6094770185532354, |
| "grad_norm": 0.9248247742652893, |
| "learning_rate": 2.4045210900793167e-06, |
| "loss": 0.5277139186859131, |
| "memory(GiB)": 76.04, |
| "step": 6225, |
| "token_acc": 0.8437402643642352, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.6107699269506757, |
| "grad_norm": 1.5387322902679443, |
| "learning_rate": 2.4009602755532188e-06, |
| "loss": 0.5056108474731446, |
| "memory(GiB)": 76.04, |
| "step": 6230, |
| "token_acc": 0.8501114918148692, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.6120628353481155, |
| "grad_norm": 1.0052211284637451, |
| "learning_rate": 2.3973996622539646e-06, |
| "loss": 0.5336996078491211, |
| "memory(GiB)": 76.04, |
| "step": 6235, |
| "token_acc": 0.8721888153938665, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.6133557437455557, |
| "grad_norm": 1.027434229850769, |
| "learning_rate": 2.393839257415933e-06, |
| "loss": 0.49329376220703125, |
| "memory(GiB)": 76.04, |
| "step": 6240, |
| "token_acc": 0.8676368108218897, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.6146486521429957, |
| "grad_norm": 1.1870218515396118, |
| "learning_rate": 2.3902790682730806e-06, |
| "loss": 0.5256915092468262, |
| "memory(GiB)": 76.04, |
| "step": 6245, |
| "token_acc": 0.8572780020181635, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.6159415605404357, |
| "grad_norm": 1.0639426708221436, |
| "learning_rate": 2.3867191020589264e-06, |
| "loss": 0.5284603118896485, |
| "memory(GiB)": 76.04, |
| "step": 6250, |
| "token_acc": 0.8446619622126109, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.6172344689378757, |
| "grad_norm": 1.0155668258666992, |
| "learning_rate": 2.3831593660065345e-06, |
| "loss": 0.5121121406555176, |
| "memory(GiB)": 76.04, |
| "step": 6255, |
| "token_acc": 0.8595757910736493, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.6185273773353157, |
| "grad_norm": 1.0661635398864746, |
| "learning_rate": 2.3795998673485025e-06, |
| "loss": 0.5248492240905762, |
| "memory(GiB)": 76.04, |
| "step": 6260, |
| "token_acc": 0.8348161428909712, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.619820285732756, |
| "grad_norm": 1.0434828996658325, |
| "learning_rate": 2.376040613316944e-06, |
| "loss": 0.5192477226257324, |
| "memory(GiB)": 76.04, |
| "step": 6265, |
| "token_acc": 0.8534787948847626, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.6211131941301957, |
| "grad_norm": 0.9453509449958801, |
| "learning_rate": 2.37248161114348e-06, |
| "loss": 0.5361949920654296, |
| "memory(GiB)": 76.04, |
| "step": 6270, |
| "token_acc": 0.8283499021225963, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.622406102527636, |
| "grad_norm": 1.1677709817886353, |
| "learning_rate": 2.3689228680592138e-06, |
| "loss": 0.52266845703125, |
| "memory(GiB)": 76.04, |
| "step": 6275, |
| "token_acc": 0.8238867321306235, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.623699010925076, |
| "grad_norm": 1.974316954612732, |
| "learning_rate": 2.3653643912947276e-06, |
| "loss": 0.5168787479400635, |
| "memory(GiB)": 76.04, |
| "step": 6280, |
| "token_acc": 0.8214940319191738, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.624991919322516, |
| "grad_norm": 1.1270877122879028, |
| "learning_rate": 2.3618061880800586e-06, |
| "loss": 0.48665618896484375, |
| "memory(GiB)": 76.04, |
| "step": 6285, |
| "token_acc": 0.8658167398627041, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.6262848277199562, |
| "grad_norm": 1.4701924324035645, |
| "learning_rate": 2.3582482656446897e-06, |
| "loss": 0.5326834678649902, |
| "memory(GiB)": 76.04, |
| "step": 6290, |
| "token_acc": 0.841709722874589, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.627577736117396, |
| "grad_norm": 0.9950865507125854, |
| "learning_rate": 2.3546906312175347e-06, |
| "loss": 0.597511100769043, |
| "memory(GiB)": 76.04, |
| "step": 6295, |
| "token_acc": 0.7920300141959035, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.6288706445148362, |
| "grad_norm": 1.3875091075897217, |
| "learning_rate": 2.35113329202692e-06, |
| "loss": 0.5079882621765137, |
| "memory(GiB)": 76.04, |
| "step": 6300, |
| "token_acc": 0.8363686840644087, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.6301635529122762, |
| "grad_norm": 1.17184317111969, |
| "learning_rate": 2.3475762553005727e-06, |
| "loss": 0.5145916938781738, |
| "memory(GiB)": 76.04, |
| "step": 6305, |
| "token_acc": 0.841187863137508, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.6314564613097162, |
| "grad_norm": 1.8713252544403076, |
| "learning_rate": 2.344019528265607e-06, |
| "loss": 0.5273695468902588, |
| "memory(GiB)": 76.04, |
| "step": 6310, |
| "token_acc": 0.8691662296801258, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.6327493697071562, |
| "grad_norm": 1.0383869409561157, |
| "learning_rate": 2.3404631181485053e-06, |
| "loss": 0.5135766983032226, |
| "memory(GiB)": 76.04, |
| "step": 6315, |
| "token_acc": 0.8497678608551036, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.6340422781045962, |
| "grad_norm": 0.8791532516479492, |
| "learning_rate": 2.3369070321751085e-06, |
| "loss": 0.5190924167633056, |
| "memory(GiB)": 76.04, |
| "step": 6320, |
| "token_acc": 0.8457278865303347, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.6353351865020365, |
| "grad_norm": 1.2614482641220093, |
| "learning_rate": 2.3333512775705975e-06, |
| "loss": 0.5101301193237304, |
| "memory(GiB)": 76.04, |
| "step": 6325, |
| "token_acc": 0.8530480522450639, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.6366280948994762, |
| "grad_norm": 1.5320963859558105, |
| "learning_rate": 2.3297958615594786e-06, |
| "loss": 0.4884361743927002, |
| "memory(GiB)": 76.04, |
| "step": 6330, |
| "token_acc": 0.8361637380975754, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.6379210032969165, |
| "grad_norm": 1.157752275466919, |
| "learning_rate": 2.326240791365575e-06, |
| "loss": 0.4981177806854248, |
| "memory(GiB)": 76.04, |
| "step": 6335, |
| "token_acc": 0.8416252072968491, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.6392139116943565, |
| "grad_norm": 1.2564195394515991, |
| "learning_rate": 2.3226860742120017e-06, |
| "loss": 0.538153886795044, |
| "memory(GiB)": 76.04, |
| "step": 6340, |
| "token_acc": 0.8415269756303705, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.6405068200917965, |
| "grad_norm": 0.9583210945129395, |
| "learning_rate": 2.319131717321159e-06, |
| "loss": 0.4883336067199707, |
| "memory(GiB)": 76.04, |
| "step": 6345, |
| "token_acc": 0.8325664381632079, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.6417997284892365, |
| "grad_norm": 1.2164779901504517, |
| "learning_rate": 2.3155777279147156e-06, |
| "loss": 0.5153134346008301, |
| "memory(GiB)": 76.04, |
| "step": 6350, |
| "token_acc": 0.8470916505687915, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.6430926368866765, |
| "grad_norm": 1.1382580995559692, |
| "learning_rate": 2.312024113213592e-06, |
| "loss": 0.5252164840698242, |
| "memory(GiB)": 76.04, |
| "step": 6355, |
| "token_acc": 0.8316279498525073, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.6443855452841167, |
| "grad_norm": 0.8894354104995728, |
| "learning_rate": 2.3084708804379497e-06, |
| "loss": 0.5195868015289307, |
| "memory(GiB)": 76.04, |
| "step": 6360, |
| "token_acc": 0.8409683261916332, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.6456784536815565, |
| "grad_norm": 1.683902382850647, |
| "learning_rate": 2.3049180368071724e-06, |
| "loss": 0.5006110191345214, |
| "memory(GiB)": 76.04, |
| "step": 6365, |
| "token_acc": 0.8291754756871036, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.6469713620789967, |
| "grad_norm": 1.7893975973129272, |
| "learning_rate": 2.301365589539853e-06, |
| "loss": 0.49852724075317384, |
| "memory(GiB)": 76.04, |
| "step": 6370, |
| "token_acc": 0.8365970585845454, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.6482642704764368, |
| "grad_norm": 0.9394116997718811, |
| "learning_rate": 2.2978135458537793e-06, |
| "loss": 0.5331932067871094, |
| "memory(GiB)": 76.04, |
| "step": 6375, |
| "token_acc": 0.8245893719806763, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.6495571788738768, |
| "grad_norm": 1.047746181488037, |
| "learning_rate": 2.2942619129659205e-06, |
| "loss": 0.5376855850219726, |
| "memory(GiB)": 76.04, |
| "step": 6380, |
| "token_acc": 0.8324474924989285, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.650850087271317, |
| "grad_norm": 0.8962509632110596, |
| "learning_rate": 2.2907106980924104e-06, |
| "loss": 0.4863112449645996, |
| "memory(GiB)": 76.04, |
| "step": 6385, |
| "token_acc": 0.8503153721391241, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.6521429956687568, |
| "grad_norm": 1.389729380607605, |
| "learning_rate": 2.2871599084485325e-06, |
| "loss": 0.5152921676635742, |
| "memory(GiB)": 76.04, |
| "step": 6390, |
| "token_acc": 0.8414910086935811, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.653435904066197, |
| "grad_norm": 0.9997110366821289, |
| "learning_rate": 2.2836095512487063e-06, |
| "loss": 0.5211985588073731, |
| "memory(GiB)": 76.04, |
| "step": 6395, |
| "token_acc": 0.857375318849503, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.6547288124636368, |
| "grad_norm": 1.0818804502487183, |
| "learning_rate": 2.280059633706475e-06, |
| "loss": 0.5084996223449707, |
| "memory(GiB)": 76.04, |
| "step": 6400, |
| "token_acc": 0.8613592233009709, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.656021720861077, |
| "grad_norm": 0.9981624484062195, |
| "learning_rate": 2.276510163034486e-06, |
| "loss": 0.5429449081420898, |
| "memory(GiB)": 76.04, |
| "step": 6405, |
| "token_acc": 0.8164522088613749, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.657314629258517, |
| "grad_norm": 0.9858782887458801, |
| "learning_rate": 2.2729611464444797e-06, |
| "loss": 0.5149686813354493, |
| "memory(GiB)": 76.04, |
| "step": 6410, |
| "token_acc": 0.8206152336907014, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.658607537655957, |
| "grad_norm": 0.9807013273239136, |
| "learning_rate": 2.2694125911472743e-06, |
| "loss": 0.5264925479888916, |
| "memory(GiB)": 76.04, |
| "step": 6415, |
| "token_acc": 0.8599959754502465, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.6599004460533973, |
| "grad_norm": 1.4319406747817993, |
| "learning_rate": 2.265864504352749e-06, |
| "loss": 0.5101997375488281, |
| "memory(GiB)": 76.04, |
| "step": 6420, |
| "token_acc": 0.8523102555710927, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.661193354450837, |
| "grad_norm": 1.1590079069137573, |
| "learning_rate": 2.2623168932698347e-06, |
| "loss": 0.4951170444488525, |
| "memory(GiB)": 76.04, |
| "step": 6425, |
| "token_acc": 0.852101487651052, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.6624862628482773, |
| "grad_norm": 0.9497671127319336, |
| "learning_rate": 2.258769765106492e-06, |
| "loss": 0.5196887969970703, |
| "memory(GiB)": 76.04, |
| "step": 6430, |
| "token_acc": 0.8118867658795361, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.6637791712457173, |
| "grad_norm": 1.0885018110275269, |
| "learning_rate": 2.255223127069702e-06, |
| "loss": 0.5309447765350341, |
| "memory(GiB)": 76.04, |
| "step": 6435, |
| "token_acc": 0.8558985773734636, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.6650720796431573, |
| "grad_norm": 1.0342893600463867, |
| "learning_rate": 2.251676986365449e-06, |
| "loss": 0.49361910820007326, |
| "memory(GiB)": 76.04, |
| "step": 6440, |
| "token_acc": 0.8603108210435222, |
| "train_speed(iter/s)": 0.027631 |
| }, |
| { |
| "epoch": 1.6663649880405973, |
| "grad_norm": 1.072338342666626, |
| "learning_rate": 2.2481313501987103e-06, |
| "loss": 0.5142477035522461, |
| "memory(GiB)": 76.04, |
| "step": 6445, |
| "token_acc": 0.8642217409120178, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.6676578964380373, |
| "grad_norm": 1.2066073417663574, |
| "learning_rate": 2.2445862257734317e-06, |
| "loss": 0.5130002975463868, |
| "memory(GiB)": 76.04, |
| "step": 6450, |
| "token_acc": 0.8323050805349675, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.6689508048354775, |
| "grad_norm": 1.054640769958496, |
| "learning_rate": 2.2410416202925262e-06, |
| "loss": 0.5043740749359131, |
| "memory(GiB)": 76.04, |
| "step": 6455, |
| "token_acc": 0.8686844613918017, |
| "train_speed(iter/s)": 0.027631 |
| }, |
| { |
| "epoch": 1.6702437132329173, |
| "grad_norm": 2.2036914825439453, |
| "learning_rate": 2.237497540957848e-06, |
| "loss": 0.5211320877075195, |
| "memory(GiB)": 76.04, |
| "step": 6460, |
| "token_acc": 0.8593575418994414, |
| "train_speed(iter/s)": 0.027631 |
| }, |
| { |
| "epoch": 1.6715366216303575, |
| "grad_norm": 1.2252447605133057, |
| "learning_rate": 2.2339539949701817e-06, |
| "loss": 0.5284463882446289, |
| "memory(GiB)": 76.04, |
| "step": 6465, |
| "token_acc": 0.8268600408188509, |
| "train_speed(iter/s)": 0.027631 |
| }, |
| { |
| "epoch": 1.6728295300277976, |
| "grad_norm": 1.165474772453308, |
| "learning_rate": 2.230410989529233e-06, |
| "loss": 0.5352771759033204, |
| "memory(GiB)": 76.04, |
| "step": 6470, |
| "token_acc": 0.8386907812843231, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.6741224384252376, |
| "grad_norm": 0.8712031841278076, |
| "learning_rate": 2.226868531833605e-06, |
| "loss": 0.5065167903900146, |
| "memory(GiB)": 76.04, |
| "step": 6475, |
| "token_acc": 0.8473345970687503, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.6754153468226776, |
| "grad_norm": 1.0779868364334106, |
| "learning_rate": 2.2233266290807886e-06, |
| "loss": 0.5394890785217286, |
| "memory(GiB)": 76.04, |
| "step": 6480, |
| "token_acc": 0.8533062727144003, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.6767082552201176, |
| "grad_norm": 1.0535589456558228, |
| "learning_rate": 2.2197852884671487e-06, |
| "loss": 0.5131864547729492, |
| "memory(GiB)": 76.04, |
| "step": 6485, |
| "token_acc": 0.8391084472747705, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.6780011636175578, |
| "grad_norm": 0.8883035778999329, |
| "learning_rate": 2.2162445171879067e-06, |
| "loss": 0.5062174320220947, |
| "memory(GiB)": 76.04, |
| "step": 6490, |
| "token_acc": 0.853890824622532, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.6792940720149976, |
| "grad_norm": 0.983790934085846, |
| "learning_rate": 2.212704322437129e-06, |
| "loss": 0.500247859954834, |
| "memory(GiB)": 76.04, |
| "step": 6495, |
| "token_acc": 0.860179981923213, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.6805869804124378, |
| "grad_norm": 3.652728319168091, |
| "learning_rate": 2.2091647114077083e-06, |
| "loss": 0.5243520736694336, |
| "memory(GiB)": 76.04, |
| "step": 6500, |
| "token_acc": 0.8531156542628818, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.6818798888098778, |
| "grad_norm": 1.112502098083496, |
| "learning_rate": 2.2056256912913508e-06, |
| "loss": 0.5279044151306153, |
| "memory(GiB)": 76.04, |
| "step": 6505, |
| "token_acc": 0.8433751743375174, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.6831727972073178, |
| "grad_norm": 1.060985803604126, |
| "learning_rate": 2.2020872692785666e-06, |
| "loss": 0.5015209197998047, |
| "memory(GiB)": 76.04, |
| "step": 6510, |
| "token_acc": 0.8586926542245105, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.684465705604758, |
| "grad_norm": 1.1359707117080688, |
| "learning_rate": 2.1985494525586458e-06, |
| "loss": 0.4859332084655762, |
| "memory(GiB)": 76.04, |
| "step": 6515, |
| "token_acc": 0.844207331995497, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.6857586140021978, |
| "grad_norm": 1.0586234331130981, |
| "learning_rate": 2.1950122483196513e-06, |
| "loss": 0.5136495590209961, |
| "memory(GiB)": 76.04, |
| "step": 6520, |
| "token_acc": 0.8458574181117534, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.687051522399638, |
| "grad_norm": 2.7810006141662598, |
| "learning_rate": 2.191475663748401e-06, |
| "loss": 0.5169890403747559, |
| "memory(GiB)": 76.04, |
| "step": 6525, |
| "token_acc": 0.8014549325762953, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.688344430797078, |
| "grad_norm": 1.60906982421875, |
| "learning_rate": 2.1879397060304518e-06, |
| "loss": 0.5097242832183838, |
| "memory(GiB)": 76.04, |
| "step": 6530, |
| "token_acc": 0.8751048951048951, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.689637339194518, |
| "grad_norm": 1.4863407611846924, |
| "learning_rate": 2.1844043823500912e-06, |
| "loss": 0.5065485954284668, |
| "memory(GiB)": 76.04, |
| "step": 6535, |
| "token_acc": 0.8393135069196147, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.690930247591958, |
| "grad_norm": 1.1122289896011353, |
| "learning_rate": 2.1808696998903147e-06, |
| "loss": 0.4878704071044922, |
| "memory(GiB)": 76.04, |
| "step": 6540, |
| "token_acc": 0.8160763559053693, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.692223155989398, |
| "grad_norm": 2.09844970703125, |
| "learning_rate": 2.177335665832816e-06, |
| "loss": 0.5010098457336426, |
| "memory(GiB)": 76.04, |
| "step": 6545, |
| "token_acc": 0.844059639520619, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.6935160643868383, |
| "grad_norm": 1.2505282163619995, |
| "learning_rate": 2.1738022873579724e-06, |
| "loss": 0.5115324020385742, |
| "memory(GiB)": 76.04, |
| "step": 6550, |
| "token_acc": 0.8518385971190838, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.6948089727842781, |
| "grad_norm": 35.658302307128906, |
| "learning_rate": 2.1702695716448276e-06, |
| "loss": 0.5169626235961914, |
| "memory(GiB)": 76.04, |
| "step": 6555, |
| "token_acc": 0.8498464176012572, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.6961018811817183, |
| "grad_norm": 1.1642698049545288, |
| "learning_rate": 2.166737525871081e-06, |
| "loss": 0.5165857315063477, |
| "memory(GiB)": 76.04, |
| "step": 6560, |
| "token_acc": 0.8605155555555556, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.6973947895791583, |
| "grad_norm": 1.4594037532806396, |
| "learning_rate": 2.1632061572130687e-06, |
| "loss": 0.48950824737548826, |
| "memory(GiB)": 76.04, |
| "step": 6565, |
| "token_acc": 0.832944099378882, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.6986876979765984, |
| "grad_norm": 1.3144365549087524, |
| "learning_rate": 2.1596754728457508e-06, |
| "loss": 0.5155162811279297, |
| "memory(GiB)": 76.04, |
| "step": 6570, |
| "token_acc": 0.8293847917462743, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.6999806063740384, |
| "grad_norm": 1.3902095556259155, |
| "learning_rate": 2.1561454799426997e-06, |
| "loss": 0.5293027877807617, |
| "memory(GiB)": 76.04, |
| "step": 6575, |
| "token_acc": 0.8462088378535365, |
| "train_speed(iter/s)": 0.02763 |
| }, |
| { |
| "epoch": 1.7012735147714784, |
| "grad_norm": 1.1059478521347046, |
| "learning_rate": 2.1526161856760806e-06, |
| "loss": 0.5223227500915527, |
| "memory(GiB)": 76.04, |
| "step": 6580, |
| "token_acc": 0.8502826247235193, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.7025664231689186, |
| "grad_norm": 1.1676130294799805, |
| "learning_rate": 2.1490875972166394e-06, |
| "loss": 0.5052920341491699, |
| "memory(GiB)": 76.04, |
| "step": 6585, |
| "token_acc": 0.8628404326533489, |
| "train_speed(iter/s)": 0.027629 |
| }, |
| { |
| "epoch": 1.7038593315663584, |
| "grad_norm": 1.3875575065612793, |
| "learning_rate": 2.1455597217336895e-06, |
| "loss": 0.5150994777679443, |
| "memory(GiB)": 76.04, |
| "step": 6590, |
| "token_acc": 0.8165382212039158, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.7051522399637986, |
| "grad_norm": 1.5124651193618774, |
| "learning_rate": 2.1420325663950923e-06, |
| "loss": 0.4880176544189453, |
| "memory(GiB)": 76.04, |
| "step": 6595, |
| "token_acc": 0.8497203061189471, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.7064451483612386, |
| "grad_norm": 1.1382073163986206, |
| "learning_rate": 2.138506138367252e-06, |
| "loss": 0.496349573135376, |
| "memory(GiB)": 76.04, |
| "step": 6600, |
| "token_acc": 0.8560916156924068, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.7077380567586786, |
| "grad_norm": 1.3284364938735962, |
| "learning_rate": 2.134980444815089e-06, |
| "loss": 0.5333932876586914, |
| "memory(GiB)": 76.04, |
| "step": 6605, |
| "token_acc": 0.8525549959102846, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.7090309651561189, |
| "grad_norm": 0.9929280281066895, |
| "learning_rate": 2.1314554929020335e-06, |
| "loss": 0.49078850746154784, |
| "memory(GiB)": 76.04, |
| "step": 6610, |
| "token_acc": 0.8561151079136691, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.7103238735535586, |
| "grad_norm": 1.0720187425613403, |
| "learning_rate": 2.1279312897900097e-06, |
| "loss": 0.5510223388671875, |
| "memory(GiB)": 76.04, |
| "step": 6615, |
| "token_acc": 0.8111014442317731, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.7116167819509989, |
| "grad_norm": 0.9900431036949158, |
| "learning_rate": 2.124407842639421e-06, |
| "loss": 0.535820198059082, |
| "memory(GiB)": 76.04, |
| "step": 6620, |
| "token_acc": 0.8021880945909214, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.7129096903484387, |
| "grad_norm": 1.2437286376953125, |
| "learning_rate": 2.120885158609132e-06, |
| "loss": 0.5138998985290527, |
| "memory(GiB)": 76.04, |
| "step": 6625, |
| "token_acc": 0.8522361238259926, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.7142025987458789, |
| "grad_norm": 1.634427547454834, |
| "learning_rate": 2.1173632448564603e-06, |
| "loss": 0.4958186149597168, |
| "memory(GiB)": 76.04, |
| "step": 6630, |
| "token_acc": 0.8392738961898459, |
| "train_speed(iter/s)": 0.027627 |
| }, |
| { |
| "epoch": 1.7154955071433189, |
| "grad_norm": 0.9783524870872498, |
| "learning_rate": 2.113842108537155e-06, |
| "loss": 0.51722412109375, |
| "memory(GiB)": 76.04, |
| "step": 6635, |
| "token_acc": 0.8480223559759243, |
| "train_speed(iter/s)": 0.027628 |
| }, |
| { |
| "epoch": 1.716788415540759, |
| "grad_norm": 0.919309139251709, |
| "learning_rate": 2.110321756805388e-06, |
| "loss": 0.4969566822052002, |
| "memory(GiB)": 76.04, |
| "step": 6640, |
| "token_acc": 0.8367924528301887, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.7180813239381991, |
| "grad_norm": 1.3210502862930298, |
| "learning_rate": 2.1068021968137367e-06, |
| "loss": 0.509549903869629, |
| "memory(GiB)": 76.04, |
| "step": 6645, |
| "token_acc": 0.8161617605030008, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.719374232335639, |
| "grad_norm": 1.611215591430664, |
| "learning_rate": 2.103283435713169e-06, |
| "loss": 0.49874000549316405, |
| "memory(GiB)": 76.04, |
| "step": 6650, |
| "token_acc": 0.8636546184738956, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.7206671407330791, |
| "grad_norm": 1.5470666885375977, |
| "learning_rate": 2.0997654806530314e-06, |
| "loss": 0.5100409984588623, |
| "memory(GiB)": 76.04, |
| "step": 6655, |
| "token_acc": 0.8550865800865801, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.7219600491305191, |
| "grad_norm": 1.1041886806488037, |
| "learning_rate": 2.0962483387810293e-06, |
| "loss": 0.5100605964660645, |
| "memory(GiB)": 76.04, |
| "step": 6660, |
| "token_acc": 0.8544809228039042, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.7232529575279592, |
| "grad_norm": 1.3654582500457764, |
| "learning_rate": 2.092732017243221e-06, |
| "loss": 0.5010916709899902, |
| "memory(GiB)": 76.04, |
| "step": 6665, |
| "token_acc": 0.8365678065576336, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.7245458659253992, |
| "grad_norm": 1.7149609327316284, |
| "learning_rate": 2.0892165231839935e-06, |
| "loss": 0.5101409912109375, |
| "memory(GiB)": 76.04, |
| "step": 6670, |
| "token_acc": 0.866217041193058, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.7258387743228392, |
| "grad_norm": 1.0402779579162598, |
| "learning_rate": 2.085701863746054e-06, |
| "loss": 0.5074934005737305, |
| "memory(GiB)": 76.04, |
| "step": 6675, |
| "token_acc": 0.8241552132337115, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.7271316827202794, |
| "grad_norm": 1.1275187730789185, |
| "learning_rate": 2.082188046070414e-06, |
| "loss": 0.48826584815979, |
| "memory(GiB)": 76.04, |
| "step": 6680, |
| "token_acc": 0.8426791277258567, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.7284245911177192, |
| "grad_norm": 0.9774495363235474, |
| "learning_rate": 2.0786750772963758e-06, |
| "loss": 0.49518957138061526, |
| "memory(GiB)": 76.04, |
| "step": 6685, |
| "token_acc": 0.8610470275066548, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.7297174995151594, |
| "grad_norm": 0.9467169642448425, |
| "learning_rate": 2.0751629645615155e-06, |
| "loss": 0.5169444561004639, |
| "memory(GiB)": 76.04, |
| "step": 6690, |
| "token_acc": 0.8478371242891958, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.7310104079125994, |
| "grad_norm": 1.2938302755355835, |
| "learning_rate": 2.071651715001671e-06, |
| "loss": 0.549882173538208, |
| "memory(GiB)": 76.04, |
| "step": 6695, |
| "token_acc": 0.8068968578022369, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.7323033163100394, |
| "grad_norm": 2.241086721420288, |
| "learning_rate": 2.068141335750925e-06, |
| "loss": 0.49896945953369143, |
| "memory(GiB)": 76.04, |
| "step": 6700, |
| "token_acc": 0.845947499520981, |
| "train_speed(iter/s)": 0.027626 |
| }, |
| { |
| "epoch": 1.7335962247074794, |
| "grad_norm": 2.3480138778686523, |
| "learning_rate": 2.0646318339415917e-06, |
| "loss": 0.5186596393585206, |
| "memory(GiB)": 76.04, |
| "step": 6705, |
| "token_acc": 0.8365800865800865, |
| "train_speed(iter/s)": 0.027625 |
| }, |
| { |
| "epoch": 1.7348891331049194, |
| "grad_norm": 1.1465567350387573, |
| "learning_rate": 2.0611232167042062e-06, |
| "loss": 0.504915428161621, |
| "memory(GiB)": 76.04, |
| "step": 6710, |
| "token_acc": 0.8566171520890364, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.7361820415023597, |
| "grad_norm": 0.9098281860351562, |
| "learning_rate": 2.0576154911675024e-06, |
| "loss": 0.49738254547119143, |
| "memory(GiB)": 76.04, |
| "step": 6715, |
| "token_acc": 0.8353113246970331, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.7374749498997994, |
| "grad_norm": 2.8491294384002686, |
| "learning_rate": 2.0541086644584033e-06, |
| "loss": 0.48783140182495116, |
| "memory(GiB)": 76.04, |
| "step": 6720, |
| "token_acc": 0.8488560619708161, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.7387678582972397, |
| "grad_norm": 1.6546626091003418, |
| "learning_rate": 2.0506027437020067e-06, |
| "loss": 0.5130843162536621, |
| "memory(GiB)": 76.04, |
| "step": 6725, |
| "token_acc": 0.8279507603186097, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.7400607666946797, |
| "grad_norm": 1.783795952796936, |
| "learning_rate": 2.047097736021569e-06, |
| "loss": 0.5069493293762207, |
| "memory(GiB)": 76.04, |
| "step": 6730, |
| "token_acc": 0.8449707155589509, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.7413536750921197, |
| "grad_norm": 1.0760575532913208, |
| "learning_rate": 2.043593648538492e-06, |
| "loss": 0.5043985366821289, |
| "memory(GiB)": 76.04, |
| "step": 6735, |
| "token_acc": 0.8538088715625329, |
| "train_speed(iter/s)": 0.027624 |
| }, |
| { |
| "epoch": 1.74264658348956, |
| "grad_norm": 0.9393293261528015, |
| "learning_rate": 2.0400904883723074e-06, |
| "loss": 0.5335483551025391, |
| "memory(GiB)": 76.04, |
| "step": 6740, |
| "token_acc": 0.8334933205343572, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.7439394918869997, |
| "grad_norm": 1.1046473979949951, |
| "learning_rate": 2.036588262640661e-06, |
| "loss": 0.5038503170013428, |
| "memory(GiB)": 76.04, |
| "step": 6745, |
| "token_acc": 0.8518955250280055, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.74523240028444, |
| "grad_norm": 3.058380603790283, |
| "learning_rate": 2.0330869784593054e-06, |
| "loss": 0.5195840835571289, |
| "memory(GiB)": 76.04, |
| "step": 6750, |
| "token_acc": 0.8470919324577861, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.74652530868188, |
| "grad_norm": 0.9729762673377991, |
| "learning_rate": 2.029586642942074e-06, |
| "loss": 0.5047917366027832, |
| "memory(GiB)": 76.04, |
| "step": 6755, |
| "token_acc": 0.8124232148999951, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.74781821707932, |
| "grad_norm": 1.6101481914520264, |
| "learning_rate": 2.026087263200876e-06, |
| "loss": 0.5221758365631104, |
| "memory(GiB)": 76.04, |
| "step": 6760, |
| "token_acc": 0.8239374739860333, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.74911112547676, |
| "grad_norm": 1.5293364524841309, |
| "learning_rate": 2.0225888463456787e-06, |
| "loss": 0.5044497489929199, |
| "memory(GiB)": 76.04, |
| "step": 6765, |
| "token_acc": 0.8550766191978082, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.7504040338742, |
| "grad_norm": 0.9731065034866333, |
| "learning_rate": 2.019091399484491e-06, |
| "loss": 0.499710750579834, |
| "memory(GiB)": 76.04, |
| "step": 6770, |
| "token_acc": 0.8571820068120425, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.7516969422716402, |
| "grad_norm": 1.806395173072815, |
| "learning_rate": 2.0155949297233542e-06, |
| "loss": 0.5355013847351074, |
| "memory(GiB)": 76.04, |
| "step": 6775, |
| "token_acc": 0.813873528994754, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.75298985066908, |
| "grad_norm": 1.0448716878890991, |
| "learning_rate": 2.012099444166322e-06, |
| "loss": 0.5205565929412842, |
| "memory(GiB)": 76.04, |
| "step": 6780, |
| "token_acc": 0.8436368468258978, |
| "train_speed(iter/s)": 0.027623 |
| }, |
| { |
| "epoch": 1.7542827590665202, |
| "grad_norm": 1.2543259859085083, |
| "learning_rate": 2.008604949915448e-06, |
| "loss": 0.5098013877868652, |
| "memory(GiB)": 76.04, |
| "step": 6785, |
| "token_acc": 0.8451558833389206, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.7555756674639602, |
| "grad_norm": 0.9221981167793274, |
| "learning_rate": 2.005111454070773e-06, |
| "loss": 0.5172914505004883, |
| "memory(GiB)": 76.04, |
| "step": 6790, |
| "token_acc": 0.8517071704916801, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.7568685758614002, |
| "grad_norm": 4.873886585235596, |
| "learning_rate": 2.0016189637303087e-06, |
| "loss": 0.5167638778686523, |
| "memory(GiB)": 76.04, |
| "step": 6795, |
| "token_acc": 0.842072213500785, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.7581614842588402, |
| "grad_norm": 0.9154864549636841, |
| "learning_rate": 1.9981274859900253e-06, |
| "loss": 0.49820308685302733, |
| "memory(GiB)": 76.04, |
| "step": 6800, |
| "token_acc": 0.8464762230585016, |
| "train_speed(iter/s)": 0.027622 |
| }, |
| { |
| "epoch": 1.7594543926562802, |
| "grad_norm": 1.0603070259094238, |
| "learning_rate": 1.9946370279438337e-06, |
| "loss": 0.5082100868225098, |
| "memory(GiB)": 76.04, |
| "step": 6805, |
| "token_acc": 0.8356841646066598, |
| "train_speed(iter/s)": 0.027621 |
| }, |
| { |
| "epoch": 1.7607473010537205, |
| "grad_norm": 1.0266915559768677, |
| "learning_rate": 1.9911475966835735e-06, |
| "loss": 0.5149668216705322, |
| "memory(GiB)": 76.04, |
| "step": 6810, |
| "token_acc": 0.8340411379451494, |
| "train_speed(iter/s)": 0.027621 |
| }, |
| { |
| "epoch": 1.7620402094511602, |
| "grad_norm": 1.4958763122558594, |
| "learning_rate": 1.987659199298997e-06, |
| "loss": 0.515727186203003, |
| "memory(GiB)": 76.04, |
| "step": 6815, |
| "token_acc": 0.8732497922127741, |
| "train_speed(iter/s)": 0.027621 |
| }, |
| { |
| "epoch": 1.7633331178486005, |
| "grad_norm": 1.098486304283142, |
| "learning_rate": 1.984171842877759e-06, |
| "loss": 0.49164752960205077, |
| "memory(GiB)": 76.04, |
| "step": 6820, |
| "token_acc": 0.850276862943795, |
| "train_speed(iter/s)": 0.02762 |
| }, |
| { |
| "epoch": 1.7646260262460405, |
| "grad_norm": 0.9649628400802612, |
| "learning_rate": 1.9806855345053964e-06, |
| "loss": 0.4989636898040771, |
| "memory(GiB)": 76.04, |
| "step": 6825, |
| "token_acc": 0.8244157210490719, |
| "train_speed(iter/s)": 0.02762 |
| }, |
| { |
| "epoch": 1.7659189346434805, |
| "grad_norm": 1.1349451541900635, |
| "learning_rate": 1.977200281265319e-06, |
| "loss": 0.5093589782714844, |
| "memory(GiB)": 76.04, |
| "step": 6830, |
| "token_acc": 0.8507517284266745, |
| "train_speed(iter/s)": 0.027619 |
| }, |
| { |
| "epoch": 1.7672118430409207, |
| "grad_norm": 0.9703465700149536, |
| "learning_rate": 1.9737160902387896e-06, |
| "loss": 0.5363808631896972, |
| "memory(GiB)": 76.04, |
| "step": 6835, |
| "token_acc": 0.8378486587481649, |
| "train_speed(iter/s)": 0.027619 |
| }, |
| { |
| "epoch": 1.7685047514383605, |
| "grad_norm": 1.3537312746047974, |
| "learning_rate": 1.9702329685049167e-06, |
| "loss": 0.4682920455932617, |
| "memory(GiB)": 76.04, |
| "step": 6840, |
| "token_acc": 0.8536826495304004, |
| "train_speed(iter/s)": 0.027618 |
| }, |
| { |
| "epoch": 1.7697976598358007, |
| "grad_norm": 1.2528867721557617, |
| "learning_rate": 1.9667509231406332e-06, |
| "loss": 0.5215599060058593, |
| "memory(GiB)": 76.04, |
| "step": 6845, |
| "token_acc": 0.8464350200378737, |
| "train_speed(iter/s)": 0.027618 |
| }, |
| { |
| "epoch": 1.7710905682332405, |
| "grad_norm": 1.063242793083191, |
| "learning_rate": 1.963269961220687e-06, |
| "loss": 0.5151140689849854, |
| "memory(GiB)": 76.04, |
| "step": 6850, |
| "token_acc": 0.8156099097207642, |
| "train_speed(iter/s)": 0.027617 |
| }, |
| { |
| "epoch": 1.7723834766306807, |
| "grad_norm": 1.098589539527893, |
| "learning_rate": 1.9597900898176212e-06, |
| "loss": 0.5092347145080567, |
| "memory(GiB)": 76.04, |
| "step": 6855, |
| "token_acc": 0.8469693605683837, |
| "train_speed(iter/s)": 0.027616 |
| }, |
| { |
| "epoch": 1.7736763850281207, |
| "grad_norm": 1.204325556755066, |
| "learning_rate": 1.9563113160017692e-06, |
| "loss": 0.5028075218200684, |
| "memory(GiB)": 76.04, |
| "step": 6860, |
| "token_acc": 0.8575784400511459, |
| "train_speed(iter/s)": 0.027616 |
| }, |
| { |
| "epoch": 1.7749692934255608, |
| "grad_norm": 1.1051974296569824, |
| "learning_rate": 1.952833646841229e-06, |
| "loss": 0.5096775531768799, |
| "memory(GiB)": 76.04, |
| "step": 6865, |
| "token_acc": 0.844632645043, |
| "train_speed(iter/s)": 0.027615 |
| }, |
| { |
| "epoch": 1.776262201823001, |
| "grad_norm": 1.019508957862854, |
| "learning_rate": 1.949357089401858e-06, |
| "loss": 0.5253026962280274, |
| "memory(GiB)": 76.04, |
| "step": 6870, |
| "token_acc": 0.8215040397762585, |
| "train_speed(iter/s)": 0.027615 |
| }, |
| { |
| "epoch": 1.7775551102204408, |
| "grad_norm": 1.104564905166626, |
| "learning_rate": 1.9458816507472508e-06, |
| "loss": 0.5215746879577636, |
| "memory(GiB)": 76.04, |
| "step": 6875, |
| "token_acc": 0.8430459464254035, |
| "train_speed(iter/s)": 0.027615 |
| }, |
| { |
| "epoch": 1.778848018617881, |
| "grad_norm": 1.5065202713012695, |
| "learning_rate": 1.942407337938731e-06, |
| "loss": 0.5007893562316894, |
| "memory(GiB)": 76.04, |
| "step": 6880, |
| "token_acc": 0.877871314353399, |
| "train_speed(iter/s)": 0.027615 |
| }, |
| { |
| "epoch": 1.780140927015321, |
| "grad_norm": 1.023169994354248, |
| "learning_rate": 1.9389341580353376e-06, |
| "loss": 0.5197202682495117, |
| "memory(GiB)": 76.04, |
| "step": 6885, |
| "token_acc": 0.8612209229744913, |
| "train_speed(iter/s)": 0.027615 |
| }, |
| { |
| "epoch": 1.781433835412761, |
| "grad_norm": 1.0600008964538574, |
| "learning_rate": 1.9354621180938025e-06, |
| "loss": 0.5054890155792237, |
| "memory(GiB)": 76.04, |
| "step": 6890, |
| "token_acc": 0.8306423761008461, |
| "train_speed(iter/s)": 0.027615 |
| }, |
| { |
| "epoch": 1.782726743810201, |
| "grad_norm": 1.1297129392623901, |
| "learning_rate": 1.931991225168544e-06, |
| "loss": 0.5017886161804199, |
| "memory(GiB)": 76.04, |
| "step": 6895, |
| "token_acc": 0.8641059027777778, |
| "train_speed(iter/s)": 0.027616 |
| }, |
| { |
| "epoch": 1.784019652207641, |
| "grad_norm": 1.0830426216125488, |
| "learning_rate": 1.92852148631165e-06, |
| "loss": 0.5002258777618408, |
| "memory(GiB)": 76.04, |
| "step": 6900, |
| "token_acc": 0.8582578976537965, |
| "train_speed(iter/s)": 0.027616 |
| }, |
| { |
| "epoch": 1.7853125606050813, |
| "grad_norm": 1.4555113315582275, |
| "learning_rate": 1.9250529085728656e-06, |
| "loss": 0.5128755569458008, |
| "memory(GiB)": 76.04, |
| "step": 6905, |
| "token_acc": 0.861249647125247, |
| "train_speed(iter/s)": 0.027616 |
| }, |
| { |
| "epoch": 1.786605469002521, |
| "grad_norm": 1.1025890111923218, |
| "learning_rate": 1.9215854989995726e-06, |
| "loss": 0.5137574195861816, |
| "memory(GiB)": 76.04, |
| "step": 6910, |
| "token_acc": 0.8645792423863758, |
| "train_speed(iter/s)": 0.027617 |
| }, |
| { |
| "epoch": 1.7878983773999613, |
| "grad_norm": 0.8277395367622375, |
| "learning_rate": 1.9181192646367815e-06, |
| "loss": 0.4998950958251953, |
| "memory(GiB)": 76.04, |
| "step": 6915, |
| "token_acc": 0.86408374778284, |
| "train_speed(iter/s)": 0.027616 |
| }, |
| { |
| "epoch": 1.7891912857974013, |
| "grad_norm": 4.725937843322754, |
| "learning_rate": 1.914654212527114e-06, |
| "loss": 0.48327035903930665, |
| "memory(GiB)": 76.04, |
| "step": 6920, |
| "token_acc": 0.8672415229525952, |
| "train_speed(iter/s)": 0.027616 |
| }, |
| { |
| "epoch": 1.7904841941948413, |
| "grad_norm": 1.076759934425354, |
| "learning_rate": 1.9111903497107924e-06, |
| "loss": 0.5146621704101563, |
| "memory(GiB)": 76.04, |
| "step": 6925, |
| "token_acc": 0.8483241482097674, |
| "train_speed(iter/s)": 0.027616 |
| }, |
| { |
| "epoch": 1.7917771025922813, |
| "grad_norm": 1.0654821395874023, |
| "learning_rate": 1.90772768322562e-06, |
| "loss": 0.5301095962524414, |
| "memory(GiB)": 76.04, |
| "step": 6930, |
| "token_acc": 0.8365392073218025, |
| "train_speed(iter/s)": 0.027616 |
| }, |
| { |
| "epoch": 1.7930700109897213, |
| "grad_norm": 0.9456325769424438, |
| "learning_rate": 1.9042662201069705e-06, |
| "loss": 0.4947515487670898, |
| "memory(GiB)": 76.04, |
| "step": 6935, |
| "token_acc": 0.853165902597834, |
| "train_speed(iter/s)": 0.027616 |
| }, |
| { |
| "epoch": 1.7943629193871615, |
| "grad_norm": 0.9174202680587769, |
| "learning_rate": 1.9008059673877728e-06, |
| "loss": 0.5024736404418946, |
| "memory(GiB)": 76.04, |
| "step": 6940, |
| "token_acc": 0.826625231817954, |
| "train_speed(iter/s)": 0.027615 |
| }, |
| { |
| "epoch": 1.7956558277846013, |
| "grad_norm": 1.0330584049224854, |
| "learning_rate": 1.8973469320984939e-06, |
| "loss": 0.5240283012390137, |
| "memory(GiB)": 76.04, |
| "step": 6945, |
| "token_acc": 0.8250224483687518, |
| "train_speed(iter/s)": 0.027616 |
| }, |
| { |
| "epoch": 1.7969487361820415, |
| "grad_norm": 1.1314250230789185, |
| "learning_rate": 1.893889121267132e-06, |
| "loss": 0.5122389793395996, |
| "memory(GiB)": 76.04, |
| "step": 6950, |
| "token_acc": 0.8462841506319767, |
| "train_speed(iter/s)": 0.027616 |
| }, |
| { |
| "epoch": 1.7982416445794815, |
| "grad_norm": 0.9879010319709778, |
| "learning_rate": 1.8904325419191941e-06, |
| "loss": 0.5107357025146484, |
| "memory(GiB)": 76.04, |
| "step": 6955, |
| "token_acc": 0.8432230939274413, |
| "train_speed(iter/s)": 0.027616 |
| }, |
| { |
| "epoch": 1.7995345529769216, |
| "grad_norm": 0.9637373089790344, |
| "learning_rate": 1.886977201077685e-06, |
| "loss": 0.5289700508117676, |
| "memory(GiB)": 76.04, |
| "step": 6960, |
| "token_acc": 0.8506177606177606, |
| "train_speed(iter/s)": 0.027616 |
| }, |
| { |
| "epoch": 1.8008274613743618, |
| "grad_norm": 0.8645473122596741, |
| "learning_rate": 1.8835231057630955e-06, |
| "loss": 0.5153064727783203, |
| "memory(GiB)": 76.04, |
| "step": 6965, |
| "token_acc": 0.8533177661023545, |
| "train_speed(iter/s)": 0.027617 |
| }, |
| { |
| "epoch": 1.8021203697718016, |
| "grad_norm": 0.9590107202529907, |
| "learning_rate": 1.8800702629933828e-06, |
| "loss": 0.4972332000732422, |
| "memory(GiB)": 76.04, |
| "step": 6970, |
| "token_acc": 0.8284552126624812, |
| "train_speed(iter/s)": 0.027618 |
| }, |
| { |
| "epoch": 1.8034132781692418, |
| "grad_norm": 0.996835470199585, |
| "learning_rate": 1.8766186797839625e-06, |
| "loss": 0.48930206298828127, |
| "memory(GiB)": 76.04, |
| "step": 6975, |
| "token_acc": 0.8765837634913186, |
| "train_speed(iter/s)": 0.027617 |
| }, |
| { |
| "epoch": 1.8047061865666818, |
| "grad_norm": 1.4224867820739746, |
| "learning_rate": 1.8731683631476885e-06, |
| "loss": 0.5020298480987548, |
| "memory(GiB)": 76.04, |
| "step": 6980, |
| "token_acc": 0.8626479614204297, |
| "train_speed(iter/s)": 0.027617 |
| }, |
| { |
| "epoch": 1.8059990949641218, |
| "grad_norm": 1.8809725046157837, |
| "learning_rate": 1.8697193200948415e-06, |
| "loss": 0.5353089332580566, |
| "memory(GiB)": 76.04, |
| "step": 6985, |
| "token_acc": 0.8270812946250589, |
| "train_speed(iter/s)": 0.027618 |
| }, |
| { |
| "epoch": 1.8072920033615618, |
| "grad_norm": 0.9029675126075745, |
| "learning_rate": 1.866271557633115e-06, |
| "loss": 0.4966177463531494, |
| "memory(GiB)": 76.04, |
| "step": 6990, |
| "token_acc": 0.8467085471597947, |
| "train_speed(iter/s)": 0.027617 |
| }, |
| { |
| "epoch": 1.8085849117590018, |
| "grad_norm": 1.1452031135559082, |
| "learning_rate": 1.862825082767602e-06, |
| "loss": 0.5193626403808593, |
| "memory(GiB)": 76.04, |
| "step": 6995, |
| "token_acc": 0.8338509316770186, |
| "train_speed(iter/s)": 0.027617 |
| }, |
| { |
| "epoch": 1.809877820156442, |
| "grad_norm": 1.0218499898910522, |
| "learning_rate": 1.8593799025007772e-06, |
| "loss": 0.4930767059326172, |
| "memory(GiB)": 76.04, |
| "step": 7000, |
| "token_acc": 0.8444614310877729, |
| "train_speed(iter/s)": 0.027616 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 11601, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.806726131996636e+19, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|