| { | |
| "best_metric": 37.65067359962184, | |
| "best_model_checkpoint": "./whisper-tiny-fr-micro-train/checkpoint-8222", | |
| "epoch": 0.08564583333333334, | |
| "eval_steps": 4111, | |
| "global_step": 8222, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 19.87736701965332, | |
| "learning_rate": 5e-09, | |
| "loss": 0.9486, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 18.398113250732422, | |
| "learning_rate": 1e-08, | |
| "loss": 0.9083, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 16.93355941772461, | |
| "learning_rate": 1.5e-08, | |
| "loss": 0.8204, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 11.64875602722168, | |
| "learning_rate": 2e-08, | |
| "loss": 0.7006, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 8.734498977661133, | |
| "learning_rate": 2.5e-08, | |
| "loss": 0.648, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 7.067263603210449, | |
| "learning_rate": 3e-08, | |
| "loss": 0.537, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 7.97986364364624, | |
| "learning_rate": 3.4999999999999996e-08, | |
| "loss": 0.5752, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 7.728494167327881, | |
| "learning_rate": 4e-08, | |
| "loss": 0.5863, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 9.38297176361084, | |
| "learning_rate": 4.5e-08, | |
| "loss": 0.5172, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 7.568984508514404, | |
| "learning_rate": 5e-08, | |
| "loss": 0.4682, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 8.042702674865723, | |
| "learning_rate": 5.5e-08, | |
| "loss": 0.4214, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 7.661986827850342, | |
| "learning_rate": 6e-08, | |
| "loss": 0.4431, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 9.154074668884277, | |
| "learning_rate": 6.5e-08, | |
| "loss": 0.4056, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 8.346107482910156, | |
| "learning_rate": 6.999999999999999e-08, | |
| "loss": 0.4079, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 6.246629238128662, | |
| "learning_rate": 7.5e-08, | |
| "loss": 0.3897, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 7.129103183746338, | |
| "learning_rate": 8e-08, | |
| "loss": 0.3536, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 6.834921836853027, | |
| "learning_rate": 8.5e-08, | |
| "loss": 0.3401, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 8.863313674926758, | |
| "learning_rate": 9e-08, | |
| "loss": 0.3627, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 7.284473896026611, | |
| "learning_rate": 9.499999999999999e-08, | |
| "loss": 0.356, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 5.816940784454346, | |
| "learning_rate": 1e-07, | |
| "loss": 0.3539, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 8.022565841674805, | |
| "learning_rate": 9.997382198952879e-08, | |
| "loss": 0.3577, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 7.51448917388916, | |
| "learning_rate": 9.994764397905758e-08, | |
| "loss": 0.3513, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 7.016752243041992, | |
| "learning_rate": 9.992146596858639e-08, | |
| "loss": 0.3687, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 6.761058330535889, | |
| "learning_rate": 9.989528795811518e-08, | |
| "loss": 0.3495, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 6.693453311920166, | |
| "learning_rate": 9.986910994764397e-08, | |
| "loss": 0.3325, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 6.049990653991699, | |
| "learning_rate": 9.984293193717277e-08, | |
| "loss": 0.3609, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 6.6787109375, | |
| "learning_rate": 9.981675392670157e-08, | |
| "loss": 0.3317, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 6.8440937995910645, | |
| "learning_rate": 9.979057591623035e-08, | |
| "loss": 0.332, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 6.892059326171875, | |
| "learning_rate": 9.976439790575916e-08, | |
| "loss": 0.3379, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 8.142931938171387, | |
| "learning_rate": 9.973821989528795e-08, | |
| "loss": 0.3308, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 7.152769565582275, | |
| "learning_rate": 9.971204188481675e-08, | |
| "loss": 0.3352, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 6.680529594421387, | |
| "learning_rate": 9.968586387434554e-08, | |
| "loss": 0.3343, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 6.2912445068359375, | |
| "learning_rate": 9.965968586387435e-08, | |
| "loss": 0.3233, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 6.337522983551025, | |
| "learning_rate": 9.963350785340313e-08, | |
| "loss": 0.3147, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 7.505101203918457, | |
| "learning_rate": 9.960732984293193e-08, | |
| "loss": 0.3384, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 6.080435276031494, | |
| "learning_rate": 9.958115183246073e-08, | |
| "loss": 0.3397, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 7.185611248016357, | |
| "learning_rate": 9.955497382198953e-08, | |
| "loss": 0.3448, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 6.759621620178223, | |
| "learning_rate": 9.952879581151831e-08, | |
| "loss": 0.3193, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 5.393013000488281, | |
| "learning_rate": 9.950261780104712e-08, | |
| "loss": 0.3134, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 6.275601387023926, | |
| "learning_rate": 9.947643979057591e-08, | |
| "loss": 0.3227, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 8.202522277832031, | |
| "learning_rate": 9.94502617801047e-08, | |
| "loss": 0.3366, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 6.580430507659912, | |
| "learning_rate": 9.94240837696335e-08, | |
| "loss": 0.4026, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 8.456107139587402, | |
| "learning_rate": 9.93979057591623e-08, | |
| "loss": 0.3868, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 8.176070213317871, | |
| "learning_rate": 9.937172774869109e-08, | |
| "loss": 0.4297, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 9.07604694366455, | |
| "learning_rate": 9.934554973821989e-08, | |
| "loss": 0.4861, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 8.36545181274414, | |
| "learning_rate": 9.931937172774869e-08, | |
| "loss": 0.4939, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 9.944046974182129, | |
| "learning_rate": 9.929319371727748e-08, | |
| "loss": 0.5198, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 12.03496265411377, | |
| "learning_rate": 9.926701570680629e-08, | |
| "loss": 0.5353, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 14.10308837890625, | |
| "learning_rate": 9.924083769633508e-08, | |
| "loss": 0.5005, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 12.214973449707031, | |
| "learning_rate": 9.921465968586387e-08, | |
| "loss": 0.5879, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 11.323634147644043, | |
| "learning_rate": 9.918848167539266e-08, | |
| "loss": 0.5031, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 10.742391586303711, | |
| "learning_rate": 9.916230366492147e-08, | |
| "loss": 0.5495, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 14.457928657531738, | |
| "learning_rate": 9.913612565445025e-08, | |
| "loss": 0.5263, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 11.978686332702637, | |
| "learning_rate": 9.910994764397906e-08, | |
| "loss": 0.5477, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 11.699676513671875, | |
| "learning_rate": 9.908376963350785e-08, | |
| "loss": 0.5293, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 11.737068176269531, | |
| "learning_rate": 9.905759162303664e-08, | |
| "loss": 0.5622, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 10.408597946166992, | |
| "learning_rate": 9.903141361256544e-08, | |
| "loss": 0.5639, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 11.709553718566895, | |
| "learning_rate": 9.900523560209424e-08, | |
| "loss": 0.5421, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 10.107832908630371, | |
| "learning_rate": 9.897905759162302e-08, | |
| "loss": 0.5291, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 11.955233573913574, | |
| "learning_rate": 9.895287958115183e-08, | |
| "loss": 0.5508, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 12.00100040435791, | |
| "learning_rate": 9.892670157068062e-08, | |
| "loss": 0.5271, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 11.11552619934082, | |
| "learning_rate": 9.890052356020942e-08, | |
| "loss": 0.5338, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 8.751993179321289, | |
| "learning_rate": 9.887434554973821e-08, | |
| "loss": 0.5178, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 10.523124694824219, | |
| "learning_rate": 9.884816753926702e-08, | |
| "loss": 0.5574, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 14.987282752990723, | |
| "learning_rate": 9.88219895287958e-08, | |
| "loss": 0.5406, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 12.370256423950195, | |
| "learning_rate": 9.87958115183246e-08, | |
| "loss": 0.4805, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 9.747725486755371, | |
| "learning_rate": 9.87696335078534e-08, | |
| "loss": 0.5259, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 12.413991928100586, | |
| "learning_rate": 9.87434554973822e-08, | |
| "loss": 0.5225, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 11.440505981445312, | |
| "learning_rate": 9.871727748691098e-08, | |
| "loss": 0.5511, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 11.07944107055664, | |
| "learning_rate": 9.869109947643979e-08, | |
| "loss": 0.4913, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 12.481764793395996, | |
| "learning_rate": 9.866492146596858e-08, | |
| "loss": 0.5618, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 10.33045768737793, | |
| "learning_rate": 9.863874345549738e-08, | |
| "loss": 0.5099, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 11.342964172363281, | |
| "learning_rate": 9.861256544502617e-08, | |
| "loss": 0.5454, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 10.811851501464844, | |
| "learning_rate": 9.858638743455498e-08, | |
| "loss": 0.5118, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 12.243831634521484, | |
| "learning_rate": 9.856020942408377e-08, | |
| "loss": 0.5191, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 10.51577377319336, | |
| "learning_rate": 9.853403141361256e-08, | |
| "loss": 0.4916, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 9.325318336486816, | |
| "learning_rate": 9.850785340314135e-08, | |
| "loss": 0.5178, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 9.348186492919922, | |
| "learning_rate": 9.848167539267015e-08, | |
| "loss": 0.5066, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 11.930258750915527, | |
| "learning_rate": 9.845549738219895e-08, | |
| "loss": 0.4747, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 11.170626640319824, | |
| "learning_rate": 9.842931937172775e-08, | |
| "loss": 0.5285, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 10.741945266723633, | |
| "learning_rate": 9.840314136125654e-08, | |
| "loss": 0.5686, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 10.902135848999023, | |
| "learning_rate": 9.837696335078533e-08, | |
| "loss": 0.5909, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 8.929906845092773, | |
| "learning_rate": 9.835078534031414e-08, | |
| "loss": 0.578, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 11.585110664367676, | |
| "learning_rate": 9.832460732984292e-08, | |
| "loss": 0.8891, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 11.564123153686523, | |
| "learning_rate": 9.829842931937173e-08, | |
| "loss": 1.0214, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 10.137656211853027, | |
| "learning_rate": 9.827225130890052e-08, | |
| "loss": 0.7967, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 9.988815307617188, | |
| "learning_rate": 9.824607329842931e-08, | |
| "loss": 0.7586, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 8.484098434448242, | |
| "learning_rate": 9.82198952879581e-08, | |
| "loss": 0.6455, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 8.596495628356934, | |
| "learning_rate": 9.819371727748691e-08, | |
| "loss": 0.6966, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 8.861817359924316, | |
| "learning_rate": 9.816753926701569e-08, | |
| "loss": 0.8732, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 9.609010696411133, | |
| "learning_rate": 9.81413612565445e-08, | |
| "loss": 0.803, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 8.113046646118164, | |
| "learning_rate": 9.811518324607329e-08, | |
| "loss": 0.8018, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 7.831557750701904, | |
| "learning_rate": 9.808900523560209e-08, | |
| "loss": 0.7681, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 9.451202392578125, | |
| "learning_rate": 9.806282722513088e-08, | |
| "loss": 0.6863, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 6.153475284576416, | |
| "learning_rate": 9.803664921465969e-08, | |
| "loss": 0.5344, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 6.556187152862549, | |
| "learning_rate": 9.801047120418847e-08, | |
| "loss": 0.5072, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 6.695789337158203, | |
| "learning_rate": 9.798429319371727e-08, | |
| "loss": 0.4882, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 5.124952793121338, | |
| "learning_rate": 9.795811518324607e-08, | |
| "loss": 0.4023, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 5.724789142608643, | |
| "learning_rate": 9.793193717277487e-08, | |
| "loss": 0.436, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 6.060319423675537, | |
| "learning_rate": 9.790575916230365e-08, | |
| "loss": 0.3939, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 5.216397285461426, | |
| "learning_rate": 9.787958115183246e-08, | |
| "loss": 0.3305, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 5.101900577545166, | |
| "learning_rate": 9.785340314136125e-08, | |
| "loss": 0.3212, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 4.3815484046936035, | |
| "learning_rate": 9.782722513089004e-08, | |
| "loss": 0.3153, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 7.231525897979736, | |
| "learning_rate": 9.780104712041885e-08, | |
| "loss": 0.4177, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 5.76875638961792, | |
| "learning_rate": 9.777486910994764e-08, | |
| "loss": 0.5512, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 5.580086708068848, | |
| "learning_rate": 9.774869109947644e-08, | |
| "loss": 0.5036, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 6.0146894454956055, | |
| "learning_rate": 9.772251308900523e-08, | |
| "loss": 0.5229, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 5.919321060180664, | |
| "learning_rate": 9.769633507853404e-08, | |
| "loss": 0.6031, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 7.249564170837402, | |
| "learning_rate": 9.767015706806282e-08, | |
| "loss": 0.6529, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 5.037733554840088, | |
| "learning_rate": 9.764397905759162e-08, | |
| "loss": 0.5845, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 5.596995830535889, | |
| "learning_rate": 9.761780104712042e-08, | |
| "loss": 0.4844, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 6.465144157409668, | |
| "learning_rate": 9.759162303664921e-08, | |
| "loss": 0.7091, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 8.079314231872559, | |
| "learning_rate": 9.7565445026178e-08, | |
| "loss": 1.0112, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 6.964105606079102, | |
| "learning_rate": 9.753926701570681e-08, | |
| "loss": 0.8826, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 5.710263252258301, | |
| "learning_rate": 9.751308900523559e-08, | |
| "loss": 0.7581, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 4.680161476135254, | |
| "learning_rate": 9.74869109947644e-08, | |
| "loss": 0.6771, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 6.1621198654174805, | |
| "learning_rate": 9.746073298429319e-08, | |
| "loss": 0.5596, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 5.297918796539307, | |
| "learning_rate": 9.743455497382198e-08, | |
| "loss": 0.4256, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 5.257850646972656, | |
| "learning_rate": 9.740837696335078e-08, | |
| "loss": 0.3977, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 5.193603992462158, | |
| "learning_rate": 9.738219895287958e-08, | |
| "loss": 0.3957, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 4.629751682281494, | |
| "learning_rate": 9.735602094240836e-08, | |
| "loss": 0.3431, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 4.40083122253418, | |
| "learning_rate": 9.732984293193717e-08, | |
| "loss": 0.3242, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 4.38535737991333, | |
| "learning_rate": 9.730366492146596e-08, | |
| "loss": 0.3124, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 4.32893180847168, | |
| "learning_rate": 9.727748691099476e-08, | |
| "loss": 0.2964, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 4.1819586753845215, | |
| "learning_rate": 9.725130890052355e-08, | |
| "loss": 0.2668, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 4.960391521453857, | |
| "learning_rate": 9.722513089005235e-08, | |
| "loss": 0.2789, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 4.490744113922119, | |
| "learning_rate": 9.719895287958115e-08, | |
| "loss": 0.2566, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 4.861118316650391, | |
| "learning_rate": 9.717277486910994e-08, | |
| "loss": 0.279, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 5.386078357696533, | |
| "learning_rate": 9.714659685863873e-08, | |
| "loss": 0.2882, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 6.265291213989258, | |
| "learning_rate": 9.712041884816754e-08, | |
| "loss": 0.3766, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 6.723097801208496, | |
| "learning_rate": 9.709424083769633e-08, | |
| "loss": 0.5386, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 5.949530601501465, | |
| "learning_rate": 9.706806282722513e-08, | |
| "loss": 0.4818, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 5.125253200531006, | |
| "learning_rate": 9.704188481675392e-08, | |
| "loss": 0.4862, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 5.845962047576904, | |
| "learning_rate": 9.701570680628271e-08, | |
| "loss": 0.4502, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.178995609283447, | |
| "learning_rate": 9.698952879581152e-08, | |
| "loss": 0.4086, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 4.950035095214844, | |
| "learning_rate": 9.696335078534031e-08, | |
| "loss": 0.4167, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 7.225176811218262, | |
| "learning_rate": 9.693717277486911e-08, | |
| "loss": 0.3945, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 6.313861846923828, | |
| "learning_rate": 9.69109947643979e-08, | |
| "loss": 0.4021, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.976010799407959, | |
| "learning_rate": 9.68848167539267e-08, | |
| "loss": 0.3821, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 6.867140769958496, | |
| "learning_rate": 9.685863874345549e-08, | |
| "loss": 0.3901, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.82126522064209, | |
| "learning_rate": 9.683246073298429e-08, | |
| "loss": 0.3798, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.741916656494141, | |
| "learning_rate": 9.680628272251309e-08, | |
| "loss": 0.3842, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.43148946762085, | |
| "learning_rate": 9.678010471204188e-08, | |
| "loss": 0.3762, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 4.983076095581055, | |
| "learning_rate": 9.675392670157067e-08, | |
| "loss": 0.3496, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.233561992645264, | |
| "learning_rate": 9.672774869109948e-08, | |
| "loss": 0.3225, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 4.534473419189453, | |
| "learning_rate": 9.670157068062826e-08, | |
| "loss": 0.3009, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.9857306480407715, | |
| "learning_rate": 9.667539267015707e-08, | |
| "loss": 0.4075, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 7.715012073516846, | |
| "learning_rate": 9.664921465968586e-08, | |
| "loss": 0.5876, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 7.0109405517578125, | |
| "learning_rate": 9.662303664921465e-08, | |
| "loss": 0.6805, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 6.128580093383789, | |
| "learning_rate": 9.659685863874345e-08, | |
| "loss": 0.5924, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.6104865074157715, | |
| "learning_rate": 9.657068062827225e-08, | |
| "loss": 0.4888, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.251614093780518, | |
| "learning_rate": 9.654450261780103e-08, | |
| "loss": 0.4286, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.3208184242248535, | |
| "learning_rate": 9.651832460732984e-08, | |
| "loss": 0.3889, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.454063892364502, | |
| "learning_rate": 9.649214659685863e-08, | |
| "loss": 0.4181, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.304567813873291, | |
| "learning_rate": 9.646596858638742e-08, | |
| "loss": 0.3816, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 4.866218566894531, | |
| "learning_rate": 9.643979057591623e-08, | |
| "loss": 0.3475, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 4.873610019683838, | |
| "learning_rate": 9.641361256544502e-08, | |
| "loss": 0.3369, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 4.69268798828125, | |
| "learning_rate": 9.638743455497382e-08, | |
| "loss": 0.3515, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.367419719696045, | |
| "learning_rate": 9.636125654450261e-08, | |
| "loss": 0.3328, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 4.6179728507995605, | |
| "learning_rate": 9.633507853403142e-08, | |
| "loss": 0.3536, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 4.710158348083496, | |
| "learning_rate": 9.630890052356021e-08, | |
| "loss": 0.3446, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 4.824263095855713, | |
| "learning_rate": 9.6282722513089e-08, | |
| "loss": 0.3488, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.474529266357422, | |
| "learning_rate": 9.62565445026178e-08, | |
| "loss": 0.3648, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.825191497802734, | |
| "learning_rate": 9.62303664921466e-08, | |
| "loss": 0.3782, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 0.7290233969688416, | |
| "eval_runtime": 275.8897, | |
| "eval_samples_per_second": 9.801, | |
| "eval_steps_per_second": 1.225, | |
| "eval_wer": 39.41739541479556, | |
| "step": 4111 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 4.9900221824646, | |
| "learning_rate": 9.620418848167538e-08, | |
| "loss": 0.3366, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 6.866960525512695, | |
| "learning_rate": 9.617801047120419e-08, | |
| "loss": 0.384, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 6.261806011199951, | |
| "learning_rate": 9.615183246073298e-08, | |
| "loss": 0.4945, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 6.251190185546875, | |
| "learning_rate": 9.612565445026178e-08, | |
| "loss": 0.5011, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 7.063992023468018, | |
| "learning_rate": 9.609947643979057e-08, | |
| "loss": 0.4765, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 6.903201103210449, | |
| "learning_rate": 9.607329842931938e-08, | |
| "loss": 0.4501, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 6.0563788414001465, | |
| "learning_rate": 9.604712041884816e-08, | |
| "loss": 0.459, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 6.9955363273620605, | |
| "learning_rate": 9.602094240837696e-08, | |
| "loss": 0.4237, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.026924133300781, | |
| "learning_rate": 9.599476439790576e-08, | |
| "loss": 0.3898, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 5.720476150512695, | |
| "learning_rate": 9.596858638743455e-08, | |
| "loss": 0.4239, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.680058479309082, | |
| "learning_rate": 9.594240837696334e-08, | |
| "loss": 0.4321, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 8.304168701171875, | |
| "learning_rate": 9.591623036649215e-08, | |
| "loss": 0.5794, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 8.107504844665527, | |
| "learning_rate": 9.589005235602093e-08, | |
| "loss": 0.8686, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 8.911792755126953, | |
| "learning_rate": 9.586387434554973e-08, | |
| "loss": 0.9618, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.706320285797119, | |
| "learning_rate": 9.583769633507853e-08, | |
| "loss": 0.8042, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.71433687210083, | |
| "learning_rate": 9.581151832460732e-08, | |
| "loss": 0.6088, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.675333023071289, | |
| "learning_rate": 9.578534031413611e-08, | |
| "loss": 0.551, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.766154766082764, | |
| "learning_rate": 9.575916230366492e-08, | |
| "loss": 0.5113, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.36196231842041, | |
| "learning_rate": 9.573298429319371e-08, | |
| "loss": 0.4785, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.1699395179748535, | |
| "learning_rate": 9.570680628272251e-08, | |
| "loss": 0.4663, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.362920761108398, | |
| "learning_rate": 9.56806282722513e-08, | |
| "loss": 0.4394, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.1348347663879395, | |
| "learning_rate": 9.56544502617801e-08, | |
| "loss": 0.4343, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.3059563636779785, | |
| "learning_rate": 9.56282722513089e-08, | |
| "loss": 0.4535, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 7.463464260101318, | |
| "learning_rate": 9.560209424083769e-08, | |
| "loss": 0.7034, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 7.093417644500732, | |
| "learning_rate": 9.557591623036649e-08, | |
| "loss": 0.8337, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.7604193687438965, | |
| "learning_rate": 9.554973821989528e-08, | |
| "loss": 0.7934, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.093296051025391, | |
| "learning_rate": 9.552356020942409e-08, | |
| "loss": 0.7055, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.788339138031006, | |
| "learning_rate": 9.549738219895288e-08, | |
| "loss": 0.6884, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.2128496170043945, | |
| "learning_rate": 9.547120418848167e-08, | |
| "loss": 0.5722, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.026149272918701, | |
| "learning_rate": 9.544502617801047e-08, | |
| "loss": 0.5802, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.711429119110107, | |
| "learning_rate": 9.541884816753927e-08, | |
| "loss": 0.5129, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.273972988128662, | |
| "learning_rate": 9.539267015706805e-08, | |
| "loss": 0.4283, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 5.497582912445068, | |
| "learning_rate": 9.536649214659686e-08, | |
| "loss": 0.4075, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 5.759308815002441, | |
| "learning_rate": 9.534031413612565e-08, | |
| "loss": 0.4438, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.2068305015563965, | |
| "learning_rate": 9.531413612565445e-08, | |
| "loss": 0.4686, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 5.611216068267822, | |
| "learning_rate": 9.528795811518324e-08, | |
| "loss": 0.4714, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 5.2035040855407715, | |
| "learning_rate": 9.526178010471204e-08, | |
| "loss": 0.4933, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.796937942504883, | |
| "learning_rate": 9.523560209424082e-08, | |
| "loss": 0.518, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 5.768625259399414, | |
| "learning_rate": 9.520942408376963e-08, | |
| "loss": 0.5254, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 5.743659019470215, | |
| "learning_rate": 9.518324607329842e-08, | |
| "loss": 0.5098, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.624993801116943, | |
| "learning_rate": 9.515706806282722e-08, | |
| "loss": 0.4855, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.45778751373291, | |
| "learning_rate": 9.513089005235601e-08, | |
| "loss": 0.5223, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 5.325904369354248, | |
| "learning_rate": 9.510471204188482e-08, | |
| "loss": 0.5041, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 5.208452224731445, | |
| "learning_rate": 9.507853403141361e-08, | |
| "loss": 0.5157, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 5.317996501922607, | |
| "learning_rate": 9.50523560209424e-08, | |
| "loss": 0.5614, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.383024215698242, | |
| "learning_rate": 9.50261780104712e-08, | |
| "loss": 0.5186, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 4.965906620025635, | |
| "learning_rate": 9.499999999999999e-08, | |
| "loss": 0.4887, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.874698162078857, | |
| "learning_rate": 9.49738219895288e-08, | |
| "loss": 0.4882, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.321093559265137, | |
| "learning_rate": 9.494764397905759e-08, | |
| "loss": 0.4929, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.657257556915283, | |
| "learning_rate": 9.492146596858638e-08, | |
| "loss": 0.4467, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.798694133758545, | |
| "learning_rate": 9.489528795811518e-08, | |
| "loss": 0.5027, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.4486236572265625, | |
| "learning_rate": 9.486910994764398e-08, | |
| "loss": 0.5157, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.78603458404541, | |
| "learning_rate": 9.484293193717276e-08, | |
| "loss": 0.568, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.391395568847656, | |
| "learning_rate": 9.481675392670157e-08, | |
| "loss": 0.5768, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.703619003295898, | |
| "learning_rate": 9.479057591623036e-08, | |
| "loss": 0.5885, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.5529937744140625, | |
| "learning_rate": 9.476439790575916e-08, | |
| "loss": 0.5355, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.757615566253662, | |
| "learning_rate": 9.473821989528795e-08, | |
| "loss": 0.4787, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.5016703605651855, | |
| "learning_rate": 9.471204188481676e-08, | |
| "loss": 0.4435, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.2132368087768555, | |
| "learning_rate": 9.468586387434555e-08, | |
| "loss": 0.5157, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.654526710510254, | |
| "learning_rate": 9.465968586387434e-08, | |
| "loss": 0.5769, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.5138139724731445, | |
| "learning_rate": 9.463350785340314e-08, | |
| "loss": 0.5805, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.938875198364258, | |
| "learning_rate": 9.460732984293194e-08, | |
| "loss": 0.6114, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 4.941293239593506, | |
| "learning_rate": 9.458115183246072e-08, | |
| "loss": 0.5762, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.395961284637451, | |
| "learning_rate": 9.455497382198953e-08, | |
| "loss": 0.5745, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.585537910461426, | |
| "learning_rate": 9.452879581151832e-08, | |
| "loss": 0.5571, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.933156490325928, | |
| "learning_rate": 9.450261780104711e-08, | |
| "loss": 0.4811, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 7.242075443267822, | |
| "learning_rate": 9.447643979057591e-08, | |
| "loss": 0.464, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.613156318664551, | |
| "learning_rate": 9.445026178010471e-08, | |
| "loss": 0.5033, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.406403541564941, | |
| "learning_rate": 9.44240837696335e-08, | |
| "loss": 0.4344, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 4.892160415649414, | |
| "learning_rate": 9.43979057591623e-08, | |
| "loss": 0.4187, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.776142120361328, | |
| "learning_rate": 9.43717277486911e-08, | |
| "loss": 0.4124, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.972835063934326, | |
| "learning_rate": 9.434554973821989e-08, | |
| "loss": 0.4042, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.167374610900879, | |
| "learning_rate": 9.431937172774868e-08, | |
| "loss": 0.4208, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.850512504577637, | |
| "learning_rate": 9.429319371727749e-08, | |
| "loss": 0.3989, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.674014091491699, | |
| "learning_rate": 9.426701570680628e-08, | |
| "loss": 0.3725, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 7.24482536315918, | |
| "learning_rate": 9.424083769633507e-08, | |
| "loss": 0.376, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.7198710441589355, | |
| "learning_rate": 9.421465968586388e-08, | |
| "loss": 0.3846, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.8929829597473145, | |
| "learning_rate": 9.418848167539266e-08, | |
| "loss": 0.3857, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 8.093165397644043, | |
| "learning_rate": 9.416230366492147e-08, | |
| "loss": 0.3766, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.204592227935791, | |
| "learning_rate": 9.413612565445026e-08, | |
| "loss": 0.3779, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.946498870849609, | |
| "learning_rate": 9.410994764397905e-08, | |
| "loss": 0.3719, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 7.825682163238525, | |
| "learning_rate": 9.408376963350785e-08, | |
| "loss": 0.3891, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 7.207645416259766, | |
| "learning_rate": 9.405759162303665e-08, | |
| "loss": 0.3901, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.809023857116699, | |
| "learning_rate": 9.403141361256543e-08, | |
| "loss": 0.4059, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.104794979095459, | |
| "learning_rate": 9.400523560209424e-08, | |
| "loss": 0.4059, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 6.525493621826172, | |
| "learning_rate": 9.397905759162303e-08, | |
| "loss": 0.4047, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 6.874316215515137, | |
| "learning_rate": 9.395287958115183e-08, | |
| "loss": 0.514, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.96618127822876, | |
| "learning_rate": 9.392670157068062e-08, | |
| "loss": 0.4962, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 6.455708026885986, | |
| "learning_rate": 9.390052356020942e-08, | |
| "loss": 0.5045, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 6.469492435455322, | |
| "learning_rate": 9.387434554973822e-08, | |
| "loss": 0.8458, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 9.225332260131836, | |
| "learning_rate": 9.384816753926701e-08, | |
| "loss": 0.8835, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 6.529109954833984, | |
| "learning_rate": 9.38219895287958e-08, | |
| "loss": 0.7166, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 7.395893096923828, | |
| "learning_rate": 9.379581151832461e-08, | |
| "loss": 0.7075, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 8.16038990020752, | |
| "learning_rate": 9.376963350785339e-08, | |
| "loss": 0.9168, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 7.322926044464111, | |
| "learning_rate": 9.37434554973822e-08, | |
| "loss": 0.7444, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 7.18267297744751, | |
| "learning_rate": 9.371727748691099e-08, | |
| "loss": 0.6744, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 7.361169815063477, | |
| "learning_rate": 9.369109947643978e-08, | |
| "loss": 0.5583, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 8.085954666137695, | |
| "learning_rate": 9.366492146596858e-08, | |
| "loss": 0.5442, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 7.492279052734375, | |
| "learning_rate": 9.363874345549738e-08, | |
| "loss": 0.5684, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 6.951526641845703, | |
| "learning_rate": 9.361256544502618e-08, | |
| "loss": 0.5311, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 6.271228790283203, | |
| "learning_rate": 9.358638743455497e-08, | |
| "loss": 0.473, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.724484443664551, | |
| "learning_rate": 9.356020942408376e-08, | |
| "loss": 0.4471, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.2642669677734375, | |
| "learning_rate": 9.353403141361256e-08, | |
| "loss": 0.4008, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.970279216766357, | |
| "learning_rate": 9.350785340314136e-08, | |
| "loss": 0.3922, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 6.13707160949707, | |
| "learning_rate": 9.348167539267016e-08, | |
| "loss": 0.4149, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.1920061111450195, | |
| "learning_rate": 9.345549738219895e-08, | |
| "loss": 0.3732, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 6.059106349945068, | |
| "learning_rate": 9.342931937172774e-08, | |
| "loss": 0.3783, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.317996025085449, | |
| "learning_rate": 9.340314136125655e-08, | |
| "loss": 0.3701, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.347188472747803, | |
| "learning_rate": 9.337696335078533e-08, | |
| "loss": 0.3466, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.118027687072754, | |
| "learning_rate": 9.335078534031414e-08, | |
| "loss": 0.363, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 4.868067264556885, | |
| "learning_rate": 9.332460732984293e-08, | |
| "loss": 0.3696, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.714309215545654, | |
| "learning_rate": 9.329842931937172e-08, | |
| "loss": 0.3768, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.903509616851807, | |
| "learning_rate": 9.327225130890052e-08, | |
| "loss": 0.3625, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.700974941253662, | |
| "learning_rate": 9.324607329842932e-08, | |
| "loss": 0.3717, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 6.056822776794434, | |
| "learning_rate": 9.32198952879581e-08, | |
| "loss": 0.3601, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 6.140659809112549, | |
| "learning_rate": 9.319371727748691e-08, | |
| "loss": 0.3691, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 6.195953369140625, | |
| "learning_rate": 9.31675392670157e-08, | |
| "loss": 0.3632, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 4.96120023727417, | |
| "learning_rate": 9.314136125654451e-08, | |
| "loss": 0.3449, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 6.803286075592041, | |
| "learning_rate": 9.311518324607329e-08, | |
| "loss": 0.3601, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.16037654876709, | |
| "learning_rate": 9.30890052356021e-08, | |
| "loss": 0.3478, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.407104969024658, | |
| "learning_rate": 9.306282722513089e-08, | |
| "loss": 0.3498, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.451097011566162, | |
| "learning_rate": 9.303664921465968e-08, | |
| "loss": 0.3574, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.362937927246094, | |
| "learning_rate": 9.301047120418847e-08, | |
| "loss": 0.3477, | |
| "step": 7175 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.407390117645264, | |
| "learning_rate": 9.298429319371728e-08, | |
| "loss": 0.3575, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.426994800567627, | |
| "learning_rate": 9.295811518324606e-08, | |
| "loss": 0.3454, | |
| "step": 7225 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 6.192265510559082, | |
| "learning_rate": 9.293193717277487e-08, | |
| "loss": 0.36, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.969931125640869, | |
| "learning_rate": 9.290575916230366e-08, | |
| "loss": 0.3479, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.602126121520996, | |
| "learning_rate": 9.287958115183245e-08, | |
| "loss": 0.3527, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 6.191224575042725, | |
| "learning_rate": 9.285340314136125e-08, | |
| "loss": 0.3915, | |
| "step": 7325 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.79760217666626, | |
| "learning_rate": 9.282722513089005e-08, | |
| "loss": 0.3922, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 8.519009590148926, | |
| "learning_rate": 9.280104712041885e-08, | |
| "loss": 0.4254, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.360806941986084, | |
| "learning_rate": 9.277486910994764e-08, | |
| "loss": 0.4391, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.539173603057861, | |
| "learning_rate": 9.274869109947645e-08, | |
| "loss": 0.3988, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 7.067492961883545, | |
| "learning_rate": 9.272251308900523e-08, | |
| "loss": 0.3779, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.135078430175781, | |
| "learning_rate": 9.269633507853403e-08, | |
| "loss": 0.3904, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.269252300262451, | |
| "learning_rate": 9.267015706806283e-08, | |
| "loss": 0.3597, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 7.094182014465332, | |
| "learning_rate": 9.264397905759162e-08, | |
| "loss": 0.3766, | |
| "step": 7525 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.993140697479248, | |
| "learning_rate": 9.261780104712041e-08, | |
| "loss": 0.3377, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 6.09189510345459, | |
| "learning_rate": 9.259162303664922e-08, | |
| "loss": 0.3779, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.466849327087402, | |
| "learning_rate": 9.2565445026178e-08, | |
| "loss": 0.3602, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.297680854797363, | |
| "learning_rate": 9.25392670157068e-08, | |
| "loss": 0.3318, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.143691539764404, | |
| "learning_rate": 9.25130890052356e-08, | |
| "loss": 0.334, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.337982654571533, | |
| "learning_rate": 9.248691099476439e-08, | |
| "loss": 0.3343, | |
| "step": 7675 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.539205551147461, | |
| "learning_rate": 9.246073298429318e-08, | |
| "loss": 0.3527, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.057958126068115, | |
| "learning_rate": 9.243455497382199e-08, | |
| "loss": 0.3441, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.447077751159668, | |
| "learning_rate": 9.240837696335077e-08, | |
| "loss": 0.3368, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.604344844818115, | |
| "learning_rate": 9.238219895287958e-08, | |
| "loss": 0.3357, | |
| "step": 7775 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 6.193871021270752, | |
| "learning_rate": 9.235602094240837e-08, | |
| "loss": 0.3841, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.70228910446167, | |
| "learning_rate": 9.232984293193718e-08, | |
| "loss": 0.3991, | |
| "step": 7825 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 6.8992743492126465, | |
| "learning_rate": 9.230366492146596e-08, | |
| "loss": 0.4435, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 7.393523693084717, | |
| "learning_rate": 9.227748691099476e-08, | |
| "loss": 0.4094, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.266127586364746, | |
| "learning_rate": 9.225130890052356e-08, | |
| "loss": 0.3806, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.960921287536621, | |
| "learning_rate": 9.222513089005235e-08, | |
| "loss": 0.3749, | |
| "step": 7925 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 6.215056896209717, | |
| "learning_rate": 9.219895287958114e-08, | |
| "loss": 0.3956, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 4.992290019989014, | |
| "learning_rate": 9.217277486910995e-08, | |
| "loss": 0.414, | |
| "step": 7975 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.627460479736328, | |
| "learning_rate": 9.214659685863874e-08, | |
| "loss": 0.4508, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 7.53002405166626, | |
| "learning_rate": 9.212041884816754e-08, | |
| "loss": 0.4771, | |
| "step": 8025 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 6.5475172996521, | |
| "learning_rate": 9.209424083769633e-08, | |
| "loss": 0.4636, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 6.499009132385254, | |
| "learning_rate": 9.206806282722512e-08, | |
| "loss": 0.5024, | |
| "step": 8075 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.928787708282471, | |
| "learning_rate": 9.204188481675393e-08, | |
| "loss": 0.482, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 6.647201061248779, | |
| "learning_rate": 9.201570680628272e-08, | |
| "loss": 0.4901, | |
| "step": 8125 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 7.4282355308532715, | |
| "learning_rate": 9.198952879581152e-08, | |
| "loss": 0.509, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 8.04277229309082, | |
| "learning_rate": 9.196335078534031e-08, | |
| "loss": 0.5403, | |
| "step": 8175 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 8.562540054321289, | |
| "learning_rate": 9.193717277486911e-08, | |
| "loss": 0.5798, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 0.7510205507278442, | |
| "eval_runtime": 275.1584, | |
| "eval_samples_per_second": 9.827, | |
| "eval_steps_per_second": 1.228, | |
| "eval_wer": 37.65067359962184, | |
| "step": 8222 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 96000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 4111, | |
| "total_flos": 3.23866357530624e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |