[ { "epoch": 0.08, "grad_norm": 0.28229808807373047, "learning_rate": 0.0003304347826086957, "loss": 1.0758, "step": 20 }, { "epoch": 0.16, "grad_norm": 0.3910837173461914, "learning_rate": 0.0003995221430894122, "loss": 0.9148, "step": 40 }, { "epoch": 0.24, "grad_norm": 0.37663960456848145, "learning_rate": 0.00039758476229578745, "loss": 0.8888, "step": 60 }, { "epoch": 0.32, "grad_norm": 0.2441498339176178, "learning_rate": 0.0003941724426452488, "loss": 0.8392, "step": 80 }, { "epoch": 0.4, "grad_norm": 0.32805338501930237, "learning_rate": 0.0003893106565618147, "loss": 0.8178, "step": 100 }, { "epoch": 0.4, "eval_loss": 0.8086594939231873, "eval_runtime": 19.7643, "eval_samples_per_second": 25.298, "eval_steps_per_second": 3.188, "step": 100 }, { "epoch": 0.48, "grad_norm": 0.32320863008499146, "learning_rate": 0.0003830356965061241, "loss": 0.7901, "step": 120 }, { "epoch": 0.56, "grad_norm": 0.35026517510414124, "learning_rate": 0.0003753944040579839, "loss": 0.7661, "step": 140 }, { "epoch": 0.64, "grad_norm": 0.3722197115421295, "learning_rate": 0.00036644382025141837, "loss": 0.7126, "step": 160 }, { "epoch": 0.72, "grad_norm": 0.4616721570491791, "learning_rate": 0.0003562507597724135, "loss": 0.6517, "step": 180 }, { "epoch": 0.8, "grad_norm": 0.40086525678634644, "learning_rate": 0.0003448913121979015, "loss": 0.6215, "step": 200 }, { "epoch": 0.8, "eval_loss": 0.6392109394073486, "eval_runtime": 19.766, "eval_samples_per_second": 25.296, "eval_steps_per_second": 3.187, "step": 200 }, { "epoch": 0.88, "grad_norm": 0.4292043447494507, "learning_rate": 0.00033245027399915895, "loss": 0.5898, "step": 220 }, { "epoch": 0.96, "grad_norm": 0.5536438226699829, "learning_rate": 0.0003190205155496219, "loss": 0.5709, "step": 240 }, { "epoch": 1.04, "grad_norm": 0.6616698503494263, "learning_rate": 0.00030470228786230405, "loss": 0.4551, "step": 260 }, { "epoch": 1.12, "grad_norm": 0.5763731598854065, "learning_rate": 0.0002896024742319127, "loss": 0.3672, "step": 280 }, { "epoch": 1.2, "grad_norm": 0.5821401476860046, "learning_rate": 0.0002738337923680367, "loss": 0.3648, "step": 300 }, { "epoch": 1.2, "eval_loss": 0.4687094986438751, "eval_runtime": 19.7698, "eval_samples_per_second": 25.291, "eval_steps_per_second": 3.187, "step": 300 }, { "epoch": 1.28, "grad_norm": 0.47784799337387085, "learning_rate": 0.00025751395297535327, "loss": 0.3424, "step": 320 }, { "epoch": 1.3599999999999999, "grad_norm": 0.5968295931816101, "learning_rate": 0.00024076478106192076, "loss": 0.3253, "step": 340 }, { "epoch": 1.44, "grad_norm": 0.5316683053970337, "learning_rate": 0.00022371130653484945, "loss": 0.2952, "step": 360 }, { "epoch": 1.52, "grad_norm": 0.4502660930156708, "learning_rate": 0.0002064808308719107, "loss": 0.2728, "step": 380 }, { "epoch": 1.6, "grad_norm": 0.512885570526123, "learning_rate": 0.00018920197683623203, "loss": 0.2615, "step": 400 }, { "epoch": 1.6, "eval_loss": 0.3294866383075714, "eval_runtime": 19.7741, "eval_samples_per_second": 25.286, "eval_steps_per_second": 3.186, "step": 400 }, { "epoch": 1.6800000000000002, "grad_norm": 0.510636568069458, "learning_rate": 0.00017200372832780684, "loss": 0.2468, "step": 420 }, { "epoch": 1.76, "grad_norm": 0.38415294885635376, "learning_rate": 0.00015501446753917467, "loss": 0.2153, "step": 440 }, { "epoch": 1.8399999999999999, "grad_norm": 0.4481910765171051, "learning_rate": 0.00013836101660275217, "loss": 0.1996, "step": 460 }, { "epoch": 1.92, "grad_norm": 0.516516387462616, "learning_rate": 0.000122167690883765, "loss": 0.1803, "step": 480 }, { "epoch": 2.0, "grad_norm": 0.5795238018035889, "learning_rate": 0.00010655537098579868, "loss": 0.1915, "step": 500 }, { "epoch": 2.0, "eval_loss": 0.2246081531047821, "eval_runtime": 19.7628, "eval_samples_per_second": 25.3, "eval_steps_per_second": 3.188, "step": 500 }, { "epoch": 2.08, "grad_norm": 0.46038225293159485, "learning_rate": 9.164060039629896e-05, "loss": 0.1179, "step": 520 }, { "epoch": 2.16, "grad_norm": 0.43895432353019714, "learning_rate": 7.753471550795519e-05, "loss": 0.1153, "step": 540 }, { "epoch": 2.24, "grad_norm": 0.7098507285118103, "learning_rate": 6.434301451021892e-05, "loss": 0.1261, "step": 560 }, { "epoch": 2.32, "grad_norm": 0.3989202082157135, "learning_rate": 5.216397135505024e-05, "loss": 0.1121, "step": 580 }, { "epoch": 2.4, "grad_norm": 0.525729775428772, "learning_rate": 4.108850066451255e-05, "loss": 0.1186, "step": 600 }, { "epoch": 2.4, "eval_loss": 0.19236330687999725, "eval_runtime": 19.7599, "eval_samples_per_second": 25.304, "eval_steps_per_second": 3.188, "step": 600 }, { "epoch": 2.48, "grad_norm": 0.33528250455856323, "learning_rate": 3.1199279067563706e-05, "loss": 0.1075, "step": 620 }, { "epoch": 2.56, "grad_norm": 0.35482296347618103, "learning_rate": 2.2570128032157568e-05, "loss": 0.1106, "step": 640 }, { "epoch": 2.64, "grad_norm": 0.2427404522895813, "learning_rate": 1.526546279971466e-05, "loss": 0.0932, "step": 660 }, { "epoch": 2.7199999999999998, "grad_norm": 0.27014562487602234, "learning_rate": 9.339811535579768e-06, "loss": 0.1005, "step": 680 }, { "epoch": 2.8, "grad_norm": 0.3576093018054962, "learning_rate": 4.837408284931444e-06, "loss": 0.0948, "step": 700 }, { "epoch": 2.8, "eval_loss": 0.17951039969921112, "eval_runtime": 19.7723, "eval_samples_per_second": 25.288, "eval_steps_per_second": 3.186, "step": 700 }, { "epoch": 2.88, "grad_norm": 0.3262103199958801, "learning_rate": 1.7918627726630777e-06, "loss": 0.0939, "step": 720 }, { "epoch": 2.96, "grad_norm": 0.2655605971813202, "learning_rate": 2.259095121265542e-07, "loss": 0.0953, "step": 740 } ]