| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.19080659150043366, |
| "eval_steps": 10, |
| "global_step": 550, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.003469210754553339, |
| "grad_norm": 21.349376678466797, |
| "learning_rate": 0.00039272727272727273, |
| "loss": 0.7499, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.003469210754553339, |
| "eval_accuracy": 0.5052905678749084, |
| "eval_loss": 0.8751901388168335, |
| "eval_runtime": 684.1934, |
| "eval_samples_per_second": 8.426, |
| "eval_steps_per_second": 2.108, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.006938421509106678, |
| "grad_norm": 8.41540241241455, |
| "learning_rate": 0.0003854545454545455, |
| "loss": 0.8516, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.006938421509106678, |
| "eval_accuracy": 0.49470946192741394, |
| "eval_loss": 1.0566141605377197, |
| "eval_runtime": 674.6878, |
| "eval_samples_per_second": 8.545, |
| "eval_steps_per_second": 2.137, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.010407632263660017, |
| "grad_norm": 1.6247762441635132, |
| "learning_rate": 0.0003781818181818182, |
| "loss": 0.758, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.010407632263660017, |
| "eval_accuracy": 0.5052905678749084, |
| "eval_loss": 0.6859118938446045, |
| "eval_runtime": 671.8473, |
| "eval_samples_per_second": 8.581, |
| "eval_steps_per_second": 2.146, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.013876843018213356, |
| "grad_norm": 1.565369725227356, |
| "learning_rate": 0.0003709090909090909, |
| "loss": 0.7641, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.013876843018213356, |
| "eval_accuracy": 0.49470946192741394, |
| "eval_loss": 0.7000069618225098, |
| "eval_runtime": 669.6953, |
| "eval_samples_per_second": 8.608, |
| "eval_steps_per_second": 2.153, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.017346053772766695, |
| "grad_norm": 0.3613499104976654, |
| "learning_rate": 0.00036363636363636367, |
| "loss": 0.7147, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.017346053772766695, |
| "eval_accuracy": 0.5052905678749084, |
| "eval_loss": 0.6953065991401672, |
| "eval_runtime": 671.8252, |
| "eval_samples_per_second": 8.581, |
| "eval_steps_per_second": 2.146, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.020815264527320035, |
| "grad_norm": 0.978003203868866, |
| "learning_rate": 0.0003563636363636364, |
| "loss": 0.7172, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.020815264527320035, |
| "eval_accuracy": 0.5052905678749084, |
| "eval_loss": 0.6954382658004761, |
| "eval_runtime": 667.8042, |
| "eval_samples_per_second": 8.633, |
| "eval_steps_per_second": 2.159, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.024284475281873375, |
| "grad_norm": 1.575156807899475, |
| "learning_rate": 0.0003490909090909091, |
| "loss": 0.6646, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.024284475281873375, |
| "eval_accuracy": 0.6794449090957642, |
| "eval_loss": 0.6547678112983704, |
| "eval_runtime": 671.8486, |
| "eval_samples_per_second": 8.581, |
| "eval_steps_per_second": 2.146, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.027753686036426712, |
| "grad_norm": 1.7995883226394653, |
| "learning_rate": 0.0003418181818181818, |
| "loss": 0.6741, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.027753686036426712, |
| "eval_accuracy": 0.49470946192741394, |
| "eval_loss": 0.7157939076423645, |
| "eval_runtime": 669.7623, |
| "eval_samples_per_second": 8.608, |
| "eval_steps_per_second": 2.153, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.031222896790980052, |
| "grad_norm": 1.0423110723495483, |
| "learning_rate": 0.00033454545454545456, |
| "loss": 0.6865, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.031222896790980052, |
| "eval_accuracy": 0.49470946192741394, |
| "eval_loss": 0.7069694399833679, |
| "eval_runtime": 671.5859, |
| "eval_samples_per_second": 8.584, |
| "eval_steps_per_second": 2.147, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03469210754553339, |
| "grad_norm": 0.3134685158729553, |
| "learning_rate": 0.0003272727272727273, |
| "loss": 0.6556, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03469210754553339, |
| "eval_accuracy": 0.5052905678749084, |
| "eval_loss": 0.663440465927124, |
| "eval_runtime": 674.0916, |
| "eval_samples_per_second": 8.552, |
| "eval_steps_per_second": 2.139, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03816131830008673, |
| "grad_norm": 0.2366938591003418, |
| "learning_rate": 0.00032, |
| "loss": 0.6998, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.03816131830008673, |
| "eval_accuracy": 0.4983521103858948, |
| "eval_loss": 0.7156180739402771, |
| "eval_runtime": 669.1584, |
| "eval_samples_per_second": 8.615, |
| "eval_steps_per_second": 2.155, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04163052905464007, |
| "grad_norm": 0.8924188017845154, |
| "learning_rate": 0.00031272727272727273, |
| "loss": 0.673, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04163052905464007, |
| "eval_accuracy": 0.7351257801055908, |
| "eval_loss": 0.5488757491111755, |
| "eval_runtime": 674.8075, |
| "eval_samples_per_second": 8.543, |
| "eval_steps_per_second": 2.137, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.045099739809193407, |
| "grad_norm": 1.685569405555725, |
| "learning_rate": 0.0003054545454545455, |
| "loss": 0.6757, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.045099739809193407, |
| "eval_accuracy": 0.6183868050575256, |
| "eval_loss": 0.6621639132499695, |
| "eval_runtime": 676.2118, |
| "eval_samples_per_second": 8.525, |
| "eval_steps_per_second": 2.132, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.04856895056374675, |
| "grad_norm": 0.9425109028816223, |
| "learning_rate": 0.0002981818181818182, |
| "loss": 0.7129, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.04856895056374675, |
| "eval_accuracy": 0.7564613819122314, |
| "eval_loss": 0.5819193720817566, |
| "eval_runtime": 671.3349, |
| "eval_samples_per_second": 8.587, |
| "eval_steps_per_second": 2.148, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05203816131830009, |
| "grad_norm": 1.6216133832931519, |
| "learning_rate": 0.0002909090909090909, |
| "loss": 0.5978, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05203816131830009, |
| "eval_accuracy": 0.5904596447944641, |
| "eval_loss": 0.6498456597328186, |
| "eval_runtime": 674.7362, |
| "eval_samples_per_second": 8.544, |
| "eval_steps_per_second": 2.137, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.055507372072853424, |
| "grad_norm": 0.8777738213539124, |
| "learning_rate": 0.0002836363636363637, |
| "loss": 0.6108, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.055507372072853424, |
| "eval_accuracy": 0.6152645349502563, |
| "eval_loss": 0.6110721230506897, |
| "eval_runtime": 672.1883, |
| "eval_samples_per_second": 8.576, |
| "eval_steps_per_second": 2.145, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.05897658282740677, |
| "grad_norm": 0.8297567367553711, |
| "learning_rate": 0.0002763636363636364, |
| "loss": 0.4967, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.05897658282740677, |
| "eval_accuracy": 0.7484822273254395, |
| "eval_loss": 0.5142812728881836, |
| "eval_runtime": 669.9409, |
| "eval_samples_per_second": 8.605, |
| "eval_steps_per_second": 2.152, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.062445793581960105, |
| "grad_norm": 0.6402560472488403, |
| "learning_rate": 0.0002690909090909091, |
| "loss": 0.3308, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.062445793581960105, |
| "eval_accuracy": 0.7941023707389832, |
| "eval_loss": 0.5033007264137268, |
| "eval_runtime": 674.7774, |
| "eval_samples_per_second": 8.544, |
| "eval_steps_per_second": 2.137, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.06591500433651344, |
| "grad_norm": 3.098665475845337, |
| "learning_rate": 0.00026181818181818185, |
| "loss": 0.5915, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06591500433651344, |
| "eval_accuracy": 0.7613183259963989, |
| "eval_loss": 0.5313282012939453, |
| "eval_runtime": 674.9752, |
| "eval_samples_per_second": 8.541, |
| "eval_steps_per_second": 2.136, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06938421509106678, |
| "grad_norm": 2.2409965991973877, |
| "learning_rate": 0.00025454545454545456, |
| "loss": 0.502, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.06938421509106678, |
| "eval_accuracy": 0.8124891519546509, |
| "eval_loss": 0.42527222633361816, |
| "eval_runtime": 675.4293, |
| "eval_samples_per_second": 8.535, |
| "eval_steps_per_second": 2.135, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07285342584562012, |
| "grad_norm": 0.5789617896080017, |
| "learning_rate": 0.00024727272727272727, |
| "loss": 0.5021, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.07285342584562012, |
| "eval_accuracy": 0.8084995746612549, |
| "eval_loss": 0.4414755403995514, |
| "eval_runtime": 675.3173, |
| "eval_samples_per_second": 8.537, |
| "eval_steps_per_second": 2.135, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.07632263660017347, |
| "grad_norm": 0.26364243030548096, |
| "learning_rate": 0.00024, |
| "loss": 0.4129, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.07632263660017347, |
| "eval_accuracy": 0.7996530532836914, |
| "eval_loss": 0.4404090344905853, |
| "eval_runtime": 669.8525, |
| "eval_samples_per_second": 8.606, |
| "eval_steps_per_second": 2.153, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.0797918473547268, |
| "grad_norm": 1.726539134979248, |
| "learning_rate": 0.00023272727272727271, |
| "loss": 0.4352, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0797918473547268, |
| "eval_accuracy": 0.7849089503288269, |
| "eval_loss": 0.49116840958595276, |
| "eval_runtime": 669.7677, |
| "eval_samples_per_second": 8.607, |
| "eval_steps_per_second": 2.153, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.08326105810928014, |
| "grad_norm": 0.7366547584533691, |
| "learning_rate": 0.00022545454545454545, |
| "loss": 0.6708, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08326105810928014, |
| "eval_accuracy": 0.8705984354019165, |
| "eval_loss": 0.5467382073402405, |
| "eval_runtime": 667.648, |
| "eval_samples_per_second": 8.635, |
| "eval_steps_per_second": 2.16, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08673026886383348, |
| "grad_norm": 4.697054862976074, |
| "learning_rate": 0.00021818181818181818, |
| "loss": 0.6336, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.08673026886383348, |
| "eval_accuracy": 0.8832610845565796, |
| "eval_loss": 0.3458012044429779, |
| "eval_runtime": 666.8811, |
| "eval_samples_per_second": 8.645, |
| "eval_steps_per_second": 2.162, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.09019947961838681, |
| "grad_norm": 0.3141838014125824, |
| "learning_rate": 0.0002109090909090909, |
| "loss": 0.4794, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.09019947961838681, |
| "eval_accuracy": 0.8700780868530273, |
| "eval_loss": 0.327676922082901, |
| "eval_runtime": 671.2259, |
| "eval_samples_per_second": 8.589, |
| "eval_steps_per_second": 2.148, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.09366869037294015, |
| "grad_norm": 4.525466442108154, |
| "learning_rate": 0.00020363636363636363, |
| "loss": 0.4008, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.09366869037294015, |
| "eval_accuracy": 0.8185603022575378, |
| "eval_loss": 0.5368410348892212, |
| "eval_runtime": 668.9566, |
| "eval_samples_per_second": 8.618, |
| "eval_steps_per_second": 2.156, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.0971379011274935, |
| "grad_norm": 3.764827251434326, |
| "learning_rate": 0.00019636363636363636, |
| "loss": 0.3972, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.0971379011274935, |
| "eval_accuracy": 0.9283608198165894, |
| "eval_loss": 0.20311547815799713, |
| "eval_runtime": 673.3421, |
| "eval_samples_per_second": 8.562, |
| "eval_steps_per_second": 2.142, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.10060711188204684, |
| "grad_norm": 0.8228343725204468, |
| "learning_rate": 0.0001890909090909091, |
| "loss": 0.2846, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.10060711188204684, |
| "eval_accuracy": 0.960797905921936, |
| "eval_loss": 0.13612762093544006, |
| "eval_runtime": 673.4488, |
| "eval_samples_per_second": 8.56, |
| "eval_steps_per_second": 2.141, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.10407632263660017, |
| "grad_norm": 2.2025303840637207, |
| "learning_rate": 0.00018181818181818183, |
| "loss": 0.2268, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10407632263660017, |
| "eval_accuracy": 0.957328736782074, |
| "eval_loss": 0.12012926489114761, |
| "eval_runtime": 673.2169, |
| "eval_samples_per_second": 8.563, |
| "eval_steps_per_second": 2.142, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10754553339115351, |
| "grad_norm": 6.165393829345703, |
| "learning_rate": 0.00017454545454545454, |
| "loss": 0.283, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.10754553339115351, |
| "eval_accuracy": 0.8777103424072266, |
| "eval_loss": 0.3603801131248474, |
| "eval_runtime": 677.3459, |
| "eval_samples_per_second": 8.511, |
| "eval_steps_per_second": 2.129, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.11101474414570685, |
| "grad_norm": 1.3905977010726929, |
| "learning_rate": 0.00016727272727272728, |
| "loss": 0.5713, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.11101474414570685, |
| "eval_accuracy": 0.9710320830345154, |
| "eval_loss": 0.1216258704662323, |
| "eval_runtime": 674.6755, |
| "eval_samples_per_second": 8.545, |
| "eval_steps_per_second": 2.137, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.11448395490026018, |
| "grad_norm": 0.8541626930236816, |
| "learning_rate": 0.00016, |
| "loss": 0.1798, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.11448395490026018, |
| "eval_accuracy": 0.96131831407547, |
| "eval_loss": 0.179531529545784, |
| "eval_runtime": 675.7124, |
| "eval_samples_per_second": 8.532, |
| "eval_steps_per_second": 2.134, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.11795316565481354, |
| "grad_norm": 0.44364920258522034, |
| "learning_rate": 0.00015272727272727275, |
| "loss": 0.1716, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.11795316565481354, |
| "eval_accuracy": 0.9774501323699951, |
| "eval_loss": 0.10663458704948425, |
| "eval_runtime": 678.1501, |
| "eval_samples_per_second": 8.501, |
| "eval_steps_per_second": 2.126, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.12142237640936687, |
| "grad_norm": 0.32369139790534973, |
| "learning_rate": 0.00014545454545454546, |
| "loss": 0.1507, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.12142237640936687, |
| "eval_accuracy": 0.9649609923362732, |
| "eval_loss": 0.1186453253030777, |
| "eval_runtime": 679.7025, |
| "eval_samples_per_second": 8.482, |
| "eval_steps_per_second": 2.122, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.12489158716392021, |
| "grad_norm": 0.11126814782619476, |
| "learning_rate": 0.0001381818181818182, |
| "loss": 0.1163, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.12489158716392021, |
| "eval_accuracy": 0.9701647758483887, |
| "eval_loss": 0.0995645821094513, |
| "eval_runtime": 680.1145, |
| "eval_samples_per_second": 8.477, |
| "eval_steps_per_second": 2.12, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.12836079791847355, |
| "grad_norm": 0.15057525038719177, |
| "learning_rate": 0.00013090909090909093, |
| "loss": 0.0297, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.12836079791847355, |
| "eval_accuracy": 0.9784908890724182, |
| "eval_loss": 0.07483232766389847, |
| "eval_runtime": 676.5662, |
| "eval_samples_per_second": 8.521, |
| "eval_steps_per_second": 2.131, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.13183000867302688, |
| "grad_norm": 0.17313919961452484, |
| "learning_rate": 0.00012363636363636364, |
| "loss": 0.0742, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.13183000867302688, |
| "eval_accuracy": 0.9791847467422485, |
| "eval_loss": 0.08153310418128967, |
| "eval_runtime": 678.0332, |
| "eval_samples_per_second": 8.503, |
| "eval_steps_per_second": 2.127, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.13529921942758022, |
| "grad_norm": 0.024125000461935997, |
| "learning_rate": 0.00011636363636363636, |
| "loss": 0.0745, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.13529921942758022, |
| "eval_accuracy": 0.9774501323699951, |
| "eval_loss": 0.10447587072849274, |
| "eval_runtime": 682.8191, |
| "eval_samples_per_second": 8.443, |
| "eval_steps_per_second": 2.112, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.13876843018213356, |
| "grad_norm": 0.07723889499902725, |
| "learning_rate": 0.00010909090909090909, |
| "loss": 0.2459, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13876843018213356, |
| "eval_accuracy": 0.9791847467422485, |
| "eval_loss": 0.09286625683307648, |
| "eval_runtime": 680.3478, |
| "eval_samples_per_second": 8.474, |
| "eval_steps_per_second": 2.12, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1422376409366869, |
| "grad_norm": 0.28492438793182373, |
| "learning_rate": 0.00010181818181818181, |
| "loss": 0.0129, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1422376409366869, |
| "eval_accuracy": 0.9798786044120789, |
| "eval_loss": 0.08954403549432755, |
| "eval_runtime": 675.1468, |
| "eval_samples_per_second": 8.539, |
| "eval_steps_per_second": 2.136, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.14570685169124023, |
| "grad_norm": 0.06717664748430252, |
| "learning_rate": 9.454545454545455e-05, |
| "loss": 0.027, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.14570685169124023, |
| "eval_accuracy": 0.9805724024772644, |
| "eval_loss": 0.06631265580654144, |
| "eval_runtime": 677.1592, |
| "eval_samples_per_second": 8.514, |
| "eval_steps_per_second": 2.129, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.1491760624457936, |
| "grad_norm": 0.09406604617834091, |
| "learning_rate": 8.727272727272727e-05, |
| "loss": 0.0684, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1491760624457936, |
| "eval_accuracy": 0.9798786044120789, |
| "eval_loss": 0.06788154691457748, |
| "eval_runtime": 678.5247, |
| "eval_samples_per_second": 8.496, |
| "eval_steps_per_second": 2.125, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.15264527320034693, |
| "grad_norm": 0.06969747692346573, |
| "learning_rate": 8e-05, |
| "loss": 0.3329, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.15264527320034693, |
| "eval_accuracy": 0.9781439900398254, |
| "eval_loss": 0.06968674063682556, |
| "eval_runtime": 684.4687, |
| "eval_samples_per_second": 8.423, |
| "eval_steps_per_second": 2.107, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.15611448395490027, |
| "grad_norm": 0.11187425255775452, |
| "learning_rate": 7.272727272727273e-05, |
| "loss": 0.2407, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.15611448395490027, |
| "eval_accuracy": 0.9810928106307983, |
| "eval_loss": 0.06031050533056259, |
| "eval_runtime": 681.7986, |
| "eval_samples_per_second": 8.456, |
| "eval_steps_per_second": 2.115, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.1595836947094536, |
| "grad_norm": 0.10134902596473694, |
| "learning_rate": 6.545454545454546e-05, |
| "loss": 0.131, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.1595836947094536, |
| "eval_accuracy": 0.9805724024772644, |
| "eval_loss": 0.08596213907003403, |
| "eval_runtime": 680.6363, |
| "eval_samples_per_second": 8.47, |
| "eval_steps_per_second": 2.119, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.16305290546400694, |
| "grad_norm": 3.860629081726074, |
| "learning_rate": 5.818181818181818e-05, |
| "loss": 0.2148, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.16305290546400694, |
| "eval_accuracy": 0.9774501323699951, |
| "eval_loss": 0.11235988885164261, |
| "eval_runtime": 686.097, |
| "eval_samples_per_second": 8.403, |
| "eval_steps_per_second": 2.102, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.16652211621856028, |
| "grad_norm": 2.909680128097534, |
| "learning_rate": 5.090909090909091e-05, |
| "loss": 0.1593, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.16652211621856028, |
| "eval_accuracy": 0.985949695110321, |
| "eval_loss": 0.06438818573951721, |
| "eval_runtime": 680.9029, |
| "eval_samples_per_second": 8.467, |
| "eval_steps_per_second": 2.118, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.16999132697311362, |
| "grad_norm": 0.10226955264806747, |
| "learning_rate": 4.3636363636363636e-05, |
| "loss": 0.0383, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.16999132697311362, |
| "eval_accuracy": 0.9862965941429138, |
| "eval_loss": 0.050194237381219864, |
| "eval_runtime": 688.0192, |
| "eval_samples_per_second": 8.379, |
| "eval_steps_per_second": 2.096, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.17346053772766695, |
| "grad_norm": 0.13847249746322632, |
| "learning_rate": 3.6363636363636364e-05, |
| "loss": 0.0461, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.17346053772766695, |
| "eval_accuracy": 0.9845620393753052, |
| "eval_loss": 0.04632818326354027, |
| "eval_runtime": 686.0902, |
| "eval_samples_per_second": 8.403, |
| "eval_steps_per_second": 2.102, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1769297484822203, |
| "grad_norm": 0.061877623200416565, |
| "learning_rate": 2.909090909090909e-05, |
| "loss": 0.0163, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.1769297484822203, |
| "eval_accuracy": 0.9830008745193481, |
| "eval_loss": 0.05002644658088684, |
| "eval_runtime": 683.5794, |
| "eval_samples_per_second": 8.434, |
| "eval_steps_per_second": 2.109, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.18039895923677363, |
| "grad_norm": 0.07484019547700882, |
| "learning_rate": 2.1818181818181818e-05, |
| "loss": 0.0373, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.18039895923677363, |
| "eval_accuracy": 0.9835212230682373, |
| "eval_loss": 0.04930433630943298, |
| "eval_runtime": 685.8885, |
| "eval_samples_per_second": 8.405, |
| "eval_steps_per_second": 2.102, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.18386816999132696, |
| "grad_norm": 0.07109741866588593, |
| "learning_rate": 1.4545454545454545e-05, |
| "loss": 0.0101, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.18386816999132696, |
| "eval_accuracy": 0.9845620393753052, |
| "eval_loss": 0.04636286944150925, |
| "eval_runtime": 684.6998, |
| "eval_samples_per_second": 8.42, |
| "eval_steps_per_second": 2.106, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.1873373807458803, |
| "grad_norm": 3.584275484085083, |
| "learning_rate": 7.272727272727272e-06, |
| "loss": 0.1887, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.1873373807458803, |
| "eval_accuracy": 0.986470103263855, |
| "eval_loss": 0.04420817643404007, |
| "eval_runtime": 684.4017, |
| "eval_samples_per_second": 8.423, |
| "eval_steps_per_second": 2.107, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.19080659150043366, |
| "grad_norm": 0.20406727492809296, |
| "learning_rate": 0.0, |
| "loss": 0.0363, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.19080659150043366, |
| "eval_accuracy": 0.9876843094825745, |
| "eval_loss": 0.043817631900310516, |
| "eval_runtime": 685.8982, |
| "eval_samples_per_second": 8.405, |
| "eval_steps_per_second": 2.102, |
| "step": 550 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 550, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 10, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.214558191809199e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|