| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 5000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002, |
| "grad_norm": 0.1880519837141037, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.5832, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.004, |
| "grad_norm": 0.4404986798763275, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 2.834, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.006, |
| "grad_norm": 0.8119221925735474, |
| "learning_rate": 3e-06, |
| "loss": 2.3087, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 1.038776159286499, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.8821, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 0.41819652915000916, |
| "learning_rate": 5e-06, |
| "loss": 4.2502, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.012, |
| "grad_norm": 0.6414370536804199, |
| "learning_rate": 6e-06, |
| "loss": 3.6424, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.014, |
| "grad_norm": 1.6446870565414429, |
| "learning_rate": 7.000000000000001e-06, |
| "loss": 3.1319, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 3.257716655731201, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 1.7977, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.018, |
| "grad_norm": 0.3212912678718567, |
| "learning_rate": 9e-06, |
| "loss": 2.3069, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 0.817084014415741, |
| "learning_rate": 1e-05, |
| "loss": 1.9409, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.022, |
| "grad_norm": 6.956598281860352, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 3.9241, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 0.4231731593608856, |
| "learning_rate": 1.2e-05, |
| "loss": 1.6482, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.026, |
| "grad_norm": 2.118069887161255, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 3.1564, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.028, |
| "grad_norm": 0.23121468722820282, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 2.426, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 0.7299257516860962, |
| "learning_rate": 1.5e-05, |
| "loss": 1.3026, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 2.510737895965576, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 1.1178, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.034, |
| "grad_norm": 2.180232524871826, |
| "learning_rate": 1.7000000000000003e-05, |
| "loss": 4.0788, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.036, |
| "grad_norm": 1.0466824769973755, |
| "learning_rate": 1.8e-05, |
| "loss": 1.6576, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.038, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9e-05, |
| "loss": 1.4615, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 0.35433244705200195, |
| "learning_rate": 2e-05, |
| "loss": 2.2587, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.042, |
| "grad_norm": 0.0, |
| "learning_rate": 2.1e-05, |
| "loss": 2.3845, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.044, |
| "grad_norm": 1.116017460823059, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 1.3799, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.046, |
| "grad_norm": 0.6432040333747864, |
| "learning_rate": 2.3000000000000003e-05, |
| "loss": 1.4828, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 0.34233808517456055, |
| "learning_rate": 2.4e-05, |
| "loss": 2.1998, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.6910992860794067, |
| "learning_rate": 2.5e-05, |
| "loss": 2.2553, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.052, |
| "grad_norm": 0.4735572338104248, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 1.2649, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.054, |
| "grad_norm": 0.5137017965316772, |
| "learning_rate": 2.7000000000000002e-05, |
| "loss": 0.943, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 1.4285893440246582, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 0.8752, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.058, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9e-05, |
| "loss": 1.1963, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 0.4973921477794647, |
| "learning_rate": 3e-05, |
| "loss": 1.115, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.062, |
| "grad_norm": 0.6414042115211487, |
| "learning_rate": 3.1e-05, |
| "loss": 2.2816, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 0.0, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.4115, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.066, |
| "grad_norm": 0.5332188010215759, |
| "learning_rate": 3.3e-05, |
| "loss": 1.3494, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.068, |
| "grad_norm": 3.6028225421905518, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 1.6002, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 0.7303054928779602, |
| "learning_rate": 3.5e-05, |
| "loss": 1.2186, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 0.922789990901947, |
| "learning_rate": 3.6e-05, |
| "loss": 0.792, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.074, |
| "grad_norm": 1.9681700468063354, |
| "learning_rate": 3.7e-05, |
| "loss": 2.0036, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.076, |
| "grad_norm": 0.6458554863929749, |
| "learning_rate": 3.8e-05, |
| "loss": 1.4322, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.078, |
| "grad_norm": 11.577519416809082, |
| "learning_rate": 3.9000000000000006e-05, |
| "loss": 3.1319, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 4.925251483917236, |
| "learning_rate": 4e-05, |
| "loss": 1.1777, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.082, |
| "grad_norm": 0.41002562642097473, |
| "learning_rate": 4.1e-05, |
| "loss": 0.7938, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.084, |
| "grad_norm": 1.1357100009918213, |
| "learning_rate": 4.2e-05, |
| "loss": 1.0942, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.086, |
| "grad_norm": 0.2831510603427887, |
| "learning_rate": 4.3e-05, |
| "loss": 1.276, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 0.47275394201278687, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 2.1237, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 0.25966623425483704, |
| "learning_rate": 4.5e-05, |
| "loss": 0.9086, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.092, |
| "grad_norm": 3.9267749786376953, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 2.0123, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.094, |
| "grad_norm": 0.0, |
| "learning_rate": 4.7e-05, |
| "loss": 0.4836, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 3.281909942626953, |
| "learning_rate": 4.8e-05, |
| "loss": 1.2907, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.098, |
| "grad_norm": 16.264896392822266, |
| "learning_rate": 4.9e-05, |
| "loss": 1.8895, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.467863917350769, |
| "learning_rate": 5e-05, |
| "loss": 1.4207, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.102, |
| "grad_norm": 4.497130870819092, |
| "learning_rate": 4.999939076763487e-05, |
| "loss": 1.5309, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 0.6231054663658142, |
| "learning_rate": 4.999756310023261e-05, |
| "loss": 2.412, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.106, |
| "grad_norm": 2.1034438610076904, |
| "learning_rate": 4.999451708687114e-05, |
| "loss": 1.599, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.108, |
| "grad_norm": 1.5209999084472656, |
| "learning_rate": 4.999025287600886e-05, |
| "loss": 0.9927, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 0.0, |
| "learning_rate": 4.99847706754774e-05, |
| "loss": 0.9149, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 2.1328773498535156, |
| "learning_rate": 4.997807075247146e-05, |
| "loss": 1.8017, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.114, |
| "grad_norm": 1.0151857137680054, |
| "learning_rate": 4.997015343353585e-05, |
| "loss": 0.948, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.116, |
| "grad_norm": 0.2761138081550598, |
| "learning_rate": 4.996101910454953e-05, |
| "loss": 0.6686, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.118, |
| "grad_norm": 4.962229251861572, |
| "learning_rate": 4.995066821070679e-05, |
| "loss": 1.2988, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 1.0377299785614014, |
| "learning_rate": 4.993910125649561e-05, |
| "loss": 1.6317, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.122, |
| "grad_norm": 3.0478384494781494, |
| "learning_rate": 4.992631880567301e-05, |
| "loss": 1.2735, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.124, |
| "grad_norm": 1.3416221141815186, |
| "learning_rate": 4.991232148123761e-05, |
| "loss": 0.9292, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.126, |
| "grad_norm": 0.15793165564537048, |
| "learning_rate": 4.989710996539926e-05, |
| "loss": 1.7753, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 8.417360305786133, |
| "learning_rate": 4.988068499954578e-05, |
| "loss": 1.3691, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.0238218307495117, |
| "learning_rate": 4.9863047384206835e-05, |
| "loss": 1.1858, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.132, |
| "grad_norm": 0.5965118408203125, |
| "learning_rate": 4.984419797901491e-05, |
| "loss": 0.4702, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.134, |
| "grad_norm": 0.0, |
| "learning_rate": 4.982413770266342e-05, |
| "loss": 1.8529, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 5.714442729949951, |
| "learning_rate": 4.980286753286195e-05, |
| "loss": 1.3363, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.138, |
| "grad_norm": 1.3500193357467651, |
| "learning_rate": 4.978038850628854e-05, |
| "loss": 1.3565, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 12.879951477050781, |
| "learning_rate": 4.975670171853926e-05, |
| "loss": 1.7252, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.142, |
| "grad_norm": 4.59855318069458, |
| "learning_rate": 4.9731808324074717e-05, |
| "loss": 1.3579, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 2.8700144290924072, |
| "learning_rate": 4.9705709536163824e-05, |
| "loss": 1.9292, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.146, |
| "grad_norm": 3.4623522758483887, |
| "learning_rate": 4.96784066268247e-05, |
| "loss": 0.6254, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.148, |
| "grad_norm": 8.593720436096191, |
| "learning_rate": 4.964990092676263e-05, |
| "loss": 1.6293, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 9.260454177856445, |
| "learning_rate": 4.962019382530521e-05, |
| "loss": 1.3849, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 0.13983963429927826, |
| "learning_rate": 4.9589286770334654e-05, |
| "loss": 0.8336, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.154, |
| "grad_norm": 0.0, |
| "learning_rate": 4.9557181268217227e-05, |
| "loss": 0.9091, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.156, |
| "grad_norm": 0.8305982351303101, |
| "learning_rate": 4.952387888372979e-05, |
| "loss": 2.1102, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.158, |
| "grad_norm": 1.0168547630310059, |
| "learning_rate": 4.94893812399836e-05, |
| "loss": 1.3355, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.6084606647491455, |
| "learning_rate": 4.9453690018345144e-05, |
| "loss": 1.7654, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.162, |
| "grad_norm": 0.0, |
| "learning_rate": 4.94168069583542e-05, |
| "loss": 2.7706, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.164, |
| "grad_norm": 1.6526423692703247, |
| "learning_rate": 4.937873385763908e-05, |
| "loss": 1.1375, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.166, |
| "grad_norm": 0.6791658401489258, |
| "learning_rate": 4.933947257182901e-05, |
| "loss": 2.3548, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 0.8403846025466919, |
| "learning_rate": 4.929902501446366e-05, |
| "loss": 1.6854, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 9.99562931060791, |
| "learning_rate": 4.925739315689991e-05, |
| "loss": 1.7509, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.172, |
| "grad_norm": 5.381862163543701, |
| "learning_rate": 4.9214579028215776e-05, |
| "loss": 1.382, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.174, |
| "grad_norm": 1.614503026008606, |
| "learning_rate": 4.917058471511149e-05, |
| "loss": 2.0779, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 1.4542772769927979, |
| "learning_rate": 4.912541236180779e-05, |
| "loss": 4.77, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.178, |
| "grad_norm": 12.149937629699707, |
| "learning_rate": 4.907906416994146e-05, |
| "loss": 1.5973, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 0.8259605169296265, |
| "learning_rate": 4.9031542398457974e-05, |
| "loss": 1.5671, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.182, |
| "grad_norm": 0.0, |
| "learning_rate": 4.898284936350144e-05, |
| "loss": 0.7493, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 1.1340575218200684, |
| "learning_rate": 4.893298743830168e-05, |
| "loss": 0.9516, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.186, |
| "grad_norm": 0.0, |
| "learning_rate": 4.888195905305859e-05, |
| "loss": 1.2972, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.188, |
| "grad_norm": 0.9685637950897217, |
| "learning_rate": 4.882976669482367e-05, |
| "loss": 0.561, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 0.0, |
| "learning_rate": 4.877641290737884e-05, |
| "loss": 0.4192, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.41207355260849, |
| "learning_rate": 4.8721900291112415e-05, |
| "loss": 0.8683, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.194, |
| "grad_norm": 1.2403852939605713, |
| "learning_rate": 4.8666231502892415e-05, |
| "loss": 0.8114, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.196, |
| "grad_norm": 3.257436990737915, |
| "learning_rate": 4.860940925593703e-05, |
| "loss": 0.8031, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.198, |
| "grad_norm": 0.0, |
| "learning_rate": 4.855143631968242e-05, |
| "loss": 1.1436, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 3.756723642349243, |
| "learning_rate": 4.849231551964771e-05, |
| "loss": 1.4053, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.202, |
| "grad_norm": 2.7545039653778076, |
| "learning_rate": 4.843204973729729e-05, |
| "loss": 1.2775, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.204, |
| "grad_norm": 15.873637199401855, |
| "learning_rate": 4.837064190990036e-05, |
| "loss": 1.4629, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.206, |
| "grad_norm": 1.0889697074890137, |
| "learning_rate": 4.830809503038781e-05, |
| "loss": 2.1119, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 3.773542881011963, |
| "learning_rate": 4.8244412147206284e-05, |
| "loss": 1.5775, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 0.398959219455719, |
| "learning_rate": 4.817959636416969e-05, |
| "loss": 1.2064, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.212, |
| "grad_norm": 3.363379955291748, |
| "learning_rate": 4.8113650840307834e-05, |
| "loss": 1.1176, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.214, |
| "grad_norm": 7.889254093170166, |
| "learning_rate": 4.8046578789712515e-05, |
| "loss": 1.2473, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 0.2269558608531952, |
| "learning_rate": 4.797838348138086e-05, |
| "loss": 1.6337, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.218, |
| "grad_norm": 14.9801607131958, |
| "learning_rate": 4.790906823905599e-05, |
| "loss": 2.1006, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 5.640057563781738, |
| "learning_rate": 4.783863644106502e-05, |
| "loss": 1.538, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.222, |
| "grad_norm": 1.0203982591629028, |
| "learning_rate": 4.776709152015443e-05, |
| "loss": 0.9892, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.0, |
| "learning_rate": 4.769443696332272e-05, |
| "loss": 1.1776, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.226, |
| "grad_norm": 10.02802562713623, |
| "learning_rate": 4.762067631165049e-05, |
| "loss": 1.1521, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.228, |
| "grad_norm": 7.899293899536133, |
| "learning_rate": 4.754581316012785e-05, |
| "loss": 1.8144, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 2.8550174236297607, |
| "learning_rate": 4.7469851157479177e-05, |
| "loss": 1.8925, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 1.191503643989563, |
| "learning_rate": 4.7392794005985326e-05, |
| "loss": 1.9765, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.234, |
| "grad_norm": 0.0, |
| "learning_rate": 4.731464546130314e-05, |
| "loss": 1.3849, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.236, |
| "grad_norm": 7.479146480560303, |
| "learning_rate": 4.723540933228244e-05, |
| "loss": 1.0749, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.238, |
| "grad_norm": 3.3298346996307373, |
| "learning_rate": 4.715508948078037e-05, |
| "loss": 1.2765, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.338376998901367, |
| "learning_rate": 4.707368982147318e-05, |
| "loss": 1.3458, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.242, |
| "grad_norm": 4.633726596832275, |
| "learning_rate": 4.6991214321665414e-05, |
| "loss": 2.4307, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.244, |
| "grad_norm": 3.250617027282715, |
| "learning_rate": 4.690766700109659e-05, |
| "loss": 1.2513, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.246, |
| "grad_norm": 4.539650917053223, |
| "learning_rate": 4.682305193174524e-05, |
| "loss": 1.5237, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 0.0, |
| "learning_rate": 4.6737373237630476e-05, |
| "loss": 0.5587, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.0, |
| "learning_rate": 4.665063509461097e-05, |
| "loss": 1.2764, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.252, |
| "grad_norm": 2.6641793251037598, |
| "learning_rate": 4.656284173018144e-05, |
| "loss": 1.3583, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.254, |
| "grad_norm": 0.6401162147521973, |
| "learning_rate": 4.6473997423266614e-05, |
| "loss": 1.0476, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 17.269447326660156, |
| "learning_rate": 4.638410650401267e-05, |
| "loss": 1.2224, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.258, |
| "grad_norm": 5.4362993240356445, |
| "learning_rate": 4.629317335357619e-05, |
| "loss": 1.2511, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 9.284676551818848, |
| "learning_rate": 4.620120240391065e-05, |
| "loss": 0.9118, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.262, |
| "grad_norm": 5.196897029876709, |
| "learning_rate": 4.610819813755038e-05, |
| "loss": 1.6257, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 3.060595750808716, |
| "learning_rate": 4.601416508739211e-05, |
| "loss": 1.3779, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.266, |
| "grad_norm": 4.4460673332214355, |
| "learning_rate": 4.591910783647404e-05, |
| "loss": 0.7513, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.268, |
| "grad_norm": 3.9478137493133545, |
| "learning_rate": 4.5823031017752485e-05, |
| "loss": 0.9175, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 0.0, |
| "learning_rate": 4.572593931387604e-05, |
| "loss": 0.5732, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.0, |
| "learning_rate": 4.562783745695738e-05, |
| "loss": 2.0532, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.274, |
| "grad_norm": 12.004858016967773, |
| "learning_rate": 4.5528730228342605e-05, |
| "loss": 1.1836, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.276, |
| "grad_norm": 1.7193065881729126, |
| "learning_rate": 4.542862245837821e-05, |
| "loss": 0.876, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.278, |
| "grad_norm": 8.586918830871582, |
| "learning_rate": 4.532751902617569e-05, |
| "loss": 1.036, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 7.1235198974609375, |
| "learning_rate": 4.522542485937369e-05, |
| "loss": 1.2111, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.282, |
| "grad_norm": 4.701413631439209, |
| "learning_rate": 4.512234493389785e-05, |
| "loss": 1.6001, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.284, |
| "grad_norm": 11.681370735168457, |
| "learning_rate": 4.5018284273718336e-05, |
| "loss": 1.4541, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.286, |
| "grad_norm": 1.9920321702957153, |
| "learning_rate": 4.491324795060491e-05, |
| "loss": 1.9069, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 17.279251098632812, |
| "learning_rate": 4.480724108387977e-05, |
| "loss": 1.6968, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 23.974655151367188, |
| "learning_rate": 4.4700268840168045e-05, |
| "loss": 2.8442, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.292, |
| "grad_norm": 3.4535789489746094, |
| "learning_rate": 4.4592336433146e-05, |
| "loss": 1.5812, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.294, |
| "grad_norm": 14.773252487182617, |
| "learning_rate": 4.448344912328686e-05, |
| "loss": 2.3539, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 1.3638978004455566, |
| "learning_rate": 4.4373612217604496e-05, |
| "loss": 1.0708, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.298, |
| "grad_norm": 1.5786101818084717, |
| "learning_rate": 4.426283106939474e-05, |
| "loss": 1.464, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.6620932817459106, |
| "learning_rate": 4.415111107797445e-05, |
| "loss": 0.9687, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.302, |
| "grad_norm": 4.670433521270752, |
| "learning_rate": 4.403845768841842e-05, |
| "loss": 1.822, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3924876391293915e-05, |
| "loss": 1.1844, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.306, |
| "grad_norm": 13.988329887390137, |
| "learning_rate": 4.381037272239311e-05, |
| "loss": 1.8112, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.308, |
| "grad_norm": 0.3172394037246704, |
| "learning_rate": 4.36949522624633e-05, |
| "loss": 0.8056, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 0.0, |
| "learning_rate": 4.357862063693486e-05, |
| "loss": 2.317, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 5.500033855438232, |
| "learning_rate": 4.3461383515647106e-05, |
| "loss": 1.1789, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.314, |
| "grad_norm": 3.911036729812622, |
| "learning_rate": 4.334324661257191e-05, |
| "loss": 1.6664, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.316, |
| "grad_norm": 3.9234087467193604, |
| "learning_rate": 4.3224215685535294e-05, |
| "loss": 0.8933, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.318, |
| "grad_norm": 0.0, |
| "learning_rate": 4.3104296535936695e-05, |
| "loss": 2.5625, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.117269515991211, |
| "learning_rate": 4.2983495008466276e-05, |
| "loss": 1.6555, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.322, |
| "grad_norm": 9.193464279174805, |
| "learning_rate": 4.2861816990820084e-05, |
| "loss": 1.1192, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.324, |
| "grad_norm": 4.3306565284729, |
| "learning_rate": 4.273926841341302e-05, |
| "loss": 1.3019, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.326, |
| "grad_norm": 7.020684242248535, |
| "learning_rate": 4.261585524908987e-05, |
| "loss": 1.3233, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 2.99652099609375, |
| "learning_rate": 4.249158351283414e-05, |
| "loss": 0.8989, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 19.782791137695312, |
| "learning_rate": 4.2366459261474933e-05, |
| "loss": 1.1397, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.332, |
| "grad_norm": 2.0220930576324463, |
| "learning_rate": 4.224048859339175e-05, |
| "loss": 1.099, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.334, |
| "grad_norm": 0.0, |
| "learning_rate": 4.211367764821722e-05, |
| "loss": 0.7078, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 9.233697891235352, |
| "learning_rate": 4.198603260653792e-05, |
| "loss": 1.958, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.338, |
| "grad_norm": 14.23305606842041, |
| "learning_rate": 4.185755968959308e-05, |
| "loss": 4.0017, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 1.316114068031311, |
| "learning_rate": 4.172826515897146e-05, |
| "loss": 0.9028, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.342, |
| "grad_norm": 2.208270311355591, |
| "learning_rate": 4.1598155316306044e-05, |
| "loss": 1.3906, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 0.7679595947265625, |
| "learning_rate": 4.146723650296701e-05, |
| "loss": 1.1884, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.346, |
| "grad_norm": 0.0, |
| "learning_rate": 4.133551509975264e-05, |
| "loss": 0.7797, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.348, |
| "grad_norm": 0.0, |
| "learning_rate": 4.1202997526578276e-05, |
| "loss": 1.9353, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.4529210925102234, |
| "learning_rate": 4.1069690242163484e-05, |
| "loss": 1.5543, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 1.7549803256988525, |
| "learning_rate": 4.093559974371725e-05, |
| "loss": 1.1649, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.354, |
| "grad_norm": 0.4439053237438202, |
| "learning_rate": 4.080073256662127e-05, |
| "loss": 1.1743, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.356, |
| "grad_norm": 1.59803307056427, |
| "learning_rate": 4.066509528411152e-05, |
| "loss": 1.5422, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.358, |
| "grad_norm": 1.4424983263015747, |
| "learning_rate": 4.052869450695776e-05, |
| "loss": 0.9694, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 3.5582339763641357, |
| "learning_rate": 4.039153688314145e-05, |
| "loss": 1.37, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.362, |
| "grad_norm": 1.2062020301818848, |
| "learning_rate": 4.02536290975317e-05, |
| "loss": 1.0036, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.364, |
| "grad_norm": 0.0, |
| "learning_rate": 4.011497787155938e-05, |
| "loss": 1.8302, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.366, |
| "grad_norm": 0.0, |
| "learning_rate": 3.997558996288965e-05, |
| "loss": 1.5304, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.0, |
| "learning_rate": 3.983547216509254e-05, |
| "loss": 0.8427, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 0.9621694087982178, |
| "learning_rate": 3.969463130731183e-05, |
| "loss": 0.9384, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.372, |
| "grad_norm": 0.9074615240097046, |
| "learning_rate": 3.955307425393224e-05, |
| "loss": 0.956, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.374, |
| "grad_norm": 94.73406219482422, |
| "learning_rate": 3.941080790424484e-05, |
| "loss": 2.5897, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 1.4673329591751099, |
| "learning_rate": 3.92678391921108e-05, |
| "loss": 1.0552, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.378, |
| "grad_norm": 2.7422103881835938, |
| "learning_rate": 3.912417508562345e-05, |
| "loss": 1.314, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 8.64188003540039, |
| "learning_rate": 3.897982258676867e-05, |
| "loss": 2.4508, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.382, |
| "grad_norm": 3.8510308265686035, |
| "learning_rate": 3.883478873108361e-05, |
| "loss": 1.5485, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.0, |
| "learning_rate": 3.868908058731376e-05, |
| "loss": 0.6966, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.386, |
| "grad_norm": 0.0, |
| "learning_rate": 3.85427052570685e-05, |
| "loss": 0.8407, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.388, |
| "grad_norm": 1.5290547609329224, |
| "learning_rate": 3.8395669874474915e-05, |
| "loss": 1.1045, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 0.0, |
| "learning_rate": 3.824798160583012e-05, |
| "loss": 0.7849, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 2.1444456577301025, |
| "learning_rate": 3.8099647649251986e-05, |
| "loss": 1.8512, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.394, |
| "grad_norm": 1.4146127700805664, |
| "learning_rate": 3.795067523432826e-05, |
| "loss": 0.9294, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.396, |
| "grad_norm": 8.307282447814941, |
| "learning_rate": 3.780107162176429e-05, |
| "loss": 1.0097, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.398, |
| "grad_norm": 17.269071578979492, |
| "learning_rate": 3.765084410302909e-05, |
| "loss": 2.4489, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 7.6245198249816895, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 1.441, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.402, |
| "grad_norm": 0.9176368713378906, |
| "learning_rate": 3.7348546664605777e-05, |
| "loss": 1.1639, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.404, |
| "grad_norm": 1.319966435432434, |
| "learning_rate": 3.719649147846832e-05, |
| "loss": 1.0539, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.406, |
| "grad_norm": 3.70522403717041, |
| "learning_rate": 3.704384185254288e-05, |
| "loss": 0.9333, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 45.268592834472656, |
| "learning_rate": 3.689060522675689e-05, |
| "loss": 1.8345, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 3.782158136367798, |
| "learning_rate": 3.673678906964727e-05, |
| "loss": 2.0458, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.412, |
| "grad_norm": 7.8290791511535645, |
| "learning_rate": 3.6582400877996546e-05, |
| "loss": 1.3346, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.414, |
| "grad_norm": 2.4729154109954834, |
| "learning_rate": 3.642744817646736e-05, |
| "loss": 1.5323, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.838581383228302, |
| "learning_rate": 3.627193851723577e-05, |
| "loss": 0.8685, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.418, |
| "grad_norm": 16.806278228759766, |
| "learning_rate": 3.611587947962319e-05, |
| "loss": 1.3805, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5959278669726935e-05, |
| "loss": 3.2155, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.422, |
| "grad_norm": 3.4624030590057373, |
| "learning_rate": 3.580214372004956e-05, |
| "loss": 1.3376, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 3.215987205505371, |
| "learning_rate": 3.564448228912682e-05, |
| "loss": 2.3062, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.426, |
| "grad_norm": 24.706693649291992, |
| "learning_rate": 3.548630206115443e-05, |
| "loss": 0.8054, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.428, |
| "grad_norm": 28.45231056213379, |
| "learning_rate": 3.532761074561355e-05, |
| "loss": 1.1863, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 3.422032356262207, |
| "learning_rate": 3.516841607689501e-05, |
| "loss": 1.2644, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 3.279686450958252, |
| "learning_rate": 3.5008725813922386e-05, |
| "loss": 1.1396, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.434, |
| "grad_norm": 6.026493549346924, |
| "learning_rate": 3.484854773977378e-05, |
| "loss": 1.6754, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.436, |
| "grad_norm": 1.911526083946228, |
| "learning_rate": 3.4687889661302576e-05, |
| "loss": 0.7079, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.438, |
| "grad_norm": 14.799755096435547, |
| "learning_rate": 3.452675940875686e-05, |
| "loss": 1.4206, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 6.455738544464111, |
| "learning_rate": 3.436516483539781e-05, |
| "loss": 1.3386, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.442, |
| "grad_norm": 1.276485800743103, |
| "learning_rate": 3.4203113817116957e-05, |
| "loss": 0.8194, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.444, |
| "grad_norm": 6.4939799308776855, |
| "learning_rate": 3.4040614252052305e-05, |
| "loss": 1.8051, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.446, |
| "grad_norm": 1.6123688220977783, |
| "learning_rate": 3.387767406020343e-05, |
| "loss": 1.8429, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 3.2084081172943115, |
| "learning_rate": 3.3714301183045385e-05, |
| "loss": 1.7987, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 2.2829909324645996, |
| "learning_rate": 3.355050358314172e-05, |
| "loss": 1.9843, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.452, |
| "grad_norm": 4.479115009307861, |
| "learning_rate": 3.338628924375638e-05, |
| "loss": 0.6176, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.454, |
| "grad_norm": 1.7939817905426025, |
| "learning_rate": 3.322166616846458e-05, |
| "loss": 1.2847, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 2.055887460708618, |
| "learning_rate": 3.305664238076278e-05, |
| "loss": 1.7299, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.458, |
| "grad_norm": 0.5422760248184204, |
| "learning_rate": 3.289122592367757e-05, |
| "loss": 0.7908, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 0.0, |
| "learning_rate": 3.272542485937369e-05, |
| "loss": 1.0017, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.462, |
| "grad_norm": 0.32280176877975464, |
| "learning_rate": 3.2559247268761115e-05, |
| "loss": 0.8984, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 18.99048614501953, |
| "learning_rate": 3.239270125110117e-05, |
| "loss": 1.7084, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.466, |
| "grad_norm": 5.556630611419678, |
| "learning_rate": 3.222579492361179e-05, |
| "loss": 0.6562, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.468, |
| "grad_norm": 21.25049591064453, |
| "learning_rate": 3.205853642107192e-05, |
| "loss": 1.1915, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 0.0, |
| "learning_rate": 3.1890933895424976e-05, |
| "loss": 1.3534, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 1.8797976970672607, |
| "learning_rate": 3.172299551538164e-05, |
| "loss": 1.4973, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.474, |
| "grad_norm": 3.0614185333251953, |
| "learning_rate": 3.155472946602162e-05, |
| "loss": 2.4774, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.476, |
| "grad_norm": 6.993570804595947, |
| "learning_rate": 3.138614394839476e-05, |
| "loss": 1.5159, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.478, |
| "grad_norm": 0.0, |
| "learning_rate": 3.121724717912138e-05, |
| "loss": 1.0125, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 2.762420892715454, |
| "learning_rate": 3.104804738999169e-05, |
| "loss": 0.5848, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.482, |
| "grad_norm": 24.377647399902344, |
| "learning_rate": 3.087855282756475e-05, |
| "loss": 1.3007, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.484, |
| "grad_norm": 15.386418342590332, |
| "learning_rate": 3.0708771752766394e-05, |
| "loss": 1.6124, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.486, |
| "grad_norm": 4.332566261291504, |
| "learning_rate": 3.053871244048669e-05, |
| "loss": 1.0171, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 1.804961919784546, |
| "learning_rate": 3.0368383179176585e-05, |
| "loss": 0.5442, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.1589062213897705, |
| "learning_rate": 3.0197792270443982e-05, |
| "loss": 0.824, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.492, |
| "grad_norm": 1.190585732460022, |
| "learning_rate": 3.002694802864912e-05, |
| "loss": 1.5214, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.494, |
| "grad_norm": 7.825082302093506, |
| "learning_rate": 2.98558587804993e-05, |
| "loss": 1.9628, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 7.856967449188232, |
| "learning_rate": 2.9684532864643122e-05, |
| "loss": 1.6987, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.498, |
| "grad_norm": 11.930267333984375, |
| "learning_rate": 2.9512978631264006e-05, |
| "loss": 1.3293, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.154645562171936, |
| "learning_rate": 2.9341204441673266e-05, |
| "loss": 1.4358, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.502, |
| "grad_norm": 0.0, |
| "learning_rate": 2.916921866790256e-05, |
| "loss": 0.9238, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.504, |
| "grad_norm": 2.808257579803467, |
| "learning_rate": 2.8997029692295874e-05, |
| "loss": 0.6767, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.506, |
| "grad_norm": 7.462977886199951, |
| "learning_rate": 2.8824645907100954e-05, |
| "loss": 1.5141, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.508, |
| "grad_norm": 4.376765727996826, |
| "learning_rate": 2.8652075714060295e-05, |
| "loss": 0.7271, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 0.0, |
| "learning_rate": 2.8479327524001636e-05, |
| "loss": 1.1601, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 2.4635355472564697, |
| "learning_rate": 2.8306409756428064e-05, |
| "loss": 1.1031, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.514, |
| "grad_norm": 16.775001525878906, |
| "learning_rate": 2.8133330839107608e-05, |
| "loss": 1.9696, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.516, |
| "grad_norm": 0.4682444632053375, |
| "learning_rate": 2.7960099207662532e-05, |
| "loss": 1.1183, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.518, |
| "grad_norm": 4.674540042877197, |
| "learning_rate": 2.7786723305158136e-05, |
| "loss": 0.8266, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 3.9463658332824707, |
| "learning_rate": 2.761321158169134e-05, |
| "loss": 1.5703, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.522, |
| "grad_norm": 9.975082397460938, |
| "learning_rate": 2.7439572493978736e-05, |
| "loss": 1.6023, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.524, |
| "grad_norm": 5.6594462394714355, |
| "learning_rate": 2.726581450494451e-05, |
| "loss": 0.9304, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.526, |
| "grad_norm": 9.62289047241211, |
| "learning_rate": 2.7091946083307896e-05, |
| "loss": 1.5715, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 15.356546401977539, |
| "learning_rate": 2.6917975703170466e-05, |
| "loss": 1.8024, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.7599657773971558, |
| "learning_rate": 2.674391184360313e-05, |
| "loss": 0.8039, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.532, |
| "grad_norm": 1.521369218826294, |
| "learning_rate": 2.656976298823284e-05, |
| "loss": 0.6528, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.534, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6395537624829096e-05, |
| "loss": 1.7035, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.536, |
| "grad_norm": 2.2401411533355713, |
| "learning_rate": 2.6221244244890336e-05, |
| "loss": 1.2272, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.538, |
| "grad_norm": 1.1615192890167236, |
| "learning_rate": 2.604689134322999e-05, |
| "loss": 0.5353, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 15.130184173583984, |
| "learning_rate": 2.587248741756253e-05, |
| "loss": 1.1314, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.542, |
| "grad_norm": 0.6911365389823914, |
| "learning_rate": 2.5698040968089225e-05, |
| "loss": 1.8837, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 29.572574615478516, |
| "learning_rate": 2.5523560497083926e-05, |
| "loss": 1.7309, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.546, |
| "grad_norm": 2.712412118911743, |
| "learning_rate": 2.5349054508478637e-05, |
| "loss": 1.5027, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.548, |
| "grad_norm": 2.607976198196411, |
| "learning_rate": 2.517453150744904e-05, |
| "loss": 0.7212, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.0, |
| "learning_rate": 2.5e-05, |
| "loss": 0.9073, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.552, |
| "grad_norm": 4.585177898406982, |
| "learning_rate": 2.4825468492550964e-05, |
| "loss": 1.3612, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.554, |
| "grad_norm": 38.40209197998047, |
| "learning_rate": 2.4650945491521372e-05, |
| "loss": 2.5574, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.556, |
| "grad_norm": 0.7733972668647766, |
| "learning_rate": 2.447643950291608e-05, |
| "loss": 1.5628, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.558, |
| "grad_norm": 10.307479858398438, |
| "learning_rate": 2.4301959031910784e-05, |
| "loss": 1.4349, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4127512582437485e-05, |
| "loss": 1.2743, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.562, |
| "grad_norm": 17.315427780151367, |
| "learning_rate": 2.3953108656770016e-05, |
| "loss": 1.4333, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.564, |
| "grad_norm": 0.3390060365200043, |
| "learning_rate": 2.377875575510967e-05, |
| "loss": 0.7756, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.566, |
| "grad_norm": 0.7559804916381836, |
| "learning_rate": 2.3604462375170906e-05, |
| "loss": 1.8001, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.568, |
| "grad_norm": 6.139041423797607, |
| "learning_rate": 2.3430237011767167e-05, |
| "loss": 1.5661, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 0.0, |
| "learning_rate": 2.3256088156396868e-05, |
| "loss": 0.801, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.572, |
| "grad_norm": 0.6441702246665955, |
| "learning_rate": 2.3082024296829536e-05, |
| "loss": 1.4609, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.574, |
| "grad_norm": 11.885534286499023, |
| "learning_rate": 2.2908053916692117e-05, |
| "loss": 1.174, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.4849330186843872, |
| "learning_rate": 2.2734185495055503e-05, |
| "loss": 1.5587, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.578, |
| "grad_norm": 2.4908790588378906, |
| "learning_rate": 2.2560427506021266e-05, |
| "loss": 1.1499, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 11.29605484008789, |
| "learning_rate": 2.238678841830867e-05, |
| "loss": 1.0868, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.582, |
| "grad_norm": 8.866591453552246, |
| "learning_rate": 2.2213276694841866e-05, |
| "loss": 1.2244, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.584, |
| "grad_norm": 0.6604860424995422, |
| "learning_rate": 2.2039900792337474e-05, |
| "loss": 1.1256, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.586, |
| "grad_norm": 6.171046733856201, |
| "learning_rate": 2.186666916089239e-05, |
| "loss": 0.4934, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.588, |
| "grad_norm": 12.340161323547363, |
| "learning_rate": 2.1693590243571938e-05, |
| "loss": 1.223, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.2349804639816284, |
| "learning_rate": 2.1520672475998373e-05, |
| "loss": 0.6751, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 22.765419006347656, |
| "learning_rate": 2.1347924285939714e-05, |
| "loss": 2.4113, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.594, |
| "grad_norm": 7.209038257598877, |
| "learning_rate": 2.117535409289905e-05, |
| "loss": 1.3446, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.596, |
| "grad_norm": 1.229001522064209, |
| "learning_rate": 2.1002970307704132e-05, |
| "loss": 1.0472, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.598, |
| "grad_norm": 2.125377893447876, |
| "learning_rate": 2.0830781332097446e-05, |
| "loss": 0.9695, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 3.410578489303589, |
| "learning_rate": 2.0658795558326743e-05, |
| "loss": 1.1868, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.602, |
| "grad_norm": 1.8903169631958008, |
| "learning_rate": 2.0487021368736003e-05, |
| "loss": 1.3204, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.604, |
| "grad_norm": 7.726503849029541, |
| "learning_rate": 2.031546713535688e-05, |
| "loss": 0.8136, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.606, |
| "grad_norm": 2.2459115982055664, |
| "learning_rate": 2.0144141219500705e-05, |
| "loss": 1.2284, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 18.302974700927734, |
| "learning_rate": 1.9973051971350888e-05, |
| "loss": 2.5085, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 0.5879172682762146, |
| "learning_rate": 1.980220772955602e-05, |
| "loss": 0.8985, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.612, |
| "grad_norm": 1.7284860610961914, |
| "learning_rate": 1.963161682082342e-05, |
| "loss": 1.7433, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.614, |
| "grad_norm": 12.673727035522461, |
| "learning_rate": 1.946128755951332e-05, |
| "loss": 1.393, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.616, |
| "grad_norm": 1.9405444860458374, |
| "learning_rate": 1.9291228247233605e-05, |
| "loss": 1.4175, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.618, |
| "grad_norm": 10.212315559387207, |
| "learning_rate": 1.912144717243525e-05, |
| "loss": 1.9178, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.4168413877487183, |
| "learning_rate": 1.895195261000831e-05, |
| "loss": 0.9371, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.622, |
| "grad_norm": 0.0, |
| "learning_rate": 1.8782752820878634e-05, |
| "loss": 0.7683, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 8.363224983215332, |
| "learning_rate": 1.8613856051605243e-05, |
| "loss": 1.173, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.626, |
| "grad_norm": 4.870674133300781, |
| "learning_rate": 1.8445270533978388e-05, |
| "loss": 1.2886, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.628, |
| "grad_norm": 1.6593077182769775, |
| "learning_rate": 1.827700448461836e-05, |
| "loss": 0.7759, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 1.97660231590271, |
| "learning_rate": 1.8109066104575023e-05, |
| "loss": 1.0199, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.632, |
| "grad_norm": 10.020951271057129, |
| "learning_rate": 1.7941463578928086e-05, |
| "loss": 1.3115, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.634, |
| "grad_norm": 2.821995973587036, |
| "learning_rate": 1.7774205076388206e-05, |
| "loss": 1.3926, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.636, |
| "grad_norm": 24.37348747253418, |
| "learning_rate": 1.7607298748898842e-05, |
| "loss": 2.0498, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.638, |
| "grad_norm": 5.457019329071045, |
| "learning_rate": 1.744075273123889e-05, |
| "loss": 0.5073, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.0, |
| "learning_rate": 1.7274575140626318e-05, |
| "loss": 1.159, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.642, |
| "grad_norm": 0.691412091255188, |
| "learning_rate": 1.7108774076322443e-05, |
| "loss": 0.5308, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.644, |
| "grad_norm": 1.4707207679748535, |
| "learning_rate": 1.6943357619237226e-05, |
| "loss": 0.8804, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.646, |
| "grad_norm": 1.5302475690841675, |
| "learning_rate": 1.677833383153542e-05, |
| "loss": 0.562, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.648, |
| "grad_norm": 1.94536292552948, |
| "learning_rate": 1.6613710756243626e-05, |
| "loss": 1.184, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 3.658015251159668, |
| "learning_rate": 1.6449496416858284e-05, |
| "loss": 1.5939, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.652, |
| "grad_norm": 13.352316856384277, |
| "learning_rate": 1.6285698816954624e-05, |
| "loss": 0.6112, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.654, |
| "grad_norm": 9.652981758117676, |
| "learning_rate": 1.612232593979658e-05, |
| "loss": 1.3099, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5959385747947698e-05, |
| "loss": 2.5607, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.658, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5796886182883053e-05, |
| "loss": 1.0217, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 3.3009743690490723, |
| "learning_rate": 1.56348351646022e-05, |
| "loss": 0.9401, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.662, |
| "grad_norm": 0.0, |
| "learning_rate": 1.547324059124315e-05, |
| "loss": 1.0247, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.664, |
| "grad_norm": 0.5582336783409119, |
| "learning_rate": 1.5312110338697426e-05, |
| "loss": 1.9519, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.666, |
| "grad_norm": 5.420542240142822, |
| "learning_rate": 1.5151452260226224e-05, |
| "loss": 0.7734, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.668, |
| "grad_norm": 3.886523962020874, |
| "learning_rate": 1.4991274186077632e-05, |
| "loss": 1.0947, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.0134683847427368, |
| "learning_rate": 1.4831583923104999e-05, |
| "loss": 0.5691, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 2.0535247325897217, |
| "learning_rate": 1.467238925438646e-05, |
| "loss": 1.966, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.674, |
| "grad_norm": 1.5086647272109985, |
| "learning_rate": 1.4513697938845572e-05, |
| "loss": 0.8153, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.676, |
| "grad_norm": 0.9438655972480774, |
| "learning_rate": 1.4355517710873184e-05, |
| "loss": 0.4211, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.678, |
| "grad_norm": 9.679282188415527, |
| "learning_rate": 1.4197856279950438e-05, |
| "loss": 1.4362, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 3.571068048477173, |
| "learning_rate": 1.4040721330273062e-05, |
| "loss": 0.7938, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.682, |
| "grad_norm": 0.3649686872959137, |
| "learning_rate": 1.388412052037682e-05, |
| "loss": 0.7605, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.684, |
| "grad_norm": 6.745936870574951, |
| "learning_rate": 1.3728061482764238e-05, |
| "loss": 2.1024, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.686, |
| "grad_norm": 0.0, |
| "learning_rate": 1.3572551823532654e-05, |
| "loss": 1.7508, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 10.104940414428711, |
| "learning_rate": 1.3417599122003464e-05, |
| "loss": 1.4949, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 2.5721874237060547, |
| "learning_rate": 1.3263210930352737e-05, |
| "loss": 1.3493, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.692, |
| "grad_norm": 1.289473533630371, |
| "learning_rate": 1.3109394773243117e-05, |
| "loss": 1.7492, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.694, |
| "grad_norm": 0.9646878242492676, |
| "learning_rate": 1.2956158147457115e-05, |
| "loss": 1.2414, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.696, |
| "grad_norm": 1.2052338123321533, |
| "learning_rate": 1.280350852153168e-05, |
| "loss": 0.978, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.698, |
| "grad_norm": 1.5356535911560059, |
| "learning_rate": 1.2651453335394231e-05, |
| "loss": 0.6575, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.6956253051757812, |
| "learning_rate": 1.2500000000000006e-05, |
| "loss": 1.6792, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.702, |
| "grad_norm": 2.102616548538208, |
| "learning_rate": 1.234915589697091e-05, |
| "loss": 0.6078, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.0, |
| "learning_rate": 1.2198928378235716e-05, |
| "loss": 1.0625, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.706, |
| "grad_norm": 7.8556294441223145, |
| "learning_rate": 1.2049324765671749e-05, |
| "loss": 1.7064, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.708, |
| "grad_norm": 2.802849292755127, |
| "learning_rate": 1.1900352350748026e-05, |
| "loss": 0.8152, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 0.0, |
| "learning_rate": 1.175201839416988e-05, |
| "loss": 1.1932, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.712, |
| "grad_norm": 18.025968551635742, |
| "learning_rate": 1.1604330125525079e-05, |
| "loss": 1.2326, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.714, |
| "grad_norm": 1.438110113143921, |
| "learning_rate": 1.1457294742931507e-05, |
| "loss": 1.2993, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.716, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1310919412686247e-05, |
| "loss": 1.4532, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.718, |
| "grad_norm": 0.0, |
| "learning_rate": 1.11652112689164e-05, |
| "loss": 1.545, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1020177413231334e-05, |
| "loss": 1.3031, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.722, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0875824914376553e-05, |
| "loss": 1.0103, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.724, |
| "grad_norm": 7.254970073699951, |
| "learning_rate": 1.0732160807889211e-05, |
| "loss": 2.8567, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.726, |
| "grad_norm": 1.416352391242981, |
| "learning_rate": 1.058919209575517e-05, |
| "loss": 1.3896, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.728, |
| "grad_norm": 3.275563955307007, |
| "learning_rate": 1.0446925746067768e-05, |
| "loss": 1.186, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0305368692688174e-05, |
| "loss": 1.9734, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.732, |
| "grad_norm": 5.804834365844727, |
| "learning_rate": 1.0164527834907467e-05, |
| "loss": 1.3322, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.734, |
| "grad_norm": 0.6750529408454895, |
| "learning_rate": 1.0024410037110357e-05, |
| "loss": 1.4241, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.5938382148742676, |
| "learning_rate": 9.88502212844063e-06, |
| "loss": 1.1161, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.738, |
| "grad_norm": 8.857674598693848, |
| "learning_rate": 9.746370902468311e-06, |
| "loss": 1.4002, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 0.0, |
| "learning_rate": 9.608463116858542e-06, |
| "loss": 0.6686, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.742, |
| "grad_norm": 6.373303413391113, |
| "learning_rate": 9.471305493042243e-06, |
| "loss": 1.1473, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.744, |
| "grad_norm": 1.3617981672286987, |
| "learning_rate": 9.334904715888495e-06, |
| "loss": 1.3623, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.746, |
| "grad_norm": 9.023974418640137, |
| "learning_rate": 9.199267433378727e-06, |
| "loss": 2.5043, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.748, |
| "grad_norm": 2.228018045425415, |
| "learning_rate": 9.064400256282757e-06, |
| "loss": 0.807, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 4.2638630867004395, |
| "learning_rate": 8.930309757836517e-06, |
| "loss": 1.078, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 3.281846284866333, |
| "learning_rate": 8.797002473421728e-06, |
| "loss": 0.8329, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.754, |
| "grad_norm": 5.123702049255371, |
| "learning_rate": 8.664484900247363e-06, |
| "loss": 1.9266, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.756, |
| "grad_norm": 10.131644248962402, |
| "learning_rate": 8.532763497032987e-06, |
| "loss": 1.7022, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.758, |
| "grad_norm": 1.5231475830078125, |
| "learning_rate": 8.40184468369396e-06, |
| "loss": 0.8765, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 9.655771255493164, |
| "learning_rate": 8.271734841028553e-06, |
| "loss": 1.8413, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.762, |
| "grad_norm": 5.206886291503906, |
| "learning_rate": 8.142440310406924e-06, |
| "loss": 1.2464, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.764, |
| "grad_norm": 0.4430970549583435, |
| "learning_rate": 8.013967393462094e-06, |
| "loss": 1.1392, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.766, |
| "grad_norm": 0.3004146218299866, |
| "learning_rate": 7.886322351782783e-06, |
| "loss": 1.8922, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.0, |
| "learning_rate": 7.759511406608255e-06, |
| "loss": 1.3631, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 2.2798960208892822, |
| "learning_rate": 7.633540738525066e-06, |
| "loss": 2.1561, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.772, |
| "grad_norm": 6.353856563568115, |
| "learning_rate": 7.508416487165862e-06, |
| "loss": 1.3865, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.774, |
| "grad_norm": 23.976675033569336, |
| "learning_rate": 7.384144750910133e-06, |
| "loss": 1.3, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.776, |
| "grad_norm": 5.609853744506836, |
| "learning_rate": 7.260731586586983e-06, |
| "loss": 1.4537, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.778, |
| "grad_norm": 0.0, |
| "learning_rate": 7.138183009179922e-06, |
| "loss": 1.0312, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 5.4580230712890625, |
| "learning_rate": 7.016504991533726e-06, |
| "loss": 2.1389, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.782, |
| "grad_norm": 2.1210005283355713, |
| "learning_rate": 6.895703464063319e-06, |
| "loss": 0.5286, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 4.246682643890381, |
| "learning_rate": 6.775784314464717e-06, |
| "loss": 1.1388, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.786, |
| "grad_norm": 0.31312233209609985, |
| "learning_rate": 6.656753387428089e-06, |
| "loss": 1.7807, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.788, |
| "grad_norm": 5.389893054962158, |
| "learning_rate": 6.538616484352902e-06, |
| "loss": 0.8653, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 0.0, |
| "learning_rate": 6.421379363065142e-06, |
| "loss": 2.2613, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.792, |
| "grad_norm": 2.296661376953125, |
| "learning_rate": 6.305047737536707e-06, |
| "loss": 0.7061, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.794, |
| "grad_norm": 4.505575656890869, |
| "learning_rate": 6.189627277606894e-06, |
| "loss": 1.1075, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.796, |
| "grad_norm": 1.5126315355300903, |
| "learning_rate": 6.075123608706093e-06, |
| "loss": 0.9158, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.798, |
| "grad_norm": 9.576681137084961, |
| "learning_rate": 5.961542311581586e-06, |
| "loss": 1.5019, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 5.753732204437256, |
| "learning_rate": 5.848888922025553e-06, |
| "loss": 0.8174, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.802, |
| "grad_norm": 1.5566200017929077, |
| "learning_rate": 5.737168930605272e-06, |
| "loss": 0.6246, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.804, |
| "grad_norm": 0.31750229001045227, |
| "learning_rate": 5.626387782395512e-06, |
| "loss": 1.1496, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.806, |
| "grad_norm": 21.146928787231445, |
| "learning_rate": 5.5165508767131415e-06, |
| "loss": 1.4641, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.808, |
| "grad_norm": 20.838895797729492, |
| "learning_rate": 5.4076635668540075e-06, |
| "loss": 2.6414, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 0.0, |
| "learning_rate": 5.299731159831953e-06, |
| "loss": 1.5888, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.812, |
| "grad_norm": 3.5689587593078613, |
| "learning_rate": 5.192758916120236e-06, |
| "loss": 0.755, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.814, |
| "grad_norm": 0.0, |
| "learning_rate": 5.086752049395094e-06, |
| "loss": 1.1083, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 2.862039566040039, |
| "learning_rate": 4.981715726281666e-06, |
| "loss": 0.5798, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.818, |
| "grad_norm": 21.366016387939453, |
| "learning_rate": 4.877655066102149e-06, |
| "loss": 1.061, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.1818464994430542, |
| "learning_rate": 4.7745751406263165e-06, |
| "loss": 0.4177, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.822, |
| "grad_norm": 17.175460815429688, |
| "learning_rate": 4.672480973824311e-06, |
| "loss": 1.3882, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.824, |
| "grad_norm": 7.882750511169434, |
| "learning_rate": 4.571377541621788e-06, |
| "loss": 1.16, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.826, |
| "grad_norm": 0.0, |
| "learning_rate": 4.4712697716574e-06, |
| "loss": 1.0392, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.828, |
| "grad_norm": 3.8057103157043457, |
| "learning_rate": 4.372162543042624e-06, |
| "loss": 1.1697, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 3.2752530574798584, |
| "learning_rate": 4.274060686123959e-06, |
| "loss": 1.3669, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 2.768428325653076, |
| "learning_rate": 4.176968982247514e-06, |
| "loss": 0.6651, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.834, |
| "grad_norm": 37.53615188598633, |
| "learning_rate": 4.08089216352596e-06, |
| "loss": 1.4382, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.836, |
| "grad_norm": 20.771350860595703, |
| "learning_rate": 3.985834912607894e-06, |
| "loss": 1.1561, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.838, |
| "grad_norm": 9.721131324768066, |
| "learning_rate": 3.891801862449629e-06, |
| "loss": 0.8151, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 70.80477142333984, |
| "learning_rate": 3.798797596089351e-06, |
| "loss": 2.6738, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.842, |
| "grad_norm": 5.896388053894043, |
| "learning_rate": 3.7068266464238084e-06, |
| "loss": 1.4829, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.844, |
| "grad_norm": 0.883860170841217, |
| "learning_rate": 3.6158934959873353e-06, |
| "loss": 0.7147, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.846, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5260025767333893e-06, |
| "loss": 1.6543, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 3.300741672515869, |
| "learning_rate": 3.4371582698185633e-06, |
| "loss": 0.3801, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.0, |
| "learning_rate": 3.3493649053890326e-06, |
| "loss": 0.6843, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.852, |
| "grad_norm": 5.147795677185059, |
| "learning_rate": 3.262626762369525e-06, |
| "loss": 2.1468, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.854, |
| "grad_norm": 1.0076605081558228, |
| "learning_rate": 3.176948068254762e-06, |
| "loss": 0.6189, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.856, |
| "grad_norm": 10.264873504638672, |
| "learning_rate": 3.092332998903416e-06, |
| "loss": 1.1855, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.858, |
| "grad_norm": 0.3832724392414093, |
| "learning_rate": 3.0087856783345914e-06, |
| "loss": 0.5795, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 2.3567116260528564, |
| "learning_rate": 2.9263101785268254e-06, |
| "loss": 1.6466, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.862, |
| "grad_norm": 22.869237899780273, |
| "learning_rate": 2.8449105192196316e-06, |
| "loss": 1.3837, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.0, |
| "learning_rate": 2.764590667717562e-06, |
| "loss": 0.9491, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.866, |
| "grad_norm": 5.939896106719971, |
| "learning_rate": 2.6853545386968606e-06, |
| "loss": 1.1473, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.868, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6072059940146775e-06, |
| "loss": 1.9689, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 0.42899826169013977, |
| "learning_rate": 2.5301488425208296e-06, |
| "loss": 0.8104, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.872, |
| "grad_norm": 3.8288755416870117, |
| "learning_rate": 2.454186839872158e-06, |
| "loss": 1.2391, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.874, |
| "grad_norm": 14.07198715209961, |
| "learning_rate": 2.379323688349516e-06, |
| "loss": 2.2037, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.876, |
| "grad_norm": 10.503342628479004, |
| "learning_rate": 2.3055630366772856e-06, |
| "loss": 1.78, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.878, |
| "grad_norm": 0.36506208777427673, |
| "learning_rate": 2.2329084798455746e-06, |
| "loss": 1.4213, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 4.561337947845459, |
| "learning_rate": 2.1613635589349756e-06, |
| "loss": 1.4604, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.882, |
| "grad_norm": 3.658083915710449, |
| "learning_rate": 2.0909317609440095e-06, |
| "loss": 1.631, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.884, |
| "grad_norm": 4.377072811126709, |
| "learning_rate": 2.0216165186191407e-06, |
| "loss": 1.7125, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.886, |
| "grad_norm": 9.698302268981934, |
| "learning_rate": 1.95342121028749e-06, |
| "loss": 2.2037, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.888, |
| "grad_norm": 4.783137798309326, |
| "learning_rate": 1.8863491596921745e-06, |
| "loss": 1.4982, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 5.755715370178223, |
| "learning_rate": 1.8204036358303173e-06, |
| "loss": 1.1612, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.892, |
| "grad_norm": 12.483977317810059, |
| "learning_rate": 1.7555878527937164e-06, |
| "loss": 1.2837, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.894, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6919049696121958e-06, |
| "loss": 1.6072, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 6.6550774574279785, |
| "learning_rate": 1.629358090099639e-06, |
| "loss": 0.8136, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.898, |
| "grad_norm": 3.452754020690918, |
| "learning_rate": 1.5679502627027136e-06, |
| "loss": 0.9143, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 12.866599082946777, |
| "learning_rate": 1.5076844803522922e-06, |
| "loss": 0.568, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.902, |
| "grad_norm": 4.006110668182373, |
| "learning_rate": 1.4485636803175829e-06, |
| "loss": 1.1768, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.904, |
| "grad_norm": 1.1515779495239258, |
| "learning_rate": 1.3905907440629752e-06, |
| "loss": 0.9012, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.906, |
| "grad_norm": 11.245774269104004, |
| "learning_rate": 1.333768497107593e-06, |
| "loss": 1.1081, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.908, |
| "grad_norm": 6.475898742675781, |
| "learning_rate": 1.2780997088875869e-06, |
| "loss": 0.9983, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 2.4155514240264893, |
| "learning_rate": 1.2235870926211619e-06, |
| "loss": 0.9166, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 8.179412841796875, |
| "learning_rate": 1.170233305176327e-06, |
| "loss": 1.4171, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.914, |
| "grad_norm": 4.91797399520874, |
| "learning_rate": 1.1180409469414094e-06, |
| "loss": 2.5297, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.916, |
| "grad_norm": 0.5031004548072815, |
| "learning_rate": 1.067012561698319e-06, |
| "loss": 1.2909, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.918, |
| "grad_norm": 20.431604385375977, |
| "learning_rate": 1.0171506364985622e-06, |
| "loss": 3.3333, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 18.30546760559082, |
| "learning_rate": 9.684576015420278e-07, |
| "loss": 1.3298, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.922, |
| "grad_norm": 1.9837167263031006, |
| "learning_rate": 9.209358300585474e-07, |
| "loss": 1.202, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.924, |
| "grad_norm": 0.7103928327560425, |
| "learning_rate": 8.745876381922147e-07, |
| "loss": 2.3325, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.926, |
| "grad_norm": 4.550110816955566, |
| "learning_rate": 8.294152848885157e-07, |
| "loss": 1.1146, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 0.0, |
| "learning_rate": 7.854209717842231e-07, |
| "loss": 0.5371, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.3056379556655884, |
| "learning_rate": 7.426068431000882e-07, |
| "loss": 1.3357, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.932, |
| "grad_norm": 46.47900390625, |
| "learning_rate": 7.009749855363456e-07, |
| "loss": 1.3991, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.934, |
| "grad_norm": 0.2128896415233612, |
| "learning_rate": 6.605274281709928e-07, |
| "loss": 0.6493, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.936, |
| "grad_norm": 0.0, |
| "learning_rate": 6.212661423609184e-07, |
| "loss": 1.5364, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.938, |
| "grad_norm": 1.3817780017852783, |
| "learning_rate": 5.83193041645802e-07, |
| "loss": 0.7224, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 0.0, |
| "learning_rate": 5.463099816548579e-07, |
| "loss": 0.6421, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.942, |
| "grad_norm": 2.6664462089538574, |
| "learning_rate": 5.106187600163987e-07, |
| "loss": 1.108, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 4.678840160369873, |
| "learning_rate": 4.7612111627021175e-07, |
| "loss": 1.6731, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.946, |
| "grad_norm": 1.585053563117981, |
| "learning_rate": 4.4281873178278475e-07, |
| "loss": 0.9536, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.948, |
| "grad_norm": 0.0, |
| "learning_rate": 4.107132296653549e-07, |
| "loss": 0.811, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 3.8119912147521973, |
| "learning_rate": 3.7980617469479953e-07, |
| "loss": 1.1257, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.952, |
| "grad_norm": 0.0, |
| "learning_rate": 3.5009907323737825e-07, |
| "loss": 1.5451, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.954, |
| "grad_norm": 13.099092483520508, |
| "learning_rate": 3.215933731753024e-07, |
| "loss": 1.2267, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.956, |
| "grad_norm": 0.0, |
| "learning_rate": 2.942904638361804e-07, |
| "loss": 0.7739, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.958, |
| "grad_norm": 4.4148335456848145, |
| "learning_rate": 2.681916759252917e-07, |
| "loss": 1.1737, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4329828146074095e-07, |
| "loss": 2.5418, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.962, |
| "grad_norm": 1.5763373374938965, |
| "learning_rate": 2.1961149371145795e-07, |
| "loss": 1.3623, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.964, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9713246713805588e-07, |
| "loss": 1.2778, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.966, |
| "grad_norm": 5.108182907104492, |
| "learning_rate": 1.7586229733657644e-07, |
| "loss": 1.3646, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.968, |
| "grad_norm": 21.954763412475586, |
| "learning_rate": 1.5580202098509077e-07, |
| "loss": 2.2615, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.4642102718353271, |
| "learning_rate": 1.3695261579316777e-07, |
| "loss": 1.3617, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.972, |
| "grad_norm": 1.9794245958328247, |
| "learning_rate": 1.193150004542204e-07, |
| "loss": 1.1849, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.974, |
| "grad_norm": 3.215317487716675, |
| "learning_rate": 1.0289003460074165e-07, |
| "loss": 0.8638, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 2.1437785625457764, |
| "learning_rate": 8.767851876239074e-08, |
| "loss": 1.9723, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.978, |
| "grad_norm": 0.0, |
| "learning_rate": 7.368119432699383e-08, |
| "loss": 0.862, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 2.7813355922698975, |
| "learning_rate": 6.089874350439506e-08, |
| "loss": 1.0444, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.982, |
| "grad_norm": 2.3938515186309814, |
| "learning_rate": 4.9331789293211026e-08, |
| "loss": 1.3459, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.984, |
| "grad_norm": 1.8486565351486206, |
| "learning_rate": 3.8980895450474455e-08, |
| "loss": 0.8181, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.986, |
| "grad_norm": 5.9725494384765625, |
| "learning_rate": 2.9846566464150626e-08, |
| "loss": 0.8222, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.988, |
| "grad_norm": 0.0, |
| "learning_rate": 2.192924752854042e-08, |
| "loss": 1.6087, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.0317845344543457, |
| "learning_rate": 1.522932452260595e-08, |
| "loss": 0.8004, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 14.324555397033691, |
| "learning_rate": 9.747123991141194e-09, |
| "loss": 0.9372, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.994, |
| "grad_norm": 2.1189212799072266, |
| "learning_rate": 5.48291312886251e-09, |
| "loss": 1.4132, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.996, |
| "grad_norm": 2.8049211502075195, |
| "learning_rate": 2.4368997673940297e-09, |
| "loss": 1.172, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.998, |
| "grad_norm": 1.8685941696166992, |
| "learning_rate": 6.092323651313292e-10, |
| "loss": 1.6231, |
| "step": 4990 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.8898677229881287, |
| "learning_rate": 0.0, |
| "loss": 0.94, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 5000, |
| "total_flos": 6806517818228736.0, |
| "train_loss": 1.3851989124298096, |
| "train_runtime": 652.6593, |
| "train_samples_per_second": 7.661, |
| "train_steps_per_second": 7.661 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 5000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 4000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6806517818228736.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|