| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 2658, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007524454477050414, |
| "grad_norm": 63.752017974853516, |
| "learning_rate": 1e-05, |
| "loss": 3.3312, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.015048908954100828, |
| "grad_norm": 50.676944732666016, |
| "learning_rate": 1e-05, |
| "loss": 1.525, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.022573363431151242, |
| "grad_norm": 38.975914001464844, |
| "learning_rate": 1e-05, |
| "loss": 1.2213, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.030097817908201655, |
| "grad_norm": 30.093769073486328, |
| "learning_rate": 1e-05, |
| "loss": 0.8578, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03762227238525207, |
| "grad_norm": 27.47739028930664, |
| "learning_rate": 1e-05, |
| "loss": 0.8364, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.045146726862302484, |
| "grad_norm": 26.15501594543457, |
| "learning_rate": 1e-05, |
| "loss": 0.8731, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0526711813393529, |
| "grad_norm": 30.096651077270508, |
| "learning_rate": 1e-05, |
| "loss": 0.5887, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.06019563581640331, |
| "grad_norm": 21.479469299316406, |
| "learning_rate": 1e-05, |
| "loss": 0.6146, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.06772009029345373, |
| "grad_norm": 25.00172996520996, |
| "learning_rate": 1e-05, |
| "loss": 0.7224, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.07524454477050414, |
| "grad_norm": 19.167516708374023, |
| "learning_rate": 1e-05, |
| "loss": 0.6965, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08276899924755456, |
| "grad_norm": 25.692691802978516, |
| "learning_rate": 1e-05, |
| "loss": 0.5539, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.09029345372460497, |
| "grad_norm": 19.874868392944336, |
| "learning_rate": 1e-05, |
| "loss": 0.5842, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.09781790820165538, |
| "grad_norm": 22.329219818115234, |
| "learning_rate": 1e-05, |
| "loss": 0.6003, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1053423626787058, |
| "grad_norm": 14.971156120300293, |
| "learning_rate": 1e-05, |
| "loss": 0.5671, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.11286681715575621, |
| "grad_norm": 14.223251342773438, |
| "learning_rate": 1e-05, |
| "loss": 0.5096, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.12039127163280662, |
| "grad_norm": 19.30224609375, |
| "learning_rate": 1e-05, |
| "loss": 0.5067, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.12791572610985705, |
| "grad_norm": 14.27910327911377, |
| "learning_rate": 1e-05, |
| "loss": 0.5281, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.13544018058690746, |
| "grad_norm": 19.874217987060547, |
| "learning_rate": 1e-05, |
| "loss": 0.4569, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.14296463506395787, |
| "grad_norm": 16.16669273376465, |
| "learning_rate": 1e-05, |
| "loss": 0.5304, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1504890895410083, |
| "grad_norm": 13.952829360961914, |
| "learning_rate": 1e-05, |
| "loss": 0.4415, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1580135440180587, |
| "grad_norm": 16.3791446685791, |
| "learning_rate": 1e-05, |
| "loss": 0.5214, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1655379984951091, |
| "grad_norm": 11.849374771118164, |
| "learning_rate": 1e-05, |
| "loss": 0.4079, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.17306245297215953, |
| "grad_norm": 17.892818450927734, |
| "learning_rate": 1e-05, |
| "loss": 0.562, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.18058690744920994, |
| "grad_norm": 11.97033977508545, |
| "learning_rate": 1e-05, |
| "loss": 0.3959, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.18811136192626035, |
| "grad_norm": 10.604959487915039, |
| "learning_rate": 1e-05, |
| "loss": 0.4519, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.19563581640331076, |
| "grad_norm": 10.842782974243164, |
| "learning_rate": 1e-05, |
| "loss": 0.4617, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.20316027088036118, |
| "grad_norm": 10.904434204101562, |
| "learning_rate": 1e-05, |
| "loss": 0.4858, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2106847253574116, |
| "grad_norm": 9.698153495788574, |
| "learning_rate": 1e-05, |
| "loss": 0.4126, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.218209179834462, |
| "grad_norm": 12.699883460998535, |
| "learning_rate": 1e-05, |
| "loss": 0.4756, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.22573363431151242, |
| "grad_norm": 14.62389850616455, |
| "learning_rate": 1e-05, |
| "loss": 0.3576, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.23325808878856283, |
| "grad_norm": 12.436488151550293, |
| "learning_rate": 1e-05, |
| "loss": 0.3493, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.24078254326561324, |
| "grad_norm": 17.488454818725586, |
| "learning_rate": 1e-05, |
| "loss": 0.4812, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.24830699774266365, |
| "grad_norm": 15.149370193481445, |
| "learning_rate": 1e-05, |
| "loss": 0.4218, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.2558314522197141, |
| "grad_norm": 11.76059341430664, |
| "learning_rate": 1e-05, |
| "loss": 0.3729, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.2633559066967645, |
| "grad_norm": 15.72620964050293, |
| "learning_rate": 1e-05, |
| "loss": 0.2586, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2708803611738149, |
| "grad_norm": 16.726228713989258, |
| "learning_rate": 1e-05, |
| "loss": 0.4295, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.27840481565086533, |
| "grad_norm": 12.156024932861328, |
| "learning_rate": 1e-05, |
| "loss": 0.3179, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.28592927012791575, |
| "grad_norm": 12.32470417022705, |
| "learning_rate": 1e-05, |
| "loss": 0.3523, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.29345372460496616, |
| "grad_norm": 17.34354591369629, |
| "learning_rate": 1e-05, |
| "loss": 0.3724, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.3009781790820166, |
| "grad_norm": 9.95320987701416, |
| "learning_rate": 1e-05, |
| "loss": 0.4028, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.308502633559067, |
| "grad_norm": 10.40683650970459, |
| "learning_rate": 1e-05, |
| "loss": 0.5469, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.3160270880361174, |
| "grad_norm": 12.613582611083984, |
| "learning_rate": 1e-05, |
| "loss": 0.3672, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.3235515425131678, |
| "grad_norm": 13.326891899108887, |
| "learning_rate": 1e-05, |
| "loss": 0.3514, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.3310759969902182, |
| "grad_norm": 8.356232643127441, |
| "learning_rate": 1e-05, |
| "loss": 0.292, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.33860045146726864, |
| "grad_norm": 14.835829734802246, |
| "learning_rate": 1e-05, |
| "loss": 0.3765, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.34612490594431905, |
| "grad_norm": 7.975886821746826, |
| "learning_rate": 1e-05, |
| "loss": 0.3667, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.35364936042136946, |
| "grad_norm": 10.029479026794434, |
| "learning_rate": 1e-05, |
| "loss": 0.3788, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3611738148984199, |
| "grad_norm": 11.4894437789917, |
| "learning_rate": 1e-05, |
| "loss": 0.309, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.3686982693754703, |
| "grad_norm": 12.190320014953613, |
| "learning_rate": 1e-05, |
| "loss": 0.3398, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.3762227238525207, |
| "grad_norm": 12.104024887084961, |
| "learning_rate": 1e-05, |
| "loss": 0.3907, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.3837471783295711, |
| "grad_norm": 11.915987014770508, |
| "learning_rate": 1e-05, |
| "loss": 0.3278, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3912716328066215, |
| "grad_norm": 16.552160263061523, |
| "learning_rate": 1e-05, |
| "loss": 0.3745, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.39879608728367194, |
| "grad_norm": 11.056331634521484, |
| "learning_rate": 1e-05, |
| "loss": 0.4694, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.40632054176072235, |
| "grad_norm": 10.76766586303711, |
| "learning_rate": 1e-05, |
| "loss": 0.3631, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.41384499623777277, |
| "grad_norm": 10.77774715423584, |
| "learning_rate": 1e-05, |
| "loss": 0.3448, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.4213694507148232, |
| "grad_norm": 11.11598014831543, |
| "learning_rate": 1e-05, |
| "loss": 0.339, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.4288939051918736, |
| "grad_norm": 8.696084976196289, |
| "learning_rate": 1e-05, |
| "loss": 0.4023, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.436418359668924, |
| "grad_norm": 15.626012802124023, |
| "learning_rate": 1e-05, |
| "loss": 0.4638, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.4439428141459744, |
| "grad_norm": 14.812833786010742, |
| "learning_rate": 1e-05, |
| "loss": 0.468, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.45146726862302483, |
| "grad_norm": 11.22861385345459, |
| "learning_rate": 1e-05, |
| "loss": 0.3772, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.45899172310007524, |
| "grad_norm": 14.62263011932373, |
| "learning_rate": 1e-05, |
| "loss": 0.3682, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.46651617757712566, |
| "grad_norm": 10.826017379760742, |
| "learning_rate": 1e-05, |
| "loss": 0.3671, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.47404063205417607, |
| "grad_norm": 9.838117599487305, |
| "learning_rate": 1e-05, |
| "loss": 0.3459, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.4815650865312265, |
| "grad_norm": 7.919167518615723, |
| "learning_rate": 1e-05, |
| "loss": 0.2914, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4890895410082769, |
| "grad_norm": 4.093368053436279, |
| "learning_rate": 1e-05, |
| "loss": 0.3329, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.4966139954853273, |
| "grad_norm": 12.66010856628418, |
| "learning_rate": 1e-05, |
| "loss": 0.4115, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.5041384499623778, |
| "grad_norm": 11.424004554748535, |
| "learning_rate": 1e-05, |
| "loss": 0.4033, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.5116629044394282, |
| "grad_norm": 9.730168342590332, |
| "learning_rate": 1e-05, |
| "loss": 0.3893, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.5191873589164786, |
| "grad_norm": 9.054938316345215, |
| "learning_rate": 1e-05, |
| "loss": 0.3922, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.526711813393529, |
| "grad_norm": 10.94675350189209, |
| "learning_rate": 1e-05, |
| "loss": 0.3291, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5342362678705794, |
| "grad_norm": 12.961570739746094, |
| "learning_rate": 1e-05, |
| "loss": 0.3225, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.5417607223476298, |
| "grad_norm": 8.719619750976562, |
| "learning_rate": 1e-05, |
| "loss": 0.3267, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5492851768246803, |
| "grad_norm": 10.847646713256836, |
| "learning_rate": 1e-05, |
| "loss": 0.4268, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.5568096313017307, |
| "grad_norm": 11.188985824584961, |
| "learning_rate": 1e-05, |
| "loss": 0.4236, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5643340857787811, |
| "grad_norm": 13.59192943572998, |
| "learning_rate": 1e-05, |
| "loss": 0.4197, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5718585402558315, |
| "grad_norm": 10.489006042480469, |
| "learning_rate": 1e-05, |
| "loss": 0.3314, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5793829947328819, |
| "grad_norm": 11.065324783325195, |
| "learning_rate": 1e-05, |
| "loss": 0.3667, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5869074492099323, |
| "grad_norm": 12.28297233581543, |
| "learning_rate": 1e-05, |
| "loss": 0.3189, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5944319036869827, |
| "grad_norm": 9.553642272949219, |
| "learning_rate": 1e-05, |
| "loss": 0.3991, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.6019563581640331, |
| "grad_norm": 11.755203247070312, |
| "learning_rate": 1e-05, |
| "loss": 0.3528, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.6094808126410836, |
| "grad_norm": 7.8607306480407715, |
| "learning_rate": 1e-05, |
| "loss": 0.3292, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.617005267118134, |
| "grad_norm": 10.472386360168457, |
| "learning_rate": 1e-05, |
| "loss": 0.3549, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.6245297215951844, |
| "grad_norm": 9.280732154846191, |
| "learning_rate": 1e-05, |
| "loss": 0.3183, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.6320541760722348, |
| "grad_norm": 9.160599708557129, |
| "learning_rate": 1e-05, |
| "loss": 0.3686, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.6395786305492852, |
| "grad_norm": 10.545658111572266, |
| "learning_rate": 1e-05, |
| "loss": 0.3517, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.6471030850263356, |
| "grad_norm": 11.327434539794922, |
| "learning_rate": 1e-05, |
| "loss": 0.3465, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.654627539503386, |
| "grad_norm": 12.003908157348633, |
| "learning_rate": 1e-05, |
| "loss": 0.2845, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.6621519939804364, |
| "grad_norm": 9.960043907165527, |
| "learning_rate": 1e-05, |
| "loss": 0.3255, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.6696764484574869, |
| "grad_norm": 11.36705207824707, |
| "learning_rate": 1e-05, |
| "loss": 0.3724, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6772009029345373, |
| "grad_norm": 9.673847198486328, |
| "learning_rate": 1e-05, |
| "loss": 0.3524, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6847253574115877, |
| "grad_norm": 10.644118309020996, |
| "learning_rate": 1e-05, |
| "loss": 0.3988, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.6922498118886381, |
| "grad_norm": 11.484865188598633, |
| "learning_rate": 1e-05, |
| "loss": 0.3601, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6997742663656885, |
| "grad_norm": 7.940932750701904, |
| "learning_rate": 1e-05, |
| "loss": 0.2722, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.7072987208427389, |
| "grad_norm": 9.51900863647461, |
| "learning_rate": 1e-05, |
| "loss": 0.2814, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.7148231753197893, |
| "grad_norm": 14.423086166381836, |
| "learning_rate": 1e-05, |
| "loss": 0.4054, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.7223476297968398, |
| "grad_norm": 12.655383110046387, |
| "learning_rate": 1e-05, |
| "loss": 0.3125, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.7298720842738902, |
| "grad_norm": 13.050726890563965, |
| "learning_rate": 1e-05, |
| "loss": 0.3368, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.7373965387509406, |
| "grad_norm": 8.44699764251709, |
| "learning_rate": 1e-05, |
| "loss": 0.3162, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.744920993227991, |
| "grad_norm": 9.112492561340332, |
| "learning_rate": 1e-05, |
| "loss": 0.3428, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.7524454477050414, |
| "grad_norm": 7.576210975646973, |
| "learning_rate": 1e-05, |
| "loss": 0.2674, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7599699021820918, |
| "grad_norm": 10.723271369934082, |
| "learning_rate": 1e-05, |
| "loss": 0.313, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.7674943566591422, |
| "grad_norm": 11.943977355957031, |
| "learning_rate": 1e-05, |
| "loss": 0.321, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.7750188111361926, |
| "grad_norm": 9.523961067199707, |
| "learning_rate": 1e-05, |
| "loss": 0.3475, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.782543265613243, |
| "grad_norm": 10.895538330078125, |
| "learning_rate": 1e-05, |
| "loss": 0.3763, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.7900677200902935, |
| "grad_norm": 6.124391078948975, |
| "learning_rate": 1e-05, |
| "loss": 0.251, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.7975921745673439, |
| "grad_norm": 9.036330223083496, |
| "learning_rate": 1e-05, |
| "loss": 0.3976, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.8051166290443943, |
| "grad_norm": 9.179152488708496, |
| "learning_rate": 1e-05, |
| "loss": 0.3052, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.8126410835214447, |
| "grad_norm": 6.6380157470703125, |
| "learning_rate": 1e-05, |
| "loss": 0.3397, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.8201655379984951, |
| "grad_norm": 9.615362167358398, |
| "learning_rate": 1e-05, |
| "loss": 0.3091, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.8276899924755455, |
| "grad_norm": 11.812026977539062, |
| "learning_rate": 1e-05, |
| "loss": 0.3156, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.835214446952596, |
| "grad_norm": 12.034916877746582, |
| "learning_rate": 1e-05, |
| "loss": 0.2479, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.8427389014296464, |
| "grad_norm": 13.908053398132324, |
| "learning_rate": 1e-05, |
| "loss": 0.256, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.8502633559066968, |
| "grad_norm": 11.581817626953125, |
| "learning_rate": 1e-05, |
| "loss": 0.3175, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.8577878103837472, |
| "grad_norm": 11.642997741699219, |
| "learning_rate": 1e-05, |
| "loss": 0.25, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.8653122648607976, |
| "grad_norm": 10.813202857971191, |
| "learning_rate": 1e-05, |
| "loss": 0.3413, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.872836719337848, |
| "grad_norm": 12.767478942871094, |
| "learning_rate": 1e-05, |
| "loss": 0.3094, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.8803611738148984, |
| "grad_norm": 9.270513534545898, |
| "learning_rate": 1e-05, |
| "loss": 0.2807, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.8878856282919488, |
| "grad_norm": 7.739561557769775, |
| "learning_rate": 1e-05, |
| "loss": 0.2789, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.8954100827689992, |
| "grad_norm": 7.118448734283447, |
| "learning_rate": 1e-05, |
| "loss": 0.2752, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.9029345372460497, |
| "grad_norm": 11.153009414672852, |
| "learning_rate": 1e-05, |
| "loss": 0.32, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.9104589917231001, |
| "grad_norm": 10.667895317077637, |
| "learning_rate": 1e-05, |
| "loss": 0.2285, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.9179834462001505, |
| "grad_norm": 12.081469535827637, |
| "learning_rate": 1e-05, |
| "loss": 0.3278, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.9255079006772009, |
| "grad_norm": 8.599586486816406, |
| "learning_rate": 1e-05, |
| "loss": 0.3049, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.9330323551542513, |
| "grad_norm": 10.062015533447266, |
| "learning_rate": 1e-05, |
| "loss": 0.3583, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.9405568096313017, |
| "grad_norm": 8.24731159210205, |
| "learning_rate": 1e-05, |
| "loss": 0.3162, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.9480812641083521, |
| "grad_norm": 8.026958465576172, |
| "learning_rate": 1e-05, |
| "loss": 0.3082, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.9556057185854026, |
| "grad_norm": 8.996962547302246, |
| "learning_rate": 1e-05, |
| "loss": 0.3969, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.963130173062453, |
| "grad_norm": 7.346575736999512, |
| "learning_rate": 1e-05, |
| "loss": 0.2905, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.9706546275395034, |
| "grad_norm": 7.704085826873779, |
| "learning_rate": 1e-05, |
| "loss": 0.2655, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.9781790820165538, |
| "grad_norm": 9.016671180725098, |
| "learning_rate": 1e-05, |
| "loss": 0.3071, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.9857035364936042, |
| "grad_norm": 10.028645515441895, |
| "learning_rate": 1e-05, |
| "loss": 0.3077, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.9932279909706546, |
| "grad_norm": 9.28890609741211, |
| "learning_rate": 1e-05, |
| "loss": 0.3317, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.000752445447705, |
| "grad_norm": 9.905320167541504, |
| "learning_rate": 1e-05, |
| "loss": 0.31, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.0082768999247556, |
| "grad_norm": 8.012602806091309, |
| "learning_rate": 1e-05, |
| "loss": 0.2343, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.0158013544018059, |
| "grad_norm": 6.548900127410889, |
| "learning_rate": 1e-05, |
| "loss": 0.2686, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.0233258088788564, |
| "grad_norm": 9.645492553710938, |
| "learning_rate": 1e-05, |
| "loss": 0.3046, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.0308502633559067, |
| "grad_norm": 10.740938186645508, |
| "learning_rate": 1e-05, |
| "loss": 0.3873, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.0383747178329572, |
| "grad_norm": 11.067444801330566, |
| "learning_rate": 1e-05, |
| "loss": 0.3229, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.0458991723100075, |
| "grad_norm": 7.437419891357422, |
| "learning_rate": 1e-05, |
| "loss": 0.2907, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.053423626787058, |
| "grad_norm": 5.831209659576416, |
| "learning_rate": 1e-05, |
| "loss": 0.3107, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.0609480812641083, |
| "grad_norm": 8.783834457397461, |
| "learning_rate": 1e-05, |
| "loss": 0.3269, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.0684725357411589, |
| "grad_norm": 12.042133331298828, |
| "learning_rate": 1e-05, |
| "loss": 0.2861, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.0759969902182092, |
| "grad_norm": 10.743906021118164, |
| "learning_rate": 1e-05, |
| "loss": 0.3, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.0835214446952597, |
| "grad_norm": 10.540002822875977, |
| "learning_rate": 1e-05, |
| "loss": 0.2708, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.09104589917231, |
| "grad_norm": 7.265504837036133, |
| "learning_rate": 1e-05, |
| "loss": 0.2723, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.0985703536493605, |
| "grad_norm": 5.650593280792236, |
| "learning_rate": 1e-05, |
| "loss": 0.3101, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.1060948081264108, |
| "grad_norm": 10.168730735778809, |
| "learning_rate": 1e-05, |
| "loss": 0.291, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.1136192626034613, |
| "grad_norm": 6.533019542694092, |
| "learning_rate": 1e-05, |
| "loss": 0.2925, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.1211437170805116, |
| "grad_norm": 9.97232437133789, |
| "learning_rate": 1e-05, |
| "loss": 0.2983, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.1286681715575622, |
| "grad_norm": 8.263399124145508, |
| "learning_rate": 1e-05, |
| "loss": 0.2818, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.1361926260346125, |
| "grad_norm": 8.396636962890625, |
| "learning_rate": 1e-05, |
| "loss": 0.3436, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.143717080511663, |
| "grad_norm": 13.860685348510742, |
| "learning_rate": 1e-05, |
| "loss": 0.4241, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.1512415349887133, |
| "grad_norm": 8.995695114135742, |
| "learning_rate": 1e-05, |
| "loss": 0.2742, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.1587659894657638, |
| "grad_norm": 12.496316909790039, |
| "learning_rate": 1e-05, |
| "loss": 0.3052, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.1662904439428141, |
| "grad_norm": 7.071567535400391, |
| "learning_rate": 1e-05, |
| "loss": 0.3227, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.1738148984198646, |
| "grad_norm": 9.216208457946777, |
| "learning_rate": 1e-05, |
| "loss": 0.2476, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.181339352896915, |
| "grad_norm": 7.963762283325195, |
| "learning_rate": 1e-05, |
| "loss": 0.2883, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.1888638073739655, |
| "grad_norm": 11.962204933166504, |
| "learning_rate": 1e-05, |
| "loss": 0.2792, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.1963882618510158, |
| "grad_norm": 11.451403617858887, |
| "learning_rate": 1e-05, |
| "loss": 0.3362, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.2039127163280663, |
| "grad_norm": 11.551766395568848, |
| "learning_rate": 1e-05, |
| "loss": 0.2431, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.2114371708051166, |
| "grad_norm": 5.913654327392578, |
| "learning_rate": 1e-05, |
| "loss": 0.277, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.2189616252821671, |
| "grad_norm": 13.468070030212402, |
| "learning_rate": 1e-05, |
| "loss": 0.3292, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.2264860797592174, |
| "grad_norm": 8.323406219482422, |
| "learning_rate": 1e-05, |
| "loss": 0.3182, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.234010534236268, |
| "grad_norm": 9.116568565368652, |
| "learning_rate": 1e-05, |
| "loss": 0.2328, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.2415349887133182, |
| "grad_norm": 8.88713264465332, |
| "learning_rate": 1e-05, |
| "loss": 0.2538, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.2490594431903688, |
| "grad_norm": 10.781469345092773, |
| "learning_rate": 1e-05, |
| "loss": 0.2389, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.256583897667419, |
| "grad_norm": 12.144160270690918, |
| "learning_rate": 1e-05, |
| "loss": 0.3006, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.2641083521444696, |
| "grad_norm": 7.866734027862549, |
| "learning_rate": 1e-05, |
| "loss": 0.345, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.27163280662152, |
| "grad_norm": 7.459820747375488, |
| "learning_rate": 1e-05, |
| "loss": 0.2482, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.2791572610985704, |
| "grad_norm": 7.7605109214782715, |
| "learning_rate": 1e-05, |
| "loss": 0.214, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.2866817155756207, |
| "grad_norm": 9.145365715026855, |
| "learning_rate": 1e-05, |
| "loss": 0.3057, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.2942061700526712, |
| "grad_norm": 10.521879196166992, |
| "learning_rate": 1e-05, |
| "loss": 0.3376, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.3017306245297215, |
| "grad_norm": 5.132536888122559, |
| "learning_rate": 1e-05, |
| "loss": 0.2723, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.309255079006772, |
| "grad_norm": 10.422348022460938, |
| "learning_rate": 1e-05, |
| "loss": 0.2769, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.3167795334838224, |
| "grad_norm": 9.99517822265625, |
| "learning_rate": 1e-05, |
| "loss": 0.3455, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.324303987960873, |
| "grad_norm": 6.903396129608154, |
| "learning_rate": 1e-05, |
| "loss": 0.2813, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.3318284424379232, |
| "grad_norm": 5.721127986907959, |
| "learning_rate": 1e-05, |
| "loss": 0.2824, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.3393528969149737, |
| "grad_norm": 9.914773941040039, |
| "learning_rate": 1e-05, |
| "loss": 0.3353, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.346877351392024, |
| "grad_norm": 7.985681056976318, |
| "learning_rate": 1e-05, |
| "loss": 0.3295, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.3544018058690745, |
| "grad_norm": 10.242146492004395, |
| "learning_rate": 1e-05, |
| "loss": 0.3519, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.3619262603461249, |
| "grad_norm": 8.590888977050781, |
| "learning_rate": 1e-05, |
| "loss": 0.3171, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.3694507148231754, |
| "grad_norm": 9.34271240234375, |
| "learning_rate": 1e-05, |
| "loss": 0.3385, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.3769751693002257, |
| "grad_norm": 8.391048431396484, |
| "learning_rate": 1e-05, |
| "loss": 0.2975, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.3844996237772762, |
| "grad_norm": 11.190972328186035, |
| "learning_rate": 1e-05, |
| "loss": 0.3412, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.3920240782543265, |
| "grad_norm": 8.990412712097168, |
| "learning_rate": 1e-05, |
| "loss": 0.2985, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.399548532731377, |
| "grad_norm": 6.625011920928955, |
| "learning_rate": 1e-05, |
| "loss": 0.2485, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.4070729872084273, |
| "grad_norm": 8.91481876373291, |
| "learning_rate": 1e-05, |
| "loss": 0.3527, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.4145974416854779, |
| "grad_norm": 7.358391761779785, |
| "learning_rate": 1e-05, |
| "loss": 0.3243, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.4221218961625282, |
| "grad_norm": 6.641557216644287, |
| "learning_rate": 1e-05, |
| "loss": 0.242, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.4296463506395787, |
| "grad_norm": 8.88590145111084, |
| "learning_rate": 1e-05, |
| "loss": 0.2933, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.437170805116629, |
| "grad_norm": 9.170287132263184, |
| "learning_rate": 1e-05, |
| "loss": 0.2567, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.4446952595936795, |
| "grad_norm": 9.2968168258667, |
| "learning_rate": 1e-05, |
| "loss": 0.2649, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.4522197140707298, |
| "grad_norm": 8.246125221252441, |
| "learning_rate": 1e-05, |
| "loss": 0.2668, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.4597441685477803, |
| "grad_norm": 10.747627258300781, |
| "learning_rate": 1e-05, |
| "loss": 0.2905, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.4672686230248306, |
| "grad_norm": 7.501006603240967, |
| "learning_rate": 1e-05, |
| "loss": 0.3359, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.4747930775018812, |
| "grad_norm": 9.075075149536133, |
| "learning_rate": 1e-05, |
| "loss": 0.3039, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.4823175319789315, |
| "grad_norm": 9.139381408691406, |
| "learning_rate": 1e-05, |
| "loss": 0.3024, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.489841986455982, |
| "grad_norm": 10.483285903930664, |
| "learning_rate": 1e-05, |
| "loss": 0.2997, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.4973664409330323, |
| "grad_norm": 10.016528129577637, |
| "learning_rate": 1e-05, |
| "loss": 0.3105, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.5048908954100828, |
| "grad_norm": 7.966375827789307, |
| "learning_rate": 1e-05, |
| "loss": 0.3121, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.5124153498871333, |
| "grad_norm": 6.316532135009766, |
| "learning_rate": 1e-05, |
| "loss": 0.3148, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.5199398043641836, |
| "grad_norm": 9.16601276397705, |
| "learning_rate": 1e-05, |
| "loss": 0.2695, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.527464258841234, |
| "grad_norm": 4.997910499572754, |
| "learning_rate": 1e-05, |
| "loss": 0.2746, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.5349887133182845, |
| "grad_norm": 9.328558921813965, |
| "learning_rate": 1e-05, |
| "loss": 0.2873, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.542513167795335, |
| "grad_norm": 7.824413299560547, |
| "learning_rate": 1e-05, |
| "loss": 0.2925, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.5500376222723853, |
| "grad_norm": 8.306281089782715, |
| "learning_rate": 1e-05, |
| "loss": 0.3059, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.5575620767494356, |
| "grad_norm": 11.394743919372559, |
| "learning_rate": 1e-05, |
| "loss": 0.2995, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.565086531226486, |
| "grad_norm": 8.414088249206543, |
| "learning_rate": 1e-05, |
| "loss": 0.2914, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.5726109857035366, |
| "grad_norm": 10.023848533630371, |
| "learning_rate": 1e-05, |
| "loss": 0.239, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.580135440180587, |
| "grad_norm": 8.992568016052246, |
| "learning_rate": 1e-05, |
| "loss": 0.2613, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.5876598946576372, |
| "grad_norm": 9.712190628051758, |
| "learning_rate": 1e-05, |
| "loss": 0.2558, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.5951843491346878, |
| "grad_norm": 7.813495635986328, |
| "learning_rate": 1e-05, |
| "loss": 0.218, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.6027088036117383, |
| "grad_norm": 6.5997443199157715, |
| "learning_rate": 1e-05, |
| "loss": 0.285, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.6102332580887886, |
| "grad_norm": 11.630515098571777, |
| "learning_rate": 1e-05, |
| "loss": 0.2677, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.617757712565839, |
| "grad_norm": 7.1673359870910645, |
| "learning_rate": 1e-05, |
| "loss": 0.2845, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.6252821670428894, |
| "grad_norm": 10.230573654174805, |
| "learning_rate": 1e-05, |
| "loss": 0.2695, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.63280662151994, |
| "grad_norm": 7.908997535705566, |
| "learning_rate": 1e-05, |
| "loss": 0.2732, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.6403310759969902, |
| "grad_norm": 6.134283542633057, |
| "learning_rate": 1e-05, |
| "loss": 0.2613, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.6478555304740405, |
| "grad_norm": 9.178680419921875, |
| "learning_rate": 1e-05, |
| "loss": 0.3006, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.655379984951091, |
| "grad_norm": 8.385205268859863, |
| "learning_rate": 1e-05, |
| "loss": 0.2391, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.6629044394281416, |
| "grad_norm": 8.024893760681152, |
| "learning_rate": 1e-05, |
| "loss": 0.3149, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.670428893905192, |
| "grad_norm": 7.160342216491699, |
| "learning_rate": 1e-05, |
| "loss": 0.2321, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.6779533483822422, |
| "grad_norm": 9.467365264892578, |
| "learning_rate": 1e-05, |
| "loss": 0.3183, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.6854778028592927, |
| "grad_norm": 8.656280517578125, |
| "learning_rate": 1e-05, |
| "loss": 0.3128, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.6930022573363432, |
| "grad_norm": 5.692852973937988, |
| "learning_rate": 1e-05, |
| "loss": 0.3139, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.7005267118133935, |
| "grad_norm": 7.600724220275879, |
| "learning_rate": 1e-05, |
| "loss": 0.249, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.7080511662904438, |
| "grad_norm": 7.849809646606445, |
| "learning_rate": 1e-05, |
| "loss": 0.2587, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.7155756207674944, |
| "grad_norm": 8.293899536132812, |
| "learning_rate": 1e-05, |
| "loss": 0.2805, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.723100075244545, |
| "grad_norm": 5.557303428649902, |
| "learning_rate": 1e-05, |
| "loss": 0.2789, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.7306245297215952, |
| "grad_norm": 10.24751091003418, |
| "learning_rate": 1e-05, |
| "loss": 0.3244, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.7381489841986455, |
| "grad_norm": 7.778582572937012, |
| "learning_rate": 1e-05, |
| "loss": 0.3005, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.745673438675696, |
| "grad_norm": 7.266477584838867, |
| "learning_rate": 1e-05, |
| "loss": 0.2207, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.7531978931527465, |
| "grad_norm": 10.221525192260742, |
| "learning_rate": 1e-05, |
| "loss": 0.2742, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.7607223476297968, |
| "grad_norm": 8.85750961303711, |
| "learning_rate": 1e-05, |
| "loss": 0.265, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.7682468021068471, |
| "grad_norm": 8.104692459106445, |
| "learning_rate": 1e-05, |
| "loss": 0.277, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.7757712565838977, |
| "grad_norm": 8.834745407104492, |
| "learning_rate": 1e-05, |
| "loss": 0.2315, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.7832957110609482, |
| "grad_norm": 10.258501052856445, |
| "learning_rate": 1e-05, |
| "loss": 0.2921, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.7908201655379985, |
| "grad_norm": 8.523322105407715, |
| "learning_rate": 1e-05, |
| "loss": 0.2043, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.7983446200150488, |
| "grad_norm": 9.75324821472168, |
| "learning_rate": 1e-05, |
| "loss": 0.2187, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.8058690744920993, |
| "grad_norm": 6.369287490844727, |
| "learning_rate": 1e-05, |
| "loss": 0.2365, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.8133935289691498, |
| "grad_norm": 6.650455951690674, |
| "learning_rate": 1e-05, |
| "loss": 0.3027, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.8209179834462002, |
| "grad_norm": 7.7705397605896, |
| "learning_rate": 1e-05, |
| "loss": 0.2942, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.8284424379232505, |
| "grad_norm": 8.137877464294434, |
| "learning_rate": 1e-05, |
| "loss": 0.3317, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.835966892400301, |
| "grad_norm": 8.068604469299316, |
| "learning_rate": 1e-05, |
| "loss": 0.2657, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.8434913468773515, |
| "grad_norm": 5.773308753967285, |
| "learning_rate": 1e-05, |
| "loss": 0.2706, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.8510158013544018, |
| "grad_norm": 6.239875793457031, |
| "learning_rate": 1e-05, |
| "loss": 0.2696, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.858540255831452, |
| "grad_norm": 11.373642921447754, |
| "learning_rate": 1e-05, |
| "loss": 0.2964, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.8660647103085026, |
| "grad_norm": 10.659271240234375, |
| "learning_rate": 1e-05, |
| "loss": 0.2839, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.8735891647855532, |
| "grad_norm": 7.298862934112549, |
| "learning_rate": 1e-05, |
| "loss": 0.2748, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.8811136192626035, |
| "grad_norm": 12.347573280334473, |
| "learning_rate": 1e-05, |
| "loss": 0.2654, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.8886380737396538, |
| "grad_norm": 6.894641399383545, |
| "learning_rate": 1e-05, |
| "loss": 0.2522, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.8961625282167043, |
| "grad_norm": 7.589210510253906, |
| "learning_rate": 1e-05, |
| "loss": 0.3053, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.9036869826937548, |
| "grad_norm": 6.738051891326904, |
| "learning_rate": 1e-05, |
| "loss": 0.224, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.911211437170805, |
| "grad_norm": 7.8747239112854, |
| "learning_rate": 1e-05, |
| "loss": 0.2751, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.9187358916478554, |
| "grad_norm": 6.456340789794922, |
| "learning_rate": 1e-05, |
| "loss": 0.1926, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.926260346124906, |
| "grad_norm": 9.186247825622559, |
| "learning_rate": 1e-05, |
| "loss": 0.3004, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.9337848006019565, |
| "grad_norm": 11.75734806060791, |
| "learning_rate": 1e-05, |
| "loss": 0.2749, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.9413092550790068, |
| "grad_norm": 10.06619644165039, |
| "learning_rate": 1e-05, |
| "loss": 0.3011, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.948833709556057, |
| "grad_norm": 11.47065544128418, |
| "learning_rate": 1e-05, |
| "loss": 0.3124, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.9563581640331076, |
| "grad_norm": 9.72806167602539, |
| "learning_rate": 1e-05, |
| "loss": 0.298, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.963882618510158, |
| "grad_norm": 9.43126106262207, |
| "learning_rate": 1e-05, |
| "loss": 0.2617, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.9714070729872084, |
| "grad_norm": 12.323174476623535, |
| "learning_rate": 1e-05, |
| "loss": 0.3397, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.9789315274642587, |
| "grad_norm": 7.464193820953369, |
| "learning_rate": 1e-05, |
| "loss": 0.2187, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.9864559819413092, |
| "grad_norm": 6.475297927856445, |
| "learning_rate": 1e-05, |
| "loss": 0.2879, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.9939804364183598, |
| "grad_norm": 8.84854793548584, |
| "learning_rate": 1e-05, |
| "loss": 0.2828, |
| "step": 2650 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 39870, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 30, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|