| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.30020422055820284, |
| "eval_steps": 500, |
| "global_step": 882, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0034036759700476512, |
| "grad_norm": 0.3817383944988251, |
| "learning_rate": 1.1235955056179776e-05, |
| "loss": 1.5188, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0068073519400953025, |
| "grad_norm": 0.4666265845298767, |
| "learning_rate": 2.2471910112359552e-05, |
| "loss": 1.5003, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.010211027910142955, |
| "grad_norm": 0.41875213384628296, |
| "learning_rate": 3.370786516853933e-05, |
| "loss": 1.4805, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.013614703880190605, |
| "grad_norm": 0.37079161405563354, |
| "learning_rate": 4.4943820224719104e-05, |
| "loss": 1.3878, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01701837985023826, |
| "grad_norm": 0.5143834352493286, |
| "learning_rate": 5.6179775280898885e-05, |
| "loss": 1.4275, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02042205582028591, |
| "grad_norm": 0.36570975184440613, |
| "learning_rate": 6.741573033707866e-05, |
| "loss": 1.3778, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.023825731790333562, |
| "grad_norm": 1.1076630353927612, |
| "learning_rate": 7.865168539325843e-05, |
| "loss": 1.3833, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02722940776038121, |
| "grad_norm": 0.43584316968917847, |
| "learning_rate": 8.988764044943821e-05, |
| "loss": 1.3653, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03063308373042886, |
| "grad_norm": 0.6449490189552307, |
| "learning_rate": 9.999960763269511e-05, |
| "loss": 1.3612, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03403675970047652, |
| "grad_norm": 0.408315509557724, |
| "learning_rate": 9.9952531006933e-05, |
| "loss": 1.363, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.037440435670524165, |
| "grad_norm": 0.4980515241622925, |
| "learning_rate": 9.982706557199723e-05, |
| "loss": 1.3148, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04084411164057182, |
| "grad_norm": 0.620286762714386, |
| "learning_rate": 9.962340821653064e-05, |
| "loss": 1.3419, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04424778761061947, |
| "grad_norm": 0.5976988077163696, |
| "learning_rate": 9.934187853309946e-05, |
| "loss": 1.3251, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.047651463580667124, |
| "grad_norm": 0.5392516851425171, |
| "learning_rate": 9.898291831666755e-05, |
| "loss": 1.2998, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05105513955071477, |
| "grad_norm": 0.7126803398132324, |
| "learning_rate": 9.85470908713026e-05, |
| "loss": 1.3652, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05445881552076242, |
| "grad_norm": 0.591543436050415, |
| "learning_rate": 9.803508012620217e-05, |
| "loss": 1.3153, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.057862491490810075, |
| "grad_norm": 0.4493406116962433, |
| "learning_rate": 9.744768956242683e-05, |
| "loss": 1.3256, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.06126616746085772, |
| "grad_norm": 0.814771294593811, |
| "learning_rate": 9.678584095202468e-05, |
| "loss": 1.3489, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.06466984343090537, |
| "grad_norm": 0.5826136469841003, |
| "learning_rate": 9.605057291152581e-05, |
| "loss": 1.3149, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06807351940095303, |
| "grad_norm": 0.5115235447883606, |
| "learning_rate": 9.524303927207663e-05, |
| "loss": 1.3108, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07147719537100068, |
| "grad_norm": 0.5263652205467224, |
| "learning_rate": 9.43645072687719e-05, |
| "loss": 1.3354, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.07488087134104833, |
| "grad_norm": 0.6202068328857422, |
| "learning_rate": 9.341635555202577e-05, |
| "loss": 1.3317, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.07828454731109598, |
| "grad_norm": 0.4816318154335022, |
| "learning_rate": 9.240007202410249e-05, |
| "loss": 1.3592, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.08168822328114364, |
| "grad_norm": 0.6449723243713379, |
| "learning_rate": 9.131725150420205e-05, |
| "loss": 1.3266, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08509189925119129, |
| "grad_norm": 0.5435076355934143, |
| "learning_rate": 9.016959322576453e-05, |
| "loss": 1.3268, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.08849557522123894, |
| "grad_norm": 0.5929358005523682, |
| "learning_rate": 8.895889816992084e-05, |
| "loss": 1.3539, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.09189925119128659, |
| "grad_norm": 0.5826624631881714, |
| "learning_rate": 8.768706623927427e-05, |
| "loss": 1.262, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.09530292716133425, |
| "grad_norm": 0.8038996458053589, |
| "learning_rate": 8.635609327644783e-05, |
| "loss": 1.3711, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.0987066031313819, |
| "grad_norm": 0.852136492729187, |
| "learning_rate": 8.496806793207635e-05, |
| "loss": 1.296, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.10211027910142954, |
| "grad_norm": 0.4777398705482483, |
| "learning_rate": 8.352516838715799e-05, |
| "loss": 1.3339, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10551395507147719, |
| "grad_norm": 0.6921893954277039, |
| "learning_rate": 8.202965893490878e-05, |
| "loss": 1.3019, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.10891763104152484, |
| "grad_norm": 0.5298048853874207, |
| "learning_rate": 8.048388642748425e-05, |
| "loss": 1.278, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1123213070115725, |
| "grad_norm": 0.6031792163848877, |
| "learning_rate": 7.889027659314423e-05, |
| "loss": 1.2982, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.11572498298162015, |
| "grad_norm": 0.767808198928833, |
| "learning_rate": 7.725133022963974e-05, |
| "loss": 1.3111, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.1191286589516678, |
| "grad_norm": 0.7626794576644897, |
| "learning_rate": 7.556961927979622e-05, |
| "loss": 1.2223, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.12253233492171545, |
| "grad_norm": 0.5305209159851074, |
| "learning_rate": 7.384778279545096e-05, |
| "loss": 1.3317, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1259360108917631, |
| "grad_norm": 0.9789229035377502, |
| "learning_rate": 7.208852279607883e-05, |
| "loss": 1.352, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.12933968686181074, |
| "grad_norm": 0.5160291790962219, |
| "learning_rate": 7.029460002860492e-05, |
| "loss": 1.2979, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.13274336283185842, |
| "grad_norm": 0.47278067469596863, |
| "learning_rate": 6.846882963505821e-05, |
| "loss": 1.2898, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.13614703880190607, |
| "grad_norm": 0.5707348585128784, |
| "learning_rate": 6.661407673486489e-05, |
| "loss": 1.3688, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13955071477195372, |
| "grad_norm": 0.623561441898346, |
| "learning_rate": 6.473325192871382e-05, |
| "loss": 1.3058, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.14295439074200136, |
| "grad_norm": 0.7172777056694031, |
| "learning_rate": 6.282930673104965e-05, |
| "loss": 1.2926, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.146358066712049, |
| "grad_norm": 0.5847612619400024, |
| "learning_rate": 6.090522893836183e-05, |
| "loss": 1.3026, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.14976174268209666, |
| "grad_norm": 0.6956146955490112, |
| "learning_rate": 5.896403794053679e-05, |
| "loss": 1.3242, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.1531654186521443, |
| "grad_norm": 0.9769287109375, |
| "learning_rate": 5.700877998263221e-05, |
| "loss": 1.2992, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.15656909462219196, |
| "grad_norm": 1.0140455961227417, |
| "learning_rate": 5.5042523384508136e-05, |
| "loss": 1.2911, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.15997277059223963, |
| "grad_norm": 0.6622276902198792, |
| "learning_rate": 5.306835372581687e-05, |
| "loss": 1.2924, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.16337644656228728, |
| "grad_norm": 1.0274490118026733, |
| "learning_rate": 5.108936900390775e-05, |
| "loss": 1.3372, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.16678012253233493, |
| "grad_norm": 0.4924236536026001, |
| "learning_rate": 4.9108674772245144e-05, |
| "loss": 1.2659, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.17018379850238258, |
| "grad_norm": 0.5681174397468567, |
| "learning_rate": 4.712937926696903e-05, |
| "loss": 1.2672, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.17018379850238258, |
| "eval_loss": 1.492391586303711, |
| "eval_runtime": 136.4709, |
| "eval_samples_per_second": 76.522, |
| "eval_steps_per_second": 19.132, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.17358747447243023, |
| "grad_norm": 0.6138017177581787, |
| "learning_rate": 4.515458852924553e-05, |
| "loss": 1.3551, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.17699115044247787, |
| "grad_norm": 0.6779869198799133, |
| "learning_rate": 4.318740153106218e-05, |
| "loss": 1.3149, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.18039482641252552, |
| "grad_norm": 0.5932974815368652, |
| "learning_rate": 4.123090531211653e-05, |
| "loss": 1.3229, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.18379850238257317, |
| "grad_norm": 0.49759408831596375, |
| "learning_rate": 3.928817013542954e-05, |
| "loss": 1.3248, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.18720217835262082, |
| "grad_norm": 0.6779336333274841, |
| "learning_rate": 3.736224466928634e-05, |
| "loss": 1.2666, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.1906058543226685, |
| "grad_norm": 0.7689851522445679, |
| "learning_rate": 3.5456151203064515e-05, |
| "loss": 1.2989, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.19400953029271614, |
| "grad_norm": 0.7002193331718445, |
| "learning_rate": 3.357288090445827e-05, |
| "loss": 1.3551, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.1974132062627638, |
| "grad_norm": 0.7646905779838562, |
| "learning_rate": 3.171538912554054e-05, |
| "loss": 1.2812, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.20081688223281144, |
| "grad_norm": 1.0780543088912964, |
| "learning_rate": 2.988659076502946e-05, |
| "loss": 1.2798, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.2042205582028591, |
| "grad_norm": 0.7833569049835205, |
| "learning_rate": 2.808935569403688e-05, |
| "loss": 1.3266, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.20762423417290674, |
| "grad_norm": 0.5977985262870789, |
| "learning_rate": 2.6326504252477046e-05, |
| "loss": 1.2375, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.21102791014295438, |
| "grad_norm": 0.6137337684631348, |
| "learning_rate": 2.4600802823203273e-05, |
| "loss": 1.2488, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.21443158611300203, |
| "grad_norm": 0.7681064605712891, |
| "learning_rate": 2.2914959490817122e-05, |
| "loss": 1.3371, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.21783526208304968, |
| "grad_norm": 1.2434996366500854, |
| "learning_rate": 2.12716197919634e-05, |
| "loss": 1.3557, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.22123893805309736, |
| "grad_norm": 0.7922418713569641, |
| "learning_rate": 1.9673362563779356e-05, |
| "loss": 1.26, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.224642614023145, |
| "grad_norm": 0.6704869866371155, |
| "learning_rate": 1.812269589701326e-05, |
| "loss": 1.2262, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.22804628999319265, |
| "grad_norm": 0.6106792092323303, |
| "learning_rate": 1.662205320016279e-05, |
| "loss": 1.2958, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.2314499659632403, |
| "grad_norm": 0.7386729717254639, |
| "learning_rate": 1.517378938080979e-05, |
| "loss": 1.279, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.23485364193328795, |
| "grad_norm": 0.9197444319725037, |
| "learning_rate": 1.3780177150143908e-05, |
| "loss": 1.3203, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.2382573179033356, |
| "grad_norm": 0.5879797339439392, |
| "learning_rate": 1.2443403456474017e-05, |
| "loss": 1.2765, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.24166099387338325, |
| "grad_norm": 0.6365487575531006, |
| "learning_rate": 1.1165566053324699e-05, |
| "loss": 1.2499, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.2450646698434309, |
| "grad_norm": 0.5641332864761353, |
| "learning_rate": 9.948670207502907e-06, |
| "loss": 1.2574, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.24846834581347857, |
| "grad_norm": 1.0512096881866455, |
| "learning_rate": 8.794625552300878e-06, |
| "loss": 1.2396, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.2518720217835262, |
| "grad_norm": 0.8620232939720154, |
| "learning_rate": 7.705243090773522e-06, |
| "loss": 1.3172, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.25527569775357384, |
| "grad_norm": 0.8536260724067688, |
| "learning_rate": 6.682232353792894e-06, |
| "loss": 1.3053, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2586793737236215, |
| "grad_norm": 0.7353459596633911, |
| "learning_rate": 5.727198717339511e-06, |
| "loss": 1.3267, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.2620830496936692, |
| "grad_norm": 0.7075720429420471, |
| "learning_rate": 4.8416408832403334e-06, |
| "loss": 1.2496, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.26548672566371684, |
| "grad_norm": 0.5608527660369873, |
| "learning_rate": 4.026948527306989e-06, |
| "loss": 1.2856, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.2688904016337645, |
| "grad_norm": 0.7906400561332703, |
| "learning_rate": 3.2844001185647288e-06, |
| "loss": 1.2576, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.27229407760381213, |
| "grad_norm": 0.9198495745658875, |
| "learning_rate": 2.6151609129943964e-06, |
| "loss": 1.3065, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2756977535738598, |
| "grad_norm": 0.5755515098571777, |
| "learning_rate": 2.02028112493588e-06, |
| "loss": 1.3112, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.27910142954390743, |
| "grad_norm": 0.5036645531654358, |
| "learning_rate": 1.5006942790224133e-06, |
| "loss": 1.2878, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.2825051055139551, |
| "grad_norm": 0.5611245036125183, |
| "learning_rate": 1.0572157452321097e-06, |
| "loss": 1.2717, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.2859087814840027, |
| "grad_norm": 0.7261125445365906, |
| "learning_rate": 6.905414593555482e-07, |
| "loss": 1.2907, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.2893124574540504, |
| "grad_norm": 0.5549591183662415, |
| "learning_rate": 4.0124683088740287e-07, |
| "loss": 1.3056, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.292716133424098, |
| "grad_norm": 1.1202582120895386, |
| "learning_rate": 1.897858400558783e-07, |
| "loss": 1.2864, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.29611980939414567, |
| "grad_norm": 0.5217841863632202, |
| "learning_rate": 5.6490325406971524e-08, |
| "loss": 1.2405, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.2995234853641933, |
| "grad_norm": 1.1227178573608398, |
| "learning_rate": 1.5694630615070704e-09, |
| "loss": 1.3007, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.30020422055820284, |
| "step": 882, |
| "total_flos": 2.4828724791109222e+17, |
| "train_loss": 1.3161652834237028, |
| "train_runtime": 1203.9175, |
| "train_samples_per_second": 23.42, |
| "train_steps_per_second": 0.733 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 882, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.4828724791109222e+17, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|