| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.7034949267192783, |
| "eval_steps": 50, |
| "global_step": 600, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04509582863585118, |
| "grad_norm": 4.735579490661621, |
| "learning_rate": 6.716417910447762e-07, |
| "loss": 1.6732, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.09019165727170236, |
| "grad_norm": 4.019947528839111, |
| "learning_rate": 1.417910447761194e-06, |
| "loss": 1.722, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.13528748590755355, |
| "grad_norm": 4.211146354675293, |
| "learning_rate": 2.1641791044776118e-06, |
| "loss": 1.6683, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.18038331454340473, |
| "grad_norm": 2.573216199874878, |
| "learning_rate": 2.9104477611940303e-06, |
| "loss": 1.7905, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2254791431792559, |
| "grad_norm": 1.0940048694610596, |
| "learning_rate": 3.656716417910448e-06, |
| "loss": 1.3494, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2254791431792559, |
| "eval_loss": 1.4587914943695068, |
| "eval_runtime": 17.5637, |
| "eval_samples_per_second": 10.647, |
| "eval_steps_per_second": 5.352, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2705749718151071, |
| "grad_norm": 0.7621564865112305, |
| "learning_rate": 4.402985074626866e-06, |
| "loss": 1.2658, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3156708004509583, |
| "grad_norm": 0.7101795077323914, |
| "learning_rate": 4.999862465351179e-06, |
| "loss": 1.3226, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.36076662908680945, |
| "grad_norm": 0.7053520679473877, |
| "learning_rate": 4.995050341381415e-06, |
| "loss": 1.2356, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.40586245772266066, |
| "grad_norm": 0.6544761657714844, |
| "learning_rate": 4.983376610111733e-06, |
| "loss": 1.048, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.4509582863585118, |
| "grad_norm": 0.5926041603088379, |
| "learning_rate": 4.964873375327125e-06, |
| "loss": 1.1656, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4509582863585118, |
| "eval_loss": 1.2103750705718994, |
| "eval_runtime": 17.0601, |
| "eval_samples_per_second": 10.961, |
| "eval_steps_per_second": 5.51, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.496054114994363, |
| "grad_norm": 0.5631921887397766, |
| "learning_rate": 4.939591522546314e-06, |
| "loss": 1.0661, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5411499436302142, |
| "grad_norm": 0.6035294532775879, |
| "learning_rate": 4.9076005790821265e-06, |
| "loss": 1.0835, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5862457722660653, |
| "grad_norm": 0.37587037682533264, |
| "learning_rate": 4.868988522835274e-06, |
| "loss": 1.0164, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6313416009019166, |
| "grad_norm": 0.5670269131660461, |
| "learning_rate": 4.823861540347411e-06, |
| "loss": 1.0161, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6764374295377678, |
| "grad_norm": 0.5858781337738037, |
| "learning_rate": 4.772343734778834e-06, |
| "loss": 1.0703, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6764374295377678, |
| "eval_loss": 1.086298942565918, |
| "eval_runtime": 16.7515, |
| "eval_samples_per_second": 11.163, |
| "eval_steps_per_second": 5.611, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7215332581736189, |
| "grad_norm": 0.6360714435577393, |
| "learning_rate": 4.7145767846139e-06, |
| "loss": 0.9194, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7666290868094702, |
| "grad_norm": 0.563727617263794, |
| "learning_rate": 4.650719554032773e-06, |
| "loss": 0.967, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.8117249154453213, |
| "grad_norm": 0.5043988823890686, |
| "learning_rate": 4.580947656020985e-06, |
| "loss": 0.9907, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8568207440811725, |
| "grad_norm": 0.5937032699584961, |
| "learning_rate": 4.50545296941833e-06, |
| "loss": 1.0275, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.9019165727170236, |
| "grad_norm": 0.5281458497047424, |
| "learning_rate": 4.424443111235215e-06, |
| "loss": 0.9382, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9019165727170236, |
| "eval_loss": 0.9937817454338074, |
| "eval_runtime": 17.0324, |
| "eval_samples_per_second": 10.979, |
| "eval_steps_per_second": 5.519, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9470124013528749, |
| "grad_norm": 0.5680450797080994, |
| "learning_rate": 4.338140865687678e-06, |
| "loss": 0.8375, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.992108229988726, |
| "grad_norm": 0.7513061761856079, |
| "learning_rate": 4.2467835715212424e-06, |
| "loss": 0.9523, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.0360766629086808, |
| "grad_norm": 0.5776436924934387, |
| "learning_rate": 4.150622469308559e-06, |
| "loss": 0.8596, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.0811724915445322, |
| "grad_norm": 0.5697727799415588, |
| "learning_rate": 4.0499220105157795e-06, |
| "loss": 0.8675, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.1262683201803834, |
| "grad_norm": 0.5843157172203064, |
| "learning_rate": 3.944959130237843e-06, |
| "loss": 0.7677, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.1262683201803834, |
| "eval_loss": 0.9346119165420532, |
| "eval_runtime": 17.1484, |
| "eval_samples_per_second": 10.905, |
| "eval_steps_per_second": 5.482, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.1713641488162345, |
| "grad_norm": 0.7570247054100037, |
| "learning_rate": 3.8360224856026575e-06, |
| "loss": 0.8989, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.2164599774520857, |
| "grad_norm": 0.6838065385818481, |
| "learning_rate": 3.7234116619386875e-06, |
| "loss": 0.8284, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.2615558060879368, |
| "grad_norm": 0.8344841003417969, |
| "learning_rate": 3.6074363488890184e-06, |
| "loss": 0.822, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.306651634723788, |
| "grad_norm": 0.6045500636100769, |
| "learning_rate": 3.488415488737673e-06, |
| "loss": 0.7663, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.3517474633596391, |
| "grad_norm": 0.6347020268440247, |
| "learning_rate": 3.366676399290354e-06, |
| "loss": 0.8671, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.3517474633596391, |
| "eval_loss": 0.9009976387023926, |
| "eval_runtime": 17.1832, |
| "eval_samples_per_second": 10.883, |
| "eval_steps_per_second": 5.47, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.3968432919954905, |
| "grad_norm": 0.6355866193771362, |
| "learning_rate": 3.2425538737217643e-06, |
| "loss": 0.8322, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.4419391206313417, |
| "grad_norm": 0.5566846132278442, |
| "learning_rate": 3.116389259865012e-06, |
| "loss": 0.9351, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.4870349492671928, |
| "grad_norm": 0.5988627672195435, |
| "learning_rate": 2.988529521475126e-06, |
| "loss": 0.7539, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.532130777903044, |
| "grad_norm": 0.526286244392395, |
| "learning_rate": 2.8593262840482927e-06, |
| "loss": 0.8275, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.5772266065388951, |
| "grad_norm": 0.5774685740470886, |
| "learning_rate": 2.7291348678209018e-06, |
| "loss": 0.7815, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.5772266065388951, |
| "eval_loss": 0.8790203332901001, |
| "eval_runtime": 17.1244, |
| "eval_samples_per_second": 10.92, |
| "eval_steps_per_second": 5.489, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.6223224351747465, |
| "grad_norm": 0.7504833340644836, |
| "learning_rate": 2.5983133106077245e-06, |
| "loss": 0.8097, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.6674182638105974, |
| "grad_norm": 0.5819371938705444, |
| "learning_rate": 2.467221383166517e-06, |
| "loss": 0.7285, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.7125140924464488, |
| "grad_norm": 0.9311485886573792, |
| "learning_rate": 2.3362195997968804e-06, |
| "loss": 0.8624, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.7576099210822997, |
| "grad_norm": 0.692903459072113, |
| "learning_rate": 2.205668226894311e-06, |
| "loss": 0.7785, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.8027057497181511, |
| "grad_norm": 0.9720048308372498, |
| "learning_rate": 2.0759262921860105e-06, |
| "loss": 0.7817, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.8027057497181511, |
| "eval_loss": 0.8658136129379272, |
| "eval_runtime": 17.194, |
| "eval_samples_per_second": 10.876, |
| "eval_steps_per_second": 5.467, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.8478015783540023, |
| "grad_norm": 0.7635661363601685, |
| "learning_rate": 1.9473505973731456e-06, |
| "loss": 0.9323, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.8928974069898534, |
| "grad_norm": 0.5519320964813232, |
| "learning_rate": 1.820294736894871e-06, |
| "loss": 0.7905, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.9379932356257046, |
| "grad_norm": 0.7337349653244019, |
| "learning_rate": 1.6951081255126e-06, |
| "loss": 0.7766, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.9830890642615557, |
| "grad_norm": 0.738616943359375, |
| "learning_rate": 1.5721350373887706e-06, |
| "loss": 0.8027, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.0270574971815107, |
| "grad_norm": 0.6186122298240662, |
| "learning_rate": 1.451713659302688e-06, |
| "loss": 0.788, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.0270574971815107, |
| "eval_loss": 0.8563017249107361, |
| "eval_runtime": 17.3119, |
| "eval_samples_per_second": 10.802, |
| "eval_steps_per_second": 5.43, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.0721533258173617, |
| "grad_norm": 0.737917959690094, |
| "learning_rate": 1.3341751606072281e-06, |
| "loss": 0.6885, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.117249154453213, |
| "grad_norm": 0.6596365571022034, |
| "learning_rate": 1.2198427824840583e-06, |
| "loss": 0.8228, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.1623449830890644, |
| "grad_norm": 0.6603862643241882, |
| "learning_rate": 1.1090309490020374e-06, |
| "loss": 0.829, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.2074408117249154, |
| "grad_norm": 0.6828354001045227, |
| "learning_rate": 1.0020444024234477e-06, |
| "loss": 0.6616, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.2525366403607667, |
| "grad_norm": 0.6725893616676331, |
| "learning_rate": 8.991773651360386e-07, |
| "loss": 0.6905, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.2525366403607667, |
| "eval_loss": 0.8513637185096741, |
| "eval_runtime": 17.3036, |
| "eval_samples_per_second": 10.807, |
| "eval_steps_per_second": 5.432, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.2976324689966177, |
| "grad_norm": 0.7450459003448486, |
| "learning_rate": 8.007127305156518e-07, |
| "loss": 0.7426, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.342728297632469, |
| "grad_norm": 0.7487466335296631, |
| "learning_rate": 7.069212849446277e-07, |
| "loss": 0.8118, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.3878241262683204, |
| "grad_norm": 0.6942753791809082, |
| "learning_rate": 6.180609631254941e-07, |
| "loss": 0.7956, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.4329199549041713, |
| "grad_norm": 0.6365687847137451, |
| "learning_rate": 5.343761387379159e-07, |
| "loss": 0.8643, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.4780157835400227, |
| "grad_norm": 0.71890789270401, |
| "learning_rate": 4.560969523896311e-07, |
| "loss": 0.8426, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.4780157835400227, |
| "eval_loss": 0.8484782576560974, |
| "eval_runtime": 17.1313, |
| "eval_samples_per_second": 10.916, |
| "eval_steps_per_second": 5.487, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.5231116121758737, |
| "grad_norm": 0.6147546768188477, |
| "learning_rate": 3.83438678709602e-07, |
| "loss": 0.8001, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.568207440811725, |
| "grad_norm": 0.6024377346038818, |
| "learning_rate": 3.1660113432389645e-07, |
| "loss": 0.7645, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.613303269447576, |
| "grad_norm": 0.5936167240142822, |
| "learning_rate": 2.557681283424374e-07, |
| "loss": 0.7751, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.6583990980834273, |
| "grad_norm": 0.6910776495933533, |
| "learning_rate": 2.0110695686783115e-07, |
| "loss": 0.8279, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.7034949267192783, |
| "grad_norm": 0.8321192860603333, |
| "learning_rate": 1.5276794291640723e-07, |
| "loss": 0.7276, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.7034949267192783, |
| "eval_loss": 0.847333550453186, |
| "eval_runtime": 17.0494, |
| "eval_samples_per_second": 10.968, |
| "eval_steps_per_second": 5.513, |
| "step": 600 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 666, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.313955370885427e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|