| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 7908, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.012645422357106728, |
| "grad_norm": 0.11559224128723145, |
| "learning_rate": 2.5316455696202535e-06, |
| "loss": 2.4624, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.025290844714213456, |
| "grad_norm": 0.13693679869174957, |
| "learning_rate": 5.063291139240507e-06, |
| "loss": 2.4073, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.03793626707132018, |
| "grad_norm": 0.18691261112689972, |
| "learning_rate": 7.5949367088607605e-06, |
| "loss": 2.4027, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.05058168942842691, |
| "grad_norm": 0.3437875509262085, |
| "learning_rate": 1.0126582278481014e-05, |
| "loss": 2.3508, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.06322711178553364, |
| "grad_norm": 0.3929010033607483, |
| "learning_rate": 1.2658227848101268e-05, |
| "loss": 2.3569, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.07587253414264036, |
| "grad_norm": 0.4519222676753998, |
| "learning_rate": 1.5189873417721521e-05, |
| "loss": 2.3237, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.08851795649974709, |
| "grad_norm": 0.42425811290740967, |
| "learning_rate": 1.7721518987341772e-05, |
| "loss": 2.2495, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.10116337885685382, |
| "grad_norm": 0.4894159436225891, |
| "learning_rate": 1.9999902601318596e-05, |
| "loss": 2.2785, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.11380880121396054, |
| "grad_norm": 0.5537558794021606, |
| "learning_rate": 1.9988217055109233e-05, |
| "loss": 2.2217, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.12645422357106728, |
| "grad_norm": 0.6574133038520813, |
| "learning_rate": 1.9957077852147003e-05, |
| "loss": 2.2403, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.139099645928174, |
| "grad_norm": 0.7620156407356262, |
| "learning_rate": 1.9906545641030418e-05, |
| "loss": 2.2379, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.15174506828528073, |
| "grad_norm": 0.7593836784362793, |
| "learning_rate": 1.983671884135574e-05, |
| "loss": 2.2384, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.16439049064238745, |
| "grad_norm": 0.637991189956665, |
| "learning_rate": 1.9747733452029044e-05, |
| "loss": 2.1626, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.17703591299949417, |
| "grad_norm": 0.7157115340232849, |
| "learning_rate": 1.9639762786386466e-05, |
| "loss": 2.2157, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.18968133535660092, |
| "grad_norm": 0.8079652190208435, |
| "learning_rate": 1.9513017134638686e-05, |
| "loss": 2.1978, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.20232675771370764, |
| "grad_norm": 0.7162272334098816, |
| "learning_rate": 1.9367743354296953e-05, |
| "loss": 2.1615, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.21497218007081437, |
| "grad_norm": 0.7601909637451172, |
| "learning_rate": 1.9204224389378434e-05, |
| "loss": 2.1563, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.2276176024279211, |
| "grad_norm": 0.8401700258255005, |
| "learning_rate": 1.902277871932732e-05, |
| "loss": 2.1535, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.2402630247850278, |
| "grad_norm": 0.8605347275733948, |
| "learning_rate": 1.882375973872494e-05, |
| "loss": 2.1561, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.25290844714213456, |
| "grad_norm": 0.7509819865226746, |
| "learning_rate": 1.8607555068997083e-05, |
| "loss": 2.1439, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.26555386949924126, |
| "grad_norm": 0.7537213563919067, |
| "learning_rate": 1.8374585803459005e-05, |
| "loss": 2.1637, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.278199291856348, |
| "grad_norm": 0.7172447443008423, |
| "learning_rate": 1.8125305687168578e-05, |
| "loss": 2.1378, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.2908447142134547, |
| "grad_norm": 1.2094899415969849, |
| "learning_rate": 1.7860200233184934e-05, |
| "loss": 2.116, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.30349013657056145, |
| "grad_norm": 0.6232509613037109, |
| "learning_rate": 1.7579785776953788e-05, |
| "loss": 2.1388, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.3161355589276682, |
| "grad_norm": 0.7145382761955261, |
| "learning_rate": 1.728460847066126e-05, |
| "loss": 2.122, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.3287809812847749, |
| "grad_norm": 0.7406628727912903, |
| "learning_rate": 1.6975243219514772e-05, |
| "loss": 2.0997, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.34142640364188165, |
| "grad_norm": 0.7270051836967468, |
| "learning_rate": 1.6652292562022838e-05, |
| "loss": 2.1062, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.35407182599898834, |
| "grad_norm": 0.7675819993019104, |
| "learning_rate": 1.6316385496454543e-05, |
| "loss": 2.0954, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.3667172483560951, |
| "grad_norm": 1.0367164611816406, |
| "learning_rate": 1.596817625576442e-05, |
| "loss": 2.0791, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.37936267071320184, |
| "grad_norm": 0.9357011318206787, |
| "learning_rate": 1.5608343033368685e-05, |
| "loss": 2.0908, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.39200809307030854, |
| "grad_norm": 1.0484192371368408, |
| "learning_rate": 1.5237586662254664e-05, |
| "loss": 2.114, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.4046535154274153, |
| "grad_norm": 0.8791719079017639, |
| "learning_rate": 1.4856629249995986e-05, |
| "loss": 2.1022, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.417298937784522, |
| "grad_norm": 0.8439493179321289, |
| "learning_rate": 1.446621277233214e-05, |
| "loss": 2.093, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.42994436014162873, |
| "grad_norm": 0.9357572793960571, |
| "learning_rate": 1.4067097628051532e-05, |
| "loss": 2.0881, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.4425897824987355, |
| "grad_norm": 0.7724614143371582, |
| "learning_rate": 1.3660061157992763e-05, |
| "loss": 2.09, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.4552352048558422, |
| "grad_norm": 0.7469125986099243, |
| "learning_rate": 1.3245896131048493e-05, |
| "loss": 2.1347, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.46788062721294893, |
| "grad_norm": 1.0939433574676514, |
| "learning_rate": 1.282540920012071e-05, |
| "loss": 2.0754, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.4805260495700556, |
| "grad_norm": 0.8267479538917542, |
| "learning_rate": 1.2399419331034666e-05, |
| "loss": 2.0978, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.4931714719271624, |
| "grad_norm": 0.9732351303100586, |
| "learning_rate": 1.1968756207471413e-05, |
| "loss": 2.0973, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.5058168942842691, |
| "grad_norm": 0.7001142501831055, |
| "learning_rate": 1.1534258615025584e-05, |
| "loss": 2.0911, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.5184623166413759, |
| "grad_norm": 0.7966179847717285, |
| "learning_rate": 1.1096772807535755e-05, |
| "loss": 2.0964, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.5311077389984825, |
| "grad_norm": 0.8109586834907532, |
| "learning_rate": 1.065715085886918e-05, |
| "loss": 2.0704, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.5437531613555893, |
| "grad_norm": 0.9050929546356201, |
| "learning_rate": 1.0216249003371113e-05, |
| "loss": 2.1378, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.556398583712696, |
| "grad_norm": 0.9542393684387207, |
| "learning_rate": 9.774925968210892e-06, |
| "loss": 2.074, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.5690440060698028, |
| "grad_norm": 0.8393438458442688, |
| "learning_rate": 9.334041300872904e-06, |
| "loss": 2.0886, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.5816894284269094, |
| "grad_norm": 1.006131649017334, |
| "learning_rate": 8.894453695049792e-06, |
| "loss": 2.0545, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.5943348507840162, |
| "grad_norm": 1.2745147943496704, |
| "learning_rate": 8.4570193181986e-06, |
| "loss": 2.0421, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.6069802731411229, |
| "grad_norm": 0.7968528270721436, |
| "learning_rate": 8.022590144017162e-06, |
| "loss": 2.1115, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.6196256954982297, |
| "grad_norm": 1.0543171167373657, |
| "learning_rate": 7.592012293088485e-06, |
| "loss": 2.1035, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.6322711178553364, |
| "grad_norm": 1.2739633321762085, |
| "learning_rate": 7.166124384925069e-06, |
| "loss": 2.1336, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.644916540212443, |
| "grad_norm": 1.05385422706604, |
| "learning_rate": 6.745755904622678e-06, |
| "loss": 2.1043, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.6575619625695498, |
| "grad_norm": 0.854491651058197, |
| "learning_rate": 6.3317255873049535e-06, |
| "loss": 2.0562, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.6702073849266565, |
| "grad_norm": 0.9591374397277832, |
| "learning_rate": 5.9248398235052566e-06, |
| "loss": 2.0872, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.6828528072837633, |
| "grad_norm": 1.0265579223632812, |
| "learning_rate": 5.525891088591604e-06, |
| "loss": 2.0493, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.69549822964087, |
| "grad_norm": 1.3940016031265259, |
| "learning_rate": 5.135656399293624e-06, |
| "loss": 2.046, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.7081436519979767, |
| "grad_norm": 1.0076895952224731, |
| "learning_rate": 4.754895800337698e-06, |
| "loss": 2.1039, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.7207890743550834, |
| "grad_norm": 1.1425988674163818, |
| "learning_rate": 4.384350884137794e-06, |
| "loss": 2.0724, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.7334344967121902, |
| "grad_norm": 1.011376142501831, |
| "learning_rate": 4.024743346425134e-06, |
| "loss": 2.0797, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.7460799190692969, |
| "grad_norm": 1.2228542566299438, |
| "learning_rate": 3.6767735806298833e-06, |
| "loss": 2.0745, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.7587253414264037, |
| "grad_norm": 1.220448613166809, |
| "learning_rate": 3.3411193137524458e-06, |
| "loss": 2.0349, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.7713707637835103, |
| "grad_norm": 0.8894066214561462, |
| "learning_rate": 3.0184342863813044e-06, |
| "loss": 2.0935, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.7840161861406171, |
| "grad_norm": 0.8592280745506287, |
| "learning_rate": 2.7093469794282246e-06, |
| "loss": 2.0997, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.7966616084977238, |
| "grad_norm": 1.1828413009643555, |
| "learning_rate": 2.4144593900607706e-06, |
| "loss": 2.0485, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.8093070308548306, |
| "grad_norm": 0.8146592378616333, |
| "learning_rate": 2.134345859216118e-06, |
| "loss": 2.0963, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.8219524532119373, |
| "grad_norm": 0.8691930174827576, |
| "learning_rate": 1.8695519529798789e-06, |
| "loss": 2.0619, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.834597875569044, |
| "grad_norm": 0.8871398568153381, |
| "learning_rate": 1.6205934000084966e-06, |
| "loss": 2.0537, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.8472432979261507, |
| "grad_norm": 0.9184776544570923, |
| "learning_rate": 1.387955087064895e-06, |
| "loss": 2.0731, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.8598887202832575, |
| "grad_norm": 1.0037082433700562, |
| "learning_rate": 1.1720901146236207e-06, |
| "loss": 2.0834, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.8725341426403642, |
| "grad_norm": 1.014647126197815, |
| "learning_rate": 9.734189143849126e-07, |
| "loss": 2.0622, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.885179564997471, |
| "grad_norm": 0.8669779300689697, |
| "learning_rate": 7.923284304164502e-07, |
| "loss": 2.0733, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.8978249873545776, |
| "grad_norm": 1.2026468515396118, |
| "learning_rate": 6.291713655176257e-07, |
| "loss": 2.0768, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.9104704097116844, |
| "grad_norm": 0.8568145036697388, |
| "learning_rate": 4.84265494274222e-07, |
| "loss": 2.0677, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.9231158320687911, |
| "grad_norm": 0.9718058705329895, |
| "learning_rate": 3.578930441413542e-07, |
| "loss": 2.08, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.9357612544258979, |
| "grad_norm": 0.8524600267410278, |
| "learning_rate": 2.503001457601928e-07, |
| "loss": 2.0769, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.9484066767830045, |
| "grad_norm": 0.7981248497962952, |
| "learning_rate": 1.6169635357900505e-07, |
| "loss": 2.0827, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.9610520991401112, |
| "grad_norm": 0.9334998726844788, |
| "learning_rate": 9.225423771221598e-08, |
| "loss": 2.0611, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.973697521497218, |
| "grad_norm": 0.7538848519325256, |
| "learning_rate": 4.210904783239378e-08, |
| "loss": 2.0664, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.9863429438543247, |
| "grad_norm": 0.8826780915260315, |
| "learning_rate": 1.1358449749798717e-08, |
| "loss": 2.0641, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.9989883662114315, |
| "grad_norm": 0.9787418246269226, |
| "learning_rate": 6.233519252774045e-11, |
| "loss": 2.0665, |
| "step": 7900 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 1.9169105291366577, |
| "eval_runtime": 98.8888, |
| "eval_samples_per_second": 6.502, |
| "eval_steps_per_second": 0.819, |
| "step": 7908 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 7908, |
| "total_flos": 1.4404222844928e+17, |
| "train_loss": 2.1297678035741874, |
| "train_runtime": 3846.3945, |
| "train_samples_per_second": 4.112, |
| "train_steps_per_second": 2.056 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 7908, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.4404222844928e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|