| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 645, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.046511627906976744, |
| "grad_norm": 1.7476660013198853, |
| "learning_rate": 4.1538461538461545e-06, |
| "loss": 0.4053, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.09302325581395349, |
| "grad_norm": 1.2797056436538696, |
| "learning_rate": 8.76923076923077e-06, |
| "loss": 0.2451, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.13953488372093023, |
| "grad_norm": 0.9715517163276672, |
| "learning_rate": 1.3384615384615386e-05, |
| "loss": 0.2026, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.18604651162790697, |
| "grad_norm": 0.8870955109596252, |
| "learning_rate": 1.8e-05, |
| "loss": 0.1818, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.23255813953488372, |
| "grad_norm": 0.706699550151825, |
| "learning_rate": 2.2615384615384615e-05, |
| "loss": 0.1687, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.27906976744186046, |
| "grad_norm": 0.6627687811851501, |
| "learning_rate": 2.7230769230769233e-05, |
| "loss": 0.1621, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.32558139534883723, |
| "grad_norm": 0.5161024928092957, |
| "learning_rate": 2.9996479470277262e-05, |
| "loss": 0.1597, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.37209302325581395, |
| "grad_norm": 0.45436057448387146, |
| "learning_rate": 2.9956892486957502e-05, |
| "loss": 0.1527, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4186046511627907, |
| "grad_norm": 0.7213302254676819, |
| "learning_rate": 2.9873434360934543e-05, |
| "loss": 0.1536, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.46511627906976744, |
| "grad_norm": 0.41891512274742126, |
| "learning_rate": 2.9746349889271652e-05, |
| "loss": 0.1454, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5116279069767442, |
| "grad_norm": 0.40932586789131165, |
| "learning_rate": 2.9576011832620583e-05, |
| "loss": 0.1384, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5581395348837209, |
| "grad_norm": 0.6326035857200623, |
| "learning_rate": 2.9362919821850365e-05, |
| "loss": 0.1401, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6046511627906976, |
| "grad_norm": 0.47325533628463745, |
| "learning_rate": 2.9107698892543862e-05, |
| "loss": 0.1297, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6511627906976745, |
| "grad_norm": 0.4126339852809906, |
| "learning_rate": 2.8811097651660716e-05, |
| "loss": 0.1288, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6976744186046512, |
| "grad_norm": 0.4939899742603302, |
| "learning_rate": 2.847398608174417e-05, |
| "loss": 0.1339, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7441860465116279, |
| "grad_norm": 0.38328367471694946, |
| "learning_rate": 2.8097352989112345e-05, |
| "loss": 0.1293, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7906976744186046, |
| "grad_norm": 0.36505013704299927, |
| "learning_rate": 2.768230310351898e-05, |
| "loss": 0.1272, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.8372093023255814, |
| "grad_norm": 0.40006181597709656, |
| "learning_rate": 2.7230053837790673e-05, |
| "loss": 0.1285, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8837209302325582, |
| "grad_norm": 0.4462541341781616, |
| "learning_rate": 2.6741931716945336e-05, |
| "loss": 0.1236, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.9302325581395349, |
| "grad_norm": 0.34532684087753296, |
| "learning_rate": 2.6219368487265756e-05, |
| "loss": 0.1274, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9767441860465116, |
| "grad_norm": 0.4481622576713562, |
| "learning_rate": 2.5663896916741064e-05, |
| "loss": 0.1213, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.0232558139534884, |
| "grad_norm": 0.3308081328868866, |
| "learning_rate": 2.5077146299194094e-05, |
| "loss": 0.124, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.069767441860465, |
| "grad_norm": 0.3247145116329193, |
| "learning_rate": 2.446083767528193e-05, |
| "loss": 0.1223, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.1162790697674418, |
| "grad_norm": 0.31839317083358765, |
| "learning_rate": 2.3816778784387097e-05, |
| "loss": 0.1162, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.1627906976744187, |
| "grad_norm": 0.36601680517196655, |
| "learning_rate": 2.3146858762206493e-05, |
| "loss": 0.1166, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.2093023255813953, |
| "grad_norm": 0.39110299944877625, |
| "learning_rate": 2.2453042599590884e-05, |
| "loss": 0.1147, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.255813953488372, |
| "grad_norm": 0.40319550037384033, |
| "learning_rate": 2.173736537888819e-05, |
| "loss": 0.1137, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.302325581395349, |
| "grad_norm": 0.35869157314300537, |
| "learning_rate": 2.10019263046963e-05, |
| "loss": 0.1174, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.3488372093023255, |
| "grad_norm": 0.39020806550979614, |
| "learning_rate": 2.0248882546534327e-05, |
| "loss": 0.1141, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.3953488372093024, |
| "grad_norm": 0.39524269104003906, |
| "learning_rate": 1.9480442911492706e-05, |
| "loss": 0.1132, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.441860465116279, |
| "grad_norm": 0.3699205219745636, |
| "learning_rate": 1.8698861365421433e-05, |
| "loss": 0.1119, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.4883720930232558, |
| "grad_norm": 0.33726540207862854, |
| "learning_rate": 1.7906430421659876e-05, |
| "loss": 0.1166, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.5348837209302326, |
| "grad_norm": 0.41265928745269775, |
| "learning_rate": 1.7105474416700165e-05, |
| "loss": 0.1127, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.5813953488372094, |
| "grad_norm": 0.3320264220237732, |
| "learning_rate": 1.6298342692507765e-05, |
| "loss": 0.1115, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.627906976744186, |
| "grad_norm": 0.2614348828792572, |
| "learning_rate": 1.548740270549671e-05, |
| "loss": 0.111, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.6744186046511627, |
| "grad_norm": 0.29234352707862854, |
| "learning_rate": 1.467503308237204e-05, |
| "loss": 0.1087, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.7209302325581395, |
| "grad_norm": 0.2900184094905853, |
| "learning_rate": 1.3863616643207844e-05, |
| "loss": 0.1074, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.7674418604651163, |
| "grad_norm": 0.29722127318382263, |
| "learning_rate": 1.3055533412225422e-05, |
| "loss": 0.1104, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.8139534883720931, |
| "grad_norm": 0.38949820399284363, |
| "learning_rate": 1.2253153636772158e-05, |
| "loss": 0.1091, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.8604651162790697, |
| "grad_norm": 0.2845311164855957, |
| "learning_rate": 1.1458830834977698e-05, |
| "loss": 0.1111, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.9069767441860463, |
| "grad_norm": 0.24525171518325806, |
| "learning_rate": 1.067489489247974e-05, |
| "loss": 0.107, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.9534883720930232, |
| "grad_norm": 0.29745063185691833, |
| "learning_rate": 9.903645228468024e-06, |
| "loss": 0.1071, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.2796909809112549, |
| "learning_rate": 9.147344051091682e-06, |
| "loss": 0.1059, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.046511627906977, |
| "grad_norm": 0.28045353293418884, |
| "learning_rate": 8.408209722012956e-06, |
| "loss": 0.1023, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.0930232558139537, |
| "grad_norm": 0.21314753592014313, |
| "learning_rate": 7.688410249570214e-06, |
| "loss": 0.1031, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.13953488372093, |
| "grad_norm": 0.3194475471973419, |
| "learning_rate": 6.990056929635958e-06, |
| "loss": 0.1023, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.186046511627907, |
| "grad_norm": 0.21530288457870483, |
| "learning_rate": 6.315198152822273e-06, |
| "loss": 0.0997, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.2325581395348837, |
| "grad_norm": 0.27216485142707825, |
| "learning_rate": 5.66581339619819e-06, |
| "loss": 0.0968, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.2790697674418605, |
| "grad_norm": 0.3025151193141937, |
| "learning_rate": 5.043807417142436e-06, |
| "loss": 0.0999, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.3255813953488373, |
| "grad_norm": 0.27557316422462463, |
| "learning_rate": 4.4510046663618e-06, |
| "loss": 0.0991, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.3720930232558137, |
| "grad_norm": 0.2208959013223648, |
| "learning_rate": 3.889143936462915e-06, |
| "loss": 0.0985, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.4186046511627906, |
| "grad_norm": 0.31590473651885986, |
| "learning_rate": 3.359873261773904e-06, |
| "loss": 0.0994, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.4651162790697674, |
| "grad_norm": 0.28370803594589233, |
| "learning_rate": 2.86474508437579e-06, |
| "loss": 0.0989, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.511627906976744, |
| "grad_norm": 0.2663387358188629, |
| "learning_rate": 2.4052117005223457e-06, |
| "loss": 0.1022, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.558139534883721, |
| "grad_norm": 0.2275928556919098, |
| "learning_rate": 1.982621000804979e-06, |
| "loss": 0.0926, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.604651162790698, |
| "grad_norm": 0.2721380889415741, |
| "learning_rate": 1.5982125165573941e-06, |
| "loss": 0.0991, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.6511627906976747, |
| "grad_norm": 0.24267996847629547, |
| "learning_rate": 1.25311378409661e-06, |
| "loss": 0.0991, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.697674418604651, |
| "grad_norm": 0.22184228897094727, |
| "learning_rate": 9.483370374646661e-07, |
| "loss": 0.0975, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.744186046511628, |
| "grad_norm": 0.28250575065612793, |
| "learning_rate": 6.847762393717782e-07, |
| "loss": 0.0926, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.7906976744186047, |
| "grad_norm": 0.28555381298065186, |
| "learning_rate": 4.632044590496948e-07, |
| "loss": 0.0956, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.8372093023255816, |
| "grad_norm": 0.29760462045669556, |
| "learning_rate": 2.8427160470641255e-07, |
| "loss": 0.0966, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.883720930232558, |
| "grad_norm": 0.28198060393333435, |
| "learning_rate": 1.4850251723345198e-07, |
| "loss": 0.0981, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.9302325581395348, |
| "grad_norm": 0.27139362692832947, |
| "learning_rate": 5.629543075708177e-08, |
| "loss": 0.0919, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.9767441860465116, |
| "grad_norm": 0.2770179808139801, |
| "learning_rate": 7.920804549007011e-09, |
| "loss": 0.0927, |
| "step": 640 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 645, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 475511974002688.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|