| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 645, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.046511627906976744, |
| "grad_norm": 2.4341416358947754, |
| "learning_rate": 4.1538461538461545e-06, |
| "loss": 0.3952, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.09302325581395349, |
| "grad_norm": 1.8208298683166504, |
| "learning_rate": 8.76923076923077e-06, |
| "loss": 0.2555, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.13953488372093023, |
| "grad_norm": 1.736371636390686, |
| "learning_rate": 1.3384615384615386e-05, |
| "loss": 0.2123, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.18604651162790697, |
| "grad_norm": 1.3780733346939087, |
| "learning_rate": 1.8e-05, |
| "loss": 0.1893, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.23255813953488372, |
| "grad_norm": 1.1913007497787476, |
| "learning_rate": 2.2615384615384615e-05, |
| "loss": 0.1759, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.27906976744186046, |
| "grad_norm": 0.8563091158866882, |
| "learning_rate": 2.7230769230769233e-05, |
| "loss": 0.1704, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.32558139534883723, |
| "grad_norm": 1.2382513284683228, |
| "learning_rate": 2.9996479470277262e-05, |
| "loss": 0.1648, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.37209302325581395, |
| "grad_norm": 0.8481675982475281, |
| "learning_rate": 2.9956892486957502e-05, |
| "loss": 0.1654, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4186046511627907, |
| "grad_norm": 1.4965696334838867, |
| "learning_rate": 2.9873434360934543e-05, |
| "loss": 0.1613, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.46511627906976744, |
| "grad_norm": 0.8099003434181213, |
| "learning_rate": 2.9746349889271652e-05, |
| "loss": 0.1554, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5116279069767442, |
| "grad_norm": 0.8638372421264648, |
| "learning_rate": 2.9576011832620583e-05, |
| "loss": 0.1464, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5581395348837209, |
| "grad_norm": 1.090198040008545, |
| "learning_rate": 2.9362919821850365e-05, |
| "loss": 0.1484, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6046511627906976, |
| "grad_norm": 0.7675595283508301, |
| "learning_rate": 2.9107698892543862e-05, |
| "loss": 0.1359, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6511627906976745, |
| "grad_norm": 0.6692689657211304, |
| "learning_rate": 2.8811097651660716e-05, |
| "loss": 0.1334, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6976744186046512, |
| "grad_norm": 0.7122989892959595, |
| "learning_rate": 2.847398608174417e-05, |
| "loss": 0.1408, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7441860465116279, |
| "grad_norm": 0.7892124056816101, |
| "learning_rate": 2.8097352989112345e-05, |
| "loss": 0.1348, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7906976744186046, |
| "grad_norm": 0.6389254927635193, |
| "learning_rate": 2.768230310351898e-05, |
| "loss": 0.133, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.8372093023255814, |
| "grad_norm": 0.9744543433189392, |
| "learning_rate": 2.7230053837790673e-05, |
| "loss": 0.1416, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8837209302325582, |
| "grad_norm": 0.638029932975769, |
| "learning_rate": 2.6741931716945336e-05, |
| "loss": 0.1281, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.9302325581395349, |
| "grad_norm": 0.6276944279670715, |
| "learning_rate": 2.6219368487265756e-05, |
| "loss": 0.1322, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9767441860465116, |
| "grad_norm": 0.5903041958808899, |
| "learning_rate": 2.5663896916741064e-05, |
| "loss": 0.1242, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.0232558139534884, |
| "grad_norm": 0.47011125087738037, |
| "learning_rate": 2.5077146299194094e-05, |
| "loss": 0.1253, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.069767441860465, |
| "grad_norm": 0.6500623226165771, |
| "learning_rate": 2.446083767528193e-05, |
| "loss": 0.1245, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.1162790697674418, |
| "grad_norm": 0.5090748071670532, |
| "learning_rate": 2.3816778784387097e-05, |
| "loss": 0.1218, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.1627906976744187, |
| "grad_norm": 0.547989010810852, |
| "learning_rate": 2.3146858762206493e-05, |
| "loss": 0.1201, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.2093023255813953, |
| "grad_norm": 0.6994646787643433, |
| "learning_rate": 2.2453042599590884e-05, |
| "loss": 0.118, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.255813953488372, |
| "grad_norm": 0.6814610362052917, |
| "learning_rate": 2.173736537888819e-05, |
| "loss": 0.121, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.302325581395349, |
| "grad_norm": 0.5295448303222656, |
| "learning_rate": 2.10019263046963e-05, |
| "loss": 0.1194, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.3488372093023255, |
| "grad_norm": 0.6496727466583252, |
| "learning_rate": 2.0248882546534327e-05, |
| "loss": 0.1142, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.3953488372093024, |
| "grad_norm": 0.7958609461784363, |
| "learning_rate": 1.9480442911492706e-05, |
| "loss": 0.1137, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.441860465116279, |
| "grad_norm": 0.5419607758522034, |
| "learning_rate": 1.8698861365421433e-05, |
| "loss": 0.1148, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.4883720930232558, |
| "grad_norm": 0.5299695134162903, |
| "learning_rate": 1.7906430421659876e-05, |
| "loss": 0.119, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.5348837209302326, |
| "grad_norm": 0.5715431571006775, |
| "learning_rate": 1.7105474416700165e-05, |
| "loss": 0.114, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.5813953488372094, |
| "grad_norm": 0.5162650346755981, |
| "learning_rate": 1.6298342692507765e-05, |
| "loss": 0.1132, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.627906976744186, |
| "grad_norm": 0.42877525091171265, |
| "learning_rate": 1.548740270549671e-05, |
| "loss": 0.1131, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.6744186046511627, |
| "grad_norm": 0.41223767399787903, |
| "learning_rate": 1.467503308237204e-05, |
| "loss": 0.1104, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.7209302325581395, |
| "grad_norm": 0.44639158248901367, |
| "learning_rate": 1.3863616643207844e-05, |
| "loss": 0.1091, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.7674418604651163, |
| "grad_norm": 0.4975012242794037, |
| "learning_rate": 1.3055533412225422e-05, |
| "loss": 0.1123, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.8139534883720931, |
| "grad_norm": 0.550603985786438, |
| "learning_rate": 1.2253153636772158e-05, |
| "loss": 0.1109, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.8604651162790697, |
| "grad_norm": 0.4819272458553314, |
| "learning_rate": 1.1458830834977698e-05, |
| "loss": 0.1129, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.9069767441860463, |
| "grad_norm": 0.34267139434814453, |
| "learning_rate": 1.067489489247974e-05, |
| "loss": 0.1085, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.9534883720930232, |
| "grad_norm": 0.48774823546409607, |
| "learning_rate": 9.903645228468024e-06, |
| "loss": 0.1098, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.5222486257553101, |
| "learning_rate": 9.147344051091682e-06, |
| "loss": 0.1076, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.046511627906977, |
| "grad_norm": 0.45477646589279175, |
| "learning_rate": 8.408209722012956e-06, |
| "loss": 0.1044, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.0930232558139537, |
| "grad_norm": 0.4191150963306427, |
| "learning_rate": 7.688410249570214e-06, |
| "loss": 0.1051, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.13953488372093, |
| "grad_norm": 0.5323087573051453, |
| "learning_rate": 6.990056929635958e-06, |
| "loss": 0.1043, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.186046511627907, |
| "grad_norm": 0.3530571162700653, |
| "learning_rate": 6.315198152822273e-06, |
| "loss": 0.1017, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.2325581395348837, |
| "grad_norm": 0.40887731313705444, |
| "learning_rate": 5.66581339619819e-06, |
| "loss": 0.0984, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.2790697674418605, |
| "grad_norm": 0.51576828956604, |
| "learning_rate": 5.043807417142436e-06, |
| "loss": 0.1015, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.3255813953488373, |
| "grad_norm": 0.47971853613853455, |
| "learning_rate": 4.4510046663618e-06, |
| "loss": 0.1008, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.3720930232558137, |
| "grad_norm": 0.38621294498443604, |
| "learning_rate": 3.889143936462915e-06, |
| "loss": 0.1008, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.4186046511627906, |
| "grad_norm": 0.48758363723754883, |
| "learning_rate": 3.359873261773904e-06, |
| "loss": 0.1018, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.4651162790697674, |
| "grad_norm": 0.47174710035324097, |
| "learning_rate": 2.86474508437579e-06, |
| "loss": 0.101, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.511627906976744, |
| "grad_norm": 0.43842917680740356, |
| "learning_rate": 2.4052117005223457e-06, |
| "loss": 0.1045, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.558139534883721, |
| "grad_norm": 0.3704339265823364, |
| "learning_rate": 1.982621000804979e-06, |
| "loss": 0.0947, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.604651162790698, |
| "grad_norm": 0.44818997383117676, |
| "learning_rate": 1.5982125165573941e-06, |
| "loss": 0.1011, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.6511627906976747, |
| "grad_norm": 0.3882347345352173, |
| "learning_rate": 1.25311378409661e-06, |
| "loss": 0.1008, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.697674418604651, |
| "grad_norm": 0.3568360507488251, |
| "learning_rate": 9.483370374646661e-07, |
| "loss": 0.0998, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.744186046511628, |
| "grad_norm": 0.45614829659461975, |
| "learning_rate": 6.847762393717782e-07, |
| "loss": 0.0946, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.7906976744186047, |
| "grad_norm": 0.40172144770622253, |
| "learning_rate": 4.632044590496948e-07, |
| "loss": 0.0971, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.8372093023255816, |
| "grad_norm": 0.5011858344078064, |
| "learning_rate": 2.8427160470641255e-07, |
| "loss": 0.0982, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.883720930232558, |
| "grad_norm": 0.4626616835594177, |
| "learning_rate": 1.4850251723345198e-07, |
| "loss": 0.1001, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.9302325581395348, |
| "grad_norm": 0.4319652020931244, |
| "learning_rate": 5.629543075708177e-08, |
| "loss": 0.0938, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.9767441860465116, |
| "grad_norm": 0.4647758901119232, |
| "learning_rate": 7.920804549007011e-09, |
| "loss": 0.0945, |
| "step": 640 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 645, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 188525150076928.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|