| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.06883101984627739, | |
| "eval_steps": 10, | |
| "global_step": 600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001147183664104623, | |
| "eval_loss": 1.873344898223877, | |
| "eval_runtime": 12.7647, | |
| "eval_samples_per_second": 515.797, | |
| "eval_steps_per_second": 8.069, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.002294367328209246, | |
| "eval_loss": 1.8726389408111572, | |
| "eval_runtime": 12.8667, | |
| "eval_samples_per_second": 511.709, | |
| "eval_steps_per_second": 8.005, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0034415509923138693, | |
| "eval_loss": 1.8714078664779663, | |
| "eval_runtime": 12.9103, | |
| "eval_samples_per_second": 509.979, | |
| "eval_steps_per_second": 7.978, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.004588734656418492, | |
| "eval_loss": 1.8696790933609009, | |
| "eval_runtime": 12.947, | |
| "eval_samples_per_second": 508.534, | |
| "eval_steps_per_second": 7.955, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0057359183205231154, | |
| "eval_loss": 1.8675329685211182, | |
| "eval_runtime": 12.9458, | |
| "eval_samples_per_second": 508.582, | |
| "eval_steps_per_second": 7.956, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.006883101984627739, | |
| "eval_loss": 1.8649154901504517, | |
| "eval_runtime": 13.0432, | |
| "eval_samples_per_second": 504.785, | |
| "eval_steps_per_second": 7.897, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.008030285648732363, | |
| "eval_loss": 1.8619294166564941, | |
| "eval_runtime": 13.0638, | |
| "eval_samples_per_second": 503.988, | |
| "eval_steps_per_second": 7.884, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.009177469312836984, | |
| "eval_loss": 1.8583979606628418, | |
| "eval_runtime": 13.0482, | |
| "eval_samples_per_second": 504.592, | |
| "eval_steps_per_second": 7.894, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.010324652976941608, | |
| "eval_loss": 1.85438871383667, | |
| "eval_runtime": 13.0615, | |
| "eval_samples_per_second": 504.075, | |
| "eval_steps_per_second": 7.886, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.011471836641046231, | |
| "grad_norm": 9.938580513000488, | |
| "learning_rate": 3.8226299694189603e-07, | |
| "loss": 3.1046, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.011471836641046231, | |
| "eval_loss": 1.849947214126587, | |
| "eval_runtime": 13.0663, | |
| "eval_samples_per_second": 503.89, | |
| "eval_steps_per_second": 7.883, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.012619020305150854, | |
| "eval_loss": 1.8451412916183472, | |
| "eval_runtime": 12.9771, | |
| "eval_samples_per_second": 507.357, | |
| "eval_steps_per_second": 7.937, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.013766203969255477, | |
| "eval_loss": 1.8399487733840942, | |
| "eval_runtime": 13.0209, | |
| "eval_samples_per_second": 505.648, | |
| "eval_steps_per_second": 7.91, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0149133876333601, | |
| "eval_loss": 1.8342881202697754, | |
| "eval_runtime": 13.0369, | |
| "eval_samples_per_second": 505.028, | |
| "eval_steps_per_second": 7.901, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.016060571297464726, | |
| "eval_loss": 1.8283486366271973, | |
| "eval_runtime": 13.0149, | |
| "eval_samples_per_second": 505.88, | |
| "eval_steps_per_second": 7.914, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.017207754961569347, | |
| "eval_loss": 1.822334885597229, | |
| "eval_runtime": 13.0213, | |
| "eval_samples_per_second": 505.632, | |
| "eval_steps_per_second": 7.91, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.01835493862567397, | |
| "eval_loss": 1.8158738613128662, | |
| "eval_runtime": 13.0599, | |
| "eval_samples_per_second": 504.14, | |
| "eval_steps_per_second": 7.887, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.019502122289778594, | |
| "eval_loss": 1.8090614080429077, | |
| "eval_runtime": 13.034, | |
| "eval_samples_per_second": 505.14, | |
| "eval_steps_per_second": 7.902, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.020649305953883215, | |
| "eval_loss": 1.8015782833099365, | |
| "eval_runtime": 13.0665, | |
| "eval_samples_per_second": 503.885, | |
| "eval_steps_per_second": 7.883, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.02179648961798784, | |
| "eval_loss": 1.793796420097351, | |
| "eval_runtime": 13.0555, | |
| "eval_samples_per_second": 504.31, | |
| "eval_steps_per_second": 7.889, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.022943673282092462, | |
| "grad_norm": 4.906337738037109, | |
| "learning_rate": 7.645259938837921e-07, | |
| "loss": 3.0303, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.022943673282092462, | |
| "eval_loss": 1.785815715789795, | |
| "eval_runtime": 12.9925, | |
| "eval_samples_per_second": 506.754, | |
| "eval_steps_per_second": 7.928, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.024090856946197087, | |
| "eval_loss": 1.7775053977966309, | |
| "eval_runtime": 13.0639, | |
| "eval_samples_per_second": 503.986, | |
| "eval_steps_per_second": 7.884, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.025238040610301708, | |
| "eval_loss": 1.7692992687225342, | |
| "eval_runtime": 13.0129, | |
| "eval_samples_per_second": 505.96, | |
| "eval_steps_per_second": 7.915, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.026385224274406333, | |
| "eval_loss": 1.760453224182129, | |
| "eval_runtime": 13.0078, | |
| "eval_samples_per_second": 506.158, | |
| "eval_steps_per_second": 7.918, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.027532407938510955, | |
| "eval_loss": 1.751396656036377, | |
| "eval_runtime": 12.9957, | |
| "eval_samples_per_second": 506.628, | |
| "eval_steps_per_second": 7.926, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.02867959160261558, | |
| "eval_loss": 1.7417218685150146, | |
| "eval_runtime": 12.9774, | |
| "eval_samples_per_second": 507.344, | |
| "eval_steps_per_second": 7.937, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0298267752667202, | |
| "eval_loss": 1.7319914102554321, | |
| "eval_runtime": 13.0219, | |
| "eval_samples_per_second": 505.611, | |
| "eval_steps_per_second": 7.91, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.030973958930824826, | |
| "eval_loss": 1.7227253913879395, | |
| "eval_runtime": 13.0026, | |
| "eval_samples_per_second": 506.361, | |
| "eval_steps_per_second": 7.922, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.03212114259492945, | |
| "eval_loss": 1.7133797407150269, | |
| "eval_runtime": 12.9757, | |
| "eval_samples_per_second": 507.409, | |
| "eval_steps_per_second": 7.938, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.03326832625903407, | |
| "eval_loss": 1.704041600227356, | |
| "eval_runtime": 12.9845, | |
| "eval_samples_per_second": 507.065, | |
| "eval_steps_per_second": 7.933, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.034415509923138694, | |
| "grad_norm": 4.665822505950928, | |
| "learning_rate": 1.1467889908256882e-06, | |
| "loss": 2.9459, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.034415509923138694, | |
| "eval_loss": 1.6940686702728271, | |
| "eval_runtime": 13.0019, | |
| "eval_samples_per_second": 506.387, | |
| "eval_steps_per_second": 7.922, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.035562693587243316, | |
| "eval_loss": 1.683342695236206, | |
| "eval_runtime": 13.0065, | |
| "eval_samples_per_second": 506.209, | |
| "eval_steps_per_second": 7.919, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.03670987725134794, | |
| "eval_loss": 1.6724653244018555, | |
| "eval_runtime": 13.0129, | |
| "eval_samples_per_second": 505.96, | |
| "eval_steps_per_second": 7.915, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.037857060915452566, | |
| "eval_loss": 1.6614341735839844, | |
| "eval_runtime": 12.9921, | |
| "eval_samples_per_second": 506.769, | |
| "eval_steps_per_second": 7.928, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.03900424457955719, | |
| "eval_loss": 1.6510112285614014, | |
| "eval_runtime": 13.0242, | |
| "eval_samples_per_second": 505.52, | |
| "eval_steps_per_second": 7.908, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.04015142824366181, | |
| "eval_loss": 1.6401513814926147, | |
| "eval_runtime": 12.9214, | |
| "eval_samples_per_second": 509.542, | |
| "eval_steps_per_second": 7.971, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.04129861190776643, | |
| "eval_loss": 1.6295816898345947, | |
| "eval_runtime": 12.9563, | |
| "eval_samples_per_second": 508.171, | |
| "eval_steps_per_second": 7.95, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.04244579557187106, | |
| "eval_loss": 1.6187150478363037, | |
| "eval_runtime": 12.9758, | |
| "eval_samples_per_second": 507.405, | |
| "eval_steps_per_second": 7.938, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.04359297923597568, | |
| "eval_loss": 1.607272982597351, | |
| "eval_runtime": 12.9876, | |
| "eval_samples_per_second": 506.947, | |
| "eval_steps_per_second": 7.931, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.0447401629000803, | |
| "eval_loss": 1.5961676836013794, | |
| "eval_runtime": 12.9782, | |
| "eval_samples_per_second": 507.313, | |
| "eval_steps_per_second": 7.936, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.045887346564184923, | |
| "grad_norm": 4.870114326477051, | |
| "learning_rate": 1.5290519877675841e-06, | |
| "loss": 2.7813, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.045887346564184923, | |
| "eval_loss": 1.5848218202590942, | |
| "eval_runtime": 12.9783, | |
| "eval_samples_per_second": 507.309, | |
| "eval_steps_per_second": 7.936, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.04703453022828955, | |
| "eval_loss": 1.5734797716140747, | |
| "eval_runtime": 12.9739, | |
| "eval_samples_per_second": 507.482, | |
| "eval_steps_per_second": 7.939, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.04818171389239417, | |
| "eval_loss": 1.562021255493164, | |
| "eval_runtime": 12.9388, | |
| "eval_samples_per_second": 508.855, | |
| "eval_steps_per_second": 7.961, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.049328897556498795, | |
| "eval_loss": 1.5495364665985107, | |
| "eval_runtime": 12.9412, | |
| "eval_samples_per_second": 508.764, | |
| "eval_steps_per_second": 7.959, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.050476081220603417, | |
| "eval_loss": 1.5375314950942993, | |
| "eval_runtime": 12.9686, | |
| "eval_samples_per_second": 507.687, | |
| "eval_steps_per_second": 7.942, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.051623264884708045, | |
| "eval_loss": 1.525598168373108, | |
| "eval_runtime": 12.9695, | |
| "eval_samples_per_second": 507.651, | |
| "eval_steps_per_second": 7.942, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.052770448548812667, | |
| "eval_loss": 1.5132672786712646, | |
| "eval_runtime": 12.8961, | |
| "eval_samples_per_second": 510.543, | |
| "eval_steps_per_second": 7.987, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.05391763221291729, | |
| "eval_loss": 1.5012215375900269, | |
| "eval_runtime": 12.9428, | |
| "eval_samples_per_second": 508.7, | |
| "eval_steps_per_second": 7.958, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.05506481587702191, | |
| "eval_loss": 1.4892219305038452, | |
| "eval_runtime": 12.9208, | |
| "eval_samples_per_second": 509.567, | |
| "eval_steps_per_second": 7.972, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.05621199954112653, | |
| "eval_loss": 1.4768636226654053, | |
| "eval_runtime": 12.9423, | |
| "eval_samples_per_second": 508.721, | |
| "eval_steps_per_second": 7.958, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.05735918320523116, | |
| "grad_norm": 4.155641555786133, | |
| "learning_rate": 1.9113149847094803e-06, | |
| "loss": 2.6308, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05735918320523116, | |
| "eval_loss": 1.4640088081359863, | |
| "eval_runtime": 12.8729, | |
| "eval_samples_per_second": 511.462, | |
| "eval_steps_per_second": 8.001, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05850636686933578, | |
| "eval_loss": 1.4513096809387207, | |
| "eval_runtime": 12.9653, | |
| "eval_samples_per_second": 507.817, | |
| "eval_steps_per_second": 7.944, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.0596535505334404, | |
| "eval_loss": 1.439149260520935, | |
| "eval_runtime": 12.9443, | |
| "eval_samples_per_second": 508.639, | |
| "eval_steps_per_second": 7.957, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.060800734197545024, | |
| "eval_loss": 1.426237940788269, | |
| "eval_runtime": 12.9496, | |
| "eval_samples_per_second": 508.433, | |
| "eval_steps_per_second": 7.954, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.06194791786164965, | |
| "eval_loss": 1.4129557609558105, | |
| "eval_runtime": 12.9822, | |
| "eval_samples_per_second": 507.157, | |
| "eval_steps_per_second": 7.934, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.06309510152575427, | |
| "eval_loss": 1.3997886180877686, | |
| "eval_runtime": 12.9979, | |
| "eval_samples_per_second": 506.542, | |
| "eval_steps_per_second": 7.924, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.0642422851898589, | |
| "eval_loss": 1.3873906135559082, | |
| "eval_runtime": 12.9378, | |
| "eval_samples_per_second": 508.895, | |
| "eval_steps_per_second": 7.961, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.06538946885396352, | |
| "eval_loss": 1.3751789331436157, | |
| "eval_runtime": 12.9624, | |
| "eval_samples_per_second": 507.932, | |
| "eval_steps_per_second": 7.946, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.06653665251806815, | |
| "eval_loss": 1.3620370626449585, | |
| "eval_runtime": 12.9498, | |
| "eval_samples_per_second": 508.426, | |
| "eval_steps_per_second": 7.954, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.06768383618217276, | |
| "eval_loss": 1.3485124111175537, | |
| "eval_runtime": 12.9759, | |
| "eval_samples_per_second": 507.403, | |
| "eval_steps_per_second": 7.938, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.06883101984627739, | |
| "grad_norm": 5.262124061584473, | |
| "learning_rate": 2.2935779816513764e-06, | |
| "loss": 2.4452, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.06883101984627739, | |
| "eval_loss": 1.3349775075912476, | |
| "eval_runtime": 12.9631, | |
| "eval_samples_per_second": 507.902, | |
| "eval_steps_per_second": 7.946, | |
| "step": 600 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 26151, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |