| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9974025974025973, | |
| "eval_steps": 500, | |
| "global_step": 576, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05194805194805195, | |
| "grad_norm": 6.558798236067014, | |
| "learning_rate": 7.758620689655173e-07, | |
| "loss": 1.1038, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1038961038961039, | |
| "grad_norm": 2.674816595181998, | |
| "learning_rate": 1.6379310344827587e-06, | |
| "loss": 0.992, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.15584415584415584, | |
| "grad_norm": 1.8226313744055744, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.84, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.2077922077922078, | |
| "grad_norm": 1.5584327305344476, | |
| "learning_rate": 3.362068965517242e-06, | |
| "loss": 0.7842, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2597402597402597, | |
| "grad_norm": 1.5690300333908958, | |
| "learning_rate": 4.224137931034483e-06, | |
| "loss": 0.7462, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3116883116883117, | |
| "grad_norm": 1.3587459505022679, | |
| "learning_rate": 4.999954022123679e-06, | |
| "loss": 0.7281, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 1.5619125769090973, | |
| "learning_rate": 4.994438722989841e-06, | |
| "loss": 0.7182, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.4155844155844156, | |
| "grad_norm": 1.3120284796636292, | |
| "learning_rate": 4.979751088147192e-06, | |
| "loss": 0.71, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4675324675324675, | |
| "grad_norm": 1.4094824038705405, | |
| "learning_rate": 4.955945125704375e-06, | |
| "loss": 0.7064, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.5194805194805194, | |
| "grad_norm": 1.274095106150924, | |
| "learning_rate": 4.923108372900683e-06, | |
| "loss": 0.677, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 1.4208094097198067, | |
| "learning_rate": 4.881361574221648e-06, | |
| "loss": 0.6915, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.6233766233766234, | |
| "grad_norm": 1.374753133285299, | |
| "learning_rate": 4.830858237407799e-06, | |
| "loss": 0.6861, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6753246753246753, | |
| "grad_norm": 1.293455293244359, | |
| "learning_rate": 4.771784068989186e-06, | |
| "loss": 0.6975, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 1.3020469143910023, | |
| "learning_rate": 4.7043562914212915e-06, | |
| "loss": 0.6799, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7792207792207793, | |
| "grad_norm": 1.3651429159451645, | |
| "learning_rate": 4.6288228443332786e-06, | |
| "loss": 0.6913, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8311688311688312, | |
| "grad_norm": 1.5410204906658607, | |
| "learning_rate": 4.5454614728256995e-06, | |
| "loss": 0.6805, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8831168831168831, | |
| "grad_norm": 1.3733140100266965, | |
| "learning_rate": 4.454578706170075e-06, | |
| "loss": 0.6643, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.935064935064935, | |
| "grad_norm": 1.2861021481912802, | |
| "learning_rate": 4.356508730665804e-06, | |
| "loss": 0.658, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.987012987012987, | |
| "grad_norm": 1.288753522105602, | |
| "learning_rate": 4.251612160799017e-06, | |
| "loss": 0.6886, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.0415584415584416, | |
| "grad_norm": 1.2471057575436326, | |
| "learning_rate": 4.140274713221985e-06, | |
| "loss": 0.6175, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0935064935064935, | |
| "grad_norm": 1.187620072173425, | |
| "learning_rate": 4.022905788428984e-06, | |
| "loss": 0.5921, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.1454545454545455, | |
| "grad_norm": 1.206352433048565, | |
| "learning_rate": 3.899936965343989e-06, | |
| "loss": 0.6058, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.1974025974025975, | |
| "grad_norm": 1.410833521608183, | |
| "learning_rate": 3.7718204143557337e-06, | |
| "loss": 0.6233, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.2493506493506494, | |
| "grad_norm": 1.3100928026008483, | |
| "learning_rate": 3.6390272346356225e-06, | |
| "loss": 0.6183, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.3012987012987014, | |
| "grad_norm": 1.2235762612191803, | |
| "learning_rate": 3.5020457218523407e-06, | |
| "loss": 0.5841, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.3532467532467534, | |
| "grad_norm": 1.3015881612011941, | |
| "learning_rate": 3.3613795726529795e-06, | |
| "loss": 0.6015, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.4051948051948053, | |
| "grad_norm": 1.2270855673243282, | |
| "learning_rate": 3.2175460325130176e-06, | |
| "loss": 0.5974, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.457142857142857, | |
| "grad_norm": 1.299855240564656, | |
| "learning_rate": 3.0710739937657035e-06, | |
| "loss": 0.586, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.509090909090909, | |
| "grad_norm": 1.2487072210315888, | |
| "learning_rate": 2.9225020508046233e-06, | |
| "loss": 0.6151, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.561038961038961, | |
| "grad_norm": 1.2285734897854397, | |
| "learning_rate": 2.7723765196106773e-06, | |
| "loss": 0.5882, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.612987012987013, | |
| "grad_norm": 1.3395108447307076, | |
| "learning_rate": 2.621249428885908e-06, | |
| "loss": 0.6164, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.664935064935065, | |
| "grad_norm": 1.1702020402228575, | |
| "learning_rate": 2.4696764901809926e-06, | |
| "loss": 0.5999, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.716883116883117, | |
| "grad_norm": 1.217176969741526, | |
| "learning_rate": 2.3182150544804878e-06, | |
| "loss": 0.588, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.7688311688311689, | |
| "grad_norm": 1.2406628193012295, | |
| "learning_rate": 2.1674220627596814e-06, | |
| "loss": 0.5786, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.8207792207792208, | |
| "grad_norm": 1.1903958246809216, | |
| "learning_rate": 2.017851998049107e-06, | |
| "loss": 0.5822, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.8727272727272726, | |
| "grad_norm": 1.2667654877861587, | |
| "learning_rate": 1.8700548465371877e-06, | |
| "loss": 0.5837, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.9246753246753245, | |
| "grad_norm": 1.2920373716993399, | |
| "learning_rate": 1.7245740752082901e-06, | |
| "loss": 0.5871, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.9766233766233765, | |
| "grad_norm": 1.275603929027079, | |
| "learning_rate": 1.5819446334526363e-06, | |
| "loss": 0.5838, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.031168831168831, | |
| "grad_norm": 1.133575867354361, | |
| "learning_rate": 1.4426909859963716e-06, | |
| "loss": 0.5697, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.083116883116883, | |
| "grad_norm": 1.2321064809517404, | |
| "learning_rate": 1.3073251843849503e-06, | |
| "loss": 0.5431, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.135064935064935, | |
| "grad_norm": 1.2519940232258298, | |
| "learning_rate": 1.1763449841111906e-06, | |
| "loss": 0.5016, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.187012987012987, | |
| "grad_norm": 1.2387163111275854, | |
| "learning_rate": 1.05023201431156e-06, | |
| "loss": 0.5098, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.238961038961039, | |
| "grad_norm": 1.19694862951574, | |
| "learning_rate": 9.294500067608941e-07, | |
| "loss": 0.5287, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.290909090909091, | |
| "grad_norm": 1.2777524766342223, | |
| "learning_rate": 8.144430906777756e-07, | |
| "loss": 0.516, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.342857142857143, | |
| "grad_norm": 1.1637303035803468, | |
| "learning_rate": 7.056341596107299e-07, | |
| "loss": 0.5143, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.394805194805195, | |
| "grad_norm": 1.1933528178678503, | |
| "learning_rate": 6.034233164104184e-07, | |
| "loss": 0.5239, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.446753246753247, | |
| "grad_norm": 1.2098669627294527, | |
| "learning_rate": 5.081864020058125e-07, | |
| "loss": 0.5196, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.498701298701299, | |
| "grad_norm": 1.168439112620554, | |
| "learning_rate": 4.20273613394232e-07, | |
| "loss": 0.5272, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.5506493506493504, | |
| "grad_norm": 1.1297680535945829, | |
| "learning_rate": 3.400082159270418e-07, | |
| "loss": 0.5169, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.602597402597403, | |
| "grad_norm": 1.0857638524201654, | |
| "learning_rate": 2.676853546260791e-07, | |
| "loss": 0.5253, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.6545454545454543, | |
| "grad_norm": 1.1477687512423902, | |
| "learning_rate": 2.0357096890174482e-07, | |
| "loss": 0.5277, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.7064935064935067, | |
| "grad_norm": 1.1757736817804982, | |
| "learning_rate": 1.4790081466345863e-07, | |
| "loss": 0.5381, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.7584415584415583, | |
| "grad_norm": 1.1528599201248226, | |
| "learning_rate": 1.0087959741828607e-07, | |
| "loss": 0.5028, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.8103896103896107, | |
| "grad_norm": 1.241278074991346, | |
| "learning_rate": 6.268021954544095e-08, | |
| "loss": 0.525, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.862337662337662, | |
| "grad_norm": 1.178930136102725, | |
| "learning_rate": 3.3443144514516965e-08, | |
| "loss": 0.5231, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.914285714285714, | |
| "grad_norm": 1.1659672855787477, | |
| "learning_rate": 1.3275880385284767e-08, | |
| "loss": 0.5317, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.966233766233766, | |
| "grad_norm": 1.2174438194181065, | |
| "learning_rate": 2.252584488296461e-09, | |
| "loss": 0.5093, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.9974025974025973, | |
| "step": 576, | |
| "total_flos": 465586638684160.0, | |
| "train_loss": 0.6205948731965489, | |
| "train_runtime": 15697.9454, | |
| "train_samples_per_second": 1.177, | |
| "train_steps_per_second": 0.037 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 576, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 465586638684160.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |