{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9974025974025973, "eval_steps": 500, "global_step": 576, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05194805194805195, "grad_norm": 6.558798236067014, "learning_rate": 7.758620689655173e-07, "loss": 1.1038, "step": 10 }, { "epoch": 0.1038961038961039, "grad_norm": 2.674816595181998, "learning_rate": 1.6379310344827587e-06, "loss": 0.992, "step": 20 }, { "epoch": 0.15584415584415584, "grad_norm": 1.8226313744055744, "learning_rate": 2.5e-06, "loss": 0.84, "step": 30 }, { "epoch": 0.2077922077922078, "grad_norm": 1.5584327305344476, "learning_rate": 3.362068965517242e-06, "loss": 0.7842, "step": 40 }, { "epoch": 0.2597402597402597, "grad_norm": 1.5690300333908958, "learning_rate": 4.224137931034483e-06, "loss": 0.7462, "step": 50 }, { "epoch": 0.3116883116883117, "grad_norm": 1.3587459505022679, "learning_rate": 4.999954022123679e-06, "loss": 0.7281, "step": 60 }, { "epoch": 0.36363636363636365, "grad_norm": 1.5619125769090973, "learning_rate": 4.994438722989841e-06, "loss": 0.7182, "step": 70 }, { "epoch": 0.4155844155844156, "grad_norm": 1.3120284796636292, "learning_rate": 4.979751088147192e-06, "loss": 0.71, "step": 80 }, { "epoch": 0.4675324675324675, "grad_norm": 1.4094824038705405, "learning_rate": 4.955945125704375e-06, "loss": 0.7064, "step": 90 }, { "epoch": 0.5194805194805194, "grad_norm": 1.274095106150924, "learning_rate": 4.923108372900683e-06, "loss": 0.677, "step": 100 }, { "epoch": 0.5714285714285714, "grad_norm": 1.4208094097198067, "learning_rate": 4.881361574221648e-06, "loss": 0.6915, "step": 110 }, { "epoch": 0.6233766233766234, "grad_norm": 1.374753133285299, "learning_rate": 4.830858237407799e-06, "loss": 0.6861, "step": 120 }, { "epoch": 0.6753246753246753, "grad_norm": 1.293455293244359, "learning_rate": 4.771784068989186e-06, "loss": 0.6975, "step": 130 }, { "epoch": 0.7272727272727273, "grad_norm": 1.3020469143910023, "learning_rate": 4.7043562914212915e-06, "loss": 0.6799, "step": 140 }, { "epoch": 0.7792207792207793, "grad_norm": 1.3651429159451645, "learning_rate": 4.6288228443332786e-06, "loss": 0.6913, "step": 150 }, { "epoch": 0.8311688311688312, "grad_norm": 1.5410204906658607, "learning_rate": 4.5454614728256995e-06, "loss": 0.6805, "step": 160 }, { "epoch": 0.8831168831168831, "grad_norm": 1.3733140100266965, "learning_rate": 4.454578706170075e-06, "loss": 0.6643, "step": 170 }, { "epoch": 0.935064935064935, "grad_norm": 1.2861021481912802, "learning_rate": 4.356508730665804e-06, "loss": 0.658, "step": 180 }, { "epoch": 0.987012987012987, "grad_norm": 1.288753522105602, "learning_rate": 4.251612160799017e-06, "loss": 0.6886, "step": 190 }, { "epoch": 1.0415584415584416, "grad_norm": 1.2471057575436326, "learning_rate": 4.140274713221985e-06, "loss": 0.6175, "step": 200 }, { "epoch": 1.0935064935064935, "grad_norm": 1.187620072173425, "learning_rate": 4.022905788428984e-06, "loss": 0.5921, "step": 210 }, { "epoch": 1.1454545454545455, "grad_norm": 1.206352433048565, "learning_rate": 3.899936965343989e-06, "loss": 0.6058, "step": 220 }, { "epoch": 1.1974025974025975, "grad_norm": 1.410833521608183, "learning_rate": 3.7718204143557337e-06, "loss": 0.6233, "step": 230 }, { "epoch": 1.2493506493506494, "grad_norm": 1.3100928026008483, "learning_rate": 3.6390272346356225e-06, "loss": 0.6183, "step": 240 }, { "epoch": 1.3012987012987014, "grad_norm": 1.2235762612191803, "learning_rate": 3.5020457218523407e-06, "loss": 0.5841, "step": 250 }, { "epoch": 1.3532467532467534, "grad_norm": 1.3015881612011941, "learning_rate": 3.3613795726529795e-06, "loss": 0.6015, "step": 260 }, { "epoch": 1.4051948051948053, "grad_norm": 1.2270855673243282, "learning_rate": 3.2175460325130176e-06, "loss": 0.5974, "step": 270 }, { "epoch": 1.457142857142857, "grad_norm": 1.299855240564656, "learning_rate": 3.0710739937657035e-06, "loss": 0.586, "step": 280 }, { "epoch": 1.509090909090909, "grad_norm": 1.2487072210315888, "learning_rate": 2.9225020508046233e-06, "loss": 0.6151, "step": 290 }, { "epoch": 1.561038961038961, "grad_norm": 1.2285734897854397, "learning_rate": 2.7723765196106773e-06, "loss": 0.5882, "step": 300 }, { "epoch": 1.612987012987013, "grad_norm": 1.3395108447307076, "learning_rate": 2.621249428885908e-06, "loss": 0.6164, "step": 310 }, { "epoch": 1.664935064935065, "grad_norm": 1.1702020402228575, "learning_rate": 2.4696764901809926e-06, "loss": 0.5999, "step": 320 }, { "epoch": 1.716883116883117, "grad_norm": 1.217176969741526, "learning_rate": 2.3182150544804878e-06, "loss": 0.588, "step": 330 }, { "epoch": 1.7688311688311689, "grad_norm": 1.2406628193012295, "learning_rate": 2.1674220627596814e-06, "loss": 0.5786, "step": 340 }, { "epoch": 1.8207792207792208, "grad_norm": 1.1903958246809216, "learning_rate": 2.017851998049107e-06, "loss": 0.5822, "step": 350 }, { "epoch": 1.8727272727272726, "grad_norm": 1.2667654877861587, "learning_rate": 1.8700548465371877e-06, "loss": 0.5837, "step": 360 }, { "epoch": 1.9246753246753245, "grad_norm": 1.2920373716993399, "learning_rate": 1.7245740752082901e-06, "loss": 0.5871, "step": 370 }, { "epoch": 1.9766233766233765, "grad_norm": 1.275603929027079, "learning_rate": 1.5819446334526363e-06, "loss": 0.5838, "step": 380 }, { "epoch": 2.031168831168831, "grad_norm": 1.133575867354361, "learning_rate": 1.4426909859963716e-06, "loss": 0.5697, "step": 390 }, { "epoch": 2.083116883116883, "grad_norm": 1.2321064809517404, "learning_rate": 1.3073251843849503e-06, "loss": 0.5431, "step": 400 }, { "epoch": 2.135064935064935, "grad_norm": 1.2519940232258298, "learning_rate": 1.1763449841111906e-06, "loss": 0.5016, "step": 410 }, { "epoch": 2.187012987012987, "grad_norm": 1.2387163111275854, "learning_rate": 1.05023201431156e-06, "loss": 0.5098, "step": 420 }, { "epoch": 2.238961038961039, "grad_norm": 1.19694862951574, "learning_rate": 9.294500067608941e-07, "loss": 0.5287, "step": 430 }, { "epoch": 2.290909090909091, "grad_norm": 1.2777524766342223, "learning_rate": 8.144430906777756e-07, "loss": 0.516, "step": 440 }, { "epoch": 2.342857142857143, "grad_norm": 1.1637303035803468, "learning_rate": 7.056341596107299e-07, "loss": 0.5143, "step": 450 }, { "epoch": 2.394805194805195, "grad_norm": 1.1933528178678503, "learning_rate": 6.034233164104184e-07, "loss": 0.5239, "step": 460 }, { "epoch": 2.446753246753247, "grad_norm": 1.2098669627294527, "learning_rate": 5.081864020058125e-07, "loss": 0.5196, "step": 470 }, { "epoch": 2.498701298701299, "grad_norm": 1.168439112620554, "learning_rate": 4.20273613394232e-07, "loss": 0.5272, "step": 480 }, { "epoch": 2.5506493506493504, "grad_norm": 1.1297680535945829, "learning_rate": 3.400082159270418e-07, "loss": 0.5169, "step": 490 }, { "epoch": 2.602597402597403, "grad_norm": 1.0857638524201654, "learning_rate": 2.676853546260791e-07, "loss": 0.5253, "step": 500 }, { "epoch": 2.6545454545454543, "grad_norm": 1.1477687512423902, "learning_rate": 2.0357096890174482e-07, "loss": 0.5277, "step": 510 }, { "epoch": 2.7064935064935067, "grad_norm": 1.1757736817804982, "learning_rate": 1.4790081466345863e-07, "loss": 0.5381, "step": 520 }, { "epoch": 2.7584415584415583, "grad_norm": 1.1528599201248226, "learning_rate": 1.0087959741828607e-07, "loss": 0.5028, "step": 530 }, { "epoch": 2.8103896103896107, "grad_norm": 1.241278074991346, "learning_rate": 6.268021954544095e-08, "loss": 0.525, "step": 540 }, { "epoch": 2.862337662337662, "grad_norm": 1.178930136102725, "learning_rate": 3.3443144514516965e-08, "loss": 0.5231, "step": 550 }, { "epoch": 2.914285714285714, "grad_norm": 1.1659672855787477, "learning_rate": 1.3275880385284767e-08, "loss": 0.5317, "step": 560 }, { "epoch": 2.966233766233766, "grad_norm": 1.2174438194181065, "learning_rate": 2.252584488296461e-09, "loss": 0.5093, "step": 570 }, { "epoch": 2.9974025974025973, "step": 576, "total_flos": 465586638684160.0, "train_loss": 0.6205948731965489, "train_runtime": 15697.9454, "train_samples_per_second": 1.177, "train_steps_per_second": 0.037 } ], "logging_steps": 10, "max_steps": 576, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 465586638684160.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }