| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 532, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.037638202775817454, |
| "grad_norm": 20.371968284259324, |
| "learning_rate": 8.333333333333333e-07, |
| "loss": 0.5923, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.07527640555163491, |
| "grad_norm": 0.8786546808697457, |
| "learning_rate": 1.7592592592592594e-06, |
| "loss": 0.5849, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.11291460832745237, |
| "grad_norm": 0.6200515032845644, |
| "learning_rate": 2.6851851851851856e-06, |
| "loss": 0.5754, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.15055281110326982, |
| "grad_norm": 0.604344938574343, |
| "learning_rate": 3.6111111111111115e-06, |
| "loss": 0.5628, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.18819101387908727, |
| "grad_norm": 8.233110803533657, |
| "learning_rate": 4.537037037037038e-06, |
| "loss": 0.5897, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.22582921665490474, |
| "grad_norm": 0.5824369986909103, |
| "learning_rate": 4.998650245168965e-06, |
| "loss": 0.5834, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.26346741943072216, |
| "grad_norm": 0.5709451199777168, |
| "learning_rate": 4.987860949769804e-06, |
| "loss": 0.58, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.30110562220653964, |
| "grad_norm": 0.557443125157301, |
| "learning_rate": 4.9663289476829e-06, |
| "loss": 0.5686, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3387438249823571, |
| "grad_norm": 0.49885715553958243, |
| "learning_rate": 4.934147215158732e-06, |
| "loss": 0.5584, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.37638202775817453, |
| "grad_norm": 0.41661708295752625, |
| "learning_rate": 4.891454714510784e-06, |
| "loss": 0.5994, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.414020230533992, |
| "grad_norm": 0.41407258641803263, |
| "learning_rate": 4.838435794069406e-06, |
| "loss": 0.5661, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4516584333098095, |
| "grad_norm": 0.43186345082298655, |
| "learning_rate": 4.775319392156593e-06, |
| "loss": 0.589, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4892966360856269, |
| "grad_norm": 0.4096882144739654, |
| "learning_rate": 4.70237804851899e-06, |
| "loss": 0.5423, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5269348388614443, |
| "grad_norm": 0.3998323798211624, |
| "learning_rate": 4.619926727487774e-06, |
| "loss": 0.5741, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5645730416372619, |
| "grad_norm": 0.42564571493679665, |
| "learning_rate": 4.528321457947091e-06, |
| "loss": 0.5942, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6022112444130793, |
| "grad_norm": 0.3912736147474385, |
| "learning_rate": 4.427957795983715e-06, |
| "loss": 0.5735, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6398494471888967, |
| "grad_norm": 0.40658685656666593, |
| "learning_rate": 4.319269116856291e-06, |
| "loss": 0.5667, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.6774876499647142, |
| "grad_norm": 0.408062593581521, |
| "learning_rate": 4.2027247436595245e-06, |
| "loss": 0.5578, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7151258527405316, |
| "grad_norm": 0.47342188328224316, |
| "learning_rate": 4.078827920763835e-06, |
| "loss": 0.5784, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.7527640555163491, |
| "grad_norm": 0.3807306896086036, |
| "learning_rate": 3.948113640781265e-06, |
| "loss": 0.571, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7904022582921666, |
| "grad_norm": 0.40139763329295447, |
| "learning_rate": 3.8111463344409026e-06, |
| "loss": 0.56, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.828040461067984, |
| "grad_norm": 0.46636837900138484, |
| "learning_rate": 3.668517433349069e-06, |
| "loss": 0.5752, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8656786638438014, |
| "grad_norm": 0.4250007472811382, |
| "learning_rate": 3.520842816158374e-06, |
| "loss": 0.5845, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.903316866619619, |
| "grad_norm": 0.41635021722275656, |
| "learning_rate": 3.368760149173219e-06, |
| "loss": 0.5928, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.9409550693954364, |
| "grad_norm": 0.43894853146688345, |
| "learning_rate": 3.212926132875141e-06, |
| "loss": 0.5614, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9785932721712538, |
| "grad_norm": 0.42161850151896035, |
| "learning_rate": 3.054013666257638e-06, |
| "loss": 0.58, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.015055281110327, |
| "grad_norm": 0.45211808401795345, |
| "learning_rate": 2.8927089412150176e-06, |
| "loss": 0.5852, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.0526934838861444, |
| "grad_norm": 0.40722510696568537, |
| "learning_rate": 2.729708479531844e-06, |
| "loss": 0.532, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.090331686661962, |
| "grad_norm": 0.3964696274251261, |
| "learning_rate": 2.5657161252674047e-06, |
| "loss": 0.5312, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.1279698894377794, |
| "grad_norm": 0.3941081603173965, |
| "learning_rate": 2.4014400055222337e-06, |
| "loss": 0.5181, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.1656080922135967, |
| "grad_norm": 0.43576234646890405, |
| "learning_rate": 2.2375894727102552e-06, |
| "loss": 0.5504, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.2032462949894143, |
| "grad_norm": 0.39797299710552336, |
| "learning_rate": 2.0748720415399542e-06, |
| "loss": 0.5295, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.2408844977652318, |
| "grad_norm": 0.3685397052283085, |
| "learning_rate": 1.913990333930858e-06, |
| "loss": 0.5276, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.278522700541049, |
| "grad_norm": 0.385020492453289, |
| "learning_rate": 1.7556390450573213e-06, |
| "loss": 0.5329, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.3161609033168666, |
| "grad_norm": 0.358339166503952, |
| "learning_rate": 1.600501943620384e-06, |
| "loss": 0.5282, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.353799106092684, |
| "grad_norm": 0.3959823586817411, |
| "learning_rate": 1.4492489193006884e-06, |
| "loss": 0.5233, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.3914373088685015, |
| "grad_norm": 0.37714168383937835, |
| "learning_rate": 1.302533090141689e-06, |
| "loss": 0.5148, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.429075511644319, |
| "grad_norm": 0.3839744134040006, |
| "learning_rate": 1.1609879823536233e-06, |
| "loss": 0.5369, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.4667137144201363, |
| "grad_norm": 0.4084228402216514, |
| "learning_rate": 1.0252247947159846e-06, |
| "loss": 0.5451, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.5043519171959538, |
| "grad_norm": 0.3722310775666982, |
| "learning_rate": 8.95829759390954e-07, |
| "loss": 0.5418, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.5419901199717714, |
| "grad_norm": 0.3723091238383956, |
| "learning_rate": 7.733616105439077e-07, |
| "loss": 0.5178, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.5796283227475887, |
| "grad_norm": 0.36274010086702985, |
| "learning_rate": 6.58349171701651e-07, |
| "loss": 0.5241, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.6172665255234062, |
| "grad_norm": 0.3827469898084054, |
| "learning_rate": 5.51289072266255e-07, |
| "loss": 0.5551, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.6549047282992237, |
| "grad_norm": 0.4042469036956372, |
| "learning_rate": 4.5264360304473065e-07, |
| "loss": 0.5557, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.692542931075041, |
| "grad_norm": 0.36511925658271227, |
| "learning_rate": 3.6283872005444087e-07, |
| "loss": 0.5268, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.7301811338508586, |
| "grad_norm": 0.4026080916542273, |
| "learning_rate": 2.8226220522394735e-07, |
| "loss": 0.5353, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.7678193366266761, |
| "grad_norm": 0.3791323800107557, |
| "learning_rate": 2.1126199193144904e-07, |
| "loss": 0.5543, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.8054575394024934, |
| "grad_norm": 0.3975590953752723, |
| "learning_rate": 1.5014466261124128e-07, |
| "loss": 0.5107, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.843095742178311, |
| "grad_norm": 0.3845716392065491, |
| "learning_rate": 9.917412491559337e-08, |
| "loss": 0.5371, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.8807339449541285, |
| "grad_norm": 0.351629905127873, |
| "learning_rate": 5.8570472148445633e-08, |
| "loss": 0.5313, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.9183721477299458, |
| "grad_norm": 0.3890690665902021, |
| "learning_rate": 2.8509032891635146e-08, |
| "loss": 0.5378, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.9560103505057633, |
| "grad_norm": 0.38113169843995587, |
| "learning_rate": 9.119613927399684e-09, |
| "loss": 0.5291, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.9936485532815809, |
| "grad_norm": 0.35623501505342836, |
| "learning_rate": 4.859397262726995e-10, |
| "loss": 0.5559, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 532, |
| "total_flos": 7.869097163274322e+17, |
| "train_loss": 0.5555472550983716, |
| "train_runtime": 14309.8006, |
| "train_samples_per_second": 19.012, |
| "train_steps_per_second": 0.037 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 532, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.869097163274322e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|