| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9989725756737533, |
| "eval_steps": 500, |
| "global_step": 790, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.025290444953765905, |
| "grad_norm": 47.23542022705078, |
| "learning_rate": 2.4050632911392408e-06, |
| "loss": 10.1452, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05058088990753181, |
| "grad_norm": 3.6803674697875977, |
| "learning_rate": 4.936708860759495e-06, |
| "loss": 2.1447, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07587133486129771, |
| "grad_norm": 2.6639301776885986, |
| "learning_rate": 7.468354430379747e-06, |
| "loss": 1.3165, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.10116177981506362, |
| "grad_norm": 2.2562990188598633, |
| "learning_rate": 1e-05, |
| "loss": 1.1064, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.12645222476882953, |
| "grad_norm": 1.5557793378829956, |
| "learning_rate": 9.980489073537183e-06, |
| "loss": 0.987, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.15174266972259542, |
| "grad_norm": 1.6355516910552979, |
| "learning_rate": 9.922108564649308e-06, |
| "loss": 0.9422, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.17703311467636135, |
| "grad_norm": 1.6417213678359985, |
| "learning_rate": 9.825314096462686e-06, |
| "loss": 0.9139, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.20232355963012724, |
| "grad_norm": 1.639435052871704, |
| "learning_rate": 9.690861088877633e-06, |
| "loss": 0.8812, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.22761400458389314, |
| "grad_norm": 2.0012362003326416, |
| "learning_rate": 9.519798862991627e-06, |
| "loss": 0.869, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.25290444953765906, |
| "grad_norm": 1.8415316343307495, |
| "learning_rate": 9.3134624518086e-06, |
| "loss": 0.8223, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.278194894491425, |
| "grad_norm": 2.344317674636841, |
| "learning_rate": 9.07346218114663e-06, |
| "loss": 0.8001, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.30348533944519085, |
| "grad_norm": 5.4799699783325195, |
| "learning_rate": 8.801671102058495e-06, |
| "loss": 0.7411, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.32877578439895677, |
| "grad_norm": 1.493455410003662, |
| "learning_rate": 8.500210372847128e-06, |
| "loss": 0.7192, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.3540662293527227, |
| "grad_norm": 1.5643197298049927, |
| "learning_rate": 8.171432704760159e-06, |
| "loss": 0.7094, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.37935667430648856, |
| "grad_norm": 1.616524338722229, |
| "learning_rate": 7.81790400055945e-06, |
| "loss": 0.7045, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4046471192602545, |
| "grad_norm": 1.5532481670379639, |
| "learning_rate": 7.442383329265063e-06, |
| "loss": 0.7048, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.4299375642140204, |
| "grad_norm": 1.4283138513565063, |
| "learning_rate": 7.047801393358156e-06, |
| "loss": 0.695, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.4552280091677863, |
| "grad_norm": 1.4667181968688965, |
| "learning_rate": 6.637237656492745e-06, |
| "loss": 0.6906, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.4805184541215522, |
| "grad_norm": 1.43437659740448, |
| "learning_rate": 6.21389631022014e-06, |
| "loss": 0.6876, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5058088990753181, |
| "grad_norm": 1.5381193161010742, |
| "learning_rate": 5.781081267290659e-06, |
| "loss": 0.6776, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.531099344029084, |
| "grad_norm": 1.4790972471237183, |
| "learning_rate": 5.342170376694141e-06, |
| "loss": 0.6765, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.55638978898285, |
| "grad_norm": 1.681736946105957, |
| "learning_rate": 4.900589061674649e-06, |
| "loss": 0.6826, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.5816802339366158, |
| "grad_norm": 1.4353394508361816, |
| "learning_rate": 4.459783586458065e-06, |
| "loss": 0.6738, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.6069706788903817, |
| "grad_norm": 1.4407438039779663, |
| "learning_rate": 4.023194160328931e-06, |
| "loss": 0.6723, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.6322611238441477, |
| "grad_norm": 1.417815089225769, |
| "learning_rate": 3.5942280889623028e-06, |
| "loss": 0.6561, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6575515687979135, |
| "grad_norm": 1.5496110916137695, |
| "learning_rate": 3.1762331825475755e-06, |
| "loss": 0.6619, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.6828420137516794, |
| "grad_norm": 1.4604257345199585, |
| "learning_rate": 2.7724716282371037e-06, |
| "loss": 0.6515, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.7081324587054454, |
| "grad_norm": 1.4951623678207397, |
| "learning_rate": 2.3860945308287554e-06, |
| "loss": 0.6505, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.7334229036592113, |
| "grad_norm": 1.3503330945968628, |
| "learning_rate": 2.0201173203763068e-06, |
| "loss": 0.6523, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.7587133486129771, |
| "grad_norm": 1.4306488037109375, |
| "learning_rate": 1.6773962186558418e-06, |
| "loss": 0.6583, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.7840037935667431, |
| "grad_norm": 1.5080723762512207, |
| "learning_rate": 1.3606059481525296e-06, |
| "loss": 0.6469, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.809294238520509, |
| "grad_norm": 1.3163236379623413, |
| "learning_rate": 1.0722188575351423e-06, |
| "loss": 0.6509, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.8345846834742748, |
| "grad_norm": 1.3945035934448242, |
| "learning_rate": 8.14485626530821e-07, |
| "loss": 0.6481, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.8598751284280408, |
| "grad_norm": 1.3138415813446045, |
| "learning_rate": 5.894177007864272e-07, |
| "loss": 0.6441, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.8851655733818067, |
| "grad_norm": 1.331908941268921, |
| "learning_rate": 3.987715938012965e-07, |
| "loss": 0.6455, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.9104560183355725, |
| "grad_norm": 1.5567970275878906, |
| "learning_rate": 2.440351784449524e-07, |
| "loss": 0.6376, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.9357464632893385, |
| "grad_norm": 1.4572831392288208, |
| "learning_rate": 1.264160750458493e-07, |
| "loss": 0.6373, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.9610369082431044, |
| "grad_norm": 1.3331170082092285, |
| "learning_rate": 4.6832226674803916e-08, |
| "loss": 0.642, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.9863273531968703, |
| "grad_norm": 1.4231302738189697, |
| "learning_rate": 5.904735177095222e-09, |
| "loss": 0.6394, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.9989725756737533, |
| "step": 790, |
| "total_flos": 1.0543839820541592e+19, |
| "train_loss": 1.01295283172704, |
| "train_runtime": 50765.246, |
| "train_samples_per_second": 1.994, |
| "train_steps_per_second": 0.016 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 790, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0543839820541592e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|