| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9848908785674314, |
| "eval_steps": 500, |
| "global_step": 55, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01790710688304421, |
| "grad_norm": 9.154049089173217, |
| "learning_rate": 0.0, |
| "loss": 1.8388, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.03581421376608842, |
| "grad_norm": 9.137176514972408, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 1.7621, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.053721320649132626, |
| "grad_norm": 8.5343147738079, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 1.7087, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.07162842753217684, |
| "grad_norm": 6.3146031385642205, |
| "learning_rate": 5e-06, |
| "loss": 1.707, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.08953553441522104, |
| "grad_norm": 4.390788179209801, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 1.5855, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.10744264129826525, |
| "grad_norm": 4.103715813570343, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 1.4669, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.12534974818130945, |
| "grad_norm": 4.258081836604545, |
| "learning_rate": 1e-05, |
| "loss": 1.3811, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.14325685506435368, |
| "grad_norm": 4.394719037154525, |
| "learning_rate": 9.989726963751683e-06, |
| "loss": 1.196, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.16116396194739788, |
| "grad_norm": 4.387678851831427, |
| "learning_rate": 9.95895006911623e-06, |
| "loss": 1.192, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.17907106883044208, |
| "grad_norm": 3.266903233723893, |
| "learning_rate": 9.907795784955327e-06, |
| "loss": 1.1589, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1969781757134863, |
| "grad_norm": 2.9600105157057004, |
| "learning_rate": 9.836474315195148e-06, |
| "loss": 1.1848, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.2148852825965305, |
| "grad_norm": 2.794372521189197, |
| "learning_rate": 9.745278735053345e-06, |
| "loss": 1.2283, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.2327923894795747, |
| "grad_norm": 2.3393492159358313, |
| "learning_rate": 9.63458378673011e-06, |
| "loss": 1.035, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.2506994963626189, |
| "grad_norm": 2.9175786704630813, |
| "learning_rate": 9.504844339512096e-06, |
| "loss": 1.1845, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.2686066032456631, |
| "grad_norm": 2.730923033140476, |
| "learning_rate": 9.356593520616948e-06, |
| "loss": 1.1604, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.28651371012870736, |
| "grad_norm": 2.3983376743034657, |
| "learning_rate": 9.190440524459203e-06, |
| "loss": 1.0337, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.30442081701175155, |
| "grad_norm": 2.2413641873757024, |
| "learning_rate": 9.007068109339783e-06, |
| "loss": 1.0432, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.32232792389479575, |
| "grad_norm": 1.7600228823774702, |
| "learning_rate": 8.807229791845673e-06, |
| "loss": 1.0112, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.34023503077783995, |
| "grad_norm": 2.0117426133601835, |
| "learning_rate": 8.591746750488639e-06, |
| "loss": 1.1059, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.35814213766088415, |
| "grad_norm": 2.060473679270181, |
| "learning_rate": 8.361504451306585e-06, |
| "loss": 0.9571, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.37604924454392835, |
| "grad_norm": 2.0723918915090516, |
| "learning_rate": 8.117449009293668e-06, |
| "loss": 1.049, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.3939563514269726, |
| "grad_norm": 2.0555716704589826, |
| "learning_rate": 7.860583300610849e-06, |
| "loss": 1.071, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.4118634583100168, |
| "grad_norm": 1.901749073039597, |
| "learning_rate": 7.591962841552627e-06, |
| "loss": 0.993, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.429770565193061, |
| "grad_norm": 2.2773682705145792, |
| "learning_rate": 7.312691451204178e-06, |
| "loss": 1.0638, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.4476776720761052, |
| "grad_norm": 1.8553489638041873, |
| "learning_rate": 7.023916715611969e-06, |
| "loss": 1.0701, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.4655847789591494, |
| "grad_norm": 1.8296309651772127, |
| "learning_rate": 6.726825272106539e-06, |
| "loss": 1.0457, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.4834918858421936, |
| "grad_norm": 1.951377844511212, |
| "learning_rate": 6.4226379331551625e-06, |
| "loss": 0.979, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.5013989927252378, |
| "grad_norm": 2.131073321825185, |
| "learning_rate": 6.112604669781572e-06, |
| "loss": 1.0516, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.519306099608282, |
| "grad_norm": 1.9236940939367049, |
| "learning_rate": 5.797999475166897e-06, |
| "loss": 0.9174, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.5372132064913262, |
| "grad_norm": 1.8594780822897603, |
| "learning_rate": 5.480115129538409e-06, |
| "loss": 1.0256, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.5551203133743704, |
| "grad_norm": 1.9789880146462748, |
| "learning_rate": 5.160257887858278e-06, |
| "loss": 1.0618, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.5730274202574147, |
| "grad_norm": 1.7424889796134264, |
| "learning_rate": 4.839742112141725e-06, |
| "loss": 0.947, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.5909345271404589, |
| "grad_norm": 1.5718805251413062, |
| "learning_rate": 4.5198848704615915e-06, |
| "loss": 0.9008, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.6088416340235031, |
| "grad_norm": 1.8168721829454335, |
| "learning_rate": 4.2020005248331056e-06, |
| "loss": 1.0493, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.6267487409065473, |
| "grad_norm": 1.5926497298103737, |
| "learning_rate": 3.887395330218429e-06, |
| "loss": 0.9912, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.6446558477895915, |
| "grad_norm": 1.6787356670246831, |
| "learning_rate": 3.5773620668448384e-06, |
| "loss": 0.9277, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.6625629546726357, |
| "grad_norm": 1.7504857895226833, |
| "learning_rate": 3.273174727893463e-06, |
| "loss": 0.9374, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.6804700615556799, |
| "grad_norm": 1.8855358371917814, |
| "learning_rate": 2.976083284388031e-06, |
| "loss": 1.0638, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.6983771684387241, |
| "grad_norm": 2.079981422104126, |
| "learning_rate": 2.687308548795825e-06, |
| "loss": 1.0418, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.7162842753217683, |
| "grad_norm": 1.825444504742724, |
| "learning_rate": 2.408037158447375e-06, |
| "loss": 0.9922, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.7341913822048125, |
| "grad_norm": 1.5244894733584424, |
| "learning_rate": 2.139416699389153e-06, |
| "loss": 0.8983, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.7520984890878567, |
| "grad_norm": 1.5240784478870684, |
| "learning_rate": 1.8825509907063328e-06, |
| "loss": 0.9511, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.7700055959709009, |
| "grad_norm": 1.7893289750375283, |
| "learning_rate": 1.6384955486934157e-06, |
| "loss": 1.0381, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.7879127028539452, |
| "grad_norm": 1.6071325493161637, |
| "learning_rate": 1.4082532495113627e-06, |
| "loss": 0.9741, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.8058198097369894, |
| "grad_norm": 1.5427190294804947, |
| "learning_rate": 1.1927702081543279e-06, |
| "loss": 0.8816, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.8237269166200336, |
| "grad_norm": 1.6398520351491825, |
| "learning_rate": 9.929318906602176e-07, |
| "loss": 0.9024, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.8416340235030778, |
| "grad_norm": 1.7080546088158608, |
| "learning_rate": 8.095594755407971e-07, |
| "loss": 1.0991, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.859541130386122, |
| "grad_norm": 1.676808577271218, |
| "learning_rate": 6.43406479383053e-07, |
| "loss": 1.0762, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.8774482372691662, |
| "grad_norm": 1.7910684633127532, |
| "learning_rate": 4.951556604879049e-07, |
| "loss": 1.0304, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.8953553441522104, |
| "grad_norm": 1.6360492284728316, |
| "learning_rate": 3.6541621326989183e-07, |
| "loss": 0.9294, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.9132624510352546, |
| "grad_norm": 1.4569649164177891, |
| "learning_rate": 2.547212649466568e-07, |
| "loss": 0.9019, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.9311695579182988, |
| "grad_norm": 1.6323528961043134, |
| "learning_rate": 1.6352568480485277e-07, |
| "loss": 0.963, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.949076664801343, |
| "grad_norm": 1.6005257490464857, |
| "learning_rate": 9.22042150446728e-08, |
| "loss": 0.8996, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.9669837716843872, |
| "grad_norm": 1.5664108489842672, |
| "learning_rate": 4.104993088376974e-08, |
| "loss": 0.9621, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.9848908785674314, |
| "grad_norm": 1.4809268609667965, |
| "learning_rate": 1.0273036248318325e-08, |
| "loss": 0.9323, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.9848908785674314, |
| "step": 55, |
| "total_flos": 6936212070400.0, |
| "train_loss": 1.1010965683243492, |
| "train_runtime": 842.7134, |
| "train_samples_per_second": 2.121, |
| "train_steps_per_second": 0.065 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 55, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6936212070400.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|