| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.08502024291497975, |
| "eval_steps": 500, |
| "global_step": 42, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0020242914979757085, |
| "grad_norm": 13.587785720825195, |
| "learning_rate": 2e-05, |
| "loss": 1.903, |
| "mean_token_accuracy": 0.5894419699907303, |
| "num_tokens": 23998.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.004048582995951417, |
| "grad_norm": 10.837627410888672, |
| "learning_rate": 1.998650472334683e-05, |
| "loss": 1.7773, |
| "mean_token_accuracy": 0.6223782598972321, |
| "num_tokens": 46596.0, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.006072874493927126, |
| "grad_norm": 5.157018184661865, |
| "learning_rate": 1.9973009446693658e-05, |
| "loss": 1.9274, |
| "mean_token_accuracy": 0.5814870595932007, |
| "num_tokens": 71542.0, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.008097165991902834, |
| "grad_norm": 5.115257263183594, |
| "learning_rate": 1.9959514170040488e-05, |
| "loss": 1.6163, |
| "mean_token_accuracy": 0.6391739100217819, |
| "num_tokens": 90870.0, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.010121457489878543, |
| "grad_norm": 4.290038585662842, |
| "learning_rate": 1.9946018893387314e-05, |
| "loss": 1.5241, |
| "mean_token_accuracy": 0.6456017643213272, |
| "num_tokens": 115219.0, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.012145748987854251, |
| "grad_norm": 3.599583864212036, |
| "learning_rate": 1.9932523616734144e-05, |
| "loss": 1.363, |
| "mean_token_accuracy": 0.6773215681314468, |
| "num_tokens": 136447.0, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01417004048582996, |
| "grad_norm": 3.737048625946045, |
| "learning_rate": 1.9919028340080974e-05, |
| "loss": 1.6063, |
| "mean_token_accuracy": 0.6296929270029068, |
| "num_tokens": 159775.0, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.016194331983805668, |
| "grad_norm": 3.6092047691345215, |
| "learning_rate": 1.9905533063427804e-05, |
| "loss": 1.5649, |
| "mean_token_accuracy": 0.639984205365181, |
| "num_tokens": 184127.0, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.018218623481781375, |
| "grad_norm": 3.934776782989502, |
| "learning_rate": 1.989203778677463e-05, |
| "loss": 1.8535, |
| "mean_token_accuracy": 0.594915583729744, |
| "num_tokens": 206169.0, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.020242914979757085, |
| "grad_norm": 3.7700464725494385, |
| "learning_rate": 1.987854251012146e-05, |
| "loss": 1.4936, |
| "mean_token_accuracy": 0.6473288685083389, |
| "num_tokens": 226067.0, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.022267206477732792, |
| "grad_norm": 3.7429916858673096, |
| "learning_rate": 1.986504723346829e-05, |
| "loss": 1.5662, |
| "mean_token_accuracy": 0.6233761757612228, |
| "num_tokens": 248812.0, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.024291497975708502, |
| "grad_norm": 3.451914072036743, |
| "learning_rate": 1.9851551956815116e-05, |
| "loss": 1.7484, |
| "mean_token_accuracy": 0.6058520972728729, |
| "num_tokens": 272961.0, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.02631578947368421, |
| "grad_norm": 3.706216335296631, |
| "learning_rate": 1.9838056680161946e-05, |
| "loss": 1.4828, |
| "mean_token_accuracy": 0.6424361318349838, |
| "num_tokens": 293964.0, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.02834008097165992, |
| "grad_norm": 3.402036428451538, |
| "learning_rate": 1.9824561403508773e-05, |
| "loss": 1.6059, |
| "mean_token_accuracy": 0.6380074322223663, |
| "num_tokens": 314414.0, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.030364372469635626, |
| "grad_norm": 3.5934112071990967, |
| "learning_rate": 1.9811066126855602e-05, |
| "loss": 1.6123, |
| "mean_token_accuracy": 0.6258052587509155, |
| "num_tokens": 338761.0, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.032388663967611336, |
| "grad_norm": 3.2826409339904785, |
| "learning_rate": 1.979757085020243e-05, |
| "loss": 1.5093, |
| "mean_token_accuracy": 0.647676095366478, |
| "num_tokens": 364154.0, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03441295546558704, |
| "grad_norm": 3.411837339401245, |
| "learning_rate": 1.978407557354926e-05, |
| "loss": 1.5875, |
| "mean_token_accuracy": 0.6337466537952423, |
| "num_tokens": 387522.0, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03643724696356275, |
| "grad_norm": 3.537415027618408, |
| "learning_rate": 1.977058029689609e-05, |
| "loss": 1.6839, |
| "mean_token_accuracy": 0.6119341999292374, |
| "num_tokens": 410636.0, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.038461538461538464, |
| "grad_norm": 3.6170666217803955, |
| "learning_rate": 1.9757085020242915e-05, |
| "loss": 1.3929, |
| "mean_token_accuracy": 0.6720004975795746, |
| "num_tokens": 430858.0, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04048582995951417, |
| "grad_norm": 3.725717782974243, |
| "learning_rate": 1.9743589743589745e-05, |
| "loss": 1.3478, |
| "mean_token_accuracy": 0.6779870688915253, |
| "num_tokens": 451526.0, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04251012145748988, |
| "grad_norm": 3.7410740852355957, |
| "learning_rate": 1.9730094466936575e-05, |
| "loss": 1.5027, |
| "mean_token_accuracy": 0.6415430754423141, |
| "num_tokens": 472555.0, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.044534412955465584, |
| "grad_norm": 3.8375744819641113, |
| "learning_rate": 1.9716599190283405e-05, |
| "loss": 1.3805, |
| "mean_token_accuracy": 0.6554747521877289, |
| "num_tokens": 492412.0, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0465587044534413, |
| "grad_norm": 3.4310216903686523, |
| "learning_rate": 1.970310391363023e-05, |
| "loss": 1.3993, |
| "mean_token_accuracy": 0.6516353040933609, |
| "num_tokens": 517688.0, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.048582995951417005, |
| "grad_norm": 3.4065134525299072, |
| "learning_rate": 1.968960863697706e-05, |
| "loss": 1.4144, |
| "mean_token_accuracy": 0.6639417558908463, |
| "num_tokens": 539605.0, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.05060728744939271, |
| "grad_norm": 3.4423940181732178, |
| "learning_rate": 1.9676113360323887e-05, |
| "loss": 1.6237, |
| "mean_token_accuracy": 0.6199875771999359, |
| "num_tokens": 563276.0, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.05263157894736842, |
| "grad_norm": 3.211747407913208, |
| "learning_rate": 1.9662618083670717e-05, |
| "loss": 1.4059, |
| "mean_token_accuracy": 0.6552923172712326, |
| "num_tokens": 586603.0, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.05465587044534413, |
| "grad_norm": 3.1153526306152344, |
| "learning_rate": 1.9649122807017544e-05, |
| "loss": 1.2644, |
| "mean_token_accuracy": 0.6816990375518799, |
| "num_tokens": 612691.0, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.05668016194331984, |
| "grad_norm": 3.2474708557128906, |
| "learning_rate": 1.9635627530364373e-05, |
| "loss": 1.4524, |
| "mean_token_accuracy": 0.6650048345327377, |
| "num_tokens": 636325.0, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.058704453441295545, |
| "grad_norm": 3.521009683609009, |
| "learning_rate": 1.9622132253711203e-05, |
| "loss": 1.3588, |
| "mean_token_accuracy": 0.6608386486768723, |
| "num_tokens": 657410.0, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06072874493927125, |
| "grad_norm": 3.240419387817383, |
| "learning_rate": 1.960863697705803e-05, |
| "loss": 1.6196, |
| "mean_token_accuracy": 0.634381040930748, |
| "num_tokens": 678587.0, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06275303643724696, |
| "grad_norm": 3.0680091381073, |
| "learning_rate": 1.959514170040486e-05, |
| "loss": 1.537, |
| "mean_token_accuracy": 0.6444396674633026, |
| "num_tokens": 700383.0, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.06477732793522267, |
| "grad_norm": 3.087522029876709, |
| "learning_rate": 1.958164642375169e-05, |
| "loss": 1.6414, |
| "mean_token_accuracy": 0.6257035434246063, |
| "num_tokens": 723769.0, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.06680161943319839, |
| "grad_norm": 3.2430222034454346, |
| "learning_rate": 1.9568151147098516e-05, |
| "loss": 1.5166, |
| "mean_token_accuracy": 0.6486149281263351, |
| "num_tokens": 747387.0, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.06882591093117409, |
| "grad_norm": 3.1888442039489746, |
| "learning_rate": 1.9554655870445346e-05, |
| "loss": 1.4071, |
| "mean_token_accuracy": 0.6582628488540649, |
| "num_tokens": 771537.0, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0708502024291498, |
| "grad_norm": 2.9818553924560547, |
| "learning_rate": 1.9541160593792176e-05, |
| "loss": 1.4723, |
| "mean_token_accuracy": 0.6373147964477539, |
| "num_tokens": 794954.0, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0728744939271255, |
| "grad_norm": 2.8076112270355225, |
| "learning_rate": 1.9527665317139005e-05, |
| "loss": 1.5494, |
| "mean_token_accuracy": 0.6327401697635651, |
| "num_tokens": 820451.0, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.07489878542510121, |
| "grad_norm": 3.305832862854004, |
| "learning_rate": 1.9514170040485832e-05, |
| "loss": 1.5983, |
| "mean_token_accuracy": 0.6266501545906067, |
| "num_tokens": 840241.0, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.07692307692307693, |
| "grad_norm": 2.9532933235168457, |
| "learning_rate": 1.9500674763832662e-05, |
| "loss": 1.4127, |
| "mean_token_accuracy": 0.6481295526027679, |
| "num_tokens": 862831.0, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.07894736842105263, |
| "grad_norm": 2.7358744144439697, |
| "learning_rate": 1.9487179487179488e-05, |
| "loss": 1.4085, |
| "mean_token_accuracy": 0.6563303023576736, |
| "num_tokens": 887709.0, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.08097165991902834, |
| "grad_norm": 2.550145149230957, |
| "learning_rate": 1.9473684210526318e-05, |
| "loss": 1.398, |
| "mean_token_accuracy": 0.6520788222551346, |
| "num_tokens": 914680.0, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08299595141700405, |
| "grad_norm": 2.6927826404571533, |
| "learning_rate": 1.9460188933873144e-05, |
| "loss": 1.618, |
| "mean_token_accuracy": 0.624066486954689, |
| "num_tokens": 939210.0, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.08502024291497975, |
| "grad_norm": 2.7264392375946045, |
| "learning_rate": 1.9446693657219974e-05, |
| "loss": 1.3058, |
| "mean_token_accuracy": 0.6675033718347549, |
| "num_tokens": 960279.0, |
| "step": 42 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1482, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 42, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2947625944952832.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|