| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9872122762148337, |
| "eval_steps": 100, |
| "global_step": 585, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05115089514066496, |
| "grad_norm": 12.8125, |
| "learning_rate": 9.846153846153848e-05, |
| "loss": 0.9959, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.10230179028132992, |
| "grad_norm": 4.6875, |
| "learning_rate": 9.675213675213675e-05, |
| "loss": 0.5833, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1534526854219949, |
| "grad_norm": 3.9375, |
| "learning_rate": 9.504273504273504e-05, |
| "loss": 0.4739, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.20460358056265984, |
| "grad_norm": 3.734375, |
| "learning_rate": 9.333333333333334e-05, |
| "loss": 0.4993, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2557544757033248, |
| "grad_norm": 3.65625, |
| "learning_rate": 9.162393162393162e-05, |
| "loss": 0.4797, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3069053708439898, |
| "grad_norm": 2.875, |
| "learning_rate": 8.991452991452992e-05, |
| "loss": 0.389, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.35805626598465473, |
| "grad_norm": 2.8125, |
| "learning_rate": 8.820512820512821e-05, |
| "loss": 0.4691, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.4092071611253197, |
| "grad_norm": 2.46875, |
| "learning_rate": 8.64957264957265e-05, |
| "loss": 0.3734, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.46035805626598464, |
| "grad_norm": 2.28125, |
| "learning_rate": 8.478632478632479e-05, |
| "loss": 0.4401, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5115089514066496, |
| "grad_norm": 2.734375, |
| "learning_rate": 8.307692307692309e-05, |
| "loss": 0.4304, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5115089514066496, |
| "eval_loss": 0.8407434821128845, |
| "eval_runtime": 35.229, |
| "eval_samples_per_second": 11.099, |
| "eval_steps_per_second": 5.564, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5626598465473146, |
| "grad_norm": 2.03125, |
| "learning_rate": 8.136752136752138e-05, |
| "loss": 0.4492, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6138107416879796, |
| "grad_norm": 3.53125, |
| "learning_rate": 7.965811965811965e-05, |
| "loss": 0.3949, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6649616368286445, |
| "grad_norm": 2.03125, |
| "learning_rate": 7.794871794871795e-05, |
| "loss": 0.3737, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7161125319693095, |
| "grad_norm": 2.375, |
| "learning_rate": 7.623931623931624e-05, |
| "loss": 0.4031, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.7672634271099744, |
| "grad_norm": 2.375, |
| "learning_rate": 7.452991452991453e-05, |
| "loss": 0.3672, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8184143222506394, |
| "grad_norm": 2.6875, |
| "learning_rate": 7.282051282051282e-05, |
| "loss": 0.3944, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 2.625, |
| "learning_rate": 7.111111111111112e-05, |
| "loss": 0.4473, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9207161125319693, |
| "grad_norm": 2.21875, |
| "learning_rate": 6.940170940170941e-05, |
| "loss": 0.3476, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.9718670076726342, |
| "grad_norm": 3.578125, |
| "learning_rate": 6.76923076923077e-05, |
| "loss": 0.3961, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.020460358056266, |
| "grad_norm": 3.34375, |
| "learning_rate": 6.598290598290599e-05, |
| "loss": 0.3859, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.020460358056266, |
| "eval_loss": 0.7718821167945862, |
| "eval_runtime": 33.9574, |
| "eval_samples_per_second": 11.514, |
| "eval_steps_per_second": 5.772, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0716112531969308, |
| "grad_norm": 2.578125, |
| "learning_rate": 6.427350427350429e-05, |
| "loss": 0.3484, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.1227621483375958, |
| "grad_norm": 3.296875, |
| "learning_rate": 6.256410256410256e-05, |
| "loss": 0.3425, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.1739130434782608, |
| "grad_norm": 2.09375, |
| "learning_rate": 6.085470085470085e-05, |
| "loss": 0.3622, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.2250639386189257, |
| "grad_norm": 2.796875, |
| "learning_rate": 5.9145299145299146e-05, |
| "loss": 0.3369, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.2762148337595907, |
| "grad_norm": 4.59375, |
| "learning_rate": 5.7435897435897434e-05, |
| "loss": 0.3215, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.3273657289002558, |
| "grad_norm": 3.734375, |
| "learning_rate": 5.572649572649573e-05, |
| "loss": 0.328, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.3785166240409208, |
| "grad_norm": 2.71875, |
| "learning_rate": 5.401709401709402e-05, |
| "loss": 0.3117, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.4296675191815857, |
| "grad_norm": 2.328125, |
| "learning_rate": 5.230769230769231e-05, |
| "loss": 0.2767, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.4808184143222507, |
| "grad_norm": 2.125, |
| "learning_rate": 5.05982905982906e-05, |
| "loss": 0.2631, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.5319693094629157, |
| "grad_norm": 2.375, |
| "learning_rate": 4.888888888888889e-05, |
| "loss": 0.3234, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.5319693094629157, |
| "eval_loss": 0.7180720567703247, |
| "eval_runtime": 34.1345, |
| "eval_samples_per_second": 11.455, |
| "eval_steps_per_second": 5.742, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.5831202046035806, |
| "grad_norm": 2.328125, |
| "learning_rate": 4.717948717948718e-05, |
| "loss": 0.2915, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.6342710997442456, |
| "grad_norm": 2.640625, |
| "learning_rate": 4.5470085470085474e-05, |
| "loss": 0.3123, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.6854219948849105, |
| "grad_norm": 2.09375, |
| "learning_rate": 4.376068376068376e-05, |
| "loss": 0.3252, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.7365728900255755, |
| "grad_norm": 1.7734375, |
| "learning_rate": 4.205128205128206e-05, |
| "loss": 0.3095, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.7877237851662404, |
| "grad_norm": 2.28125, |
| "learning_rate": 4.0341880341880346e-05, |
| "loss": 0.2539, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.8388746803069054, |
| "grad_norm": 2.5625, |
| "learning_rate": 3.8632478632478634e-05, |
| "loss": 0.2742, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.8900255754475703, |
| "grad_norm": 3.078125, |
| "learning_rate": 3.692307692307693e-05, |
| "loss": 0.3169, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.9411764705882353, |
| "grad_norm": 1.84375, |
| "learning_rate": 3.521367521367522e-05, |
| "loss": 0.2767, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.9923273657289002, |
| "grad_norm": 1.859375, |
| "learning_rate": 3.3504273504273506e-05, |
| "loss": 0.3239, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.040920716112532, |
| "grad_norm": 1.8203125, |
| "learning_rate": 3.1794871794871795e-05, |
| "loss": 0.269, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.040920716112532, |
| "eval_loss": 0.6828166842460632, |
| "eval_runtime": 33.8312, |
| "eval_samples_per_second": 11.557, |
| "eval_steps_per_second": 5.793, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.0920716112531967, |
| "grad_norm": 1.578125, |
| "learning_rate": 3.0085470085470086e-05, |
| "loss": 0.2818, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.1432225063938617, |
| "grad_norm": 2.125, |
| "learning_rate": 2.8376068376068378e-05, |
| "loss": 0.2597, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.1943734015345266, |
| "grad_norm": 2.21875, |
| "learning_rate": 2.6666666666666667e-05, |
| "loss": 0.2343, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.2455242966751916, |
| "grad_norm": 3.203125, |
| "learning_rate": 2.495726495726496e-05, |
| "loss": 0.2483, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.296675191815857, |
| "grad_norm": 2.015625, |
| "learning_rate": 2.324786324786325e-05, |
| "loss": 0.2751, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.3478260869565215, |
| "grad_norm": 1.546875, |
| "learning_rate": 2.1538461538461542e-05, |
| "loss": 0.2468, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.398976982097187, |
| "grad_norm": 2.09375, |
| "learning_rate": 1.982905982905983e-05, |
| "loss": 0.2586, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.4501278772378514, |
| "grad_norm": 2.546875, |
| "learning_rate": 1.811965811965812e-05, |
| "loss": 0.249, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.501278772378517, |
| "grad_norm": 1.7265625, |
| "learning_rate": 1.641025641025641e-05, |
| "loss": 0.2664, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.5524296675191813, |
| "grad_norm": 1.9921875, |
| "learning_rate": 1.4700854700854703e-05, |
| "loss": 0.2642, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.5524296675191813, |
| "eval_loss": 0.6801092028617859, |
| "eval_runtime": 33.1113, |
| "eval_samples_per_second": 11.809, |
| "eval_steps_per_second": 5.919, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.6035805626598467, |
| "grad_norm": 2.8125, |
| "learning_rate": 1.2991452991452993e-05, |
| "loss": 0.2889, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.6547314578005117, |
| "grad_norm": 2.171875, |
| "learning_rate": 1.1282051282051283e-05, |
| "loss": 0.2604, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.7058823529411766, |
| "grad_norm": 2.5625, |
| "learning_rate": 9.572649572649575e-06, |
| "loss": 0.2438, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.7570332480818416, |
| "grad_norm": 1.703125, |
| "learning_rate": 7.863247863247863e-06, |
| "loss": 0.2579, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.8081841432225065, |
| "grad_norm": 5.09375, |
| "learning_rate": 6.153846153846155e-06, |
| "loss": 0.2926, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.8593350383631715, |
| "grad_norm": 1.84375, |
| "learning_rate": 4.444444444444445e-06, |
| "loss": 0.2576, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.9104859335038364, |
| "grad_norm": 1.8046875, |
| "learning_rate": 2.735042735042735e-06, |
| "loss": 0.2082, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.9616368286445014, |
| "grad_norm": 1.6875, |
| "learning_rate": 1.0256410256410257e-06, |
| "loss": 0.2314, |
| "step": 580 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 585, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1654075985362944.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|