| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.987551867219917, |
| "eval_steps": 500, |
| "global_step": 360, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.08298755186721991, |
| "grad_norm": 0.4138890994781238, |
| "learning_rate": 5e-06, |
| "loss": 0.6819, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.16597510373443983, |
| "grad_norm": 0.29279799441362253, |
| "learning_rate": 5e-06, |
| "loss": 0.6152, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.24896265560165975, |
| "grad_norm": 0.22155490022279595, |
| "learning_rate": 5e-06, |
| "loss": 0.5911, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.33195020746887965, |
| "grad_norm": 0.21703792973144043, |
| "learning_rate": 5e-06, |
| "loss": 0.5758, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.4149377593360996, |
| "grad_norm": 0.19782125949582666, |
| "learning_rate": 5e-06, |
| "loss": 0.5658, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4979253112033195, |
| "grad_norm": 0.20429620764864578, |
| "learning_rate": 5e-06, |
| "loss": 0.5621, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.5809128630705395, |
| "grad_norm": 0.1947179711144, |
| "learning_rate": 5e-06, |
| "loss": 0.5481, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.6639004149377593, |
| "grad_norm": 0.20952965041956714, |
| "learning_rate": 5e-06, |
| "loss": 0.5491, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.7468879668049793, |
| "grad_norm": 0.2092027679734135, |
| "learning_rate": 5e-06, |
| "loss": 0.5473, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.8298755186721992, |
| "grad_norm": 0.1973704614234666, |
| "learning_rate": 5e-06, |
| "loss": 0.5412, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.9128630705394191, |
| "grad_norm": 0.22215520376065145, |
| "learning_rate": 5e-06, |
| "loss": 0.5361, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.995850622406639, |
| "grad_norm": 0.20002555613598916, |
| "learning_rate": 5e-06, |
| "loss": 0.5354, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.995850622406639, |
| "eval_loss": 0.5283368229866028, |
| "eval_runtime": 121.5237, |
| "eval_samples_per_second": 26.703, |
| "eval_steps_per_second": 0.42, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.0788381742738589, |
| "grad_norm": 0.23151869944663353, |
| "learning_rate": 5e-06, |
| "loss": 0.5334, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.161825726141079, |
| "grad_norm": 0.20416069004838694, |
| "learning_rate": 5e-06, |
| "loss": 0.5122, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.2448132780082988, |
| "grad_norm": 0.21256654137396935, |
| "learning_rate": 5e-06, |
| "loss": 0.509, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.3278008298755186, |
| "grad_norm": 0.21018667523519946, |
| "learning_rate": 5e-06, |
| "loss": 0.5041, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.4107883817427385, |
| "grad_norm": 0.219240042940767, |
| "learning_rate": 5e-06, |
| "loss": 0.4998, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.4937759336099585, |
| "grad_norm": 0.22681455392212077, |
| "learning_rate": 5e-06, |
| "loss": 0.5037, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.5767634854771784, |
| "grad_norm": 0.227133839723048, |
| "learning_rate": 5e-06, |
| "loss": 0.4977, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.6597510373443982, |
| "grad_norm": 0.21040711904959797, |
| "learning_rate": 5e-06, |
| "loss": 0.4941, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.7427385892116183, |
| "grad_norm": 0.23482785666403702, |
| "learning_rate": 5e-06, |
| "loss": 0.4945, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.8257261410788381, |
| "grad_norm": 0.2035179907011211, |
| "learning_rate": 5e-06, |
| "loss": 0.4904, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.908713692946058, |
| "grad_norm": 0.21720290177963564, |
| "learning_rate": 5e-06, |
| "loss": 0.491, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.991701244813278, |
| "grad_norm": 0.2214820393037949, |
| "learning_rate": 5e-06, |
| "loss": 0.4901, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.5045989155769348, |
| "eval_runtime": 122.3281, |
| "eval_samples_per_second": 26.527, |
| "eval_steps_per_second": 0.417, |
| "step": 241 |
| }, |
| { |
| "epoch": 2.074688796680498, |
| "grad_norm": 0.25601226331965665, |
| "learning_rate": 5e-06, |
| "loss": 0.4945, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.1576763485477177, |
| "grad_norm": 0.2537099080076595, |
| "learning_rate": 5e-06, |
| "loss": 0.4617, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.240663900414938, |
| "grad_norm": 0.2445352596834903, |
| "learning_rate": 5e-06, |
| "loss": 0.4648, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.323651452282158, |
| "grad_norm": 0.24195048816699535, |
| "learning_rate": 5e-06, |
| "loss": 0.4688, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.4066390041493775, |
| "grad_norm": 0.3297443855710949, |
| "learning_rate": 5e-06, |
| "loss": 0.46, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.4896265560165975, |
| "grad_norm": 0.2227067008121754, |
| "learning_rate": 5e-06, |
| "loss": 0.4679, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.572614107883817, |
| "grad_norm": 0.24268677689146825, |
| "learning_rate": 5e-06, |
| "loss": 0.4642, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.6556016597510372, |
| "grad_norm": 0.24131530500929413, |
| "learning_rate": 5e-06, |
| "loss": 0.4597, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.7385892116182573, |
| "grad_norm": 0.22997089130920098, |
| "learning_rate": 5e-06, |
| "loss": 0.4617, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.821576763485477, |
| "grad_norm": 0.23994756278793414, |
| "learning_rate": 5e-06, |
| "loss": 0.4597, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.904564315352697, |
| "grad_norm": 0.23257285232469585, |
| "learning_rate": 5e-06, |
| "loss": 0.4545, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.987551867219917, |
| "grad_norm": 0.22525776234601527, |
| "learning_rate": 5e-06, |
| "loss": 0.4618, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.987551867219917, |
| "eval_loss": 0.49428611993789673, |
| "eval_runtime": 121.9138, |
| "eval_samples_per_second": 26.617, |
| "eval_steps_per_second": 0.418, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.987551867219917, |
| "step": 360, |
| "total_flos": 602804028702720.0, |
| "train_loss": 0.5124640332327949, |
| "train_runtime": 20041.158, |
| "train_samples_per_second": 9.227, |
| "train_steps_per_second": 0.018 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 360, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 602804028702720.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|