| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.998642226748133, |
| "eval_steps": 500, |
| "global_step": 368, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05431093007467753, |
| "grad_norm": 1.0013175010681152, |
| "learning_rate": 2e-05, |
| "loss": 1.8305, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.10862186014935506, |
| "grad_norm": 0.6650476455688477, |
| "learning_rate": 4e-05, |
| "loss": 1.6895, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1629327902240326, |
| "grad_norm": 0.2262110561132431, |
| "learning_rate": 6e-05, |
| "loss": 1.4921, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2172437202987101, |
| "grad_norm": 0.22021710872650146, |
| "learning_rate": 8e-05, |
| "loss": 1.4074, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.27155465037338766, |
| "grad_norm": 0.18622460961341858, |
| "learning_rate": 0.0001, |
| "loss": 1.2593, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3258655804480652, |
| "grad_norm": 0.18928822875022888, |
| "learning_rate": 0.00012, |
| "loss": 1.1828, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3801765105227427, |
| "grad_norm": 4.061775207519531, |
| "learning_rate": 0.00014, |
| "loss": 1.0697, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.4344874405974202, |
| "grad_norm": 0.20349960029125214, |
| "learning_rate": 0.00016, |
| "loss": 1.0102, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.48879837067209775, |
| "grad_norm": 0.23872722685337067, |
| "learning_rate": 0.00018, |
| "loss": 0.9941, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5431093007467753, |
| "grad_norm": 0.20515283942222595, |
| "learning_rate": 0.0002, |
| "loss": 0.9608, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5974202308214528, |
| "grad_norm": 0.22473494708538055, |
| "learning_rate": 0.00019931371771625544, |
| "loss": 0.9308, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6517311608961304, |
| "grad_norm": 0.24142144620418549, |
| "learning_rate": 0.0001972642905324813, |
| "loss": 0.9381, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7060420909708078, |
| "grad_norm": 0.260145902633667, |
| "learning_rate": 0.00019387984816003867, |
| "loss": 0.8955, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.7603530210454854, |
| "grad_norm": 0.22235779464244843, |
| "learning_rate": 0.00018920684425573865, |
| "loss": 0.8667, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.814663951120163, |
| "grad_norm": 0.23329713940620422, |
| "learning_rate": 0.00018330941881540915, |
| "loss": 0.873, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8689748811948405, |
| "grad_norm": 0.2553715109825134, |
| "learning_rate": 0.0001762685178110382, |
| "loss": 0.8651, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.923285811269518, |
| "grad_norm": 0.22536128759384155, |
| "learning_rate": 0.0001681807821550438, |
| "loss": 0.8504, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9775967413441955, |
| "grad_norm": 0.22558774054050446, |
| "learning_rate": 0.00015915722124135227, |
| "loss": 0.8414, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.031907671418873, |
| "grad_norm": 0.2400912493467331, |
| "learning_rate": 0.00014932168926979074, |
| "loss": 0.8389, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.0862186014935507, |
| "grad_norm": 0.23116886615753174, |
| "learning_rate": 0.00013880918526722497, |
| "loss": 0.8289, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.140529531568228, |
| "grad_norm": 0.2643767297267914, |
| "learning_rate": 0.00012776400013875006, |
| "loss": 0.8037, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.1948404616429056, |
| "grad_norm": 0.23654422163963318, |
| "learning_rate": 0.00011633773618185302, |
| "loss": 0.8209, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.2491513917175832, |
| "grad_norm": 0.25414180755615234, |
| "learning_rate": 0.00010468722624699401, |
| "loss": 0.8327, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.3034623217922607, |
| "grad_norm": 0.24420738220214844, |
| "learning_rate": 9.297238110547074e-05, |
| "loss": 0.8056, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.3577732518669383, |
| "grad_norm": 0.23167012631893158, |
| "learning_rate": 8.13539945708319e-05, |
| "loss": 0.8294, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.4120841819416157, |
| "grad_norm": 0.2467813491821289, |
| "learning_rate": 6.999153649996595e-05, |
| "loss": 0.7809, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.4663951120162932, |
| "grad_norm": 0.2599235475063324, |
| "learning_rate": 5.904096396634935e-05, |
| "loss": 0.7995, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.5207060420909708, |
| "grad_norm": 0.2679561674594879, |
| "learning_rate": 4.865258064851579e-05, |
| "loss": 0.7845, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.5750169721656482, |
| "grad_norm": 0.2540304958820343, |
| "learning_rate": 3.8968973815020806e-05, |
| "loss": 0.7868, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.629327902240326, |
| "grad_norm": 0.26017382740974426, |
| "learning_rate": 3.0123057222115836e-05, |
| "loss": 0.7665, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.6836388323150033, |
| "grad_norm": 0.2591923177242279, |
| "learning_rate": 2.2236246786624792e-05, |
| "loss": 0.7936, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.737949762389681, |
| "grad_norm": 0.2649495601654053, |
| "learning_rate": 1.5416794074090258e-05, |
| "loss": 0.7597, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.7922606924643585, |
| "grad_norm": 0.25946423411369324, |
| "learning_rate": 9.75830047614117e-06, |
| "loss": 0.7954, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.8465716225390358, |
| "grad_norm": 0.24905863404273987, |
| "learning_rate": 5.338432470956589e-06, |
| "loss": 0.7648, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.9008825526137136, |
| "grad_norm": 0.2536468803882599, |
| "learning_rate": 2.2178556007054872e-06, |
| "loss": 0.8026, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.955193482688391, |
| "grad_norm": 0.27082565426826477, |
| "learning_rate": 4.3940179781019055e-07, |
| "loss": 0.7768, |
| "step": 360 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 368, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.122148310240461e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|