| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9998005186515061, |
| "eval_steps": 10000, |
| "global_step": 3759, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.026597513132522108, |
| "grad_norm": 0.14929383993148804, |
| "learning_rate": 2.6595744680851064e-05, |
| "loss": 0.7258, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.053195026265044215, |
| "grad_norm": 0.22324731945991516, |
| "learning_rate": 5.319148936170213e-05, |
| "loss": 0.477, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07979253939756632, |
| "grad_norm": 0.1394360512495041, |
| "learning_rate": 7.978723404255319e-05, |
| "loss": 0.4484, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10639005253008843, |
| "grad_norm": 0.1933247148990631, |
| "learning_rate": 9.99875823256999e-05, |
| "loss": 0.4301, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13298756566261055, |
| "grad_norm": 0.1871040314435959, |
| "learning_rate": 9.966886949974127e-05, |
| "loss": 0.4265, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.15958507879513265, |
| "grad_norm": 0.12056238204240799, |
| "learning_rate": 9.892213289286789e-05, |
| "loss": 0.4169, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.18618259192765477, |
| "grad_norm": 0.23017819225788116, |
| "learning_rate": 9.775380754233831e-05, |
| "loss": 0.4149, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.21278010506017686, |
| "grad_norm": 0.22511781752109528, |
| "learning_rate": 9.617396154591494e-05, |
| "loss": 0.4127, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.23937761819269898, |
| "grad_norm": 0.15255825221538544, |
| "learning_rate": 9.41962092995658e-05, |
| "loss": 0.4059, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.2659751313252211, |
| "grad_norm": 0.1079307496547699, |
| "learning_rate": 9.183759417477731e-05, |
| "loss": 0.4018, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2925726444577432, |
| "grad_norm": 0.1743098944425583, |
| "learning_rate": 8.9118441646512e-05, |
| "loss": 0.3919, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3191701575902653, |
| "grad_norm": 0.19822756946086884, |
| "learning_rate": 8.606218413748768e-05, |
| "loss": 0.3909, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3457676707227874, |
| "grad_norm": 0.1920367330312729, |
| "learning_rate": 8.26951590881904e-05, |
| "loss": 0.3855, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.37236518385530953, |
| "grad_norm": 0.1861521601676941, |
| "learning_rate": 7.904638199276271e-05, |
| "loss": 0.3907, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.39896269698783166, |
| "grad_norm": 0.10632487386465073, |
| "learning_rate": 7.514729635664032e-05, |
| "loss": 0.3846, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.4255602101203537, |
| "grad_norm": 0.2400396317243576, |
| "learning_rate": 7.103150273068921e-05, |
| "loss": 0.3904, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.45215772325287584, |
| "grad_norm": 0.1306409388780594, |
| "learning_rate": 6.673446915690408e-05, |
| "loss": 0.3912, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.47875523638539796, |
| "grad_norm": 0.16852012276649475, |
| "learning_rate": 6.229322552091536e-05, |
| "loss": 0.3806, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.5053527495179201, |
| "grad_norm": 0.23730169236660004, |
| "learning_rate": 5.774604444523663e-05, |
| "loss": 0.3812, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.5319502626504422, |
| "grad_norm": 0.17537447810173035, |
| "learning_rate": 5.313211147316933e-05, |
| "loss": 0.3767, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5585477757829643, |
| "grad_norm": 0.13570892810821533, |
| "learning_rate": 4.849118738557042e-05, |
| "loss": 0.3782, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.5851452889154865, |
| "grad_norm": 0.21518413722515106, |
| "learning_rate": 4.386326556048369e-05, |
| "loss": 0.3706, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6117428020480085, |
| "grad_norm": 0.28089213371276855, |
| "learning_rate": 3.9288227328354234e-05, |
| "loss": 0.3805, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.6383403151805306, |
| "grad_norm": 0.19185791909694672, |
| "learning_rate": 3.4805498292818055e-05, |
| "loss": 0.3683, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6649378283130527, |
| "grad_norm": 0.2617637515068054, |
| "learning_rate": 3.045370857873868e-05, |
| "loss": 0.3825, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.6915353414455748, |
| "grad_norm": 0.11534757167100906, |
| "learning_rate": 2.6270359935318967e-05, |
| "loss": 0.3721, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.718132854578097, |
| "grad_norm": 0.18955928087234497, |
| "learning_rate": 2.22915025630421e-05, |
| "loss": 0.3682, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.7447303677106191, |
| "grad_norm": 0.22698479890823364, |
| "learning_rate": 1.8551424449401173e-05, |
| "loss": 0.3675, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7713278808431412, |
| "grad_norm": 0.18734973669052124, |
| "learning_rate": 1.5082355890580507e-05, |
| "loss": 0.3719, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.7979253939756633, |
| "grad_norm": 0.22010599076747894, |
| "learning_rate": 1.1914191745387143e-05, |
| "loss": 0.363, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8245229071081854, |
| "grad_norm": 0.25703734159469604, |
| "learning_rate": 9.074233814921846e-06, |
| "loss": 0.3711, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.8511204202407074, |
| "grad_norm": 0.15355700254440308, |
| "learning_rate": 6.586955568045134e-06, |
| "loss": 0.362, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8777179333732296, |
| "grad_norm": 0.20059217512607574, |
| "learning_rate": 4.47379124012689e-06, |
| "loss": 0.3422, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.9043154465057517, |
| "grad_norm": 0.16272881627082825, |
| "learning_rate": 2.7529511225315162e-06, |
| "loss": 0.3667, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.9309129596382738, |
| "grad_norm": 0.24089215695858002, |
| "learning_rate": 1.4392646345894934e-06, |
| "loss": 0.3883, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.9575104727707959, |
| "grad_norm": 0.14056609570980072, |
| "learning_rate": 5.440525303902377e-07, |
| "loss": 0.3723, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.984107985903318, |
| "grad_norm": 0.18514706194400787, |
| "learning_rate": 7.502934165993791e-08, |
| "loss": 0.366, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.9998005186515061, |
| "step": 3759, |
| "total_flos": 1.8198794606166934e+19, |
| "train_loss": 0.39680684224885016, |
| "train_runtime": 142849.6117, |
| "train_samples_per_second": 0.421, |
| "train_steps_per_second": 0.026 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 3759, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 2500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.8198794606166934e+19, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|