| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 90.0, |
| "eval_steps": 500, |
| "global_step": 360, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.5360991358757019, |
| "learning_rate": 0.0009980973490458728, |
| "loss": 2.5493, |
| "step": 10 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.2548345923423767, |
| "learning_rate": 0.000992403876506104, |
| "loss": 1.6144, |
| "step": 20 |
| }, |
| { |
| "epoch": 7.5, |
| "grad_norm": 0.24080035090446472, |
| "learning_rate": 0.0009829629131445341, |
| "loss": 1.3919, |
| "step": 30 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.2729661166667938, |
| "learning_rate": 0.0009698463103929542, |
| "loss": 1.254, |
| "step": 40 |
| }, |
| { |
| "epoch": 12.5, |
| "grad_norm": 0.30077192187309265, |
| "learning_rate": 0.0009531538935183251, |
| "loss": 1.138, |
| "step": 50 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.3787655234336853, |
| "learning_rate": 0.0009330127018922195, |
| "loss": 1.0206, |
| "step": 60 |
| }, |
| { |
| "epoch": 17.5, |
| "grad_norm": 0.416939377784729, |
| "learning_rate": 0.0009095760221444959, |
| "loss": 0.91, |
| "step": 70 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.5407611727714539, |
| "learning_rate": 0.000883022221559489, |
| "loss": 0.8329, |
| "step": 80 |
| }, |
| { |
| "epoch": 22.5, |
| "grad_norm": 0.5476299524307251, |
| "learning_rate": 0.0008535533905932737, |
| "loss": 0.7483, |
| "step": 90 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 0.5024413466453552, |
| "learning_rate": 0.0008213938048432696, |
| "loss": 0.6778, |
| "step": 100 |
| }, |
| { |
| "epoch": 27.5, |
| "grad_norm": 0.5071346759796143, |
| "learning_rate": 0.0007867882181755231, |
| "loss": 0.6175, |
| "step": 110 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 0.5480501055717468, |
| "learning_rate": 0.00075, |
| "loss": 0.5726, |
| "step": 120 |
| }, |
| { |
| "epoch": 32.5, |
| "grad_norm": 0.5539716482162476, |
| "learning_rate": 0.0007113091308703497, |
| "loss": 0.5276, |
| "step": 130 |
| }, |
| { |
| "epoch": 35.0, |
| "grad_norm": 0.5367885231971741, |
| "learning_rate": 0.0006710100716628344, |
| "loss": 0.4889, |
| "step": 140 |
| }, |
| { |
| "epoch": 37.5, |
| "grad_norm": 0.5145406126976013, |
| "learning_rate": 0.0006294095225512603, |
| "loss": 0.4513, |
| "step": 150 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 0.5838665962219238, |
| "learning_rate": 0.0005868240888334653, |
| "loss": 0.4196, |
| "step": 160 |
| }, |
| { |
| "epoch": 42.5, |
| "grad_norm": 0.49232539534568787, |
| "learning_rate": 0.0005435778713738292, |
| "loss": 0.3972, |
| "step": 170 |
| }, |
| { |
| "epoch": 45.0, |
| "grad_norm": 0.4959801137447357, |
| "learning_rate": 0.0005, |
| "loss": 0.372, |
| "step": 180 |
| }, |
| { |
| "epoch": 47.5, |
| "grad_norm": 0.5067233443260193, |
| "learning_rate": 0.00045642212862617086, |
| "loss": 0.3527, |
| "step": 190 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 0.5176546573638916, |
| "learning_rate": 0.00041317591116653486, |
| "loss": 0.3385, |
| "step": 200 |
| }, |
| { |
| "epoch": 52.5, |
| "grad_norm": 0.4937039911746979, |
| "learning_rate": 0.0003705904774487396, |
| "loss": 0.3191, |
| "step": 210 |
| }, |
| { |
| "epoch": 55.0, |
| "grad_norm": 0.4794902205467224, |
| "learning_rate": 0.0003289899283371657, |
| "loss": 0.3082, |
| "step": 220 |
| }, |
| { |
| "epoch": 57.5, |
| "grad_norm": 0.4285900294780731, |
| "learning_rate": 0.0002886908691296504, |
| "loss": 0.295, |
| "step": 230 |
| }, |
| { |
| "epoch": 60.0, |
| "grad_norm": 0.45302724838256836, |
| "learning_rate": 0.0002500000000000001, |
| "loss": 0.2889, |
| "step": 240 |
| }, |
| { |
| "epoch": 62.5, |
| "grad_norm": 0.42409127950668335, |
| "learning_rate": 0.00021321178182447708, |
| "loss": 0.2781, |
| "step": 250 |
| }, |
| { |
| "epoch": 65.0, |
| "grad_norm": 0.4453699588775635, |
| "learning_rate": 0.0001786061951567303, |
| "loss": 0.273, |
| "step": 260 |
| }, |
| { |
| "epoch": 67.5, |
| "grad_norm": 0.4217115044593811, |
| "learning_rate": 0.00014644660940672628, |
| "loss": 0.2646, |
| "step": 270 |
| }, |
| { |
| "epoch": 70.0, |
| "grad_norm": 0.43468865752220154, |
| "learning_rate": 0.00011697777844051105, |
| "loss": 0.2611, |
| "step": 280 |
| }, |
| { |
| "epoch": 72.5, |
| "grad_norm": 0.41657349467277527, |
| "learning_rate": 9.042397785550405e-05, |
| "loss": 0.2534, |
| "step": 290 |
| }, |
| { |
| "epoch": 75.0, |
| "grad_norm": 0.40352940559387207, |
| "learning_rate": 6.698729810778065e-05, |
| "loss": 0.2509, |
| "step": 300 |
| }, |
| { |
| "epoch": 77.5, |
| "grad_norm": 0.3772071301937103, |
| "learning_rate": 4.684610648167503e-05, |
| "loss": 0.2493, |
| "step": 310 |
| }, |
| { |
| "epoch": 80.0, |
| "grad_norm": 0.3778958022594452, |
| "learning_rate": 3.0153689607045842e-05, |
| "loss": 0.249, |
| "step": 320 |
| }, |
| { |
| "epoch": 82.5, |
| "grad_norm": 0.38997748494148254, |
| "learning_rate": 1.70370868554659e-05, |
| "loss": 0.2441, |
| "step": 330 |
| }, |
| { |
| "epoch": 85.0, |
| "grad_norm": 0.3619975447654724, |
| "learning_rate": 7.59612349389599e-06, |
| "loss": 0.2472, |
| "step": 340 |
| }, |
| { |
| "epoch": 87.5, |
| "grad_norm": 0.4022546708583832, |
| "learning_rate": 1.9026509541272275e-06, |
| "loss": 0.2493, |
| "step": 350 |
| }, |
| { |
| "epoch": 90.0, |
| "grad_norm": 0.36714521050453186, |
| "learning_rate": 0.0, |
| "loss": 0.2444, |
| "step": 360 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 360, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 90, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.02434468200448e+16, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|