| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.0, |
| "eval_steps": 500, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.08, |
| "grad_norm": 114.14533233642578, |
| "learning_rate": 9.996447363202946e-05, |
| "loss": 16.6271, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 19.26224136352539, |
| "learning_rate": 9.977809823015401e-05, |
| "loss": 13.6217, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 16.628232955932617, |
| "learning_rate": 9.938441702975689e-05, |
| "loss": 12.3656, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 13.531852722167969, |
| "learning_rate": 9.879583809693738e-05, |
| "loss": 12.2646, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 14.354416847229004, |
| "learning_rate": 9.801468428384716e-05, |
| "loss": 12.1808, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 13.167389869689941, |
| "learning_rate": 9.704403844771128e-05, |
| "loss": 12.1502, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 12.674458503723145, |
| "learning_rate": 9.588773128419906e-05, |
| "loss": 11.9683, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 13.783093452453613, |
| "learning_rate": 9.45503262094184e-05, |
| "loss": 11.8564, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 17.22150993347168, |
| "learning_rate": 9.30371013501972e-05, |
| "loss": 12.0212, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 13.618257522583008, |
| "learning_rate": 9.135402871372808e-05, |
| "loss": 11.8631, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 13.423781394958496, |
| "learning_rate": 8.950775061878453e-05, |
| "loss": 11.6954, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 12.201665878295898, |
| "learning_rate": 8.750555348152298e-05, |
| "loss": 11.7669, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 14.449649810791016, |
| "learning_rate": 8.535533905932738e-05, |
| "loss": 10.3003, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 14.11440658569336, |
| "learning_rate": 8.306559326618259e-05, |
| "loss": 8.5731, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 12.817862510681152, |
| "learning_rate": 8.064535268264883e-05, |
| "loss": 8.5948, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 12.568441390991211, |
| "learning_rate": 7.810416889260653e-05, |
| "loss": 8.656, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.3599999999999999, |
| "grad_norm": 13.24677848815918, |
| "learning_rate": 7.545207078751857e-05, |
| "loss": 8.7269, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 12.547300338745117, |
| "learning_rate": 7.269952498697734e-05, |
| "loss": 8.6868, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 12.896221160888672, |
| "learning_rate": 6.985739453173903e-05, |
| "loss": 8.7702, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 12.103998184204102, |
| "learning_rate": 6.693689601226458e-05, |
| "loss": 8.7521, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.6800000000000002, |
| "grad_norm": 12.651740074157715, |
| "learning_rate": 6.394955530196147e-05, |
| "loss": 8.8742, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 12.18869400024414, |
| "learning_rate": 6.090716206982714e-05, |
| "loss": 8.7707, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.8399999999999999, |
| "grad_norm": 12.143824577331543, |
| "learning_rate": 5.782172325201155e-05, |
| "loss": 8.7433, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 11.785919189453125, |
| "learning_rate": 5.470541566592573e-05, |
| "loss": 8.7179, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 12.037582397460938, |
| "learning_rate": 5.157053795390642e-05, |
| "loss": 8.6486, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.08, |
| "grad_norm": 12.652670860290527, |
| "learning_rate": 4.8429462046093585e-05, |
| "loss": 5.591, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.16, |
| "grad_norm": 14.214653015136719, |
| "learning_rate": 4.529458433407429e-05, |
| "loss": 5.4084, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.24, |
| "grad_norm": 12.59438419342041, |
| "learning_rate": 4.2178276747988446e-05, |
| "loss": 5.3605, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 12.09847354888916, |
| "learning_rate": 3.9092837930172884e-05, |
| "loss": 5.3401, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 13.492688179016113, |
| "learning_rate": 3.605044469803854e-05, |
| "loss": 5.3064, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.48, |
| "grad_norm": 12.142609596252441, |
| "learning_rate": 3.3063103987735433e-05, |
| "loss": 5.3187, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.56, |
| "grad_norm": 12.719853401184082, |
| "learning_rate": 3.0142605468260978e-05, |
| "loss": 5.413, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.64, |
| "grad_norm": 12.052017211914062, |
| "learning_rate": 2.7300475013022663e-05, |
| "loss": 5.2867, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.7199999999999998, |
| "grad_norm": 12.2785062789917, |
| "learning_rate": 2.4547929212481435e-05, |
| "loss": 5.3684, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 11.52890396118164, |
| "learning_rate": 2.1895831107393484e-05, |
| "loss": 5.3484, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 12.041983604431152, |
| "learning_rate": 1.9354647317351188e-05, |
| "loss": 5.2971, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.96, |
| "grad_norm": 12.474526405334473, |
| "learning_rate": 1.6934406733817414e-05, |
| "loss": 5.2595, |
| "step": 370 |
| }, |
| { |
| "epoch": 3.04, |
| "grad_norm": 10.717528343200684, |
| "learning_rate": 1.4644660940672627e-05, |
| "loss": 4.2182, |
| "step": 380 |
| }, |
| { |
| "epoch": 3.12, |
| "grad_norm": 11.499459266662598, |
| "learning_rate": 1.2494446518477022e-05, |
| "loss": 3.1071, |
| "step": 390 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 11.054661750793457, |
| "learning_rate": 1.049224938121548e-05, |
| "loss": 3.068, |
| "step": 400 |
| }, |
| { |
| "epoch": 3.2800000000000002, |
| "grad_norm": 11.394171714782715, |
| "learning_rate": 8.645971286271904e-06, |
| "loss": 2.9727, |
| "step": 410 |
| }, |
| { |
| "epoch": 3.36, |
| "grad_norm": 10.573503494262695, |
| "learning_rate": 6.962898649802823e-06, |
| "loss": 2.9938, |
| "step": 420 |
| }, |
| { |
| "epoch": 3.44, |
| "grad_norm": 11.186671257019043, |
| "learning_rate": 5.449673790581611e-06, |
| "loss": 3.0744, |
| "step": 430 |
| }, |
| { |
| "epoch": 3.52, |
| "grad_norm": 10.377039909362793, |
| "learning_rate": 4.112268715800943e-06, |
| "loss": 2.9339, |
| "step": 440 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 10.85782527923584, |
| "learning_rate": 2.9559615522887273e-06, |
| "loss": 2.9912, |
| "step": 450 |
| }, |
| { |
| "epoch": 3.68, |
| "grad_norm": 11.210704803466797, |
| "learning_rate": 1.985315716152847e-06, |
| "loss": 2.9679, |
| "step": 460 |
| }, |
| { |
| "epoch": 3.76, |
| "grad_norm": 11.051790237426758, |
| "learning_rate": 1.2041619030626284e-06, |
| "loss": 3.0297, |
| "step": 470 |
| }, |
| { |
| "epoch": 3.84, |
| "grad_norm": 10.918357849121094, |
| "learning_rate": 6.15582970243117e-07, |
| "loss": 2.9479, |
| "step": 480 |
| }, |
| { |
| "epoch": 3.92, |
| "grad_norm": 10.786859512329102, |
| "learning_rate": 2.219017698460002e-07, |
| "loss": 3.0567, |
| "step": 490 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 10.881739616394043, |
| "learning_rate": 2.467198171342e-08, |
| "loss": 3.0452, |
| "step": 500 |
| }, |
| { |
| "epoch": 4.0, |
| "step": 500, |
| "total_flos": 4.036279852125389e+16, |
| "train_loss": 7.3980213584899905, |
| "train_runtime": 1227.9067, |
| "train_samples_per_second": 19.542, |
| "train_steps_per_second": 0.407 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.036279852125389e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|