| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 50.0, |
| "global_step": 86, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0237741456166419, |
| "grad_norm": 3.34375, |
| "learning_rate": 2e-05, |
| "loss": 1.1976238489151, |
| "step": 1, |
| "token_acc": 0.7184428746928747 |
| }, |
| { |
| "epoch": 0.1188707280832095, |
| "grad_norm": 3.765625, |
| "learning_rate": 0.0001, |
| "loss": 1.2833234071731567, |
| "step": 5, |
| "token_acc": 0.6957437157113635 |
| }, |
| { |
| "epoch": 0.237741456166419, |
| "grad_norm": 300.0, |
| "learning_rate": 9.906276553136923e-05, |
| "loss": 2.3091596603393554, |
| "step": 10, |
| "token_acc": 0.6008980821950969 |
| }, |
| { |
| "epoch": 0.35661218424962854, |
| "grad_norm": 3.28125, |
| "learning_rate": 9.628619846344454e-05, |
| "loss": 1.3249249458312988, |
| "step": 15, |
| "token_acc": 0.7083573751573646 |
| }, |
| { |
| "epoch": 0.475482912332838, |
| "grad_norm": 2.203125, |
| "learning_rate": 9.177439057064683e-05, |
| "loss": 0.9449030876159668, |
| "step": 20, |
| "token_acc": 0.7733868243825437 |
| }, |
| { |
| "epoch": 0.5943536404160475, |
| "grad_norm": 1.4375, |
| "learning_rate": 8.569648672789497e-05, |
| "loss": 0.804572582244873, |
| "step": 25, |
| "token_acc": 0.8030009224918576 |
| }, |
| { |
| "epoch": 0.7132243684992571, |
| "grad_norm": 1.1953125, |
| "learning_rate": 7.828034377432693e-05, |
| "loss": 0.6343977928161622, |
| "step": 30, |
| "token_acc": 0.8435477802859292 |
| }, |
| { |
| "epoch": 0.8320950965824666, |
| "grad_norm": 1.0546875, |
| "learning_rate": 6.980398830195785e-05, |
| "loss": 0.3958749771118164, |
| "step": 35, |
| "token_acc": 0.8998997800202961 |
| }, |
| { |
| "epoch": 0.950965824665676, |
| "grad_norm": 0.74609375, |
| "learning_rate": 6.058519361147055e-05, |
| "loss": 0.38391425609588625, |
| "step": 40, |
| "token_acc": 0.9056173526140155 |
| }, |
| { |
| "epoch": 1.0475482912332839, |
| "grad_norm": 0.82421875, |
| "learning_rate": 5.096956658859122e-05, |
| "loss": 0.29045734405517576, |
| "step": 45, |
| "token_acc": 0.9264699310283186 |
| }, |
| { |
| "epoch": 1.1664190193164934, |
| "grad_norm": 0.87109375, |
| "learning_rate": 4.131759111665349e-05, |
| "loss": 0.13265597820281982, |
| "step": 50, |
| "token_acc": 0.9676710929519918 |
| }, |
| { |
| "epoch": 1.2852897473997027, |
| "grad_norm": 0.48046875, |
| "learning_rate": 3.199111375976449e-05, |
| "loss": 0.11603926420211792, |
| "step": 55, |
| "token_acc": 0.9712261616975557 |
| }, |
| { |
| "epoch": 1.4041604754829122, |
| "grad_norm": 0.5703125, |
| "learning_rate": 2.333977835991545e-05, |
| "loss": 0.09179887771606446, |
| "step": 60, |
| "token_acc": 0.9757825780724563 |
| }, |
| { |
| "epoch": 1.5230312035661218, |
| "grad_norm": 0.59765625, |
| "learning_rate": 1.5687918106563326e-05, |
| "loss": 0.11050317287445069, |
| "step": 65, |
| "token_acc": 0.9738332162768583 |
| }, |
| { |
| "epoch": 1.6419019316493313, |
| "grad_norm": 0.69140625, |
| "learning_rate": 9.322396486851626e-06, |
| "loss": 0.0894925355911255, |
| "step": 70, |
| "token_acc": 0.9766682632919051 |
| }, |
| { |
| "epoch": 1.7607726597325408, |
| "grad_norm": 0.58984375, |
| "learning_rate": 4.4818529516926726e-06, |
| "loss": 0.0985899806022644, |
| "step": 75, |
| "token_acc": 0.9736953912894402 |
| }, |
| { |
| "epoch": 1.8796433878157504, |
| "grad_norm": 0.6640625, |
| "learning_rate": 1.3477564710088098e-06, |
| "loss": 0.11772974729537963, |
| "step": 80, |
| "token_acc": 0.970642493007756 |
| }, |
| { |
| "epoch": 1.9985141158989599, |
| "grad_norm": 0.72265625, |
| "learning_rate": 3.760237478849793e-08, |
| "loss": 0.10874439477920532, |
| "step": 85, |
| "token_acc": 0.9721623048495361 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 86, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.3391554666248192e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|