| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.0, |
| "eval_steps": 100, |
| "global_step": 300, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.16806722689075632, |
| "grad_norm": 5.1821064949035645, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 1.6929, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.33613445378151263, |
| "grad_norm": 1.7028719186782837, |
| "learning_rate": 3e-05, |
| "loss": 1.317, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.5042016806722689, |
| "grad_norm": 0.8325338959693909, |
| "learning_rate": 4.666666666666667e-05, |
| "loss": 0.9858, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.6722689075630253, |
| "grad_norm": 0.563953161239624, |
| "learning_rate": 6.333333333333333e-05, |
| "loss": 0.8006, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.8403361344537815, |
| "grad_norm": 0.6012160778045654, |
| "learning_rate": 8e-05, |
| "loss": 0.7177, |
| "step": 25 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.6171104311943054, |
| "learning_rate": 9.666666666666667e-05, |
| "loss": 0.6072, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.1680672268907564, |
| "grad_norm": 0.5041235685348511, |
| "learning_rate": 9.994585556692624e-05, |
| "loss": 0.526, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.3361344537815127, |
| "grad_norm": 0.5351182222366333, |
| "learning_rate": 9.972609476841367e-05, |
| "loss": 0.4891, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.504201680672269, |
| "grad_norm": 0.5860567688941956, |
| "learning_rate": 9.933807660562898e-05, |
| "loss": 0.4424, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.6722689075630253, |
| "grad_norm": 0.6627805829048157, |
| "learning_rate": 9.878311400921072e-05, |
| "loss": 0.3993, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.8403361344537816, |
| "grad_norm": 0.6919167637825012, |
| "learning_rate": 9.806308479691595e-05, |
| "loss": 0.3435, |
| "step": 55 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.7827972769737244, |
| "learning_rate": 9.718042531967918e-05, |
| "loss": 0.2951, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.168067226890756, |
| "grad_norm": 0.6349595785140991, |
| "learning_rate": 9.613812221777212e-05, |
| "loss": 0.2419, |
| "step": 65 |
| }, |
| { |
| "epoch": 2.3361344537815127, |
| "grad_norm": 0.763509213924408, |
| "learning_rate": 9.493970231495835e-05, |
| "loss": 0.1926, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.504201680672269, |
| "grad_norm": 0.706917405128479, |
| "learning_rate": 9.358922068483812e-05, |
| "loss": 0.1715, |
| "step": 75 |
| }, |
| { |
| "epoch": 2.6722689075630255, |
| "grad_norm": 0.6611737608909607, |
| "learning_rate": 9.209124692976287e-05, |
| "loss": 0.1689, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.8403361344537816, |
| "grad_norm": 0.6016265749931335, |
| "learning_rate": 9.045084971874738e-05, |
| "loss": 0.148, |
| "step": 85 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.7554067373275757, |
| "learning_rate": 8.86735796366982e-05, |
| "loss": 0.1496, |
| "step": 90 |
| }, |
| { |
| "epoch": 3.168067226890756, |
| "grad_norm": 0.7190783023834229, |
| "learning_rate": 8.676545040299145e-05, |
| "loss": 0.0929, |
| "step": 95 |
| }, |
| { |
| "epoch": 3.3361344537815127, |
| "grad_norm": 0.5446023941040039, |
| "learning_rate": 8.473291852294987e-05, |
| "loss": 0.0848, |
| "step": 100 |
| }, |
| { |
| "epoch": 3.3361344537815127, |
| "eval_loss": 0.09926361590623856, |
| "eval_runtime": 1.6896, |
| "eval_samples_per_second": 11.837, |
| "eval_steps_per_second": 11.837, |
| "step": 100 |
| }, |
| { |
| "epoch": 3.504201680672269, |
| "grad_norm": 0.5175163149833679, |
| "learning_rate": 8.258286144107276e-05, |
| "loss": 0.0849, |
| "step": 105 |
| }, |
| { |
| "epoch": 3.6722689075630255, |
| "grad_norm": 0.6044480800628662, |
| "learning_rate": 8.032255426994069e-05, |
| "loss": 0.0927, |
| "step": 110 |
| }, |
| { |
| "epoch": 3.8403361344537816, |
| "grad_norm": 0.5574583411216736, |
| "learning_rate": 7.795964517353735e-05, |
| "loss": 0.0761, |
| "step": 115 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.5631000995635986, |
| "learning_rate": 7.550212948828377e-05, |
| "loss": 0.0711, |
| "step": 120 |
| }, |
| { |
| "epoch": 4.168067226890757, |
| "grad_norm": 0.49856850504875183, |
| "learning_rate": 7.295832266935059e-05, |
| "loss": 0.0512, |
| "step": 125 |
| }, |
| { |
| "epoch": 4.336134453781512, |
| "grad_norm": 0.49295711517333984, |
| "learning_rate": 7.033683215379002e-05, |
| "loss": 0.0482, |
| "step": 130 |
| }, |
| { |
| "epoch": 4.504201680672269, |
| "grad_norm": 0.4027378559112549, |
| "learning_rate": 6.764652823569344e-05, |
| "loss": 0.0448, |
| "step": 135 |
| }, |
| { |
| "epoch": 4.6722689075630255, |
| "grad_norm": 0.328042209148407, |
| "learning_rate": 6.48965140519241e-05, |
| "loss": 0.0433, |
| "step": 140 |
| }, |
| { |
| "epoch": 4.840336134453781, |
| "grad_norm": 0.500453770160675, |
| "learning_rate": 6.209609477998338e-05, |
| "loss": 0.0524, |
| "step": 145 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.4435368776321411, |
| "learning_rate": 5.925474615223573e-05, |
| "loss": 0.0431, |
| "step": 150 |
| }, |
| { |
| "epoch": 5.168067226890757, |
| "grad_norm": 0.4105546772480011, |
| "learning_rate": 5.6382082393029746e-05, |
| "loss": 0.0273, |
| "step": 155 |
| }, |
| { |
| "epoch": 5.336134453781512, |
| "grad_norm": 0.4708898067474365, |
| "learning_rate": 5.348782368720626e-05, |
| "loss": 0.0299, |
| "step": 160 |
| }, |
| { |
| "epoch": 5.504201680672269, |
| "grad_norm": 0.32217878103256226, |
| "learning_rate": 5.0581763290069865e-05, |
| "loss": 0.0252, |
| "step": 165 |
| }, |
| { |
| "epoch": 5.6722689075630255, |
| "grad_norm": 0.33504194021224976, |
| "learning_rate": 4.767373439011267e-05, |
| "loss": 0.0273, |
| "step": 170 |
| }, |
| { |
| "epoch": 5.840336134453781, |
| "grad_norm": 0.453778475522995, |
| "learning_rate": 4.477357683661734e-05, |
| "loss": 0.0324, |
| "step": 175 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.42859596014022827, |
| "learning_rate": 4.1891103844721636e-05, |
| "loss": 0.0315, |
| "step": 180 |
| }, |
| { |
| "epoch": 6.168067226890757, |
| "grad_norm": 0.21872906386852264, |
| "learning_rate": 3.903606879060483e-05, |
| "loss": 0.0178, |
| "step": 185 |
| }, |
| { |
| "epoch": 6.336134453781512, |
| "grad_norm": 0.2727561295032501, |
| "learning_rate": 3.6218132209150045e-05, |
| "loss": 0.0172, |
| "step": 190 |
| }, |
| { |
| "epoch": 6.504201680672269, |
| "grad_norm": 0.259501188993454, |
| "learning_rate": 3.34468291057521e-05, |
| "loss": 0.0191, |
| "step": 195 |
| }, |
| { |
| "epoch": 6.6722689075630255, |
| "grad_norm": 0.2755616307258606, |
| "learning_rate": 3.073153669287759e-05, |
| "loss": 0.0182, |
| "step": 200 |
| }, |
| { |
| "epoch": 6.6722689075630255, |
| "eval_loss": 0.09819571673870087, |
| "eval_runtime": 1.6764, |
| "eval_samples_per_second": 11.93, |
| "eval_steps_per_second": 11.93, |
| "step": 200 |
| }, |
| { |
| "epoch": 6.840336134453781, |
| "grad_norm": 0.30396485328674316, |
| "learning_rate": 2.8081442660546125e-05, |
| "loss": 0.0185, |
| "step": 205 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.3928929269313812, |
| "learning_rate": 2.5505514088095655e-05, |
| "loss": 0.0184, |
| "step": 210 |
| }, |
| { |
| "epoch": 7.168067226890757, |
| "grad_norm": 0.22996097803115845, |
| "learning_rate": 2.3012467102424373e-05, |
| "loss": 0.0121, |
| "step": 215 |
| }, |
| { |
| "epoch": 7.336134453781512, |
| "grad_norm": 0.19302956759929657, |
| "learning_rate": 2.061073738537635e-05, |
| "loss": 0.0122, |
| "step": 220 |
| }, |
| { |
| "epoch": 7.504201680672269, |
| "grad_norm": 0.1985304355621338, |
| "learning_rate": 1.8308451630064484e-05, |
| "loss": 0.0134, |
| "step": 225 |
| }, |
| { |
| "epoch": 7.6722689075630255, |
| "grad_norm": 0.13862721621990204, |
| "learning_rate": 1.611340004271339e-05, |
| "loss": 0.0123, |
| "step": 230 |
| }, |
| { |
| "epoch": 7.840336134453781, |
| "grad_norm": 0.1474885195493698, |
| "learning_rate": 1.4033009983067452e-05, |
| "loss": 0.0117, |
| "step": 235 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.28437599539756775, |
| "learning_rate": 1.2074320832556556e-05, |
| "loss": 0.0113, |
| "step": 240 |
| }, |
| { |
| "epoch": 8.168067226890756, |
| "grad_norm": 0.12733525037765503, |
| "learning_rate": 1.0243960175257606e-05, |
| "loss": 0.0096, |
| "step": 245 |
| }, |
| { |
| "epoch": 8.336134453781513, |
| "grad_norm": 0.17934471368789673, |
| "learning_rate": 8.548121372247918e-06, |
| "loss": 0.0098, |
| "step": 250 |
| }, |
| { |
| "epoch": 8.504201680672269, |
| "grad_norm": 0.20203900337219238, |
| "learning_rate": 6.992542605231739e-06, |
| "loss": 0.0097, |
| "step": 255 |
| }, |
| { |
| "epoch": 8.672268907563025, |
| "grad_norm": 0.14116302132606506, |
| "learning_rate": 5.582487460349805e-06, |
| "loss": 0.0099, |
| "step": 260 |
| }, |
| { |
| "epoch": 8.840336134453782, |
| "grad_norm": 0.17721185088157654, |
| "learning_rate": 4.322727117869951e-06, |
| "loss": 0.0095, |
| "step": 265 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.14386144280433655, |
| "learning_rate": 3.2175242080234313e-06, |
| "loss": 0.0092, |
| "step": 270 |
| }, |
| { |
| "epoch": 9.168067226890756, |
| "grad_norm": 0.13240846991539001, |
| "learning_rate": 2.2706183876134045e-06, |
| "loss": 0.0083, |
| "step": 275 |
| }, |
| { |
| "epoch": 9.336134453781513, |
| "grad_norm": 0.12592285871505737, |
| "learning_rate": 1.4852136862001764e-06, |
| "loss": 0.0092, |
| "step": 280 |
| }, |
| { |
| "epoch": 9.504201680672269, |
| "grad_norm": 0.10958818346261978, |
| "learning_rate": 8.639676646793382e-07, |
| "loss": 0.0076, |
| "step": 285 |
| }, |
| { |
| "epoch": 9.672268907563025, |
| "grad_norm": 0.11822710186243057, |
| "learning_rate": 4.089824229369155e-07, |
| "loss": 0.0093, |
| "step": 290 |
| }, |
| { |
| "epoch": 9.840336134453782, |
| "grad_norm": 0.14177332818508148, |
| "learning_rate": 1.2179748700879012e-07, |
| "loss": 0.0087, |
| "step": 295 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.15116795897483826, |
| "learning_rate": 3.384599811889766e-09, |
| "loss": 0.0095, |
| "step": 300 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 0.1007455587387085, |
| "eval_runtime": 1.6871, |
| "eval_samples_per_second": 11.855, |
| "eval_steps_per_second": 11.855, |
| "step": 300 |
| }, |
| { |
| "epoch": 10.0, |
| "step": 300, |
| "total_flos": 3.884520534294528e+16, |
| "train_loss": 0.18234332187722127, |
| "train_runtime": 611.2552, |
| "train_samples_per_second": 31.084, |
| "train_steps_per_second": 0.491 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 300, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.884520534294528e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|