| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.32362459546925565, |
| "eval_steps": 500, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.006472491909385114, |
| "grad_norm": 2.0135042667388916, |
| "learning_rate": 1.8e-05, |
| "loss": 1.5311, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.012944983818770227, |
| "grad_norm": 1.0551478862762451, |
| "learning_rate": 3.8e-05, |
| "loss": 1.2229, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.019417475728155338, |
| "grad_norm": 1.101570725440979, |
| "learning_rate": 5.8e-05, |
| "loss": 0.9845, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.025889967637540454, |
| "grad_norm": 0.9952124357223511, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 0.8599, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.032362459546925564, |
| "grad_norm": 0.8940704464912415, |
| "learning_rate": 9.8e-05, |
| "loss": 0.8401, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.038834951456310676, |
| "grad_norm": 0.8487522602081299, |
| "learning_rate": 9.93979933110368e-05, |
| "loss": 0.776, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.045307443365695796, |
| "grad_norm": 0.7864758968353271, |
| "learning_rate": 9.872909698996656e-05, |
| "loss": 0.8153, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.05177993527508091, |
| "grad_norm": 0.7295967936515808, |
| "learning_rate": 9.806020066889633e-05, |
| "loss": 0.8251, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05825242718446602, |
| "grad_norm": 0.7075174450874329, |
| "learning_rate": 9.739130434782609e-05, |
| "loss": 0.749, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.06472491909385113, |
| "grad_norm": 0.6422586441040039, |
| "learning_rate": 9.672240802675586e-05, |
| "loss": 0.7428, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07119741100323625, |
| "grad_norm": 0.6745761632919312, |
| "learning_rate": 9.605351170568563e-05, |
| "loss": 0.7806, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.07766990291262135, |
| "grad_norm": 0.6769530177116394, |
| "learning_rate": 9.53846153846154e-05, |
| "loss": 0.796, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.08414239482200647, |
| "grad_norm": 0.5150454044342041, |
| "learning_rate": 9.471571906354515e-05, |
| "loss": 0.7486, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.09061488673139159, |
| "grad_norm": 0.6074182391166687, |
| "learning_rate": 9.404682274247491e-05, |
| "loss": 0.7783, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0970873786407767, |
| "grad_norm": 0.6745702624320984, |
| "learning_rate": 9.337792642140469e-05, |
| "loss": 0.7503, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.10355987055016182, |
| "grad_norm": 0.6088436841964722, |
| "learning_rate": 9.270903010033445e-05, |
| "loss": 0.7771, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.11003236245954692, |
| "grad_norm": 0.6351744532585144, |
| "learning_rate": 9.204013377926422e-05, |
| "loss": 0.7554, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.11650485436893204, |
| "grad_norm": 0.5281522274017334, |
| "learning_rate": 9.137123745819398e-05, |
| "loss": 0.7635, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.12297734627831715, |
| "grad_norm": 0.6833154559135437, |
| "learning_rate": 9.070234113712374e-05, |
| "loss": 0.753, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.12944983818770225, |
| "grad_norm": 0.5272049307823181, |
| "learning_rate": 9.003344481605351e-05, |
| "loss": 0.7384, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.13592233009708737, |
| "grad_norm": 0.6461851596832275, |
| "learning_rate": 8.936454849498329e-05, |
| "loss": 0.7548, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1423948220064725, |
| "grad_norm": 0.5979950428009033, |
| "learning_rate": 8.869565217391305e-05, |
| "loss": 0.7534, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1488673139158576, |
| "grad_norm": 0.6396514773368835, |
| "learning_rate": 8.802675585284281e-05, |
| "loss": 0.7436, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1553398058252427, |
| "grad_norm": 0.6104384064674377, |
| "learning_rate": 8.735785953177258e-05, |
| "loss": 0.7345, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.16181229773462782, |
| "grad_norm": 0.5535375475883484, |
| "learning_rate": 8.668896321070234e-05, |
| "loss": 0.7293, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.16828478964401294, |
| "grad_norm": 0.5984886884689331, |
| "learning_rate": 8.602006688963212e-05, |
| "loss": 0.7173, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.17475728155339806, |
| "grad_norm": 0.517716646194458, |
| "learning_rate": 8.535117056856188e-05, |
| "loss": 0.7694, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.18122977346278318, |
| "grad_norm": 0.5802426934242249, |
| "learning_rate": 8.468227424749165e-05, |
| "loss": 0.7505, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.18770226537216828, |
| "grad_norm": 0.5289272665977478, |
| "learning_rate": 8.401337792642141e-05, |
| "loss": 0.7678, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1941747572815534, |
| "grad_norm": 0.5293733477592468, |
| "learning_rate": 8.334448160535117e-05, |
| "loss": 0.7242, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.20064724919093851, |
| "grad_norm": 0.566146969795227, |
| "learning_rate": 8.267558528428094e-05, |
| "loss": 0.7248, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.20711974110032363, |
| "grad_norm": 0.5496853590011597, |
| "learning_rate": 8.200668896321071e-05, |
| "loss": 0.7588, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.21359223300970873, |
| "grad_norm": 0.5420507192611694, |
| "learning_rate": 8.133779264214048e-05, |
| "loss": 0.7673, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.22006472491909385, |
| "grad_norm": 0.582582950592041, |
| "learning_rate": 8.066889632107024e-05, |
| "loss": 0.7341, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.22653721682847897, |
| "grad_norm": 0.5332103371620178, |
| "learning_rate": 8e-05, |
| "loss": 0.7518, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.23300970873786409, |
| "grad_norm": 0.5132858157157898, |
| "learning_rate": 7.933110367892977e-05, |
| "loss": 0.752, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.23948220064724918, |
| "grad_norm": 0.5356752276420593, |
| "learning_rate": 7.866220735785953e-05, |
| "loss": 0.7432, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2459546925566343, |
| "grad_norm": 0.5592623353004456, |
| "learning_rate": 7.79933110367893e-05, |
| "loss": 0.7396, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.2524271844660194, |
| "grad_norm": 0.5103404521942139, |
| "learning_rate": 7.732441471571906e-05, |
| "loss": 0.7253, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2588996763754045, |
| "grad_norm": 0.5380160212516785, |
| "learning_rate": 7.665551839464883e-05, |
| "loss": 0.7464, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.26537216828478966, |
| "grad_norm": 0.5373779535293579, |
| "learning_rate": 7.598662207357859e-05, |
| "loss": 0.7377, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.27184466019417475, |
| "grad_norm": 0.523170530796051, |
| "learning_rate": 7.531772575250837e-05, |
| "loss": 0.7413, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2783171521035599, |
| "grad_norm": 0.5368985533714294, |
| "learning_rate": 7.464882943143813e-05, |
| "loss": 0.7227, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.284789644012945, |
| "grad_norm": 0.587778627872467, |
| "learning_rate": 7.39799331103679e-05, |
| "loss": 0.7289, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2912621359223301, |
| "grad_norm": 0.5471896529197693, |
| "learning_rate": 7.331103678929766e-05, |
| "loss": 0.7301, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2977346278317152, |
| "grad_norm": 0.5446394085884094, |
| "learning_rate": 7.264214046822742e-05, |
| "loss": 0.7325, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.3042071197411003, |
| "grad_norm": 0.5761464238166809, |
| "learning_rate": 7.197324414715719e-05, |
| "loss": 0.7583, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3106796116504854, |
| "grad_norm": 0.5082632899284363, |
| "learning_rate": 7.130434782608696e-05, |
| "loss": 0.7382, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.31715210355987056, |
| "grad_norm": 0.5441333651542664, |
| "learning_rate": 7.063545150501673e-05, |
| "loss": 0.7411, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.32362459546925565, |
| "grad_norm": 0.5039440393447876, |
| "learning_rate": 6.996655518394649e-05, |
| "loss": 0.7332, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1545, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.803063236717445e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|