| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9865871833084947, |
| "eval_steps": 500, |
| "global_step": 501, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05961251862891207, |
| "grad_norm": 9.858894874210412, |
| "learning_rate": 5e-06, |
| "loss": 0.9953, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.11922503725782414, |
| "grad_norm": 3.200951587553443, |
| "learning_rate": 5e-06, |
| "loss": 0.8872, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.17883755588673622, |
| "grad_norm": 0.8709047390143161, |
| "learning_rate": 5e-06, |
| "loss": 0.8521, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.23845007451564829, |
| "grad_norm": 0.7917099907436183, |
| "learning_rate": 5e-06, |
| "loss": 0.8247, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.29806259314456035, |
| "grad_norm": 0.7435704731717787, |
| "learning_rate": 5e-06, |
| "loss": 0.8138, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.35767511177347244, |
| "grad_norm": 0.6069787155672357, |
| "learning_rate": 5e-06, |
| "loss": 0.8091, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.4172876304023845, |
| "grad_norm": 0.7240817518375863, |
| "learning_rate": 5e-06, |
| "loss": 0.7952, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.47690014903129657, |
| "grad_norm": 0.7043281338985534, |
| "learning_rate": 5e-06, |
| "loss": 0.7931, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.5365126676602087, |
| "grad_norm": 0.586854693193699, |
| "learning_rate": 5e-06, |
| "loss": 0.7883, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5961251862891207, |
| "grad_norm": 0.5823407135149267, |
| "learning_rate": 5e-06, |
| "loss": 0.7853, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6557377049180327, |
| "grad_norm": 0.6283773137272283, |
| "learning_rate": 5e-06, |
| "loss": 0.7847, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.7153502235469449, |
| "grad_norm": 0.6299087806837431, |
| "learning_rate": 5e-06, |
| "loss": 0.7847, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7749627421758569, |
| "grad_norm": 0.6905339934914414, |
| "learning_rate": 5e-06, |
| "loss": 0.7716, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.834575260804769, |
| "grad_norm": 0.5764098315055344, |
| "learning_rate": 5e-06, |
| "loss": 0.7722, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.8941877794336811, |
| "grad_norm": 0.596630510361194, |
| "learning_rate": 5e-06, |
| "loss": 0.7723, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.9538002980625931, |
| "grad_norm": 0.6393363259269436, |
| "learning_rate": 5e-06, |
| "loss": 0.7727, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9955290611028316, |
| "eval_loss": 0.7674793601036072, |
| "eval_runtime": 178.4077, |
| "eval_samples_per_second": 25.33, |
| "eval_steps_per_second": 0.398, |
| "step": 167 |
| }, |
| { |
| "epoch": 1.0134128166915053, |
| "grad_norm": 1.028243455897246, |
| "learning_rate": 5e-06, |
| "loss": 0.7832, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.0730253353204173, |
| "grad_norm": 0.6463791732368608, |
| "learning_rate": 5e-06, |
| "loss": 0.7342, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.1326378539493294, |
| "grad_norm": 0.7099545246983331, |
| "learning_rate": 5e-06, |
| "loss": 0.7264, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.1922503725782414, |
| "grad_norm": 0.6643420655401983, |
| "learning_rate": 5e-06, |
| "loss": 0.7249, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.2518628912071534, |
| "grad_norm": 0.7310409955228447, |
| "learning_rate": 5e-06, |
| "loss": 0.733, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.3114754098360657, |
| "grad_norm": 0.6511777722980082, |
| "learning_rate": 5e-06, |
| "loss": 0.7219, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.3710879284649775, |
| "grad_norm": 0.7827849824805997, |
| "learning_rate": 5e-06, |
| "loss": 0.7208, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.4307004470938898, |
| "grad_norm": 0.6318650254279009, |
| "learning_rate": 5e-06, |
| "loss": 0.7188, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.4903129657228018, |
| "grad_norm": 0.6677002292570327, |
| "learning_rate": 5e-06, |
| "loss": 0.7192, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.5499254843517138, |
| "grad_norm": 0.564078834227423, |
| "learning_rate": 5e-06, |
| "loss": 0.7245, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.6095380029806259, |
| "grad_norm": 0.5237353037646948, |
| "learning_rate": 5e-06, |
| "loss": 0.7263, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.669150521609538, |
| "grad_norm": 0.6003861550477204, |
| "learning_rate": 5e-06, |
| "loss": 0.7203, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.7287630402384502, |
| "grad_norm": 0.7033677185903798, |
| "learning_rate": 5e-06, |
| "loss": 0.7217, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.788375558867362, |
| "grad_norm": 0.6820739163112765, |
| "learning_rate": 5e-06, |
| "loss": 0.7267, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.8479880774962743, |
| "grad_norm": 0.6594996515164985, |
| "learning_rate": 5e-06, |
| "loss": 0.7179, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.9076005961251863, |
| "grad_norm": 0.6309878288237989, |
| "learning_rate": 5e-06, |
| "loss": 0.7196, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.9672131147540983, |
| "grad_norm": 0.5108818281911819, |
| "learning_rate": 5e-06, |
| "loss": 0.7274, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.9970193740685542, |
| "eval_loss": 0.7537275552749634, |
| "eval_runtime": 178.6454, |
| "eval_samples_per_second": 25.296, |
| "eval_steps_per_second": 0.397, |
| "step": 335 |
| }, |
| { |
| "epoch": 2.0268256333830106, |
| "grad_norm": 0.919905069790943, |
| "learning_rate": 5e-06, |
| "loss": 0.7213, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.0864381520119224, |
| "grad_norm": 0.5627127586056083, |
| "learning_rate": 5e-06, |
| "loss": 0.6748, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.1460506706408347, |
| "grad_norm": 0.698070822337305, |
| "learning_rate": 5e-06, |
| "loss": 0.6786, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.2056631892697465, |
| "grad_norm": 0.7188866615898121, |
| "learning_rate": 5e-06, |
| "loss": 0.6768, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.2652757078986587, |
| "grad_norm": 0.7308617732721838, |
| "learning_rate": 5e-06, |
| "loss": 0.6779, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.3248882265275705, |
| "grad_norm": 0.6553661010694801, |
| "learning_rate": 5e-06, |
| "loss": 0.6764, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.384500745156483, |
| "grad_norm": 0.5825389739879382, |
| "learning_rate": 5e-06, |
| "loss": 0.6782, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.444113263785395, |
| "grad_norm": 0.5934721290598023, |
| "learning_rate": 5e-06, |
| "loss": 0.6723, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.503725782414307, |
| "grad_norm": 0.6213580858455807, |
| "learning_rate": 5e-06, |
| "loss": 0.6771, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.563338301043219, |
| "grad_norm": 0.5958772497068647, |
| "learning_rate": 5e-06, |
| "loss": 0.6772, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.6229508196721314, |
| "grad_norm": 0.6075566821845071, |
| "learning_rate": 5e-06, |
| "loss": 0.6771, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.682563338301043, |
| "grad_norm": 0.6500795583419717, |
| "learning_rate": 5e-06, |
| "loss": 0.6777, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.742175856929955, |
| "grad_norm": 0.5645816490580728, |
| "learning_rate": 5e-06, |
| "loss": 0.6745, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.8017883755588673, |
| "grad_norm": 0.593606888596975, |
| "learning_rate": 5e-06, |
| "loss": 0.6781, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.8614008941877795, |
| "grad_norm": 0.7862396669753284, |
| "learning_rate": 5e-06, |
| "loss": 0.6794, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.9210134128166914, |
| "grad_norm": 0.5741353341999686, |
| "learning_rate": 5e-06, |
| "loss": 0.6742, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.9806259314456036, |
| "grad_norm": 0.6882181948210808, |
| "learning_rate": 5e-06, |
| "loss": 0.6854, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.9865871833084947, |
| "eval_loss": 0.7531510591506958, |
| "eval_runtime": 177.5218, |
| "eval_samples_per_second": 25.456, |
| "eval_steps_per_second": 0.4, |
| "step": 501 |
| }, |
| { |
| "epoch": 2.9865871833084947, |
| "step": 501, |
| "total_flos": 838984280309760.0, |
| "train_loss": 0.7383992823060164, |
| "train_runtime": 29820.4569, |
| "train_samples_per_second": 8.636, |
| "train_steps_per_second": 0.017 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 501, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 838984280309760.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|