| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 470, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.10743061772605192, | |
| "grad_norm": 20.089375743938827, | |
| "learning_rate": 3.8297872340425535e-06, | |
| "loss": 2.2361, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.21486123545210384, | |
| "grad_norm": 1.8136781205400963, | |
| "learning_rate": 8.085106382978723e-06, | |
| "loss": 0.5542, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.3222918531781558, | |
| "grad_norm": 5.833809524442147, | |
| "learning_rate": 1.2340425531914895e-05, | |
| "loss": 0.3915, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.4297224709042077, | |
| "grad_norm": 1.3551663864951546, | |
| "learning_rate": 1.6595744680851064e-05, | |
| "loss": 0.3464, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5371530886302597, | |
| "grad_norm": 1.2219998499437525, | |
| "learning_rate": 1.9998896833611603e-05, | |
| "loss": 0.3284, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6445837063563116, | |
| "grad_norm": 3.325568321975458, | |
| "learning_rate": 1.9960311560501457e-05, | |
| "loss": 0.3098, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7520143240823635, | |
| "grad_norm": 1.5025152149862429, | |
| "learning_rate": 1.986681112191161e-05, | |
| "loss": 0.2947, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.8594449418084154, | |
| "grad_norm": 1.1978088609682094, | |
| "learning_rate": 1.9718911023007382e-05, | |
| "loss": 0.2772, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.9668755595344674, | |
| "grad_norm": 3.7209154133323414, | |
| "learning_rate": 1.9517426695952358e-05, | |
| "loss": 0.2637, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.0644583706356312, | |
| "grad_norm": 0.9321825728768071, | |
| "learning_rate": 1.926346900410604e-05, | |
| "loss": 0.2477, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.1718889883616832, | |
| "grad_norm": 1.1579496686187436, | |
| "learning_rate": 1.895843811739162e-05, | |
| "loss": 0.2384, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.279319606087735, | |
| "grad_norm": 1.4252514462516224, | |
| "learning_rate": 1.8604015792601395e-05, | |
| "loss": 0.2313, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.386750223813787, | |
| "grad_norm": 1.158427909482145, | |
| "learning_rate": 1.8202156101201646e-05, | |
| "loss": 0.2259, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.4941808415398388, | |
| "grad_norm": 0.8524241535709315, | |
| "learning_rate": 1.7755074655758174e-05, | |
| "loss": 0.2207, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.6016114592658908, | |
| "grad_norm": 0.6875946112635646, | |
| "learning_rate": 1.7265236394381634e-05, | |
| "loss": 0.2117, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.7090420769919428, | |
| "grad_norm": 0.8721265188917838, | |
| "learning_rate": 1.6735341990541766e-05, | |
| "loss": 0.207, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.8164726947179948, | |
| "grad_norm": 0.800843100380685, | |
| "learning_rate": 1.61683129631787e-05, | |
| "loss": 0.1962, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.9239033124440466, | |
| "grad_norm": 0.7963931429193268, | |
| "learning_rate": 1.5567275569205216e-05, | |
| "loss": 0.1938, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.0214861235452104, | |
| "grad_norm": 0.659012311906312, | |
| "learning_rate": 1.4935543567206984e-05, | |
| "loss": 0.1954, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.1289167412712624, | |
| "grad_norm": 0.7791125669343928, | |
| "learning_rate": 1.4276599947371388e-05, | |
| "loss": 0.17, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.2363473589973144, | |
| "grad_norm": 0.7255530063668101, | |
| "learning_rate": 1.3594077728375129e-05, | |
| "loss": 0.1676, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.3437779767233664, | |
| "grad_norm": 1.0972658675565765, | |
| "learning_rate": 1.2891739927104992e-05, | |
| "loss": 0.1634, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.451208594449418, | |
| "grad_norm": 0.8782737880409247, | |
| "learning_rate": 1.217345881164667e-05, | |
| "loss": 0.1619, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.55863921217547, | |
| "grad_norm": 0.8377800753430866, | |
| "learning_rate": 1.1443194551928267e-05, | |
| "loss": 0.16, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.666069829901522, | |
| "grad_norm": 0.8148515538842358, | |
| "learning_rate": 1.0704973385725853e-05, | |
| "loss": 0.1545, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.773500447627574, | |
| "grad_norm": 0.8382605438092161, | |
| "learning_rate": 9.962865420410702e-06, | |
| "loss": 0.157, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.880931065353626, | |
| "grad_norm": 0.8418234560769987, | |
| "learning_rate": 9.22096219282597e-06, | |
| "loss": 0.1516, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.9883616830796775, | |
| "grad_norm": 1.2348680954607516, | |
| "learning_rate": 8.483354111014142e-06, | |
| "loss": 0.1507, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.0859444941808416, | |
| "grad_norm": 1.4296779451123045, | |
| "learning_rate": 7.75410790216802e-06, | |
| "loss": 0.1338, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 3.1933751119068936, | |
| "grad_norm": 0.936837659064759, | |
| "learning_rate": 7.037244191143662e-06, | |
| "loss": 0.1158, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.3008057296329456, | |
| "grad_norm": 0.7518008256356895, | |
| "learning_rate": 6.336715333153869e-06, | |
| "loss": 0.1141, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 3.408236347358997, | |
| "grad_norm": 0.9067706982590781, | |
| "learning_rate": 5.656383622859418e-06, | |
| "loss": 0.1123, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.515666965085049, | |
| "grad_norm": 0.8868213545464397, | |
| "learning_rate": 5.000000000000003e-06, | |
| "loss": 0.1123, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 3.623097582811101, | |
| "grad_norm": 1.0556337500861324, | |
| "learning_rate": 4.371183368969165e-06, | |
| "loss": 0.1067, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.730528200537153, | |
| "grad_norm": 0.9611508609707269, | |
| "learning_rate": 3.7734006463527695e-06, | |
| "loss": 0.1101, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.837958818263205, | |
| "grad_norm": 0.7820816197402637, | |
| "learning_rate": 3.209947646436752e-06, | |
| "loss": 0.1095, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.9453894359892567, | |
| "grad_norm": 1.0504063992235715, | |
| "learning_rate": 2.6839309100699975e-06, | |
| "loss": 0.1052, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 4.042972247090421, | |
| "grad_norm": 0.6865272264261028, | |
| "learning_rate": 2.1982505770671303e-06, | |
| "loss": 0.0921, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 4.150402864816472, | |
| "grad_norm": 0.8796386863699859, | |
| "learning_rate": 1.7555843965823992e-06, | |
| "loss": 0.076, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 4.257833482542525, | |
| "grad_norm": 0.887963511828224, | |
| "learning_rate": 1.3583729636118359e-06, | |
| "loss": 0.0733, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.365264100268576, | |
| "grad_norm": 0.9590088128005394, | |
| "learning_rate": 1.0088062630208272e-06, | |
| "loss": 0.0731, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 4.472694717994629, | |
| "grad_norm": 1.2910374680962302, | |
| "learning_rate": 7.088115952851238e-07, | |
| "loss": 0.0718, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 4.58012533572068, | |
| "grad_norm": 0.9177315234691451, | |
| "learning_rate": 4.6004295051554236e-07, | |
| "loss": 0.0715, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 4.687555953446733, | |
| "grad_norm": 0.8450380472855318, | |
| "learning_rate": 2.638718893515946e-07, | |
| "loss": 0.0725, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 4.794986571172784, | |
| "grad_norm": 0.8170638735343974, | |
| "learning_rate": 1.21379981001305e-07, | |
| "loss": 0.0701, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 4.902417188898836, | |
| "grad_norm": 0.9592988275261085, | |
| "learning_rate": 3.335284011929951e-08, | |
| "loss": 0.0701, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 2.4307818442462943, | |
| "learning_rate": 2.75795400255241e-10, | |
| "loss": 0.0699, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 470, | |
| "total_flos": 1189607016235008.0, | |
| "train_loss": 0.2205800841463373, | |
| "train_runtime": 29859.2322, | |
| "train_samples_per_second": 11.969, | |
| "train_steps_per_second": 0.016 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 470, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1189607016235008.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |