{ "best_global_step": 600, "best_metric": 72.28, "best_model_checkpoint": "/content/drive/MyDrive/Summarization/checkpoints/cur_cp/checkpoint-600", "epoch": 8.835341365461847, "eval_steps": 100, "global_step": 2200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.20080321285140562, "grad_norm": 10.723053932189941, "learning_rate": 4.995027130745321e-05, "loss": 0.8179, "step": 50 }, { "epoch": 0.40160642570281124, "grad_norm": 13.537382125854492, "learning_rate": 4.980128306524183e-05, "loss": 0.7085, "step": 100 }, { "epoch": 0.40160642570281124, "eval_f1": 70.31, "eval_gen_len": 42.92338709677419, "eval_loss": 0.6963455080986023, "eval_p": 74.73, "eval_r": 70.54, "eval_runtime": 46.3873, "eval_samples_per_second": 10.693, "eval_steps_per_second": 0.668, "step": 100 }, { "epoch": 0.6024096385542169, "grad_norm": 3.749103307723999, "learning_rate": 4.9553627992605066e-05, "loss": 0.6667, "step": 150 }, { "epoch": 0.8032128514056225, "grad_norm": 7.421478748321533, "learning_rate": 4.9208291334580104e-05, "loss": 0.5991, "step": 200 }, { "epoch": 0.8032128514056225, "eval_f1": 68.09, "eval_gen_len": 40.17741935483871, "eval_loss": 0.7214160561561584, "eval_p": 75.9, "eval_r": 66.2, "eval_runtime": 45.6263, "eval_samples_per_second": 10.871, "eval_steps_per_second": 0.679, "step": 200 }, { "epoch": 1.0040160642570282, "grad_norm": 9.242626190185547, "learning_rate": 4.87666469424063e-05, "loss": 0.752, "step": 250 }, { "epoch": 1.2048192771084336, "grad_norm": 4.659054756164551, "learning_rate": 4.8230451807939135e-05, "loss": 0.4472, "step": 300 }, { "epoch": 1.2048192771084336, "eval_f1": 67.64, "eval_gen_len": 39.01411290322581, "eval_loss": 0.7062053084373474, "eval_p": 75.6, "eval_r": 65.19, "eval_runtime": 42.4185, "eval_samples_per_second": 11.693, "eval_steps_per_second": 0.731, "step": 300 }, { "epoch": 1.4056224899598393, "grad_norm": 11.880289077758789, "learning_rate": 4.760183907381757e-05, "loss": 0.5069, "step": 350 }, { "epoch": 1.606425702811245, "grad_norm": 9.056407928466797, "learning_rate": 4.6883309547192476e-05, "loss": 0.519, "step": 400 }, { "epoch": 1.606425702811245, "eval_f1": 71.67, "eval_gen_len": 46.93346774193548, "eval_loss": 0.7584968209266663, "eval_p": 73.71, "eval_r": 74.1, "eval_runtime": 55.9551, "eval_samples_per_second": 8.864, "eval_steps_per_second": 0.554, "step": 400 }, { "epoch": 1.8072289156626506, "grad_norm": 7.793162822723389, "learning_rate": 4.607772175077711e-05, "loss": 0.4939, "step": 450 }, { "epoch": 2.0080321285140563, "grad_norm": 7.212235927581787, "learning_rate": 4.518828055079925e-05, "loss": 0.4646, "step": 500 }, { "epoch": 2.0080321285140563, "eval_f1": 69.01, "eval_gen_len": 42.16129032258065, "eval_loss": 0.7297435998916626, "eval_p": 73.93, "eval_r": 68.57, "eval_runtime": 48.8895, "eval_samples_per_second": 10.145, "eval_steps_per_second": 0.634, "step": 500 }, { "epoch": 2.208835341365462, "grad_norm": 5.176413536071777, "learning_rate": 4.421852440709666e-05, "loss": 0.2646, "step": 550 }, { "epoch": 2.4096385542168672, "grad_norm": 19.735258102416992, "learning_rate": 4.3172311296078595e-05, "loss": 0.3214, "step": 600 }, { "epoch": 2.4096385542168672, "eval_f1": 72.28, "eval_gen_len": 46.564516129032256, "eval_loss": 0.788872241973877, "eval_p": 74.17, "eval_r": 74.3, "eval_runtime": 54.3998, "eval_samples_per_second": 9.118, "eval_steps_per_second": 0.57, "step": 600 }, { "epoch": 2.610441767068273, "grad_norm": 9.665975570678711, "learning_rate": 4.205380336255594e-05, "loss": 0.4161, "step": 650 }, { "epoch": 2.8112449799196786, "grad_norm": 8.65356731414795, "learning_rate": 4.08674503614997e-05, "loss": 0.2973, "step": 700 }, { "epoch": 2.8112449799196786, "eval_f1": 68.05, "eval_gen_len": 41.256048387096776, "eval_loss": 0.770168662071228, "eval_p": 74.23, "eval_r": 66.93, "eval_runtime": 44.2971, "eval_samples_per_second": 11.197, "eval_steps_per_second": 0.7, "step": 700 }, { "epoch": 3.0120481927710845, "grad_norm": 4.7805867195129395, "learning_rate": 3.961797195560118e-05, "loss": 0.3477, "step": 750 }, { "epoch": 3.21285140562249, "grad_norm": 3.864029884338379, "learning_rate": 3.8310338939059644e-05, "loss": 0.2447, "step": 800 }, { "epoch": 3.21285140562249, "eval_f1": 69.3, "eval_gen_len": 42.681451612903224, "eval_loss": 0.8243445158004761, "eval_p": 74.09, "eval_r": 68.98, "eval_runtime": 47.774, "eval_samples_per_second": 10.382, "eval_steps_per_second": 0.649, "step": 800 }, { "epoch": 3.4136546184738954, "grad_norm": 1.5391823053359985, "learning_rate": 3.694975346229458e-05, "loss": 0.2272, "step": 850 }, { "epoch": 3.6144578313253013, "grad_norm": 12.502788543701172, "learning_rate": 3.55416283362546e-05, "loss": 0.2136, "step": 900 }, { "epoch": 3.6144578313253013, "eval_f1": 72.18, "eval_gen_len": 45.850806451612904, "eval_loss": 0.8359085917472839, "eval_p": 74.24, "eval_r": 73.77, "eval_runtime": 51.4887, "eval_samples_per_second": 9.633, "eval_steps_per_second": 0.602, "step": 900 }, { "epoch": 3.8152610441767068, "grad_norm": 6.718369960784912, "learning_rate": 3.409156549865654e-05, "loss": 0.1959, "step": 950 }, { "epoch": 4.016064257028113, "grad_norm": 32.431217193603516, "learning_rate": 3.260533372782234e-05, "loss": 0.2571, "step": 1000 }, { "epoch": 4.016064257028113, "eval_f1": 69.38, "eval_gen_len": 42.243951612903224, "eval_loss": 0.8177807927131653, "eval_p": 73.87, "eval_r": 69.22, "eval_runtime": 48.2955, "eval_samples_per_second": 10.27, "eval_steps_per_second": 0.642, "step": 1000 }, { "epoch": 4.216867469879518, "grad_norm": 5.5347065925598145, "learning_rate": 3.10888456927748e-05, "loss": 0.1427, "step": 1050 }, { "epoch": 4.417670682730924, "grad_norm": 4.728146076202393, "learning_rate": 2.9548134430893604e-05, "loss": 0.1356, "step": 1100 }, { "epoch": 4.417670682730924, "eval_f1": 70.43, "eval_gen_len": 44.792338709677416, "eval_loss": 0.8745766878128052, "eval_p": 73.6, "eval_r": 71.48, "eval_runtime": 49.7707, "eval_samples_per_second": 9.966, "eval_steps_per_second": 0.623, "step": 1100 }, { "epoch": 4.618473895582329, "grad_norm": 4.265488147735596, "learning_rate": 2.7989329346710375e-05, "loss": 0.1644, "step": 1150 }, { "epoch": 4.8192771084337345, "grad_norm": 5.412672996520996, "learning_rate": 2.6418631827326857e-05, "loss": 0.1564, "step": 1200 }, { "epoch": 4.8192771084337345, "eval_f1": 70.56, "eval_gen_len": 45.20967741935484, "eval_loss": 0.8563244342803955, "eval_p": 73.64, "eval_r": 71.76, "eval_runtime": 50.2355, "eval_samples_per_second": 9.873, "eval_steps_per_second": 0.617, "step": 1200 }, { "epoch": 5.020080321285141, "grad_norm": 5.465022087097168, "learning_rate": 2.484229057146507e-05, "loss": 0.1396, "step": 1250 }, { "epoch": 5.220883534136546, "grad_norm": 4.947805881500244, "learning_rate": 2.3266576730297956e-05, "loss": 0.0937, "step": 1300 }, { "epoch": 5.220883534136546, "eval_f1": 70.17, "eval_gen_len": 42.51411290322581, "eval_loss": 0.8945086002349854, "eval_p": 74.73, "eval_r": 69.95, "eval_runtime": 47.3409, "eval_samples_per_second": 10.477, "eval_steps_per_second": 0.655, "step": 1300 }, { "epoch": 5.421686746987952, "grad_norm": 2.938572883605957, "learning_rate": 2.1697758958957448e-05, "loss": 0.0842, "step": 1350 }, { "epoch": 5.622489959839357, "grad_norm": 7.102436542510986, "learning_rate": 2.014207847797256e-05, "loss": 0.1031, "step": 1400 }, { "epoch": 5.622489959839357, "eval_f1": 71.86, "eval_gen_len": 47.534274193548384, "eval_loss": 0.8877253532409668, "eval_p": 73.18, "eval_r": 74.71, "eval_runtime": 53.5518, "eval_samples_per_second": 9.262, "eval_steps_per_second": 0.579, "step": 1400 }, { "epoch": 5.823293172690763, "grad_norm": 2.2575900554656982, "learning_rate": 1.8605724243850502e-05, "loss": 0.1126, "step": 1450 }, { "epoch": 6.024096385542169, "grad_norm": 3.7854058742523193, "learning_rate": 1.70948083275794e-05, "loss": 0.095, "step": 1500 }, { "epoch": 6.024096385542169, "eval_f1": 70.87, "eval_gen_len": 45.21774193548387, "eval_loss": 0.9038512706756592, "eval_p": 73.83, "eval_r": 72.16, "eval_runtime": 52.1132, "eval_samples_per_second": 9.518, "eval_steps_per_second": 0.595, "step": 1500 }, { "epoch": 6.224899598393574, "grad_norm": 3.2422773838043213, "learning_rate": 1.561534159900441e-05, "loss": 0.0689, "step": 1550 }, { "epoch": 6.42570281124498, "grad_norm": 6.3022990226745605, "learning_rate": 1.4173209813811788e-05, "loss": 0.0764, "step": 1600 }, { "epoch": 6.42570281124498, "eval_f1": 71.12, "eval_gen_len": 46.310483870967744, "eval_loss": 0.9024052619934082, "eval_p": 73.55, "eval_r": 73.2, "eval_runtime": 51.8511, "eval_samples_per_second": 9.566, "eval_steps_per_second": 0.598, "step": 1600 }, { "epoch": 6.626506024096385, "grad_norm": 3.3996074199676514, "learning_rate": 1.277415019825417e-05, "loss": 0.0639, "step": 1650 }, { "epoch": 6.827309236947791, "grad_norm": 1.7633498907089233, "learning_rate": 1.1423728624769695e-05, "loss": 0.0499, "step": 1700 }, { "epoch": 6.827309236947791, "eval_f1": 71.1, "eval_gen_len": 45.52620967741935, "eval_loss": 0.9023270010948181, "eval_p": 73.73, "eval_r": 72.63, "eval_runtime": 48.5153, "eval_samples_per_second": 10.224, "eval_steps_per_second": 0.639, "step": 1700 }, { "epoch": 7.028112449799197, "grad_norm": 0.14504443109035492, "learning_rate": 1.0127317469297277e-05, "loss": 0.0707, "step": 1750 }, { "epoch": 7.228915662650603, "grad_norm": 0.9350752830505371, "learning_rate": 8.890074238378074e-06, "loss": 0.0422, "step": 1800 }, { "epoch": 7.228915662650603, "eval_f1": 71.74, "eval_gen_len": 46.16935483870968, "eval_loss": 0.9106847643852234, "eval_p": 73.84, "eval_r": 73.54, "eval_runtime": 53.7841, "eval_samples_per_second": 9.222, "eval_steps_per_second": 0.576, "step": 1800 }, { "epoch": 7.429718875502008, "grad_norm": 2.7118020057678223, "learning_rate": 7.71692105107098e-06, "loss": 0.0442, "step": 1850 }, { "epoch": 7.6305220883534135, "grad_norm": 3.540323495864868, "learning_rate": 6.612525057308949e-06, "loss": 0.0469, "step": 1900 }, { "epoch": 7.6305220883534135, "eval_f1": 71.51, "eval_gen_len": 45.40120967741935, "eval_loss": 0.9106153845787048, "eval_p": 74.15, "eval_r": 73.09, "eval_runtime": 51.3332, "eval_samples_per_second": 9.662, "eval_steps_per_second": 0.604, "step": 1900 }, { "epoch": 7.831325301204819, "grad_norm": 1.7734179496765137, "learning_rate": 5.581279870597867e-06, "loss": 0.0368, "step": 1950 }, { "epoch": 8.032128514056225, "grad_norm": 2.24417781829834, "learning_rate": 4.627288088924156e-06, "loss": 0.0379, "step": 2000 }, { "epoch": 8.032128514056225, "eval_f1": 70.84, "eval_gen_len": 45.127016129032256, "eval_loss": 0.9157423973083496, "eval_p": 73.79, "eval_r": 72.15, "eval_runtime": 51.0735, "eval_samples_per_second": 9.711, "eval_steps_per_second": 0.607, "step": 2000 }, { "epoch": 8.23293172690763, "grad_norm": 1.954770565032959, "learning_rate": 3.754344973408064e-06, "loss": 0.0335, "step": 2050 }, { "epoch": 8.433734939759036, "grad_norm": 1.5767686367034912, "learning_rate": 2.9659233496337786e-06, "loss": 0.0301, "step": 2100 }, { "epoch": 8.433734939759036, "eval_f1": 71.39, "eval_gen_len": 46.381048387096776, "eval_loss": 0.9240424633026123, "eval_p": 73.77, "eval_r": 73.37, "eval_runtime": 53.2399, "eval_samples_per_second": 9.316, "eval_steps_per_second": 0.582, "step": 2100 }, { "epoch": 8.634538152610443, "grad_norm": 0.2601850628852844, "learning_rate": 2.265159791723373e-06, "loss": 0.0358, "step": 2150 }, { "epoch": 8.835341365461847, "grad_norm": 5.757479667663574, "learning_rate": 1.6548421441183875e-06, "loss": 0.0254, "step": 2200 }, { "epoch": 8.835341365461847, "eval_f1": 71.82, "eval_gen_len": 46.47782258064516, "eval_loss": 0.9292237162590027, "eval_p": 73.8, "eval_r": 74.04, "eval_runtime": 54.2877, "eval_samples_per_second": 9.137, "eval_steps_per_second": 0.571, "step": 2200 } ], "logging_steps": 50, "max_steps": 2490, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 490953080733696.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }