{ "best_global_step": 5000, "best_metric": 0.9754972535633911, "best_model_checkpoint": "/home/skwon01/scratch/afroscope_may/fine_tuned_models/afrolid_mega/checkpoint-5000", "epoch": 25.0, "eval_steps": 5000, "global_step": 7025, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.35587188612099646, "grad_norm": 0.9373229742050171, "learning_rate": 1.9718149466192173e-05, "loss": 5.1026, "step": 100 }, { "epoch": 0.7117437722419929, "grad_norm": 0.9601100087165833, "learning_rate": 1.9433451957295375e-05, "loss": 2.8606, "step": 200 }, { "epoch": 1.0676156583629894, "grad_norm": 0.8142226338386536, "learning_rate": 1.9148754448398578e-05, "loss": 1.6358, "step": 300 }, { "epoch": 1.4234875444839858, "grad_norm": 0.6562248468399048, "learning_rate": 1.886405693950178e-05, "loss": 0.9659, "step": 400 }, { "epoch": 1.7793594306049823, "grad_norm": 0.807991087436676, "learning_rate": 1.8579359430604986e-05, "loss": 0.6199, "step": 500 }, { "epoch": 2.135231316725979, "grad_norm": 0.6347224116325378, "learning_rate": 1.8294661921708185e-05, "loss": 0.4351, "step": 600 }, { "epoch": 2.491103202846975, "grad_norm": 0.7022924423217773, "learning_rate": 1.800996441281139e-05, "loss": 0.3276, "step": 700 }, { "epoch": 2.8469750889679717, "grad_norm": 0.7419522404670715, "learning_rate": 1.7725266903914593e-05, "loss": 0.2658, "step": 800 }, { "epoch": 3.202846975088968, "grad_norm": 0.6704352498054504, "learning_rate": 1.7440569395017795e-05, "loss": 0.2184, "step": 900 }, { "epoch": 3.5587188612099645, "grad_norm": 0.7129318714141846, "learning_rate": 1.7155871886120997e-05, "loss": 0.187, "step": 1000 }, { "epoch": 3.914590747330961, "grad_norm": 0.7012542486190796, "learning_rate": 1.68711743772242e-05, "loss": 0.166, "step": 1100 }, { "epoch": 4.270462633451958, "grad_norm": 0.5381819009780884, "learning_rate": 1.6586476868327405e-05, "loss": 0.1438, "step": 1200 }, { "epoch": 4.6263345195729535, "grad_norm": 0.6322551965713501, "learning_rate": 1.6301779359430608e-05, "loss": 0.1307, "step": 1300 }, { "epoch": 4.98220640569395, "grad_norm": 0.598932147026062, "learning_rate": 1.601708185053381e-05, "loss": 0.1201, "step": 1400 }, { "epoch": 5.338078291814947, "grad_norm": 0.5444994568824768, "learning_rate": 1.5732384341637012e-05, "loss": 0.1057, "step": 1500 }, { "epoch": 5.693950177935943, "grad_norm": 0.5283308029174805, "learning_rate": 1.5447686832740214e-05, "loss": 0.1001, "step": 1600 }, { "epoch": 6.049822064056939, "grad_norm": 0.5698280930519104, "learning_rate": 1.5162989323843418e-05, "loss": 0.0933, "step": 1700 }, { "epoch": 6.405693950177936, "grad_norm": 0.5436670780181885, "learning_rate": 1.4878291814946619e-05, "loss": 0.0826, "step": 1800 }, { "epoch": 6.761565836298932, "grad_norm": 0.4918835461139679, "learning_rate": 1.4593594306049823e-05, "loss": 0.0808, "step": 1900 }, { "epoch": 7.117437722419929, "grad_norm": 0.425703763961792, "learning_rate": 1.4308896797153027e-05, "loss": 0.0757, "step": 2000 }, { "epoch": 7.473309608540926, "grad_norm": 0.5279386043548584, "learning_rate": 1.4024199288256228e-05, "loss": 0.0689, "step": 2100 }, { "epoch": 7.829181494661921, "grad_norm": 0.5598633289337158, "learning_rate": 1.3739501779359432e-05, "loss": 0.0666, "step": 2200 }, { "epoch": 8.185053380782918, "grad_norm": 0.4975087642669678, "learning_rate": 1.3454804270462634e-05, "loss": 0.0621, "step": 2300 }, { "epoch": 8.540925266903916, "grad_norm": 0.4781990051269531, "learning_rate": 1.3170106761565838e-05, "loss": 0.0588, "step": 2400 }, { "epoch": 8.896797153024911, "grad_norm": 0.5004540681838989, "learning_rate": 1.288540925266904e-05, "loss": 0.0569, "step": 2500 }, { "epoch": 9.252669039145907, "grad_norm": 0.494505912065506, "learning_rate": 1.2600711743772243e-05, "loss": 0.0519, "step": 2600 }, { "epoch": 9.608540925266905, "grad_norm": 0.47906264662742615, "learning_rate": 1.2316014234875447e-05, "loss": 0.0503, "step": 2700 }, { "epoch": 9.9644128113879, "grad_norm": 0.507278323173523, "learning_rate": 1.2031316725978647e-05, "loss": 0.0497, "step": 2800 }, { "epoch": 10.320284697508896, "grad_norm": 0.5136927366256714, "learning_rate": 1.1746619217081851e-05, "loss": 0.0456, "step": 2900 }, { "epoch": 10.676156583629894, "grad_norm": 0.479863703250885, "learning_rate": 1.1461921708185055e-05, "loss": 0.0436, "step": 3000 }, { "epoch": 11.03202846975089, "grad_norm": 0.4625159502029419, "learning_rate": 1.1177224199288256e-05, "loss": 0.0437, "step": 3100 }, { "epoch": 11.387900355871887, "grad_norm": 0.43024319410324097, "learning_rate": 1.089252669039146e-05, "loss": 0.0395, "step": 3200 }, { "epoch": 11.743772241992882, "grad_norm": 0.5366887450218201, "learning_rate": 1.0607829181494662e-05, "loss": 0.0388, "step": 3300 }, { "epoch": 12.099644128113878, "grad_norm": 0.41900748014450073, "learning_rate": 1.0323131672597866e-05, "loss": 0.0379, "step": 3400 }, { "epoch": 12.455516014234876, "grad_norm": 0.5012409090995789, "learning_rate": 1.0038434163701067e-05, "loss": 0.0357, "step": 3500 }, { "epoch": 12.811387900355871, "grad_norm": 0.4979284405708313, "learning_rate": 9.753736654804271e-06, "loss": 0.0347, "step": 3600 }, { "epoch": 13.167259786476869, "grad_norm": 0.39561697840690613, "learning_rate": 9.469039145907473e-06, "loss": 0.0332, "step": 3700 }, { "epoch": 13.523131672597865, "grad_norm": 0.46909043192863464, "learning_rate": 9.184341637010676e-06, "loss": 0.032, "step": 3800 }, { "epoch": 13.87900355871886, "grad_norm": 0.4095679521560669, "learning_rate": 8.89964412811388e-06, "loss": 0.0319, "step": 3900 }, { "epoch": 14.234875444839858, "grad_norm": 0.45537084341049194, "learning_rate": 8.614946619217082e-06, "loss": 0.0301, "step": 4000 }, { "epoch": 14.590747330960854, "grad_norm": 0.4314133822917938, "learning_rate": 8.330249110320286e-06, "loss": 0.0291, "step": 4100 }, { "epoch": 14.946619217081851, "grad_norm": 0.388823002576828, "learning_rate": 8.045551601423488e-06, "loss": 0.0286, "step": 4200 }, { "epoch": 15.302491103202847, "grad_norm": 0.45969030261039734, "learning_rate": 7.76085409252669e-06, "loss": 0.0268, "step": 4300 }, { "epoch": 15.658362989323843, "grad_norm": 0.383735328912735, "learning_rate": 7.476156583629895e-06, "loss": 0.0266, "step": 4400 }, { "epoch": 16.01423487544484, "grad_norm": 0.43427741527557373, "learning_rate": 7.191459074733097e-06, "loss": 0.0265, "step": 4500 }, { "epoch": 16.370106761565836, "grad_norm": 0.3857556879520416, "learning_rate": 6.906761565836299e-06, "loss": 0.0243, "step": 4600 }, { "epoch": 16.725978647686834, "grad_norm": 0.41817349195480347, "learning_rate": 6.622064056939502e-06, "loss": 0.0246, "step": 4700 }, { "epoch": 17.081850533807827, "grad_norm": 0.44656312465667725, "learning_rate": 6.337366548042705e-06, "loss": 0.024, "step": 4800 }, { "epoch": 17.437722419928825, "grad_norm": 0.38422083854675293, "learning_rate": 6.052669039145908e-06, "loss": 0.0227, "step": 4900 }, { "epoch": 17.793594306049823, "grad_norm": 0.36347660422325134, "learning_rate": 5.767971530249111e-06, "loss": 0.0227, "step": 5000 }, { "epoch": 17.793594306049823, "eval_f1": 0.9754972535633911, "eval_loss": 0.088624507188797, "eval_runtime": 53.192, "eval_samples_per_second": 4372.145, "eval_steps_per_second": 136.637, "step": 5000 }, { "epoch": 18.14946619217082, "grad_norm": 0.3471202254295349, "learning_rate": 5.483274021352314e-06, "loss": 0.0219, "step": 5100 }, { "epoch": 18.505338078291814, "grad_norm": 0.4116271436214447, "learning_rate": 5.1985765124555165e-06, "loss": 0.0215, "step": 5200 }, { "epoch": 18.86120996441281, "grad_norm": 0.3756101727485657, "learning_rate": 4.913879003558719e-06, "loss": 0.0216, "step": 5300 }, { "epoch": 19.21708185053381, "grad_norm": 0.3369296193122864, "learning_rate": 4.629181494661922e-06, "loss": 0.0206, "step": 5400 }, { "epoch": 19.572953736654803, "grad_norm": 0.41810908913612366, "learning_rate": 4.344483985765125e-06, "loss": 0.0201, "step": 5500 }, { "epoch": 19.9288256227758, "grad_norm": 0.45450907945632935, "learning_rate": 4.0597864768683275e-06, "loss": 0.0202, "step": 5600 }, { "epoch": 20.284697508896798, "grad_norm": 0.3766241669654846, "learning_rate": 3.7750889679715307e-06, "loss": 0.0194, "step": 5700 }, { "epoch": 20.640569395017792, "grad_norm": 0.3532281816005707, "learning_rate": 3.4903914590747334e-06, "loss": 0.0192, "step": 5800 }, { "epoch": 20.99644128113879, "grad_norm": 0.34376078844070435, "learning_rate": 3.205693950177936e-06, "loss": 0.0189, "step": 5900 }, { "epoch": 21.352313167259787, "grad_norm": 0.38847091794013977, "learning_rate": 2.9209964412811393e-06, "loss": 0.0184, "step": 6000 }, { "epoch": 21.708185053380785, "grad_norm": 0.33618295192718506, "learning_rate": 2.636298932384342e-06, "loss": 0.0185, "step": 6100 }, { "epoch": 22.06405693950178, "grad_norm": 0.34742456674575806, "learning_rate": 2.351601423487545e-06, "loss": 0.018, "step": 6200 }, { "epoch": 22.419928825622776, "grad_norm": 0.28954410552978516, "learning_rate": 2.0669039145907475e-06, "loss": 0.0177, "step": 6300 }, { "epoch": 22.775800711743774, "grad_norm": 0.3540429174900055, "learning_rate": 1.7822064056939503e-06, "loss": 0.0173, "step": 6400 }, { "epoch": 23.131672597864767, "grad_norm": 0.2977263033390045, "learning_rate": 1.4975088967971532e-06, "loss": 0.0175, "step": 6500 }, { "epoch": 23.487544483985765, "grad_norm": 0.3293995261192322, "learning_rate": 1.212811387900356e-06, "loss": 0.0168, "step": 6600 }, { "epoch": 23.843416370106763, "grad_norm": 0.33407387137413025, "learning_rate": 9.281138790035587e-07, "loss": 0.0169, "step": 6700 }, { "epoch": 24.199288256227756, "grad_norm": 0.27643245458602905, "learning_rate": 6.434163701067617e-07, "loss": 0.0166, "step": 6800 }, { "epoch": 24.555160142348754, "grad_norm": 0.3567065894603729, "learning_rate": 3.5871886120996446e-07, "loss": 0.0168, "step": 6900 }, { "epoch": 24.91103202846975, "grad_norm": 0.34183645248413086, "learning_rate": 7.402135231316726e-08, "loss": 0.0164, "step": 7000 }, { "epoch": 25.0, "step": 7025, "total_flos": 3.806537758910972e+18, "train_loss": 0.21727693550527308, "train_runtime": 4570.4626, "train_samples_per_second": 12590.799, "train_steps_per_second": 1.537 } ], "logging_steps": 100, "max_steps": 7025, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 5000, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.806537758910972e+18, "train_batch_size": 512, "trial_name": null, "trial_params": null }