{ "best_global_step": 900, "best_metric": 0.5792015790939331, "best_model_checkpoint": "/content/drive/MyDrive/Colab Notebooks/AITF_baseCPT_V1.2/checkpoint-900", "epoch": 1.0, "eval_steps": 100, "global_step": 929, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.021531422419593594, "grad_norm": 0.16254645586013794, "learning_rate": 7.6e-05, "loss": 0.8647, "step": 20 }, { "epoch": 0.04306284483918719, "grad_norm": 0.24263904988765717, "learning_rate": 0.00015600000000000002, "loss": 0.8267, "step": 40 }, { "epoch": 0.06459426725878079, "grad_norm": 0.25156837701797485, "learning_rate": 0.0001999482703462211, "loss": 0.7922, "step": 60 }, { "epoch": 0.08612568967837438, "grad_norm": 0.23544861376285553, "learning_rate": 0.00019946334007549978, "loss": 0.8013, "step": 80 }, { "epoch": 0.10765711209796797, "grad_norm": 0.26032447814941406, "learning_rate": 0.0001984704140331751, "loss": 0.7578, "step": 100 }, { "epoch": 0.10765711209796797, "eval_loss": 0.7207502126693726, "eval_runtime": 369.3192, "eval_samples_per_second": 10.075, "eval_steps_per_second": 2.521, "step": 100 }, { "epoch": 0.12918853451756157, "grad_norm": 0.2662807106971741, "learning_rate": 0.0001969745634568572, "loss": 0.7343, "step": 120 }, { "epoch": 0.15071995693715515, "grad_norm": 0.2714097499847412, "learning_rate": 0.00019498342820427794, "loss": 0.7366, "step": 140 }, { "epoch": 0.17225137935674875, "grad_norm": 0.2746363580226898, "learning_rate": 0.00019250717773373462, "loss": 0.7002, "step": 160 }, { "epoch": 0.19378280177634236, "grad_norm": 0.28092408180236816, "learning_rate": 0.0001895584591649349, "loss": 0.7018, "step": 180 }, { "epoch": 0.21531422419593593, "grad_norm": 0.2575222849845886, "learning_rate": 0.00018615233268551643, "loss": 0.6814, "step": 200 }, { "epoch": 0.21531422419593593, "eval_loss": 0.6637697219848633, "eval_runtime": 369.0562, "eval_samples_per_second": 10.082, "eval_steps_per_second": 2.523, "step": 200 }, { "epoch": 0.23684564661552954, "grad_norm": 0.2600899636745453, "learning_rate": 0.00018230619463314266, "loss": 0.6969, "step": 220 }, { "epoch": 0.25837706903512314, "grad_norm": 0.2799288332462311, "learning_rate": 0.0001780396886460237, "loss": 0.6711, "step": 240 }, { "epoch": 0.27990849145471675, "grad_norm": 0.29837536811828613, "learning_rate": 0.00017337460533564845, "loss": 0.6669, "step": 260 }, { "epoch": 0.3014399138743103, "grad_norm": 0.2809407114982605, "learning_rate": 0.0001683347709941367, "loss": 0.647, "step": 280 }, { "epoch": 0.3229713362939039, "grad_norm": 0.2516048550605774, "learning_rate": 0.00016294592590462316, "loss": 0.6595, "step": 300 }, { "epoch": 0.3229713362939039, "eval_loss": 0.636076033115387, "eval_runtime": 369.0513, "eval_samples_per_second": 10.083, "eval_steps_per_second": 2.523, "step": 300 }, { "epoch": 0.3445027587134975, "grad_norm": 0.27277883887290955, "learning_rate": 0.00015723559287618728, "loss": 0.6409, "step": 320 }, { "epoch": 0.3660341811330911, "grad_norm": 0.2745457887649536, "learning_rate": 0.00015123293667476887, "loss": 0.6455, "step": 340 }, { "epoch": 0.3875656035526847, "grad_norm": 0.245443657040596, "learning_rate": 0.00014496861506800758, "loss": 0.6478, "step": 360 }, { "epoch": 0.4090970259722783, "grad_norm": 0.27047595381736755, "learning_rate": 0.00013847462224477538, "loss": 0.6428, "step": 380 }, { "epoch": 0.43062844839187187, "grad_norm": 0.30521273612976074, "learning_rate": 0.00013178412540911457, "loss": 0.6405, "step": 400 }, { "epoch": 0.43062844839187187, "eval_loss": 0.6157485246658325, "eval_runtime": 369.0589, "eval_samples_per_second": 10.082, "eval_steps_per_second": 2.523, "step": 400 }, { "epoch": 0.45215987081146547, "grad_norm": 0.27309557795524597, "learning_rate": 0.00012493129538315788, "loss": 0.6339, "step": 420 }, { "epoch": 0.4736912932310591, "grad_norm": 0.2948486804962158, "learning_rate": 0.00011795113208420208, "loss": 0.6236, "step": 440 }, { "epoch": 0.4952227156506527, "grad_norm": 0.30423271656036377, "learning_rate": 0.00011087928576728865, "loss": 0.5999, "step": 460 }, { "epoch": 0.5167541380702463, "grad_norm": 0.31128746271133423, "learning_rate": 0.00010375187494627098, "loss": 0.6202, "step": 480 }, { "epoch": 0.5382855604898399, "grad_norm": 0.2759592533111572, "learning_rate": 9.660530192331191e-05, "loss": 0.6207, "step": 500 }, { "epoch": 0.5382855604898399, "eval_loss": 0.6023540496826172, "eval_runtime": 368.8563, "eval_samples_per_second": 10.088, "eval_steps_per_second": 2.524, "step": 500 }, { "epoch": 0.5598169829094335, "grad_norm": 0.2957970201969147, "learning_rate": 8.947606686897045e-05, "loss": 0.6128, "step": 520 }, { "epoch": 0.5813484053290271, "grad_norm": 0.285124808549881, "learning_rate": 8.240058140243834e-05, "loss": 0.6164, "step": 540 }, { "epoch": 0.6028798277486206, "grad_norm": 0.28131911158561707, "learning_rate": 7.541498262404125e-05, "loss": 0.6132, "step": 560 }, { "epoch": 0.6244112501682142, "grad_norm": 0.28777876496315, "learning_rate": 6.855494854980857e-05, "loss": 0.6137, "step": 580 }, { "epoch": 0.6459426725878078, "grad_norm": 0.2806089222431183, "learning_rate": 6.185551589075482e-05, "loss": 0.6047, "step": 600 }, { "epoch": 0.6459426725878078, "eval_loss": 0.5914610624313354, "eval_runtime": 369.201, "eval_samples_per_second": 10.079, "eval_steps_per_second": 2.522, "step": 600 }, { "epoch": 0.6674740950074014, "grad_norm": 0.2653355598449707, "learning_rate": 5.535090110754131e-05, "loss": 0.6166, "step": 620 }, { "epoch": 0.689005517426995, "grad_norm": 0.2984977960586548, "learning_rate": 4.9074325654457446e-05, "loss": 0.5918, "step": 640 }, { "epoch": 0.7105369398465886, "grad_norm": 0.3091660141944885, "learning_rate": 4.305784630526416e-05, "loss": 0.589, "step": 660 }, { "epoch": 0.7320683622661822, "grad_norm": 0.28470903635025024, "learning_rate": 3.7332191427488784e-05, "loss": 0.6055, "step": 680 }, { "epoch": 0.7535997846857758, "grad_norm": 0.3136354088783264, "learning_rate": 3.192660404137729e-05, "loss": 0.5873, "step": 700 }, { "epoch": 0.7535997846857758, "eval_loss": 0.5847632884979248, "eval_runtime": 368.982, "eval_samples_per_second": 10.085, "eval_steps_per_second": 2.523, "step": 700 }, { "epoch": 0.7751312071053694, "grad_norm": 0.290301650762558, "learning_rate": 2.6868692465060828e-05, "loss": 0.6044, "step": 720 }, { "epoch": 0.796662629524963, "grad_norm": 0.28396815061569214, "learning_rate": 2.2184289308744844e-05, "loss": 0.5994, "step": 740 }, { "epoch": 0.8181940519445566, "grad_norm": 0.28993701934814453, "learning_rate": 1.7897319538090962e-05, "loss": 0.5965, "step": 760 }, { "epoch": 0.8397254743641501, "grad_norm": 0.31562483310699463, "learning_rate": 1.402967828063897e-05, "loss": 0.5828, "step": 780 }, { "epoch": 0.8612568967837437, "grad_norm": 0.27920928597450256, "learning_rate": 1.0601118999356907e-05, "loss": 0.5896, "step": 800 }, { "epoch": 0.8612568967837437, "eval_loss": 0.5801649689674377, "eval_runtime": 369.5001, "eval_samples_per_second": 10.07, "eval_steps_per_second": 2.52, "step": 800 }, { "epoch": 0.8827883192033373, "grad_norm": 0.3463917076587677, "learning_rate": 7.629152604458156e-06, "loss": 0.5849, "step": 820 }, { "epoch": 0.9043197416229309, "grad_norm": 0.2968030273914337, "learning_rate": 5.128958018758012e-06, "loss": 0.5733, "step": 840 }, { "epoch": 0.9258511640425245, "grad_norm": 0.3239639699459076, "learning_rate": 3.1133046533455947e-06, "loss": 0.5851, "step": 860 }, { "epoch": 0.9473825864621181, "grad_norm": 0.27936938405036926, "learning_rate": 1.592487189516212e-06, "loss": 0.5851, "step": 880 }, { "epoch": 0.9689140088817118, "grad_norm": 0.3098885715007782, "learning_rate": 5.742730000568908e-07, "loss": 0.5968, "step": 900 }, { "epoch": 0.9689140088817118, "eval_loss": 0.5792015790939331, "eval_runtime": 370.9508, "eval_samples_per_second": 10.031, "eval_steps_per_second": 2.51, "step": 900 }, { "epoch": 0.9904454313013054, "grad_norm": 0.33122149109840393, "learning_rate": 6.386247842353754e-08, "loss": 0.5844, "step": 920 } ], "logging_steps": 20, "max_steps": 929, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2627950806210068e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }