| { | |
| "best_global_step": 900, | |
| "best_metric": 0.5792015790939331, | |
| "best_model_checkpoint": "/content/drive/MyDrive/Colab Notebooks/AITF_baseCPT_V1.2/checkpoint-900", | |
| "epoch": 1.0, | |
| "eval_steps": 100, | |
| "global_step": 929, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.021531422419593594, | |
| "grad_norm": 0.16254645586013794, | |
| "learning_rate": 7.6e-05, | |
| "loss": 0.8647, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04306284483918719, | |
| "grad_norm": 0.24263904988765717, | |
| "learning_rate": 0.00015600000000000002, | |
| "loss": 0.8267, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06459426725878079, | |
| "grad_norm": 0.25156837701797485, | |
| "learning_rate": 0.0001999482703462211, | |
| "loss": 0.7922, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.08612568967837438, | |
| "grad_norm": 0.23544861376285553, | |
| "learning_rate": 0.00019946334007549978, | |
| "loss": 0.8013, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.10765711209796797, | |
| "grad_norm": 0.26032447814941406, | |
| "learning_rate": 0.0001984704140331751, | |
| "loss": 0.7578, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10765711209796797, | |
| "eval_loss": 0.7207502126693726, | |
| "eval_runtime": 369.3192, | |
| "eval_samples_per_second": 10.075, | |
| "eval_steps_per_second": 2.521, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.12918853451756157, | |
| "grad_norm": 0.2662807106971741, | |
| "learning_rate": 0.0001969745634568572, | |
| "loss": 0.7343, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.15071995693715515, | |
| "grad_norm": 0.2714097499847412, | |
| "learning_rate": 0.00019498342820427794, | |
| "loss": 0.7366, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.17225137935674875, | |
| "grad_norm": 0.2746363580226898, | |
| "learning_rate": 0.00019250717773373462, | |
| "loss": 0.7002, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.19378280177634236, | |
| "grad_norm": 0.28092408180236816, | |
| "learning_rate": 0.0001895584591649349, | |
| "loss": 0.7018, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.21531422419593593, | |
| "grad_norm": 0.2575222849845886, | |
| "learning_rate": 0.00018615233268551643, | |
| "loss": 0.6814, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.21531422419593593, | |
| "eval_loss": 0.6637697219848633, | |
| "eval_runtime": 369.0562, | |
| "eval_samples_per_second": 10.082, | |
| "eval_steps_per_second": 2.523, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.23684564661552954, | |
| "grad_norm": 0.2600899636745453, | |
| "learning_rate": 0.00018230619463314266, | |
| "loss": 0.6969, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.25837706903512314, | |
| "grad_norm": 0.2799288332462311, | |
| "learning_rate": 0.0001780396886460237, | |
| "loss": 0.6711, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.27990849145471675, | |
| "grad_norm": 0.29837536811828613, | |
| "learning_rate": 0.00017337460533564845, | |
| "loss": 0.6669, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3014399138743103, | |
| "grad_norm": 0.2809407114982605, | |
| "learning_rate": 0.0001683347709941367, | |
| "loss": 0.647, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3229713362939039, | |
| "grad_norm": 0.2516048550605774, | |
| "learning_rate": 0.00016294592590462316, | |
| "loss": 0.6595, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3229713362939039, | |
| "eval_loss": 0.636076033115387, | |
| "eval_runtime": 369.0513, | |
| "eval_samples_per_second": 10.083, | |
| "eval_steps_per_second": 2.523, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3445027587134975, | |
| "grad_norm": 0.27277883887290955, | |
| "learning_rate": 0.00015723559287618728, | |
| "loss": 0.6409, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.3660341811330911, | |
| "grad_norm": 0.2745457887649536, | |
| "learning_rate": 0.00015123293667476887, | |
| "loss": 0.6455, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3875656035526847, | |
| "grad_norm": 0.245443657040596, | |
| "learning_rate": 0.00014496861506800758, | |
| "loss": 0.6478, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.4090970259722783, | |
| "grad_norm": 0.27047595381736755, | |
| "learning_rate": 0.00013847462224477538, | |
| "loss": 0.6428, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.43062844839187187, | |
| "grad_norm": 0.30521273612976074, | |
| "learning_rate": 0.00013178412540911457, | |
| "loss": 0.6405, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.43062844839187187, | |
| "eval_loss": 0.6157485246658325, | |
| "eval_runtime": 369.0589, | |
| "eval_samples_per_second": 10.082, | |
| "eval_steps_per_second": 2.523, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.45215987081146547, | |
| "grad_norm": 0.27309557795524597, | |
| "learning_rate": 0.00012493129538315788, | |
| "loss": 0.6339, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.4736912932310591, | |
| "grad_norm": 0.2948486804962158, | |
| "learning_rate": 0.00011795113208420208, | |
| "loss": 0.6236, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4952227156506527, | |
| "grad_norm": 0.30423271656036377, | |
| "learning_rate": 0.00011087928576728865, | |
| "loss": 0.5999, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5167541380702463, | |
| "grad_norm": 0.31128746271133423, | |
| "learning_rate": 0.00010375187494627098, | |
| "loss": 0.6202, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5382855604898399, | |
| "grad_norm": 0.2759592533111572, | |
| "learning_rate": 9.660530192331191e-05, | |
| "loss": 0.6207, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5382855604898399, | |
| "eval_loss": 0.6023540496826172, | |
| "eval_runtime": 368.8563, | |
| "eval_samples_per_second": 10.088, | |
| "eval_steps_per_second": 2.524, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5598169829094335, | |
| "grad_norm": 0.2957970201969147, | |
| "learning_rate": 8.947606686897045e-05, | |
| "loss": 0.6128, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5813484053290271, | |
| "grad_norm": 0.285124808549881, | |
| "learning_rate": 8.240058140243834e-05, | |
| "loss": 0.6164, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.6028798277486206, | |
| "grad_norm": 0.28131911158561707, | |
| "learning_rate": 7.541498262404125e-05, | |
| "loss": 0.6132, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6244112501682142, | |
| "grad_norm": 0.28777876496315, | |
| "learning_rate": 6.855494854980857e-05, | |
| "loss": 0.6137, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.6459426725878078, | |
| "grad_norm": 0.2806089222431183, | |
| "learning_rate": 6.185551589075482e-05, | |
| "loss": 0.6047, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6459426725878078, | |
| "eval_loss": 0.5914610624313354, | |
| "eval_runtime": 369.201, | |
| "eval_samples_per_second": 10.079, | |
| "eval_steps_per_second": 2.522, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6674740950074014, | |
| "grad_norm": 0.2653355598449707, | |
| "learning_rate": 5.535090110754131e-05, | |
| "loss": 0.6166, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.689005517426995, | |
| "grad_norm": 0.2984977960586548, | |
| "learning_rate": 4.9074325654457446e-05, | |
| "loss": 0.5918, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.7105369398465886, | |
| "grad_norm": 0.3091660141944885, | |
| "learning_rate": 4.305784630526416e-05, | |
| "loss": 0.589, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.7320683622661822, | |
| "grad_norm": 0.28470903635025024, | |
| "learning_rate": 3.7332191427488784e-05, | |
| "loss": 0.6055, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.7535997846857758, | |
| "grad_norm": 0.3136354088783264, | |
| "learning_rate": 3.192660404137729e-05, | |
| "loss": 0.5873, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7535997846857758, | |
| "eval_loss": 0.5847632884979248, | |
| "eval_runtime": 368.982, | |
| "eval_samples_per_second": 10.085, | |
| "eval_steps_per_second": 2.523, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7751312071053694, | |
| "grad_norm": 0.290301650762558, | |
| "learning_rate": 2.6868692465060828e-05, | |
| "loss": 0.6044, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.796662629524963, | |
| "grad_norm": 0.28396815061569214, | |
| "learning_rate": 2.2184289308744844e-05, | |
| "loss": 0.5994, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.8181940519445566, | |
| "grad_norm": 0.28993701934814453, | |
| "learning_rate": 1.7897319538090962e-05, | |
| "loss": 0.5965, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.8397254743641501, | |
| "grad_norm": 0.31562483310699463, | |
| "learning_rate": 1.402967828063897e-05, | |
| "loss": 0.5828, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8612568967837437, | |
| "grad_norm": 0.27920928597450256, | |
| "learning_rate": 1.0601118999356907e-05, | |
| "loss": 0.5896, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8612568967837437, | |
| "eval_loss": 0.5801649689674377, | |
| "eval_runtime": 369.5001, | |
| "eval_samples_per_second": 10.07, | |
| "eval_steps_per_second": 2.52, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8827883192033373, | |
| "grad_norm": 0.3463917076587677, | |
| "learning_rate": 7.629152604458156e-06, | |
| "loss": 0.5849, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.9043197416229309, | |
| "grad_norm": 0.2968030273914337, | |
| "learning_rate": 5.128958018758012e-06, | |
| "loss": 0.5733, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.9258511640425245, | |
| "grad_norm": 0.3239639699459076, | |
| "learning_rate": 3.1133046533455947e-06, | |
| "loss": 0.5851, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.9473825864621181, | |
| "grad_norm": 0.27936938405036926, | |
| "learning_rate": 1.592487189516212e-06, | |
| "loss": 0.5851, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.9689140088817118, | |
| "grad_norm": 0.3098885715007782, | |
| "learning_rate": 5.742730000568908e-07, | |
| "loss": 0.5968, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9689140088817118, | |
| "eval_loss": 0.5792015790939331, | |
| "eval_runtime": 370.9508, | |
| "eval_samples_per_second": 10.031, | |
| "eval_steps_per_second": 2.51, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9904454313013054, | |
| "grad_norm": 0.33122149109840393, | |
| "learning_rate": 6.386247842353754e-08, | |
| "loss": 0.5844, | |
| "step": 920 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 929, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.2627950806210068e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |