| { |
| "best_metric": 0.5498164296150208, |
| "best_model_checkpoint": "ck3-localization/checkpoint-36492", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 36492, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.041104899704044726, |
| "grad_norm": 6.12867546081543, |
| "learning_rate": 2.7397260273972604e-06, |
| "loss": 2.3513, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.08220979940808945, |
| "grad_norm": 6.480109691619873, |
| "learning_rate": 5.479452054794521e-06, |
| "loss": 1.4689, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.12331469911213416, |
| "grad_norm": 4.740694046020508, |
| "learning_rate": 8.219178082191782e-06, |
| "loss": 1.19, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.1644195988161789, |
| "grad_norm": 5.501825332641602, |
| "learning_rate": 1.0958904109589042e-05, |
| "loss": 1.0318, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.20552449852022361, |
| "grad_norm": 5.322640419006348, |
| "learning_rate": 1.3698630136986302e-05, |
| "loss": 0.9556, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.24662939822426833, |
| "grad_norm": 5.773024559020996, |
| "learning_rate": 1.6438356164383563e-05, |
| "loss": 0.9137, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.28773429792831307, |
| "grad_norm": 4.7130656242370605, |
| "learning_rate": 1.9178082191780822e-05, |
| "loss": 0.8705, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.3288391976323578, |
| "grad_norm": 3.756901741027832, |
| "learning_rate": 1.9786858291212475e-05, |
| "loss": 0.8531, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.3699440973364025, |
| "grad_norm": 4.033214092254639, |
| "learning_rate": 1.948237013580172e-05, |
| "loss": 0.8138, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.41104899704044723, |
| "grad_norm": 4.564835071563721, |
| "learning_rate": 1.9177881980390966e-05, |
| "loss": 0.7946, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.45215389674449197, |
| "grad_norm": 4.850255012512207, |
| "learning_rate": 1.887339382498021e-05, |
| "loss": 0.7833, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.49325879644853665, |
| "grad_norm": 4.716800689697266, |
| "learning_rate": 1.8568905669569456e-05, |
| "loss": 0.7662, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.5343636961525814, |
| "grad_norm": 4.0613274574279785, |
| "learning_rate": 1.82644175141587e-05, |
| "loss": 0.7668, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.5754685958566261, |
| "grad_norm": 4.6606831550598145, |
| "learning_rate": 1.7959929358747943e-05, |
| "loss": 0.7518, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.6165734955606709, |
| "grad_norm": 4.745144844055176, |
| "learning_rate": 1.765544120333719e-05, |
| "loss": 0.7477, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.6576783952647156, |
| "grad_norm": 4.414773464202881, |
| "learning_rate": 1.7350953047926437e-05, |
| "loss": 0.751, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.6987832949687602, |
| "grad_norm": 4.531556606292725, |
| "learning_rate": 1.704646489251568e-05, |
| "loss": 0.7424, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.739888194672805, |
| "grad_norm": 4.841054439544678, |
| "learning_rate": 1.6741976737104928e-05, |
| "loss": 0.7328, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.7809930943768497, |
| "grad_norm": 4.302633762359619, |
| "learning_rate": 1.6437488581694175e-05, |
| "loss": 0.725, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.8220979940808945, |
| "grad_norm": 3.530836343765259, |
| "learning_rate": 1.6133000426283418e-05, |
| "loss": 0.7072, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.8632028937849392, |
| "grad_norm": 4.2016191482543945, |
| "learning_rate": 1.5828512270872665e-05, |
| "loss": 0.6895, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.9043077934889839, |
| "grad_norm": 3.463226079940796, |
| "learning_rate": 1.5524024115461912e-05, |
| "loss": 0.7093, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.9454126931930286, |
| "grad_norm": 3.3261220455169678, |
| "learning_rate": 1.5219535960051155e-05, |
| "loss": 0.6941, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.9865175928970733, |
| "grad_norm": 3.778331756591797, |
| "learning_rate": 1.49150478046404e-05, |
| "loss": 0.7008, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_gen_len": 33.8514, |
| "eval_loss": 0.6079365611076355, |
| "eval_rouge1": 31.9733, |
| "eval_rouge2": 27.2686, |
| "eval_rougeL": 31.3526, |
| "eval_rougeLsum": 31.4543, |
| "eval_runtime": 1591.2259, |
| "eval_samples_per_second": 13.59, |
| "eval_steps_per_second": 0.425, |
| "step": 12164 |
| }, |
| { |
| "epoch": 1.0276224926011182, |
| "grad_norm": 3.7499120235443115, |
| "learning_rate": 1.4610559649229646e-05, |
| "loss": 0.6284, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.0687273923051628, |
| "grad_norm": 3.52150821685791, |
| "learning_rate": 1.4306071493818891e-05, |
| "loss": 0.6337, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.1098322920092074, |
| "grad_norm": 4.520621299743652, |
| "learning_rate": 1.4001583338408138e-05, |
| "loss": 0.6292, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.1509371917132523, |
| "grad_norm": 4.275709629058838, |
| "learning_rate": 1.3697095182997382e-05, |
| "loss": 0.6195, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.192042091417297, |
| "grad_norm": 3.500743865966797, |
| "learning_rate": 1.3392607027586628e-05, |
| "loss": 0.6168, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.2331469911213417, |
| "grad_norm": 2.587315320968628, |
| "learning_rate": 1.3088118872175875e-05, |
| "loss": 0.6169, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.2742518908253864, |
| "grad_norm": 3.4539663791656494, |
| "learning_rate": 1.2783630716765119e-05, |
| "loss": 0.619, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.3153567905294312, |
| "grad_norm": 3.759758710861206, |
| "learning_rate": 1.2479142561354364e-05, |
| "loss": 0.612, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.3564616902334758, |
| "grad_norm": 2.6160857677459717, |
| "learning_rate": 1.217465440594361e-05, |
| "loss": 0.6179, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.3975665899375205, |
| "grad_norm": 3.3576176166534424, |
| "learning_rate": 1.1870166250532855e-05, |
| "loss": 0.6203, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.4386714896415653, |
| "grad_norm": 2.6305747032165527, |
| "learning_rate": 1.1565678095122101e-05, |
| "loss": 0.609, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.47977638934561, |
| "grad_norm": 3.3986129760742188, |
| "learning_rate": 1.1261189939711345e-05, |
| "loss": 0.6076, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.5208812890496546, |
| "grad_norm": 5.06044864654541, |
| "learning_rate": 1.0956701784300592e-05, |
| "loss": 0.6187, |
| "step": 18500 |
| }, |
| { |
| "epoch": 1.5619861887536994, |
| "grad_norm": 4.3670549392700195, |
| "learning_rate": 1.0652213628889839e-05, |
| "loss": 0.6063, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.6030910884577443, |
| "grad_norm": 8.928343772888184, |
| "learning_rate": 1.0347725473479082e-05, |
| "loss": 0.5948, |
| "step": 19500 |
| }, |
| { |
| "epoch": 1.644195988161789, |
| "grad_norm": 3.5316221714019775, |
| "learning_rate": 1.004323731806833e-05, |
| "loss": 0.6108, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.6853008878658335, |
| "grad_norm": 3.8091230392456055, |
| "learning_rate": 9.738749162657574e-06, |
| "loss": 0.6024, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.7264057875698784, |
| "grad_norm": 2.5065314769744873, |
| "learning_rate": 9.43426100724682e-06, |
| "loss": 0.595, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.767510687273923, |
| "grad_norm": 4.371850490570068, |
| "learning_rate": 9.129772851836063e-06, |
| "loss": 0.6063, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.8086155869779676, |
| "grad_norm": 3.6098592281341553, |
| "learning_rate": 8.82528469642531e-06, |
| "loss": 0.6158, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.8497204866820125, |
| "grad_norm": 4.037623405456543, |
| "learning_rate": 8.520796541014555e-06, |
| "loss": 0.6045, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.8908253863860573, |
| "grad_norm": 4.389125823974609, |
| "learning_rate": 8.2163083856038e-06, |
| "loss": 0.5876, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.931930286090102, |
| "grad_norm": 4.564250946044922, |
| "learning_rate": 7.911820230193046e-06, |
| "loss": 0.5857, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.9730351857941466, |
| "grad_norm": 4.7007365226745605, |
| "learning_rate": 7.607332074782292e-06, |
| "loss": 0.6188, |
| "step": 24000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_gen_len": 34.1084, |
| "eval_loss": 0.5606569051742554, |
| "eval_rouge1": 32.2894, |
| "eval_rouge2": 27.8387, |
| "eval_rougeL": 31.7251, |
| "eval_rougeLsum": 31.8152, |
| "eval_runtime": 1524.3799, |
| "eval_samples_per_second": 14.185, |
| "eval_steps_per_second": 0.443, |
| "step": 24328 |
| }, |
| { |
| "epoch": 2.0141400854981915, |
| "grad_norm": 1.9762619733810425, |
| "learning_rate": 7.302843919371537e-06, |
| "loss": 0.5736, |
| "step": 24500 |
| }, |
| { |
| "epoch": 2.0552449852022363, |
| "grad_norm": 4.161807060241699, |
| "learning_rate": 6.998355763960782e-06, |
| "loss": 0.5458, |
| "step": 25000 |
| }, |
| { |
| "epoch": 2.0963498849062807, |
| "grad_norm": 2.221466541290283, |
| "learning_rate": 6.693867608550027e-06, |
| "loss": 0.5468, |
| "step": 25500 |
| }, |
| { |
| "epoch": 2.1374547846103256, |
| "grad_norm": 3.642603874206543, |
| "learning_rate": 6.3893794531392735e-06, |
| "loss": 0.5556, |
| "step": 26000 |
| }, |
| { |
| "epoch": 2.1785596843143704, |
| "grad_norm": 3.3420753479003906, |
| "learning_rate": 6.084891297728519e-06, |
| "loss": 0.5451, |
| "step": 26500 |
| }, |
| { |
| "epoch": 2.219664584018415, |
| "grad_norm": 3.2656307220458984, |
| "learning_rate": 5.780403142317764e-06, |
| "loss": 0.5303, |
| "step": 27000 |
| }, |
| { |
| "epoch": 2.2607694837224597, |
| "grad_norm": 3.4211599826812744, |
| "learning_rate": 5.475914986907009e-06, |
| "loss": 0.5496, |
| "step": 27500 |
| }, |
| { |
| "epoch": 2.3018743834265045, |
| "grad_norm": 4.965065956115723, |
| "learning_rate": 5.171426831496256e-06, |
| "loss": 0.5392, |
| "step": 28000 |
| }, |
| { |
| "epoch": 2.342979283130549, |
| "grad_norm": 2.4368653297424316, |
| "learning_rate": 4.866938676085501e-06, |
| "loss": 0.5443, |
| "step": 28500 |
| }, |
| { |
| "epoch": 2.384084182834594, |
| "grad_norm": 2.6363422870635986, |
| "learning_rate": 4.562450520674746e-06, |
| "loss": 0.5418, |
| "step": 29000 |
| }, |
| { |
| "epoch": 2.4251890825386386, |
| "grad_norm": 4.417261123657227, |
| "learning_rate": 4.257962365263992e-06, |
| "loss": 0.5377, |
| "step": 29500 |
| }, |
| { |
| "epoch": 2.4662939822426835, |
| "grad_norm": 2.850874900817871, |
| "learning_rate": 3.953474209853237e-06, |
| "loss": 0.5406, |
| "step": 30000 |
| }, |
| { |
| "epoch": 2.507398881946728, |
| "grad_norm": 3.0470714569091797, |
| "learning_rate": 3.6489860544424825e-06, |
| "loss": 0.5277, |
| "step": 30500 |
| }, |
| { |
| "epoch": 2.5485037816507727, |
| "grad_norm": 3.77066707611084, |
| "learning_rate": 3.3444978990317278e-06, |
| "loss": 0.5404, |
| "step": 31000 |
| }, |
| { |
| "epoch": 2.5896086813548176, |
| "grad_norm": 2.103886127471924, |
| "learning_rate": 3.0400097436209734e-06, |
| "loss": 0.5511, |
| "step": 31500 |
| }, |
| { |
| "epoch": 2.6307135810588624, |
| "grad_norm": 2.784128427505493, |
| "learning_rate": 2.7355215882102186e-06, |
| "loss": 0.5466, |
| "step": 32000 |
| }, |
| { |
| "epoch": 2.671818480762907, |
| "grad_norm": 2.9157445430755615, |
| "learning_rate": 2.4310334327994643e-06, |
| "loss": 0.5502, |
| "step": 32500 |
| }, |
| { |
| "epoch": 2.7129233804669517, |
| "grad_norm": 3.7201719284057617, |
| "learning_rate": 2.1265452773887095e-06, |
| "loss": 0.5435, |
| "step": 33000 |
| }, |
| { |
| "epoch": 2.7540282801709965, |
| "grad_norm": 3.0712995529174805, |
| "learning_rate": 1.8220571219779551e-06, |
| "loss": 0.5442, |
| "step": 33500 |
| }, |
| { |
| "epoch": 2.795133179875041, |
| "grad_norm": 3.5120229721069336, |
| "learning_rate": 1.5175689665672005e-06, |
| "loss": 0.5392, |
| "step": 34000 |
| }, |
| { |
| "epoch": 2.836238079579086, |
| "grad_norm": 3.09930682182312, |
| "learning_rate": 1.2130808111564462e-06, |
| "loss": 0.5355, |
| "step": 34500 |
| }, |
| { |
| "epoch": 2.8773429792831307, |
| "grad_norm": 2.3439698219299316, |
| "learning_rate": 9.085926557456916e-07, |
| "loss": 0.5531, |
| "step": 35000 |
| }, |
| { |
| "epoch": 2.918447878987175, |
| "grad_norm": 2.935701608657837, |
| "learning_rate": 6.04104500334937e-07, |
| "loss": 0.5355, |
| "step": 35500 |
| }, |
| { |
| "epoch": 2.95955277869122, |
| "grad_norm": 3.3096513748168945, |
| "learning_rate": 2.9961634492418246e-07, |
| "loss": 0.5528, |
| "step": 36000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_gen_len": 33.8306, |
| "eval_loss": 0.5498164296150208, |
| "eval_rouge1": 32.6478, |
| "eval_rouge2": 28.3022, |
| "eval_rougeL": 32.1001, |
| "eval_rougeLsum": 32.1838, |
| "eval_runtime": 1535.5456, |
| "eval_samples_per_second": 14.082, |
| "eval_steps_per_second": 0.44, |
| "step": 36492 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 36492, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.01 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.2577075189776384e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|