| { |
| "best_metric": 0.7704865330126336, |
| "best_model_checkpoint": "output/checkpoint-12000", |
| "epoch": 4.752475247524752, |
| "eval_steps": 250, |
| "global_step": 12000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.09900990099009901, |
| "grad_norm": 1.5793111324310303, |
| "learning_rate": 1e-05, |
| "loss": 0.1198, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.09900990099009901, |
| "eval_f1": 0.713525737782454, |
| "eval_loss": 0.12419664114713669, |
| "eval_precision": 0.7395227595992926, |
| "eval_recall": 0.6962858255614207, |
| "eval_runtime": 321.2972, |
| "eval_samples_per_second": 13.467, |
| "eval_steps_per_second": 0.675, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.19801980198019803, |
| "grad_norm": 0.9309703707695007, |
| "learning_rate": 1e-05, |
| "loss": 0.1045, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19801980198019803, |
| "eval_f1": 0.7206475251105509, |
| "eval_loss": 0.11620143055915833, |
| "eval_precision": 0.7629260609509252, |
| "eval_recall": 0.6966676403290979, |
| "eval_runtime": 329.1747, |
| "eval_samples_per_second": 13.145, |
| "eval_steps_per_second": 0.659, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.297029702970297, |
| "grad_norm": 0.3302266299724579, |
| "learning_rate": 1e-05, |
| "loss": 0.1063, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.297029702970297, |
| "eval_f1": 0.7252193838416583, |
| "eval_loss": 0.11440324038267136, |
| "eval_precision": 0.7635823487211817, |
| "eval_recall": 0.7023769534102526, |
| "eval_runtime": 330.0354, |
| "eval_samples_per_second": 13.111, |
| "eval_steps_per_second": 0.658, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.39603960396039606, |
| "grad_norm": 2.2105612754821777, |
| "learning_rate": 1e-05, |
| "loss": 0.1021, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.39603960396039606, |
| "eval_f1": 0.7330270803885934, |
| "eval_loss": 0.113602414727211, |
| "eval_precision": 0.7541870386601477, |
| "eval_recall": 0.7176272889760549, |
| "eval_runtime": 330.4375, |
| "eval_samples_per_second": 13.095, |
| "eval_steps_per_second": 0.657, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.49504950495049505, |
| "grad_norm": 2.663264274597168, |
| "learning_rate": 1e-05, |
| "loss": 0.1065, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.49504950495049505, |
| "eval_f1": 0.7386791782384943, |
| "eval_loss": 0.1129036471247673, |
| "eval_precision": 0.7609068815282719, |
| "eval_recall": 0.7225689735511556, |
| "eval_runtime": 330.3036, |
| "eval_samples_per_second": 13.1, |
| "eval_steps_per_second": 0.657, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.594059405940594, |
| "grad_norm": 0.2760612964630127, |
| "learning_rate": 1e-05, |
| "loss": 0.1038, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.594059405940594, |
| "eval_f1": 0.7218572845371175, |
| "eval_loss": 0.11184883862733841, |
| "eval_precision": 0.7802744438382736, |
| "eval_recall": 0.6929390348223414, |
| "eval_runtime": 330.314, |
| "eval_samples_per_second": 13.1, |
| "eval_steps_per_second": 0.657, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.693069306930693, |
| "grad_norm": 0.3853608965873718, |
| "learning_rate": 1e-05, |
| "loss": 0.1013, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.693069306930693, |
| "eval_f1": 0.74378295527466, |
| "eval_loss": 0.10998328775167465, |
| "eval_precision": 0.7703728976253078, |
| "eval_recall": 0.7252778415294846, |
| "eval_runtime": 330.8211, |
| "eval_samples_per_second": 13.08, |
| "eval_steps_per_second": 0.656, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.7920792079207921, |
| "grad_norm": 0.4774291217327118, |
| "learning_rate": 1e-05, |
| "loss": 0.1074, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7920792079207921, |
| "eval_f1": 0.740637178295641, |
| "eval_loss": 0.10963314771652222, |
| "eval_precision": 0.7560941570162041, |
| "eval_recall": 0.7284361590408035, |
| "eval_runtime": 330.2879, |
| "eval_samples_per_second": 13.101, |
| "eval_steps_per_second": 0.657, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8910891089108911, |
| "grad_norm": 2.6813926696777344, |
| "learning_rate": 1e-05, |
| "loss": 0.1016, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.8910891089108911, |
| "eval_f1": 0.736115628557489, |
| "eval_loss": 0.10999644547700882, |
| "eval_precision": 0.7713716773177735, |
| "eval_recall": 0.7139623192639112, |
| "eval_runtime": 330.3901, |
| "eval_samples_per_second": 13.097, |
| "eval_steps_per_second": 0.657, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.9900990099009901, |
| "grad_norm": 0.26967763900756836, |
| "learning_rate": 1e-05, |
| "loss": 0.1027, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.9900990099009901, |
| "eval_f1": 0.7451604650395085, |
| "eval_loss": 0.10895609110593796, |
| "eval_precision": 0.7685630379034225, |
| "eval_recall": 0.7282629862226766, |
| "eval_runtime": 329.6713, |
| "eval_samples_per_second": 13.125, |
| "eval_steps_per_second": 0.658, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.0891089108910892, |
| "grad_norm": 0.9345588684082031, |
| "learning_rate": 1e-05, |
| "loss": 0.0947, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.0891089108910892, |
| "eval_f1": 0.7540236340162096, |
| "eval_loss": 0.11148978024721146, |
| "eval_precision": 0.7576570195811476, |
| "eval_recall": 0.7506000194732486, |
| "eval_runtime": 329.265, |
| "eval_samples_per_second": 13.141, |
| "eval_steps_per_second": 0.659, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.188118811881188, |
| "grad_norm": 1.7818280458450317, |
| "learning_rate": 1e-05, |
| "loss": 0.0941, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.188118811881188, |
| "eval_f1": 0.7513449964701451, |
| "eval_loss": 0.11351278424263, |
| "eval_precision": 0.7627544097693352, |
| "eval_recall": 0.7417476892904087, |
| "eval_runtime": 330.7392, |
| "eval_samples_per_second": 13.083, |
| "eval_steps_per_second": 0.656, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.2871287128712872, |
| "grad_norm": 0.29553142189979553, |
| "learning_rate": 1e-05, |
| "loss": 0.093, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.2871287128712872, |
| "eval_f1": 0.7310646699770766, |
| "eval_loss": 0.11457356810569763, |
| "eval_precision": 0.7899499010545827, |
| "eval_recall": 0.701254111985089, |
| "eval_runtime": 330.9605, |
| "eval_samples_per_second": 13.074, |
| "eval_steps_per_second": 0.656, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.386138613861386, |
| "grad_norm": 0.8314707279205322, |
| "learning_rate": 1e-05, |
| "loss": 0.0965, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.386138613861386, |
| "eval_f1": 0.7446224165970077, |
| "eval_loss": 0.11156675964593887, |
| "eval_precision": 0.7669786860620493, |
| "eval_recall": 0.7283024543247999, |
| "eval_runtime": 329.2254, |
| "eval_samples_per_second": 13.143, |
| "eval_steps_per_second": 0.659, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.4851485148514851, |
| "grad_norm": 0.238117054104805, |
| "learning_rate": 1e-05, |
| "loss": 0.0949, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.4851485148514851, |
| "eval_f1": 0.7545433038240896, |
| "eval_loss": 0.11255145817995071, |
| "eval_precision": 0.7551434520731723, |
| "eval_recall": 0.753949244368406, |
| "eval_runtime": 329.0801, |
| "eval_samples_per_second": 13.149, |
| "eval_steps_per_second": 0.659, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.5841584158415842, |
| "grad_norm": 0.36143016815185547, |
| "learning_rate": 1e-05, |
| "loss": 0.0935, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.5841584158415842, |
| "eval_f1": 0.7567706745300815, |
| "eval_loss": 0.11153747141361237, |
| "eval_precision": 0.7570114849625239, |
| "eval_recall": 0.7565308407575093, |
| "eval_runtime": 328.785, |
| "eval_samples_per_second": 13.161, |
| "eval_steps_per_second": 0.66, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.6831683168316833, |
| "grad_norm": 1.6590158939361572, |
| "learning_rate": 1e-05, |
| "loss": 0.0923, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.6831683168316833, |
| "eval_f1": 0.750615488110628, |
| "eval_loss": 0.11224941164255142, |
| "eval_precision": 0.7794619961345578, |
| "eval_recall": 0.730789814100023, |
| "eval_runtime": 328.0765, |
| "eval_samples_per_second": 13.189, |
| "eval_steps_per_second": 0.661, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.7821782178217822, |
| "grad_norm": 0.7645926475524902, |
| "learning_rate": 1e-05, |
| "loss": 0.0957, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.7821782178217822, |
| "eval_f1": 0.7547025065064623, |
| "eval_loss": 0.1093941256403923, |
| "eval_precision": 0.7719325552656011, |
| "eval_recall": 0.741186268577827, |
| "eval_runtime": 328.088, |
| "eval_samples_per_second": 13.189, |
| "eval_steps_per_second": 0.661, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.881188118811881, |
| "grad_norm": 1.7966728210449219, |
| "learning_rate": 1e-05, |
| "loss": 0.0914, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.881188118811881, |
| "eval_f1": 0.7618047956088785, |
| "eval_loss": 0.11087872087955475, |
| "eval_precision": 0.7672179303677568, |
| "eval_recall": 0.756824852038084, |
| "eval_runtime": 328.324, |
| "eval_samples_per_second": 13.179, |
| "eval_steps_per_second": 0.661, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.9801980198019802, |
| "grad_norm": 0.28777483105659485, |
| "learning_rate": 1e-05, |
| "loss": 0.0963, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.9801980198019802, |
| "eval_f1": 0.7554586723328673, |
| "eval_loss": 0.11140972375869751, |
| "eval_precision": 0.7716512789760404, |
| "eval_recall": 0.7425878208739316, |
| "eval_runtime": 329.8561, |
| "eval_samples_per_second": 13.118, |
| "eval_steps_per_second": 0.658, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.0792079207920793, |
| "grad_norm": 1.7094814777374268, |
| "learning_rate": 1e-05, |
| "loss": 0.0896, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.0792079207920793, |
| "eval_f1": 0.7604882007256375, |
| "eval_loss": 0.11669965833425522, |
| "eval_precision": 0.7473437356807244, |
| "eval_recall": 0.7778062342214526, |
| "eval_runtime": 329.4978, |
| "eval_samples_per_second": 13.132, |
| "eval_steps_per_second": 0.659, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.1782178217821784, |
| "grad_norm": 0.47071635723114014, |
| "learning_rate": 1e-05, |
| "loss": 0.0896, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.1782178217821784, |
| "eval_f1": 0.7668274682246945, |
| "eval_loss": 0.11288785934448242, |
| "eval_precision": 0.767203513653942, |
| "eval_recall": 0.766453678009834, |
| "eval_runtime": 329.9865, |
| "eval_samples_per_second": 13.113, |
| "eval_steps_per_second": 0.658, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.2772277227722775, |
| "grad_norm": 0.6770134568214417, |
| "learning_rate": 1e-05, |
| "loss": 0.0838, |
| "step": 5750 |
| }, |
| { |
| "epoch": 2.2772277227722775, |
| "eval_f1": 0.7671750432534277, |
| "eval_loss": 0.11229284107685089, |
| "eval_precision": 0.7630772158891853, |
| "eval_recall": 0.7715685354030615, |
| "eval_runtime": 330.0378, |
| "eval_samples_per_second": 13.111, |
| "eval_steps_per_second": 0.658, |
| "step": 5750 |
| }, |
| { |
| "epoch": 2.376237623762376, |
| "grad_norm": 1.9502440690994263, |
| "learning_rate": 1e-05, |
| "loss": 0.0846, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.376237623762376, |
| "eval_f1": 0.7651510799847114, |
| "eval_loss": 0.11747279018163681, |
| "eval_precision": 0.7683948018832312, |
| "eval_recall": 0.7620669810205374, |
| "eval_runtime": 330.3569, |
| "eval_samples_per_second": 13.098, |
| "eval_steps_per_second": 0.657, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.4752475247524752, |
| "grad_norm": 0.43725672364234924, |
| "learning_rate": 1e-05, |
| "loss": 0.0885, |
| "step": 6250 |
| }, |
| { |
| "epoch": 2.4752475247524752, |
| "eval_f1": 0.7655915615026703, |
| "eval_loss": 0.11502809077501297, |
| "eval_precision": 0.7608718307527647, |
| "eval_recall": 0.770713103409905, |
| "eval_runtime": 330.4941, |
| "eval_samples_per_second": 13.093, |
| "eval_steps_per_second": 0.657, |
| "step": 6250 |
| }, |
| { |
| "epoch": 2.5742574257425743, |
| "grad_norm": 1.3364715576171875, |
| "learning_rate": 1e-05, |
| "loss": 0.0847, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.5742574257425743, |
| "eval_f1": 0.7656431107915714, |
| "eval_loss": 0.11593661457300186, |
| "eval_precision": 0.7612439448703502, |
| "eval_recall": 0.7703884913100628, |
| "eval_runtime": 329.9536, |
| "eval_samples_per_second": 13.114, |
| "eval_steps_per_second": 0.658, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.6732673267326734, |
| "grad_norm": 0.8091257810592651, |
| "learning_rate": 1e-05, |
| "loss": 0.0898, |
| "step": 6750 |
| }, |
| { |
| "epoch": 2.6732673267326734, |
| "eval_f1": 0.7642224417610171, |
| "eval_loss": 0.11731592565774918, |
| "eval_precision": 0.7507351472736794, |
| "eval_recall": 0.7820350588022562, |
| "eval_runtime": 330.5066, |
| "eval_samples_per_second": 13.092, |
| "eval_steps_per_second": 0.657, |
| "step": 6750 |
| }, |
| { |
| "epoch": 2.772277227722772, |
| "grad_norm": 0.6623993515968323, |
| "learning_rate": 1e-05, |
| "loss": 0.0877, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.772277227722772, |
| "eval_f1": 0.7683511472377138, |
| "eval_loss": 0.11674495786428452, |
| "eval_precision": 0.7588554349876182, |
| "eval_recall": 0.779668537489481, |
| "eval_runtime": 329.2959, |
| "eval_samples_per_second": 13.14, |
| "eval_steps_per_second": 0.659, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.871287128712871, |
| "grad_norm": 0.19114291667938232, |
| "learning_rate": 1e-05, |
| "loss": 0.0815, |
| "step": 7250 |
| }, |
| { |
| "epoch": 2.871287128712871, |
| "eval_f1": 0.7577154015271775, |
| "eval_loss": 0.11400625854730606, |
| "eval_precision": 0.7864181813154634, |
| "eval_recall": 0.7377428070687893, |
| "eval_runtime": 328.8093, |
| "eval_samples_per_second": 13.16, |
| "eval_steps_per_second": 0.66, |
| "step": 7250 |
| }, |
| { |
| "epoch": 2.9702970297029703, |
| "grad_norm": 0.7965342998504639, |
| "learning_rate": 1e-05, |
| "loss": 0.0902, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.9702970297029703, |
| "eval_f1": 0.7484147336968241, |
| "eval_loss": 0.11417645215988159, |
| "eval_precision": 0.7828750315445061, |
| "eval_recall": 0.7260785050108842, |
| "eval_runtime": 328.5671, |
| "eval_samples_per_second": 13.169, |
| "eval_steps_per_second": 0.66, |
| "step": 7500 |
| }, |
| { |
| "epoch": 3.0693069306930694, |
| "grad_norm": 2.098968982696533, |
| "learning_rate": 1e-05, |
| "loss": 0.0802, |
| "step": 7750 |
| }, |
| { |
| "epoch": 3.0693069306930694, |
| "eval_f1": 0.7688979882499555, |
| "eval_loss": 0.1173376813530922, |
| "eval_precision": 0.7535579745567124, |
| "eval_recall": 0.7900408242747954, |
| "eval_runtime": 329.2583, |
| "eval_samples_per_second": 13.142, |
| "eval_steps_per_second": 0.659, |
| "step": 7750 |
| }, |
| { |
| "epoch": 3.1683168316831685, |
| "grad_norm": 2.423370361328125, |
| "learning_rate": 1e-05, |
| "loss": 0.0748, |
| "step": 8000 |
| }, |
| { |
| "epoch": 3.1683168316831685, |
| "eval_f1": 0.7671750432534277, |
| "eval_loss": 0.11995264887809753, |
| "eval_precision": 0.7630772158891853, |
| "eval_recall": 0.7715685354030615, |
| "eval_runtime": 330.3633, |
| "eval_samples_per_second": 13.098, |
| "eval_steps_per_second": 0.657, |
| "step": 8000 |
| }, |
| { |
| "epoch": 3.2673267326732676, |
| "grad_norm": 0.2435981184244156, |
| "learning_rate": 1e-05, |
| "loss": 0.0877, |
| "step": 8250 |
| }, |
| { |
| "epoch": 3.2673267326732676, |
| "eval_f1": 0.7682401568437223, |
| "eval_loss": 0.11544305831193924, |
| "eval_precision": 0.7605157685857473, |
| "eval_recall": 0.7771111087928673, |
| "eval_runtime": 331.1394, |
| "eval_samples_per_second": 13.067, |
| "eval_steps_per_second": 0.655, |
| "step": 8250 |
| }, |
| { |
| "epoch": 3.366336633663366, |
| "grad_norm": 0.3148539960384369, |
| "learning_rate": 1e-05, |
| "loss": 0.0776, |
| "step": 8500 |
| }, |
| { |
| "epoch": 3.366336633663366, |
| "eval_f1": 0.7506009456389866, |
| "eval_loss": 0.12167887389659882, |
| "eval_precision": 0.7774573692979017, |
| "eval_recall": 0.7317636503995493, |
| "eval_runtime": 330.4561, |
| "eval_samples_per_second": 13.094, |
| "eval_steps_per_second": 0.657, |
| "step": 8500 |
| }, |
| { |
| "epoch": 3.4653465346534653, |
| "grad_norm": 1.8756177425384521, |
| "learning_rate": 1e-05, |
| "loss": 0.075, |
| "step": 8750 |
| }, |
| { |
| "epoch": 3.4653465346534653, |
| "eval_f1": 0.7520150871817991, |
| "eval_loss": 0.12817060947418213, |
| "eval_precision": 0.7849829873110825, |
| "eval_recall": 0.7302042256949516, |
| "eval_runtime": 330.0881, |
| "eval_samples_per_second": 13.109, |
| "eval_steps_per_second": 0.657, |
| "step": 8750 |
| }, |
| { |
| "epoch": 3.5643564356435644, |
| "grad_norm": 1.7097699642181396, |
| "learning_rate": 1e-05, |
| "loss": 0.0786, |
| "step": 9000 |
| }, |
| { |
| "epoch": 3.5643564356435644, |
| "eval_f1": 0.7664651841589353, |
| "eval_loss": 0.126222163438797, |
| "eval_precision": 0.7584130472040356, |
| "eval_recall": 0.7757884927010091, |
| "eval_runtime": 330.0335, |
| "eval_samples_per_second": 13.111, |
| "eval_steps_per_second": 0.658, |
| "step": 9000 |
| }, |
| { |
| "epoch": 3.6633663366336635, |
| "grad_norm": 0.3647657036781311, |
| "learning_rate": 1e-05, |
| "loss": 0.0828, |
| "step": 9250 |
| }, |
| { |
| "epoch": 3.6633663366336635, |
| "eval_f1": 0.7644029198454667, |
| "eval_loss": 0.12351341545581818, |
| "eval_precision": 0.7535178310806523, |
| "eval_recall": 0.7778545696064317, |
| "eval_runtime": 329.9562, |
| "eval_samples_per_second": 13.114, |
| "eval_steps_per_second": 0.658, |
| "step": 9250 |
| }, |
| { |
| "epoch": 3.762376237623762, |
| "grad_norm": 0.8577436804771423, |
| "learning_rate": 1e-05, |
| "loss": 0.0658, |
| "step": 9500 |
| }, |
| { |
| "epoch": 3.762376237623762, |
| "eval_f1": 0.7625425953787222, |
| "eval_loss": 0.13211406767368317, |
| "eval_precision": 0.7572532685294451, |
| "eval_recall": 0.7683530152239075, |
| "eval_runtime": 330.4643, |
| "eval_samples_per_second": 13.094, |
| "eval_steps_per_second": 0.657, |
| "step": 9500 |
| }, |
| { |
| "epoch": 3.8613861386138613, |
| "grad_norm": 1.1118955612182617, |
| "learning_rate": 1e-05, |
| "loss": 0.0929, |
| "step": 9750 |
| }, |
| { |
| "epoch": 3.8613861386138613, |
| "eval_f1": 0.7654929694539482, |
| "eval_loss": 0.1250331550836563, |
| "eval_precision": 0.7551473309426884, |
| "eval_recall": 0.7781002455020274, |
| "eval_runtime": 330.8519, |
| "eval_samples_per_second": 13.078, |
| "eval_steps_per_second": 0.656, |
| "step": 9750 |
| }, |
| { |
| "epoch": 3.9603960396039604, |
| "grad_norm": 0.2617945969104767, |
| "learning_rate": 1e-05, |
| "loss": 0.079, |
| "step": 10000 |
| }, |
| { |
| "epoch": 3.9603960396039604, |
| "eval_f1": 0.7647937419076998, |
| "eval_loss": 0.12892000377178192, |
| "eval_precision": 0.7670959127005181, |
| "eval_recall": 0.7625736332213622, |
| "eval_runtime": 329.8245, |
| "eval_samples_per_second": 13.119, |
| "eval_steps_per_second": 0.658, |
| "step": 10000 |
| }, |
| { |
| "epoch": 4.0594059405940595, |
| "grad_norm": 1.7066285610198975, |
| "learning_rate": 1e-05, |
| "loss": 0.0827, |
| "step": 10250 |
| }, |
| { |
| "epoch": 4.0594059405940595, |
| "eval_f1": 0.758990760980963, |
| "eval_loss": 0.12950246036052704, |
| "eval_precision": 0.741826860901623, |
| "eval_recall": 0.78480512841912, |
| "eval_runtime": 330.1148, |
| "eval_samples_per_second": 13.108, |
| "eval_steps_per_second": 0.657, |
| "step": 10250 |
| }, |
| { |
| "epoch": 4.158415841584159, |
| "grad_norm": 3.2379682064056396, |
| "learning_rate": 1e-05, |
| "loss": 0.0709, |
| "step": 10500 |
| }, |
| { |
| "epoch": 4.158415841584159, |
| "eval_f1": 0.7665793170128494, |
| "eval_loss": 0.13100141286849976, |
| "eval_precision": 0.765117406734896, |
| "eval_recall": 0.7680767385090446, |
| "eval_runtime": 328.8925, |
| "eval_samples_per_second": 13.156, |
| "eval_steps_per_second": 0.66, |
| "step": 10500 |
| }, |
| { |
| "epoch": 4.257425742574258, |
| "grad_norm": 0.4713508188724518, |
| "learning_rate": 1e-05, |
| "loss": 0.067, |
| "step": 10750 |
| }, |
| { |
| "epoch": 4.257425742574258, |
| "eval_f1": 0.7622919716228407, |
| "eval_loss": 0.13499750196933746, |
| "eval_precision": 0.7648353770910121, |
| "eval_recall": 0.7598494648333993, |
| "eval_runtime": 329.8598, |
| "eval_samples_per_second": 13.118, |
| "eval_steps_per_second": 0.658, |
| "step": 10750 |
| }, |
| { |
| "epoch": 4.356435643564357, |
| "grad_norm": 1.8301159143447876, |
| "learning_rate": 1e-05, |
| "loss": 0.0775, |
| "step": 11000 |
| }, |
| { |
| "epoch": 4.356435643564357, |
| "eval_f1": 0.7618188925299318, |
| "eval_loss": 0.1320166289806366, |
| "eval_precision": 0.7623117959801409, |
| "eval_recall": 0.7613299533337505, |
| "eval_runtime": 330.5622, |
| "eval_samples_per_second": 13.09, |
| "eval_steps_per_second": 0.656, |
| "step": 11000 |
| }, |
| { |
| "epoch": 4.455445544554456, |
| "grad_norm": 2.722707509994507, |
| "learning_rate": 1e-05, |
| "loss": 0.064, |
| "step": 11250 |
| }, |
| { |
| "epoch": 4.455445544554456, |
| "eval_f1": 0.7610237264171784, |
| "eval_loss": 0.1370055377483368, |
| "eval_precision": 0.755460351976167, |
| "eval_recall": 0.767172971130909, |
| "eval_runtime": 330.6803, |
| "eval_samples_per_second": 13.085, |
| "eval_steps_per_second": 0.656, |
| "step": 11250 |
| }, |
| { |
| "epoch": 4.554455445544555, |
| "grad_norm": 0.3536905348300934, |
| "learning_rate": 1e-05, |
| "loss": 0.0752, |
| "step": 11500 |
| }, |
| { |
| "epoch": 4.554455445544555, |
| "eval_f1": 0.761344776580898, |
| "eval_loss": 0.13751940429210663, |
| "eval_precision": 0.7645423335256452, |
| "eval_recall": 0.7583053405384352, |
| "eval_runtime": 330.8743, |
| "eval_samples_per_second": 13.077, |
| "eval_steps_per_second": 0.656, |
| "step": 11500 |
| }, |
| { |
| "epoch": 4.653465346534653, |
| "grad_norm": 2.0691871643066406, |
| "learning_rate": 1e-05, |
| "loss": 0.0753, |
| "step": 11750 |
| }, |
| { |
| "epoch": 4.653465346534653, |
| "eval_f1": 0.7634977092513291, |
| "eval_loss": 0.13433216512203217, |
| "eval_precision": 0.7707010020069438, |
| "eval_recall": 0.7570310598315564, |
| "eval_runtime": 330.8577, |
| "eval_samples_per_second": 13.078, |
| "eval_steps_per_second": 0.656, |
| "step": 11750 |
| }, |
| { |
| "epoch": 4.752475247524752, |
| "grad_norm": 1.6911152601242065, |
| "learning_rate": 1e-05, |
| "loss": 0.065, |
| "step": 12000 |
| }, |
| { |
| "epoch": 4.752475247524752, |
| "eval_f1": 0.7704865330126336, |
| "eval_loss": 0.13676360249519348, |
| "eval_precision": 0.7719050041777025, |
| "eval_recall": 0.7690989101935501, |
| "eval_runtime": 329.4666, |
| "eval_samples_per_second": 13.133, |
| "eval_steps_per_second": 0.659, |
| "step": 12000 |
| } |
| ], |
| "logging_steps": 250, |
| "max_steps": 25250, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 250, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.1756719325184e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|