| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 698, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.014344629729245113, | |
| "grad_norm": 1.18152568041847, | |
| "learning_rate": 9.99589838831346e-05, | |
| "loss": 0.4095, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.028689259458490227, | |
| "grad_norm": 0.7801380066445918, | |
| "learning_rate": 9.981728616718234e-05, | |
| "loss": 0.2924, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04303388918773534, | |
| "grad_norm": 0.4005181099055157, | |
| "learning_rate": 9.957468740965011e-05, | |
| "loss": 0.2772, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.05737851891698045, | |
| "grad_norm": 0.37151913590170765, | |
| "learning_rate": 9.923167897546773e-05, | |
| "loss": 0.2624, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07172314864622557, | |
| "grad_norm": 0.3658403711930329, | |
| "learning_rate": 9.878895560156171e-05, | |
| "loss": 0.2597, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.08606777837547068, | |
| "grad_norm": 0.36404458095370545, | |
| "learning_rate": 9.824741398971966e-05, | |
| "loss": 0.257, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1004124081047158, | |
| "grad_norm": 0.34348583857313997, | |
| "learning_rate": 9.760815099039044e-05, | |
| "loss": 0.2586, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1147570378339609, | |
| "grad_norm": 0.3546267311821283, | |
| "learning_rate": 9.687246138109887e-05, | |
| "loss": 0.2581, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.129101667563206, | |
| "grad_norm": 0.3518328253962369, | |
| "learning_rate": 9.604183524397439e-05, | |
| "loss": 0.259, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.14344629729245115, | |
| "grad_norm": 0.37777416169138317, | |
| "learning_rate": 9.511795494770563e-05, | |
| "loss": 0.2561, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.15779092702169625, | |
| "grad_norm": 0.34978239214614343, | |
| "learning_rate": 9.410269174003334e-05, | |
| "loss": 0.2548, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.17213555675094136, | |
| "grad_norm": 0.35146197680272717, | |
| "learning_rate": 9.29981019576834e-05, | |
| "loss": 0.2526, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1864801864801865, | |
| "grad_norm": 0.3322142658858729, | |
| "learning_rate": 9.180642286141678e-05, | |
| "loss": 0.2495, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2008248162094316, | |
| "grad_norm": 0.32127511964369854, | |
| "learning_rate": 9.053006810463156e-05, | |
| "loss": 0.2499, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2151694459386767, | |
| "grad_norm": 0.3306286284306928, | |
| "learning_rate": 8.917162284469569e-05, | |
| "loss": 0.2461, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2295140756679218, | |
| "grad_norm": 0.33792378428094794, | |
| "learning_rate": 8.773383850691155e-05, | |
| "loss": 0.2406, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.24385870539716695, | |
| "grad_norm": 0.3222015509279155, | |
| "learning_rate": 8.621962721171787e-05, | |
| "loss": 0.2365, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.258203335126412, | |
| "grad_norm": 0.3224913690900499, | |
| "learning_rate": 8.463205587641614e-05, | |
| "loss": 0.2302, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2725479648556572, | |
| "grad_norm": 0.3282689638964441, | |
| "learning_rate": 8.297434000336781e-05, | |
| "loss": 0.226, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.2868925945849023, | |
| "grad_norm": 0.314785088454373, | |
| "learning_rate": 8.124983716724434e-05, | |
| "loss": 0.2261, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3012372243141474, | |
| "grad_norm": 0.31867034269672606, | |
| "learning_rate": 7.94620402145205e-05, | |
| "loss": 0.2246, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3155818540433925, | |
| "grad_norm": 0.34102642937121, | |
| "learning_rate": 7.761457018898536e-05, | |
| "loss": 0.219, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3299264837726376, | |
| "grad_norm": 0.34109961975692005, | |
| "learning_rate": 7.571116899759945e-05, | |
| "loss": 0.2172, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.3442711135018827, | |
| "grad_norm": 0.31361120419506927, | |
| "learning_rate": 7.375569183155305e-05, | |
| "loss": 0.2139, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3586157432311278, | |
| "grad_norm": 0.3214909612073443, | |
| "learning_rate": 7.175209935787604e-05, | |
| "loss": 0.2133, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.372960372960373, | |
| "grad_norm": 0.3185074747194894, | |
| "learning_rate": 6.970444969741462e-05, | |
| "loss": 0.2152, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3873050026896181, | |
| "grad_norm": 0.2974383790136214, | |
| "learning_rate": 6.761689020542288e-05, | |
| "loss": 0.2106, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.4016496324188632, | |
| "grad_norm": 0.2971210798316127, | |
| "learning_rate": 6.549364907141713e-05, | |
| "loss": 0.2053, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4159942621481083, | |
| "grad_norm": 0.31411603403554667, | |
| "learning_rate": 6.333902675530656e-05, | |
| "loss": 0.1984, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.4303388918773534, | |
| "grad_norm": 0.3179089356943495, | |
| "learning_rate": 6.115738727714592e-05, | |
| "loss": 0.1976, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4446835216065985, | |
| "grad_norm": 0.31363246830376357, | |
| "learning_rate": 5.895314937815206e-05, | |
| "loss": 0.1922, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.4590281513358436, | |
| "grad_norm": 0.28488457188548555, | |
| "learning_rate": 5.673077757088699e-05, | |
| "loss": 0.1887, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.47337278106508873, | |
| "grad_norm": 0.29503052547081504, | |
| "learning_rate": 5.449477309673462e-05, | |
| "loss": 0.1864, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4877174107943339, | |
| "grad_norm": 0.2991655081063554, | |
| "learning_rate": 5.2249664808986244e-05, | |
| "loss": 0.1823, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.502062040523579, | |
| "grad_norm": 0.2980600376554953, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1819, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.516406670252824, | |
| "grad_norm": 0.2783466488589357, | |
| "learning_rate": 4.775033519101378e-05, | |
| "loss": 0.1774, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5307512999820692, | |
| "grad_norm": 0.28992763991000786, | |
| "learning_rate": 4.5505226903265375e-05, | |
| "loss": 0.1762, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5450959297113144, | |
| "grad_norm": 0.28259667009343253, | |
| "learning_rate": 4.326922242911301e-05, | |
| "loss": 0.1727, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5594405594405595, | |
| "grad_norm": 0.2847313831772219, | |
| "learning_rate": 4.104685062184794e-05, | |
| "loss": 0.168, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5737851891698046, | |
| "grad_norm": 0.28155885295327826, | |
| "learning_rate": 3.884261272285409e-05, | |
| "loss": 0.1654, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5881298188990497, | |
| "grad_norm": 0.2684898894630876, | |
| "learning_rate": 3.666097324469344e-05, | |
| "loss": 0.1613, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6024744486282948, | |
| "grad_norm": 0.27221979514870387, | |
| "learning_rate": 3.4506350928582876e-05, | |
| "loss": 0.1567, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6168190783575399, | |
| "grad_norm": 0.2710486003553914, | |
| "learning_rate": 3.238310979457713e-05, | |
| "loss": 0.1544, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.631163708086785, | |
| "grad_norm": 0.2697397554786478, | |
| "learning_rate": 3.0295550302585402e-05, | |
| "loss": 0.1541, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6455083378160301, | |
| "grad_norm": 0.2668796661785585, | |
| "learning_rate": 2.824790064212396e-05, | |
| "loss": 0.1514, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6598529675452752, | |
| "grad_norm": 0.2647838900048377, | |
| "learning_rate": 2.6244308168446956e-05, | |
| "loss": 0.1492, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6741975972745203, | |
| "grad_norm": 0.2562570406700666, | |
| "learning_rate": 2.4288831002400574e-05, | |
| "loss": 0.1444, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.6885422270037654, | |
| "grad_norm": 0.26315566627182685, | |
| "learning_rate": 2.2385429811014654e-05, | |
| "loss": 0.1431, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.7028868567330105, | |
| "grad_norm": 0.2637558074085645, | |
| "learning_rate": 2.0537959785479517e-05, | |
| "loss": 0.1405, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.7172314864622557, | |
| "grad_norm": 0.26909423269028165, | |
| "learning_rate": 1.875016283275567e-05, | |
| "loss": 0.1383, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7315761161915008, | |
| "grad_norm": 0.2670316420743111, | |
| "learning_rate": 1.70256599966322e-05, | |
| "loss": 0.1355, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.745920745920746, | |
| "grad_norm": 0.26381577703337905, | |
| "learning_rate": 1.5367944123583882e-05, | |
| "loss": 0.1349, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.7602653756499911, | |
| "grad_norm": 0.2573409032083678, | |
| "learning_rate": 1.378037278828212e-05, | |
| "loss": 0.1321, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.7746100053792362, | |
| "grad_norm": 0.2703336954231402, | |
| "learning_rate": 1.2266161493088463e-05, | |
| "loss": 0.1317, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7889546351084813, | |
| "grad_norm": 0.25704442767816826, | |
| "learning_rate": 1.0828377155304331e-05, | |
| "loss": 0.1287, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8032992648377264, | |
| "grad_norm": 0.2540832845252926, | |
| "learning_rate": 9.469931895368461e-06, | |
| "loss": 0.1265, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.8176438945669715, | |
| "grad_norm": 0.25355857325017345, | |
| "learning_rate": 8.193577138583241e-06, | |
| "loss": 0.1241, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.8319885242962166, | |
| "grad_norm": 0.256496187113757, | |
| "learning_rate": 7.0018980423166016e-06, | |
| "loss": 0.1241, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.8463331540254617, | |
| "grad_norm": 0.25854825665201925, | |
| "learning_rate": 5.897308259966672e-06, | |
| "loss": 0.1247, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.8606777837547068, | |
| "grad_norm": 0.2538181796857953, | |
| "learning_rate": 4.882045052294371e-06, | |
| "loss": 0.1227, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8750224134839519, | |
| "grad_norm": 0.24626447646186575, | |
| "learning_rate": 3.958164756025618e-06, | |
| "loss": 0.1208, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.889367043213197, | |
| "grad_norm": 0.2515303871680775, | |
| "learning_rate": 3.1275386189011433e-06, | |
| "loss": 0.121, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.9037116729424421, | |
| "grad_norm": 0.24642826227075423, | |
| "learning_rate": 2.391849009609559e-06, | |
| "loss": 0.1214, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.9180563026716873, | |
| "grad_norm": 0.2519477684757865, | |
| "learning_rate": 1.7525860102803438e-06, | |
| "loss": 0.1214, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.9324009324009324, | |
| "grad_norm": 0.24918787252594898, | |
| "learning_rate": 1.2110443984382936e-06, | |
| "loss": 0.1244, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9467455621301775, | |
| "grad_norm": 0.2513072821154767, | |
| "learning_rate": 7.68321024532287e-07, | |
| "loss": 0.1212, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.9610901918594227, | |
| "grad_norm": 0.25470600465243254, | |
| "learning_rate": 4.2531259034989667e-07, | |
| "loss": 0.1224, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.9754348215886678, | |
| "grad_norm": 0.24818225431416227, | |
| "learning_rate": 1.827138328176603e-07, | |
| "loss": 0.1226, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.9897794513179129, | |
| "grad_norm": 0.24705387852393956, | |
| "learning_rate": 4.1016116865394417e-08, | |
| "loss": 0.1215, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 698, | |
| "total_flos": 473362986434560.0, | |
| "train_loss": 0.18736316028843636, | |
| "train_runtime": 57518.7952, | |
| "train_samples_per_second": 0.776, | |
| "train_steps_per_second": 0.012 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 698, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 473362986434560.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |