Instructions to use kiolPL/naturavision with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use kiolPL/naturavision with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3.5-4B") model = PeftModel.from_pretrained(base_model, "kiolPL/naturavision") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": 1359, | |
| "best_metric": 0.05789753, | |
| "best_model_checkpoint": "/home/kiol/runs-v2-full-clean-r1/qwen35-qlora-forest/v0-20260424-170332/checkpoint-1359", | |
| "epoch": 3.0, | |
| "eval_steps": 200, | |
| "global_step": 1359, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002210708117443869, | |
| "grad_norm": 2.3902769088745117, | |
| "learning_rate": 2.173913043478261e-06, | |
| "loss": 0.5411382913589478, | |
| "step": 1, | |
| "token_acc": 0.8289156626506025 | |
| }, | |
| { | |
| "epoch": 0.011053540587219343, | |
| "grad_norm": 2.6440868377685547, | |
| "learning_rate": 1.0869565217391305e-05, | |
| "loss": 0.5509852766990662, | |
| "step": 5, | |
| "token_acc": 0.8427947598253275 | |
| }, | |
| { | |
| "epoch": 0.022107081174438686, | |
| "grad_norm": 2.8517158031463623, | |
| "learning_rate": 2.173913043478261e-05, | |
| "loss": 0.5423528671264648, | |
| "step": 10, | |
| "token_acc": 0.8413485374318295 | |
| }, | |
| { | |
| "epoch": 0.03316062176165803, | |
| "grad_norm": 3.078794479370117, | |
| "learning_rate": 3.260869565217392e-05, | |
| "loss": 0.4979440689086914, | |
| "step": 15, | |
| "token_acc": 0.8463063956370848 | |
| }, | |
| { | |
| "epoch": 0.04421416234887737, | |
| "grad_norm": 3.6568212509155273, | |
| "learning_rate": 4.347826086956522e-05, | |
| "loss": 0.4140318393707275, | |
| "step": 20, | |
| "token_acc": 0.8631159780985564 | |
| }, | |
| { | |
| "epoch": 0.055267702936096716, | |
| "grad_norm": 2.4268248081207275, | |
| "learning_rate": 4.9997331144187255e-05, | |
| "loss": 0.292206335067749, | |
| "step": 25, | |
| "token_acc": 0.9111776447105788 | |
| }, | |
| { | |
| "epoch": 0.06632124352331606, | |
| "grad_norm": 3.583557605743408, | |
| "learning_rate": 4.996731305997416e-05, | |
| "loss": 0.2570572137832642, | |
| "step": 30, | |
| "token_acc": 0.9214214214214215 | |
| }, | |
| { | |
| "epoch": 0.0773747841105354, | |
| "grad_norm": 3.932553768157959, | |
| "learning_rate": 4.990398100856367e-05, | |
| "loss": 0.20562427043914794, | |
| "step": 35, | |
| "token_acc": 0.9317507418397626 | |
| }, | |
| { | |
| "epoch": 0.08842832469775475, | |
| "grad_norm": 2.240527391433716, | |
| "learning_rate": 4.980741949411839e-05, | |
| "loss": 0.21206333637237548, | |
| "step": 40, | |
| "token_acc": 0.932 | |
| }, | |
| { | |
| "epoch": 0.09948186528497409, | |
| "grad_norm": 2.624908447265625, | |
| "learning_rate": 4.967775735898179e-05, | |
| "loss": 0.21093401908874512, | |
| "step": 45, | |
| "token_acc": 0.9330024813895782 | |
| }, | |
| { | |
| "epoch": 0.11053540587219343, | |
| "grad_norm": 1.4830845594406128, | |
| "learning_rate": 4.9515167611763434e-05, | |
| "loss": 0.1953817367553711, | |
| "step": 50, | |
| "token_acc": 0.9338308457711443 | |
| }, | |
| { | |
| "epoch": 0.12158894645941278, | |
| "grad_norm": 2.913989305496216, | |
| "learning_rate": 4.931986719649299e-05, | |
| "loss": 0.16938740015029907, | |
| "step": 55, | |
| "token_acc": 0.9406234537357744 | |
| }, | |
| { | |
| "epoch": 0.13264248704663212, | |
| "grad_norm": 2.571464776992798, | |
| "learning_rate": 4.909211670315114e-05, | |
| "loss": 0.18647342920303345, | |
| "step": 60, | |
| "token_acc": 0.9361914257228315 | |
| }, | |
| { | |
| "epoch": 0.14369602763385148, | |
| "grad_norm": 2.3652737140655518, | |
| "learning_rate": 4.8832220019963514e-05, | |
| "loss": 0.16005475521087648, | |
| "step": 65, | |
| "token_acc": 0.9426719840478565 | |
| }, | |
| { | |
| "epoch": 0.1547495682210708, | |
| "grad_norm": 2.1498990058898926, | |
| "learning_rate": 4.8540523927921616e-05, | |
| "loss": 0.17245489358901978, | |
| "step": 70, | |
| "token_acc": 0.9344672336168084 | |
| }, | |
| { | |
| "epoch": 0.16580310880829016, | |
| "grad_norm": 2.2657954692840576, | |
| "learning_rate": 4.821741763807186e-05, | |
| "loss": 0.1709640145301819, | |
| "step": 75, | |
| "token_acc": 0.9368473396320238 | |
| }, | |
| { | |
| "epoch": 0.1768566493955095, | |
| "grad_norm": 2.3392601013183594, | |
| "learning_rate": 4.786333227218995e-05, | |
| "loss": 0.16215839385986328, | |
| "step": 80, | |
| "token_acc": 0.9388667992047713 | |
| }, | |
| { | |
| "epoch": 0.18791018998272885, | |
| "grad_norm": 3.259490489959717, | |
| "learning_rate": 4.747874028753375e-05, | |
| "loss": 0.16813175678253173, | |
| "step": 85, | |
| "token_acc": 0.9411177644710579 | |
| }, | |
| { | |
| "epoch": 0.19896373056994818, | |
| "grad_norm": 2.805907726287842, | |
| "learning_rate": 4.706415484644195e-05, | |
| "loss": 0.16006848812103272, | |
| "step": 90, | |
| "token_acc": 0.944666001994018 | |
| }, | |
| { | |
| "epoch": 0.21001727115716753, | |
| "grad_norm": 2.46066951751709, | |
| "learning_rate": 4.662012913161997e-05, | |
| "loss": 0.15026205778121948, | |
| "step": 95, | |
| "token_acc": 0.9417910447761194 | |
| }, | |
| { | |
| "epoch": 0.22107081174438686, | |
| "grad_norm": 2.5938236713409424, | |
| "learning_rate": 4.6147255608026394e-05, | |
| "loss": 0.15484832525253295, | |
| "step": 100, | |
| "token_acc": 0.9400099157164105 | |
| }, | |
| { | |
| "epoch": 0.23212435233160622, | |
| "grad_norm": 2.2375521659851074, | |
| "learning_rate": 4.564616523234511e-05, | |
| "loss": 0.16213221549987794, | |
| "step": 105, | |
| "token_acc": 0.9409704852426213 | |
| }, | |
| { | |
| "epoch": 0.24317789291882555, | |
| "grad_norm": 2.779315233230591, | |
| "learning_rate": 4.511752661109768e-05, | |
| "loss": 0.15692013502120972, | |
| "step": 110, | |
| "token_acc": 0.9458519622454049 | |
| }, | |
| { | |
| "epoch": 0.2542314335060449, | |
| "grad_norm": 2.144108295440674, | |
| "learning_rate": 4.4562045108519565e-05, | |
| "loss": 0.1405424118041992, | |
| "step": 115, | |
| "token_acc": 0.9512437810945273 | |
| }, | |
| { | |
| "epoch": 0.26528497409326424, | |
| "grad_norm": 2.1729953289031982, | |
| "learning_rate": 4.398046190539025e-05, | |
| "loss": 0.13323441743850709, | |
| "step": 120, | |
| "token_acc": 0.9441953163926258 | |
| }, | |
| { | |
| "epoch": 0.2763385146804836, | |
| "grad_norm": 2.5072147846221924, | |
| "learning_rate": 4.3373553010073355e-05, | |
| "loss": 0.1478448271751404, | |
| "step": 125, | |
| "token_acc": 0.9433681073025335 | |
| }, | |
| { | |
| "epoch": 0.28739205526770295, | |
| "grad_norm": 3.7264955043792725, | |
| "learning_rate": 4.274212822308612e-05, | |
| "loss": 0.13619284629821776, | |
| "step": 130, | |
| "token_acc": 0.9491778774289985 | |
| }, | |
| { | |
| "epoch": 0.29844559585492225, | |
| "grad_norm": 3.988476276397705, | |
| "learning_rate": 4.208703005657999e-05, | |
| "loss": 0.1341521382331848, | |
| "step": 135, | |
| "token_acc": 0.9512922465208747 | |
| }, | |
| { | |
| "epoch": 0.3094991364421416, | |
| "grad_norm": 3.7429981231689453, | |
| "learning_rate": 4.140913261017382e-05, | |
| "loss": 0.13466038703918456, | |
| "step": 140, | |
| "token_acc": 0.9512074913750616 | |
| }, | |
| { | |
| "epoch": 0.32055267702936097, | |
| "grad_norm": 2.5042636394500732, | |
| "learning_rate": 4.070934040463998e-05, | |
| "loss": 0.13430129289627074, | |
| "step": 145, | |
| "token_acc": 0.9491525423728814 | |
| }, | |
| { | |
| "epoch": 0.3316062176165803, | |
| "grad_norm": 3.5047497749328613, | |
| "learning_rate": 3.998858717499931e-05, | |
| "loss": 0.1267208695411682, | |
| "step": 150, | |
| "token_acc": 0.9575636545182227 | |
| }, | |
| { | |
| "epoch": 0.3426597582037997, | |
| "grad_norm": 3.953479528427124, | |
| "learning_rate": 3.924783462463541e-05, | |
| "loss": 0.11603262424468994, | |
| "step": 155, | |
| "token_acc": 0.9582297364495276 | |
| }, | |
| { | |
| "epoch": 0.353713298791019, | |
| "grad_norm": 4.1243791580200195, | |
| "learning_rate": 3.848807114209074e-05, | |
| "loss": 0.1214677095413208, | |
| "step": 160, | |
| "token_acc": 0.9600591715976331 | |
| }, | |
| { | |
| "epoch": 0.36476683937823834, | |
| "grad_norm": 4.218201160430908, | |
| "learning_rate": 3.7710310482256526e-05, | |
| "loss": 0.12736471891403198, | |
| "step": 165, | |
| "token_acc": 0.9536390827517448 | |
| }, | |
| { | |
| "epoch": 0.3758203799654577, | |
| "grad_norm": 5.623844623565674, | |
| "learning_rate": 3.691559041371631e-05, | |
| "loss": 0.11791330575942993, | |
| "step": 170, | |
| "token_acc": 0.9556109725685785 | |
| }, | |
| { | |
| "epoch": 0.38687392055267705, | |
| "grad_norm": 2.831035852432251, | |
| "learning_rate": 3.6104971334047956e-05, | |
| "loss": 0.10760444402694702, | |
| "step": 175, | |
| "token_acc": 0.9595461272816971 | |
| }, | |
| { | |
| "epoch": 0.39792746113989635, | |
| "grad_norm": 3.888122797012329, | |
| "learning_rate": 3.527953485493168e-05, | |
| "loss": 0.11927105188369751, | |
| "step": 180, | |
| "token_acc": 0.956737941322725 | |
| }, | |
| { | |
| "epoch": 0.4089810017271157, | |
| "grad_norm": 3.259575128555298, | |
| "learning_rate": 3.444038235895212e-05, | |
| "loss": 0.12609773874282837, | |
| "step": 185, | |
| "token_acc": 0.955 | |
| }, | |
| { | |
| "epoch": 0.42003454231433507, | |
| "grad_norm": 4.910781383514404, | |
| "learning_rate": 3.358863353001987e-05, | |
| "loss": 0.12669839859008789, | |
| "step": 190, | |
| "token_acc": 0.9573412698412699 | |
| }, | |
| { | |
| "epoch": 0.4310880829015544, | |
| "grad_norm": 2.9438295364379883, | |
| "learning_rate": 3.272542485937369e-05, | |
| "loss": 0.11617603302001953, | |
| "step": 195, | |
| "token_acc": 0.958958958958959 | |
| }, | |
| { | |
| "epoch": 0.4421416234887737, | |
| "grad_norm": 5.1633195877075195, | |
| "learning_rate": 3.185190812915646e-05, | |
| "loss": 0.10707591772079468, | |
| "step": 200, | |
| "token_acc": 0.959479739869935 | |
| }, | |
| { | |
| "epoch": 0.4421416234887737, | |
| "eval_loss": 0.11586810648441315, | |
| "eval_runtime": 1992.0002, | |
| "eval_samples_per_second": 1.178, | |
| "eval_steps_per_second": 1.178, | |
| "eval_token_acc": 0.9600378980137381, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4531951640759931, | |
| "grad_norm": 4.371393203735352, | |
| "learning_rate": 3.096924887558855e-05, | |
| "loss": 0.11527643203735352, | |
| "step": 205, | |
| "token_acc": 0.9623202776400595 | |
| }, | |
| { | |
| "epoch": 0.46424870466321244, | |
| "grad_norm": 5.050312519073486, | |
| "learning_rate": 3.007862483378906e-05, | |
| "loss": 0.12530215978622436, | |
| "step": 210, | |
| "token_acc": 0.9602583209140586 | |
| }, | |
| { | |
| "epoch": 0.4753022452504318, | |
| "grad_norm": 2.541715145111084, | |
| "learning_rate": 2.9181224366319947e-05, | |
| "loss": 0.10488080978393555, | |
| "step": 215, | |
| "token_acc": 0.9631474103585658 | |
| }, | |
| { | |
| "epoch": 0.4863557858376511, | |
| "grad_norm": 5.020773410797119, | |
| "learning_rate": 2.827824487755007e-05, | |
| "loss": 0.13031703233718872, | |
| "step": 220, | |
| "token_acc": 0.9523809523809523 | |
| }, | |
| { | |
| "epoch": 0.49740932642487046, | |
| "grad_norm": 5.226169586181641, | |
| "learning_rate": 2.7370891215954568e-05, | |
| "loss": 0.13065972328186035, | |
| "step": 225, | |
| "token_acc": 0.9522150323544052 | |
| }, | |
| { | |
| "epoch": 0.5084628670120898, | |
| "grad_norm": 3.837796211242676, | |
| "learning_rate": 2.646037406648165e-05, | |
| "loss": 0.11477760076522828, | |
| "step": 230, | |
| "token_acc": 0.9575848303393214 | |
| }, | |
| { | |
| "epoch": 0.5195164075993092, | |
| "grad_norm": 3.0600061416625977, | |
| "learning_rate": 2.5547908335131704e-05, | |
| "loss": 0.1025763750076294, | |
| "step": 235, | |
| "token_acc": 0.9629446640316206 | |
| }, | |
| { | |
| "epoch": 0.5305699481865285, | |
| "grad_norm": 4.423151016235352, | |
| "learning_rate": 2.4634711527904272e-05, | |
| "loss": 0.11243470907211303, | |
| "step": 240, | |
| "token_acc": 0.9575212393803099 | |
| }, | |
| { | |
| "epoch": 0.5416234887737479, | |
| "grad_norm": 3.2377634048461914, | |
| "learning_rate": 2.3722002126275824e-05, | |
| "loss": 0.11171900033950806, | |
| "step": 245, | |
| "token_acc": 0.9635182408795602 | |
| }, | |
| { | |
| "epoch": 0.5526770293609672, | |
| "grad_norm": 3.354611396789551, | |
| "learning_rate": 2.281099796137594e-05, | |
| "loss": 0.10156643390655518, | |
| "step": 250, | |
| "token_acc": 0.965965965965966 | |
| }, | |
| { | |
| "epoch": 0.5637305699481865, | |
| "grad_norm": 4.366595268249512, | |
| "learning_rate": 2.19029145890313e-05, | |
| "loss": 0.09984329342842102, | |
| "step": 255, | |
| "token_acc": 0.9646061814556331 | |
| }, | |
| { | |
| "epoch": 0.5747841105354059, | |
| "grad_norm": 4.832893371582031, | |
| "learning_rate": 2.0998963667845535e-05, | |
| "loss": 0.08920307159423828, | |
| "step": 260, | |
| "token_acc": 0.9660847880299251 | |
| }, | |
| { | |
| "epoch": 0.5858376511226252, | |
| "grad_norm": 10.402474403381348, | |
| "learning_rate": 2.0100351342479216e-05, | |
| "loss": 0.10913920402526855, | |
| "step": 265, | |
| "token_acc": 0.9623389494549058 | |
| }, | |
| { | |
| "epoch": 0.5968911917098445, | |
| "grad_norm": 5.610108852386475, | |
| "learning_rate": 1.9208276634287143e-05, | |
| "loss": 0.11966934204101562, | |
| "step": 270, | |
| "token_acc": 0.9542060726729716 | |
| }, | |
| { | |
| "epoch": 0.6079447322970639, | |
| "grad_norm": 5.160745143890381, | |
| "learning_rate": 1.832392984146018e-05, | |
| "loss": 0.12343072891235352, | |
| "step": 275, | |
| "token_acc": 0.9561752988047809 | |
| }, | |
| { | |
| "epoch": 0.6189982728842832, | |
| "grad_norm": 4.599337100982666, | |
| "learning_rate": 1.7448490950806552e-05, | |
| "loss": 0.09566409587860107, | |
| "step": 280, | |
| "token_acc": 0.9677579365079365 | |
| }, | |
| { | |
| "epoch": 0.6300518134715026, | |
| "grad_norm": 4.375885486602783, | |
| "learning_rate": 1.6583128063291576e-05, | |
| "loss": 0.12313523292541503, | |
| "step": 285, | |
| "token_acc": 0.9544328875681031 | |
| }, | |
| { | |
| "epoch": 0.6411053540587219, | |
| "grad_norm": 6.371300220489502, | |
| "learning_rate": 1.572899583543671e-05, | |
| "loss": 0.10101137161254883, | |
| "step": 290, | |
| "token_acc": 0.96250616674889 | |
| }, | |
| { | |
| "epoch": 0.6521588946459412, | |
| "grad_norm": 4.738733291625977, | |
| "learning_rate": 1.488723393865766e-05, | |
| "loss": 0.10386931896209717, | |
| "step": 295, | |
| "token_acc": 0.963681592039801 | |
| }, | |
| { | |
| "epoch": 0.6632124352331606, | |
| "grad_norm": 6.841185569763184, | |
| "learning_rate": 1.4058965538597033e-05, | |
| "loss": 0.1303364634513855, | |
| "step": 300, | |
| "token_acc": 0.9556772908366534 | |
| }, | |
| { | |
| "epoch": 0.67426597582038, | |
| "grad_norm": 14.118032455444336, | |
| "learning_rate": 1.3245295796480789e-05, | |
| "loss": 0.12113407850265503, | |
| "step": 305, | |
| "token_acc": 0.9616342800199302 | |
| }, | |
| { | |
| "epoch": 0.6853195164075994, | |
| "grad_norm": 3.676441192626953, | |
| "learning_rate": 1.2447310394498019e-05, | |
| "loss": 0.09884743690490723, | |
| "step": 310, | |
| "token_acc": 0.9665518937530743 | |
| }, | |
| { | |
| "epoch": 0.6963730569948187, | |
| "grad_norm": 5.252943992614746, | |
| "learning_rate": 1.1666074087171627e-05, | |
| "loss": 0.1010090470314026, | |
| "step": 315, | |
| "token_acc": 0.9651741293532339 | |
| }, | |
| { | |
| "epoch": 0.707426597582038, | |
| "grad_norm": 4.592480182647705, | |
| "learning_rate": 1.0902629280652931e-05, | |
| "loss": 0.10196793079376221, | |
| "step": 320, | |
| "token_acc": 0.9611166500498505 | |
| }, | |
| { | |
| "epoch": 0.7184801381692574, | |
| "grad_norm": 6.885665416717529, | |
| "learning_rate": 1.0157994641835736e-05, | |
| "loss": 0.11955760717391968, | |
| "step": 325, | |
| "token_acc": 0.95773247140726 | |
| }, | |
| { | |
| "epoch": 0.7295336787564767, | |
| "grad_norm": 5.868223667144775, | |
| "learning_rate": 9.433163739145773e-06, | |
| "loss": 0.09359505772590637, | |
| "step": 330, | |
| "token_acc": 0.9641434262948207 | |
| }, | |
| { | |
| "epoch": 0.740587219343696, | |
| "grad_norm": 6.838636875152588, | |
| "learning_rate": 8.729103716819112e-06, | |
| "loss": 0.09510601162910462, | |
| "step": 335, | |
| "token_acc": 0.9697870232788509 | |
| }, | |
| { | |
| "epoch": 0.7516407599309154, | |
| "grad_norm": 5.597217559814453, | |
| "learning_rate": 8.046754004438429e-06, | |
| "loss": 0.0947553813457489, | |
| "step": 340, | |
| "token_acc": 0.9647992067426872 | |
| }, | |
| { | |
| "epoch": 0.7626943005181347, | |
| "grad_norm": 4.449385643005371, | |
| "learning_rate": 7.387025063449082e-06, | |
| "loss": 0.11492215394973755, | |
| "step": 345, | |
| "token_acc": 0.9548834903321765 | |
| }, | |
| { | |
| "epoch": 0.7737478411053541, | |
| "grad_norm": 5.535495758056641, | |
| "learning_rate": 6.750797172327442e-06, | |
| "loss": 0.10709909200668336, | |
| "step": 350, | |
| "token_acc": 0.9646590343454455 | |
| }, | |
| { | |
| "epoch": 0.7848013816925734, | |
| "grad_norm": 3.9693377017974854, | |
| "learning_rate": 6.138919252022435e-06, | |
| "loss": 0.09377566576004029, | |
| "step": 355, | |
| "token_acc": 0.963220675944334 | |
| }, | |
| { | |
| "epoch": 0.7958549222797927, | |
| "grad_norm": 4.245842456817627, | |
| "learning_rate": 5.5522077332375436e-06, | |
| "loss": 0.11761529445648193, | |
| "step": 360, | |
| "token_acc": 0.9603371343579573 | |
| }, | |
| { | |
| "epoch": 0.8069084628670121, | |
| "grad_norm": 5.86805534362793, | |
| "learning_rate": 4.99144546706469e-06, | |
| "loss": 0.11018631458282471, | |
| "step": 365, | |
| "token_acc": 0.9619000494804553 | |
| }, | |
| { | |
| "epoch": 0.8179620034542314, | |
| "grad_norm": 3.946427583694458, | |
| "learning_rate": 4.457380680423434e-06, | |
| "loss": 0.09712615013122558, | |
| "step": 370, | |
| "token_acc": 0.9691695673794132 | |
| }, | |
| { | |
| "epoch": 0.8290155440414507, | |
| "grad_norm": 3.8146259784698486, | |
| "learning_rate": 3.950725977699396e-06, | |
| "loss": 0.10309855937957764, | |
| "step": 375, | |
| "token_acc": 0.9674837418709354 | |
| }, | |
| { | |
| "epoch": 0.8400690846286701, | |
| "grad_norm": 6.905686855316162, | |
| "learning_rate": 3.4721573899138743e-06, | |
| "loss": 0.12204140424728394, | |
| "step": 380, | |
| "token_acc": 0.9587064676616915 | |
| }, | |
| { | |
| "epoch": 0.8511226252158894, | |
| "grad_norm": 5.100115776062012, | |
| "learning_rate": 3.0223134726934472e-06, | |
| "loss": 0.10104206800460816, | |
| "step": 385, | |
| "token_acc": 0.9645885286783042 | |
| }, | |
| { | |
| "epoch": 0.8621761658031089, | |
| "grad_norm": 6.091329574584961, | |
| "learning_rate": 2.6017944542431393e-06, | |
| "loss": 0.09742544889450074, | |
| "step": 390, | |
| "token_acc": 0.9626307922272048 | |
| }, | |
| { | |
| "epoch": 0.8732297063903282, | |
| "grad_norm": 5.460122585296631, | |
| "learning_rate": 2.2111614344599683e-06, | |
| "loss": 0.10580202341079711, | |
| "step": 395, | |
| "token_acc": 0.9581673306772909 | |
| }, | |
| { | |
| "epoch": 0.8842832469775475, | |
| "grad_norm": 3.7578561305999756, | |
| "learning_rate": 1.8509356362554963e-06, | |
| "loss": 0.09627346396446228, | |
| "step": 400, | |
| "token_acc": 0.9677898909811695 | |
| }, | |
| { | |
| "epoch": 0.8842832469775475, | |
| "eval_loss": 0.10459936410188675, | |
| "eval_runtime": 2136.0319, | |
| "eval_samples_per_second": 1.099, | |
| "eval_steps_per_second": 1.099, | |
| "eval_token_acc": 0.9643014245592664, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8953367875647669, | |
| "grad_norm": 4.357903480529785, | |
| "learning_rate": 1.5215977100864392e-06, | |
| "loss": 0.11347759962081909, | |
| "step": 405, | |
| "token_acc": 0.9596814335490294 | |
| }, | |
| { | |
| "epoch": 0.9063903281519862, | |
| "grad_norm": 2.9337828159332275, | |
| "learning_rate": 1.2235870926211619e-06, | |
| "loss": 0.11455904245376587, | |
| "step": 410, | |
| "token_acc": 0.9613095238095238 | |
| }, | |
| { | |
| "epoch": 0.9174438687392056, | |
| "grad_norm": 2.6067817211151123, | |
| "learning_rate": 9.573014203979242e-07, | |
| "loss": 0.0897371768951416, | |
| "step": 415, | |
| "token_acc": 0.9702970297029703 | |
| }, | |
| { | |
| "epoch": 0.9284974093264249, | |
| "grad_norm": 4.809932708740234, | |
| "learning_rate": 7.230959992571368e-07, | |
| "loss": 0.10621033906936646, | |
| "step": 420, | |
| "token_acc": 0.9666001994017946 | |
| }, | |
| { | |
| "epoch": 0.9395509499136442, | |
| "grad_norm": 6.890130996704102, | |
| "learning_rate": 5.212833302556258e-07, | |
| "loss": 0.10602649450302123, | |
| "step": 425, | |
| "token_acc": 0.9638076351016361 | |
| }, | |
| { | |
| "epoch": 0.9506044905008636, | |
| "grad_norm": 4.578585147857666, | |
| "learning_rate": 3.521326926954532e-07, | |
| "loss": 0.08783534765243531, | |
| "step": 430, | |
| "token_acc": 0.9670822942643391 | |
| }, | |
| { | |
| "epoch": 0.9616580310880829, | |
| "grad_norm": 4.462143898010254, | |
| "learning_rate": 2.158697848236607e-07, | |
| "loss": 0.08878173232078553, | |
| "step": 435, | |
| "token_acc": 0.9683950617283951 | |
| }, | |
| { | |
| "epoch": 0.9727115716753022, | |
| "grad_norm": 3.5091371536254883, | |
| "learning_rate": 1.1267642268238121e-07, | |
| "loss": 0.11377729177474975, | |
| "step": 440, | |
| "token_acc": 0.9592647789369101 | |
| }, | |
| { | |
| "epoch": 0.9837651122625216, | |
| "grad_norm": 5.187692165374756, | |
| "learning_rate": 4.26902975110749e-08, | |
| "loss": 0.08824495673179626, | |
| "step": 445, | |
| "token_acc": 0.9681750372948782 | |
| }, | |
| { | |
| "epoch": 0.9948186528497409, | |
| "grad_norm": 4.813872337341309, | |
| "learning_rate": 6.004792024680295e-09, | |
| "loss": 0.12380951642990112, | |
| "step": 450, | |
| "token_acc": 0.9557213930348258 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.10443862527608871, | |
| "eval_runtime": 2140.8222, | |
| "eval_samples_per_second": 1.096, | |
| "eval_steps_per_second": 1.096, | |
| "eval_token_acc": 0.9642337495347342, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.0044214162348877, | |
| "grad_norm": 2.683554172515869, | |
| "learning_rate": 2.9411764705882355e-06, | |
| "loss": 0.08228109031915665, | |
| "step": 455, | |
| "token_acc": 0.9726708074534162 | |
| }, | |
| { | |
| "epoch": 1.015474956822107, | |
| "grad_norm": 3.1228041648864746, | |
| "learning_rate": 1.0294117647058824e-05, | |
| "loss": 0.08381627202033996, | |
| "step": 460, | |
| "token_acc": 0.9706905116741182 | |
| }, | |
| { | |
| "epoch": 1.0265284974093265, | |
| "grad_norm": 4.306940078735352, | |
| "learning_rate": 1.7647058823529414e-05, | |
| "loss": 0.09802506566047668, | |
| "step": 465, | |
| "token_acc": 0.9675810473815462 | |
| }, | |
| { | |
| "epoch": 1.0375820379965457, | |
| "grad_norm": 3.2758865356445312, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.08836065530776978, | |
| "step": 470, | |
| "token_acc": 0.9721531576330183 | |
| }, | |
| { | |
| "epoch": 1.048635578583765, | |
| "grad_norm": 5.335510730743408, | |
| "learning_rate": 3.235294117647059e-05, | |
| "loss": 0.09924425482749939, | |
| "step": 475, | |
| "token_acc": 0.963220675944334 | |
| }, | |
| { | |
| "epoch": 1.0596891191709845, | |
| "grad_norm": 6.677077770233154, | |
| "learning_rate": 3.970588235294117e-05, | |
| "loss": 0.09869781732559205, | |
| "step": 480, | |
| "token_acc": 0.9626307922272048 | |
| }, | |
| { | |
| "epoch": 1.0707426597582037, | |
| "grad_norm": 7.235245227813721, | |
| "learning_rate": 4.705882352941177e-05, | |
| "loss": 0.09256232976913452, | |
| "step": 485, | |
| "token_acc": 0.967869500741473 | |
| }, | |
| { | |
| "epoch": 1.0817962003454231, | |
| "grad_norm": 2.617497682571411, | |
| "learning_rate": 5.441176470588235e-05, | |
| "loss": 0.11165302991867065, | |
| "step": 490, | |
| "token_acc": 0.9609804902451226 | |
| }, | |
| { | |
| "epoch": 1.0928497409326425, | |
| "grad_norm": 4.969885349273682, | |
| "learning_rate": 6.176470588235295e-05, | |
| "loss": 0.10181330442428589, | |
| "step": 495, | |
| "token_acc": 0.9626865671641791 | |
| }, | |
| { | |
| "epoch": 1.103903281519862, | |
| "grad_norm": 3.402235746383667, | |
| "learning_rate": 6.911764705882354e-05, | |
| "loss": 0.09162335991859435, | |
| "step": 500, | |
| "token_acc": 0.9699062654168722 | |
| }, | |
| { | |
| "epoch": 1.1149568221070811, | |
| "grad_norm": 3.815495729446411, | |
| "learning_rate": 7.647058823529411e-05, | |
| "loss": 0.11382390260696411, | |
| "step": 505, | |
| "token_acc": 0.954228855721393 | |
| }, | |
| { | |
| "epoch": 1.1260103626943005, | |
| "grad_norm": 4.899413108825684, | |
| "learning_rate": 8.382352941176471e-05, | |
| "loss": 0.1054335355758667, | |
| "step": 510, | |
| "token_acc": 0.9592850049652433 | |
| }, | |
| { | |
| "epoch": 1.1370639032815197, | |
| "grad_norm": 4.382949352264404, | |
| "learning_rate": 9.11764705882353e-05, | |
| "loss": 0.0767556071281433, | |
| "step": 515, | |
| "token_acc": 0.9732540861812778 | |
| }, | |
| { | |
| "epoch": 1.1481174438687392, | |
| "grad_norm": 4.010786533355713, | |
| "learning_rate": 9.852941176470589e-05, | |
| "loss": 0.0905315101146698, | |
| "step": 520, | |
| "token_acc": 0.9681274900398407 | |
| }, | |
| { | |
| "epoch": 1.1591709844559586, | |
| "grad_norm": 8.248323440551758, | |
| "learning_rate": 9.99976313340166e-05, | |
| "loss": 0.14047706127166748, | |
| "step": 525, | |
| "token_acc": 0.9527127924340468 | |
| }, | |
| { | |
| "epoch": 1.170224525043178, | |
| "grad_norm": 6.752266883850098, | |
| "learning_rate": 9.998800901308916e-05, | |
| "loss": 0.1028751015663147, | |
| "step": 530, | |
| "token_acc": 0.9620947630922694 | |
| }, | |
| { | |
| "epoch": 1.1812780656303972, | |
| "grad_norm": 4.091604709625244, | |
| "learning_rate": 9.997098641899562e-05, | |
| "loss": 0.1167901635169983, | |
| "step": 535, | |
| "token_acc": 0.9626679940268791 | |
| }, | |
| { | |
| "epoch": 1.1923316062176166, | |
| "grad_norm": 3.13606858253479, | |
| "learning_rate": 9.994656607177722e-05, | |
| "loss": 0.11653541326522827, | |
| "step": 540, | |
| "token_acc": 0.955050505050505 | |
| }, | |
| { | |
| "epoch": 1.203385146804836, | |
| "grad_norm": 4.286221027374268, | |
| "learning_rate": 9.991475158664578e-05, | |
| "loss": 0.09516905546188355, | |
| "step": 545, | |
| "token_acc": 0.968031968031968 | |
| }, | |
| { | |
| "epoch": 1.2144386873920552, | |
| "grad_norm": 5.616212368011475, | |
| "learning_rate": 9.987554767344845e-05, | |
| "loss": 0.12145495414733887, | |
| "step": 550, | |
| "token_acc": 0.9621890547263682 | |
| }, | |
| { | |
| "epoch": 1.2254922279792746, | |
| "grad_norm": 5.72205114364624, | |
| "learning_rate": 9.982896013597038e-05, | |
| "loss": 0.12007032632827759, | |
| "step": 555, | |
| "token_acc": 0.9582089552238806 | |
| }, | |
| { | |
| "epoch": 1.236545768566494, | |
| "grad_norm": 3.8363678455352783, | |
| "learning_rate": 9.977499587107569e-05, | |
| "loss": 0.08600590825080871, | |
| "step": 560, | |
| "token_acc": 0.967196819085487 | |
| }, | |
| { | |
| "epoch": 1.2475993091537134, | |
| "grad_norm": 2.206713914871216, | |
| "learning_rate": 9.971366286768629e-05, | |
| "loss": 0.11412311792373657, | |
| "step": 565, | |
| "token_acc": 0.961824491819534 | |
| }, | |
| { | |
| "epoch": 1.2586528497409326, | |
| "grad_norm": 2.48056697845459, | |
| "learning_rate": 9.964497020559926e-05, | |
| "loss": 0.09548119902610779, | |
| "step": 570, | |
| "token_acc": 0.968222442899702 | |
| }, | |
| { | |
| "epoch": 1.269706390328152, | |
| "grad_norm": 4.197746276855469, | |
| "learning_rate": 9.956892805414272e-05, | |
| "loss": 0.11575849056243896, | |
| "step": 575, | |
| "token_acc": 0.9597215315763302 | |
| }, | |
| { | |
| "epoch": 1.2807599309153712, | |
| "grad_norm": 3.9247732162475586, | |
| "learning_rate": 9.948554767067025e-05, | |
| "loss": 0.09247745275497436, | |
| "step": 580, | |
| "token_acc": 0.9692001987083955 | |
| }, | |
| { | |
| "epoch": 1.2918134715025906, | |
| "grad_norm": 3.7010436058044434, | |
| "learning_rate": 9.93948413988944e-05, | |
| "loss": 0.11627188920974732, | |
| "step": 585, | |
| "token_acc": 0.9602780536246276 | |
| }, | |
| { | |
| "epoch": 1.30286701208981, | |
| "grad_norm": 6.0411858558654785, | |
| "learning_rate": 9.92968226670593e-05, | |
| "loss": 0.09203023314476014, | |
| "step": 590, | |
| "token_acc": 0.9705882352941176 | |
| }, | |
| { | |
| "epoch": 1.3139205526770295, | |
| "grad_norm": 4.776832103729248, | |
| "learning_rate": 9.919150598595276e-05, | |
| "loss": 0.07992117404937744, | |
| "step": 595, | |
| "token_acc": 0.9711155378486056 | |
| }, | |
| { | |
| "epoch": 1.3249740932642486, | |
| "grad_norm": 2.1442465782165527, | |
| "learning_rate": 9.907890694675803e-05, | |
| "loss": 0.08411768078804016, | |
| "step": 600, | |
| "token_acc": 0.971301335972291 | |
| }, | |
| { | |
| "epoch": 1.3249740932642486, | |
| "eval_loss": 0.10072976350784302, | |
| "eval_runtime": 2790.9554, | |
| "eval_samples_per_second": 0.841, | |
| "eval_steps_per_second": 0.841, | |
| "eval_token_acc": 0.9663655128074984, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.336027633851468, | |
| "grad_norm": 3.444389820098877, | |
| "learning_rate": 9.89590422187457e-05, | |
| "loss": 0.0943886399269104, | |
| "step": 605, | |
| "token_acc": 0.969261279127417 | |
| }, | |
| { | |
| "epoch": 1.3470811744386875, | |
| "grad_norm": 3.3243448734283447, | |
| "learning_rate": 9.883192954680593e-05, | |
| "loss": 0.07429519295692444, | |
| "step": 610, | |
| "token_acc": 0.9723046488625123 | |
| }, | |
| { | |
| "epoch": 1.3581347150259067, | |
| "grad_norm": 3.897686243057251, | |
| "learning_rate": 9.869758774882154e-05, | |
| "loss": 0.10087257623672485, | |
| "step": 615, | |
| "token_acc": 0.9645 | |
| }, | |
| { | |
| "epoch": 1.369188255613126, | |
| "grad_norm": 3.0886733531951904, | |
| "learning_rate": 9.855603671288215e-05, | |
| "loss": 0.0957147240638733, | |
| "step": 620, | |
| "token_acc": 0.9651394422310757 | |
| }, | |
| { | |
| "epoch": 1.3802417962003455, | |
| "grad_norm": 5.4413604736328125, | |
| "learning_rate": 9.840729739433992e-05, | |
| "loss": 0.0882586419582367, | |
| "step": 625, | |
| "token_acc": 0.9705441837244134 | |
| }, | |
| { | |
| "epoch": 1.391295336787565, | |
| "grad_norm": 3.051844358444214, | |
| "learning_rate": 9.82513918127073e-05, | |
| "loss": 0.09511439800262451, | |
| "step": 630, | |
| "token_acc": 0.9663532904502722 | |
| }, | |
| { | |
| "epoch": 1.402348877374784, | |
| "grad_norm": 2.9811363220214844, | |
| "learning_rate": 9.808834304839729e-05, | |
| "loss": 0.10007621049880981, | |
| "step": 635, | |
| "token_acc": 0.9672943508424182 | |
| }, | |
| { | |
| "epoch": 1.4134024179620035, | |
| "grad_norm": 3.030879497528076, | |
| "learning_rate": 9.791817523930653e-05, | |
| "loss": 0.08152814507484436, | |
| "step": 640, | |
| "token_acc": 0.9720837487537388 | |
| }, | |
| { | |
| "epoch": 1.4244559585492227, | |
| "grad_norm": 2.544180154800415, | |
| "learning_rate": 9.774091357724196e-05, | |
| "loss": 0.07389838099479676, | |
| "step": 645, | |
| "token_acc": 0.9755854509217738 | |
| }, | |
| { | |
| "epoch": 1.435509499136442, | |
| "grad_norm": 5.591005802154541, | |
| "learning_rate": 9.755658430419132e-05, | |
| "loss": 0.09485760927200318, | |
| "step": 650, | |
| "token_acc": 0.9637357178340785 | |
| }, | |
| { | |
| "epoch": 1.4465630397236615, | |
| "grad_norm": 4.964202880859375, | |
| "learning_rate": 9.736521470843838e-05, | |
| "loss": 0.08160382509231567, | |
| "step": 655, | |
| "token_acc": 0.9705441837244134 | |
| }, | |
| { | |
| "epoch": 1.457616580310881, | |
| "grad_norm": 4.984673500061035, | |
| "learning_rate": 9.7166833120523e-05, | |
| "loss": 0.08802146315574647, | |
| "step": 660, | |
| "token_acc": 0.9641076769690927 | |
| }, | |
| { | |
| "epoch": 1.4686701208981001, | |
| "grad_norm": 2.584303140640259, | |
| "learning_rate": 9.696146890904722e-05, | |
| "loss": 0.09760611653327941, | |
| "step": 665, | |
| "token_acc": 0.9701343952215032 | |
| }, | |
| { | |
| "epoch": 1.4797236614853195, | |
| "grad_norm": 2.9796664714813232, | |
| "learning_rate": 9.674915247632739e-05, | |
| "loss": 0.09098277688026428, | |
| "step": 670, | |
| "token_acc": 0.9660678642714571 | |
| }, | |
| { | |
| "epoch": 1.490777202072539, | |
| "grad_norm": 10.275652885437012, | |
| "learning_rate": 9.652991525389337e-05, | |
| "loss": 0.08257744312286378, | |
| "step": 675, | |
| "token_acc": 0.9722084367245658 | |
| }, | |
| { | |
| "epoch": 1.5018307426597581, | |
| "grad_norm": 4.8155131340026855, | |
| "learning_rate": 9.630378969783547e-05, | |
| "loss": 0.07055800557136535, | |
| "step": 680, | |
| "token_acc": 0.974090682610862 | |
| }, | |
| { | |
| "epoch": 1.5128842832469775, | |
| "grad_norm": 3.6014211177825928, | |
| "learning_rate": 9.607080928399958e-05, | |
| "loss": 0.09370391964912414, | |
| "step": 685, | |
| "token_acc": 0.9658584858980702 | |
| }, | |
| { | |
| "epoch": 1.523937823834197, | |
| "grad_norm": 2.675119400024414, | |
| "learning_rate": 9.58310085030313e-05, | |
| "loss": 0.09670426845550537, | |
| "step": 690, | |
| "token_acc": 0.9641969169567379 | |
| }, | |
| { | |
| "epoch": 1.5349913644214164, | |
| "grad_norm": 3.324349880218506, | |
| "learning_rate": 9.558442285527e-05, | |
| "loss": 0.08441510200500488, | |
| "step": 695, | |
| "token_acc": 0.9696819085487077 | |
| }, | |
| { | |
| "epoch": 1.5460449050086356, | |
| "grad_norm": 3.4498190879821777, | |
| "learning_rate": 9.533108884549333e-05, | |
| "loss": 0.06717776656150817, | |
| "step": 700, | |
| "token_acc": 0.975597609561753 | |
| }, | |
| { | |
| "epoch": 1.557098445595855, | |
| "grad_norm": 2.959386110305786, | |
| "learning_rate": 9.50710439775129e-05, | |
| "loss": 0.08139981031417846, | |
| "step": 705, | |
| "token_acc": 0.9747023809523809 | |
| }, | |
| { | |
| "epoch": 1.5681519861830742, | |
| "grad_norm": 2.3681604862213135, | |
| "learning_rate": 9.480432674862232e-05, | |
| "loss": 0.07764554619789124, | |
| "step": 710, | |
| "token_acc": 0.9675810473815462 | |
| }, | |
| { | |
| "epoch": 1.5792055267702936, | |
| "grad_norm": 2.840590715408325, | |
| "learning_rate": 9.453097664389789e-05, | |
| "loss": 0.08232161402702332, | |
| "step": 715, | |
| "token_acc": 0.9701789264413518 | |
| }, | |
| { | |
| "epoch": 1.590259067357513, | |
| "grad_norm": 2.655217409133911, | |
| "learning_rate": 9.425103413035335e-05, | |
| "loss": 0.0968110740184784, | |
| "step": 720, | |
| "token_acc": 0.9689534301452178 | |
| }, | |
| { | |
| "epoch": 1.6013126079447324, | |
| "grad_norm": 2.4610400199890137, | |
| "learning_rate": 9.396454065094891e-05, | |
| "loss": 0.09739276766777039, | |
| "step": 725, | |
| "token_acc": 0.964729259811227 | |
| }, | |
| { | |
| "epoch": 1.6123661485319516, | |
| "grad_norm": 4.132114887237549, | |
| "learning_rate": 9.367153861845617e-05, | |
| "loss": 0.08105069994926453, | |
| "step": 730, | |
| "token_acc": 0.9716981132075472 | |
| }, | |
| { | |
| "epoch": 1.623419689119171, | |
| "grad_norm": 3.3622491359710693, | |
| "learning_rate": 9.337207140917919e-05, | |
| "loss": 0.09018557667732238, | |
| "step": 735, | |
| "token_acc": 0.9642324888226528 | |
| }, | |
| { | |
| "epoch": 1.6344732297063902, | |
| "grad_norm": 2.985978364944458, | |
| "learning_rate": 9.306618335653307e-05, | |
| "loss": 0.08649082779884339, | |
| "step": 740, | |
| "token_acc": 0.9683011391778108 | |
| }, | |
| { | |
| "epoch": 1.6455267702936096, | |
| "grad_norm": 3.509003162384033, | |
| "learning_rate": 9.275391974448076e-05, | |
| "loss": 0.0744367241859436, | |
| "step": 745, | |
| "token_acc": 0.9770687936191426 | |
| }, | |
| { | |
| "epoch": 1.656580310880829, | |
| "grad_norm": 2.937560796737671, | |
| "learning_rate": 9.243532680082915e-05, | |
| "loss": 0.07034647464752197, | |
| "step": 750, | |
| "token_acc": 0.9767211490837048 | |
| }, | |
| { | |
| "epoch": 1.6676338514680484, | |
| "grad_norm": 3.8314759731292725, | |
| "learning_rate": 9.211045169038554e-05, | |
| "loss": 0.07900274395942689, | |
| "step": 755, | |
| "token_acc": 0.9711729622266402 | |
| }, | |
| { | |
| "epoch": 1.6786873920552678, | |
| "grad_norm": 5.002687931060791, | |
| "learning_rate": 9.17793425079753e-05, | |
| "loss": 0.07675303220748901, | |
| "step": 760, | |
| "token_acc": 0.9689349112426036 | |
| }, | |
| { | |
| "epoch": 1.689740932642487, | |
| "grad_norm": 1.7825733423233032, | |
| "learning_rate": 9.144204827132175e-05, | |
| "loss": 0.08085300326347351, | |
| "step": 765, | |
| "token_acc": 0.9716981132075472 | |
| }, | |
| { | |
| "epoch": 1.7007944732297062, | |
| "grad_norm": 1.8953182697296143, | |
| "learning_rate": 9.10986189137897e-05, | |
| "loss": 0.07259147167205811, | |
| "step": 770, | |
| "token_acc": 0.9760479041916168 | |
| }, | |
| { | |
| "epoch": 1.7118480138169256, | |
| "grad_norm": 3.7877821922302246, | |
| "learning_rate": 9.074910527699313e-05, | |
| "loss": 0.08823164105415345, | |
| "step": 775, | |
| "token_acc": 0.972139303482587 | |
| }, | |
| { | |
| "epoch": 1.722901554404145, | |
| "grad_norm": 3.614501714706421, | |
| "learning_rate": 9.039355910326863e-05, | |
| "loss": 0.10905979871749878, | |
| "step": 780, | |
| "token_acc": 0.9652087475149106 | |
| }, | |
| { | |
| "epoch": 1.7339550949913645, | |
| "grad_norm": 3.0243847370147705, | |
| "learning_rate": 9.00320330280154e-05, | |
| "loss": 0.07965280413627625, | |
| "step": 785, | |
| "token_acc": 0.9723593287265548 | |
| }, | |
| { | |
| "epoch": 1.7450086355785839, | |
| "grad_norm": 4.025770664215088, | |
| "learning_rate": 8.966458057190301e-05, | |
| "loss": 0.07108275294303894, | |
| "step": 790, | |
| "token_acc": 0.9760956175298805 | |
| }, | |
| { | |
| "epoch": 1.756062176165803, | |
| "grad_norm": 2.9816761016845703, | |
| "learning_rate": 8.92912561329482e-05, | |
| "loss": 0.0776334285736084, | |
| "step": 795, | |
| "token_acc": 0.9706467661691542 | |
| }, | |
| { | |
| "epoch": 1.7671157167530225, | |
| "grad_norm": 3.0242762565612793, | |
| "learning_rate": 8.891211497846171e-05, | |
| "loss": 0.07837628722190856, | |
| "step": 800, | |
| "token_acc": 0.974155069582505 | |
| }, | |
| { | |
| "epoch": 1.7671157167530225, | |
| "eval_loss": 0.08094792068004608, | |
| "eval_runtime": 2286.4128, | |
| "eval_samples_per_second": 1.026, | |
| "eval_steps_per_second": 1.026, | |
| "eval_token_acc": 0.9727608026257909, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.7781692573402417, | |
| "grad_norm": 3.771763324737549, | |
| "learning_rate": 8.852721323686648e-05, | |
| "loss": 0.08302398324012757, | |
| "step": 805, | |
| "token_acc": 0.9725411882176734 | |
| }, | |
| { | |
| "epoch": 1.789222797927461, | |
| "grad_norm": 3.6321651935577393, | |
| "learning_rate": 8.813660788938833e-05, | |
| "loss": 0.06937822699546814, | |
| "step": 810, | |
| "token_acc": 0.9772839506172839 | |
| }, | |
| { | |
| "epoch": 1.8002763385146805, | |
| "grad_norm": 6.7241644859313965, | |
| "learning_rate": 8.774035676162043e-05, | |
| "loss": 0.05159105062484741, | |
| "step": 815, | |
| "token_acc": 0.9829488465396189 | |
| }, | |
| { | |
| "epoch": 1.8113298791019, | |
| "grad_norm": 4.365586757659912, | |
| "learning_rate": 8.733851851496268e-05, | |
| "loss": 0.08399490118026734, | |
| "step": 820, | |
| "token_acc": 0.9721254355400697 | |
| }, | |
| { | |
| "epoch": 1.8223834196891193, | |
| "grad_norm": 3.0722031593322754, | |
| "learning_rate": 8.693115263793747e-05, | |
| "loss": 0.07215502858161926, | |
| "step": 825, | |
| "token_acc": 0.9740648379052369 | |
| }, | |
| { | |
| "epoch": 1.8334369602763385, | |
| "grad_norm": 4.519292831420898, | |
| "learning_rate": 8.651831943738296e-05, | |
| "loss": 0.06996339559555054, | |
| "step": 830, | |
| "token_acc": 0.9770459081836327 | |
| }, | |
| { | |
| "epoch": 1.8444905008635577, | |
| "grad_norm": 4.166755199432373, | |
| "learning_rate": 8.610008002952513e-05, | |
| "loss": 0.07142719030380248, | |
| "step": 835, | |
| "token_acc": 0.9767211490837048 | |
| }, | |
| { | |
| "epoch": 1.8555440414507771, | |
| "grad_norm": 4.686975002288818, | |
| "learning_rate": 8.567649633093016e-05, | |
| "loss": 0.06802060008049012, | |
| "step": 840, | |
| "token_acc": 0.9775 | |
| }, | |
| { | |
| "epoch": 1.8665975820379965, | |
| "grad_norm": 3.0628044605255127, | |
| "learning_rate": 8.524763104933816e-05, | |
| "loss": 0.06818159222602845, | |
| "step": 845, | |
| "token_acc": 0.973644952759821 | |
| }, | |
| { | |
| "epoch": 1.877651122625216, | |
| "grad_norm": 3.939176321029663, | |
| "learning_rate": 8.481354767437988e-05, | |
| "loss": 0.07347306013107299, | |
| "step": 850, | |
| "token_acc": 0.972568578553616 | |
| }, | |
| { | |
| "epoch": 1.8887046632124354, | |
| "grad_norm": 2.8834567070007324, | |
| "learning_rate": 8.437431046817769e-05, | |
| "loss": 0.06994418501853943, | |
| "step": 855, | |
| "token_acc": 0.971712158808933 | |
| }, | |
| { | |
| "epoch": 1.8997582037996545, | |
| "grad_norm": 3.4866409301757812, | |
| "learning_rate": 8.392998445583212e-05, | |
| "loss": 0.07565975189208984, | |
| "step": 860, | |
| "token_acc": 0.9760956175298805 | |
| }, | |
| { | |
| "epoch": 1.910811744386874, | |
| "grad_norm": 3.4372429847717285, | |
| "learning_rate": 8.348063541579545e-05, | |
| "loss": 0.07984944581985473, | |
| "step": 865, | |
| "token_acc": 0.9727452923686819 | |
| }, | |
| { | |
| "epoch": 1.9218652849740931, | |
| "grad_norm": 4.127252578735352, | |
| "learning_rate": 8.302632987013388e-05, | |
| "loss": 0.07774015665054321, | |
| "step": 870, | |
| "token_acc": 0.9744872436218109 | |
| }, | |
| { | |
| "epoch": 1.9329188255613126, | |
| "grad_norm": 3.960955858230591, | |
| "learning_rate": 8.256713507467941e-05, | |
| "loss": 0.08486457467079163, | |
| "step": 875, | |
| "token_acc": 0.9731743666169895 | |
| }, | |
| { | |
| "epoch": 1.943972366148532, | |
| "grad_norm": 3.087674617767334, | |
| "learning_rate": 8.210311900907339e-05, | |
| "loss": 0.07507517337799072, | |
| "step": 880, | |
| "token_acc": 0.9787023278850916 | |
| }, | |
| { | |
| "epoch": 1.9550259067357514, | |
| "grad_norm": 2.5197293758392334, | |
| "learning_rate": 8.163435036670261e-05, | |
| "loss": 0.08100587725639344, | |
| "step": 885, | |
| "token_acc": 0.9724724724724725 | |
| }, | |
| { | |
| "epoch": 1.9660794473229708, | |
| "grad_norm": 1.6736986637115479, | |
| "learning_rate": 8.116089854452995e-05, | |
| "loss": 0.07375568151473999, | |
| "step": 890, | |
| "token_acc": 0.9772727272727273 | |
| }, | |
| { | |
| "epoch": 1.97713298791019, | |
| "grad_norm": 3.322634220123291, | |
| "learning_rate": 8.068283363282074e-05, | |
| "loss": 0.07798144817352295, | |
| "step": 895, | |
| "token_acc": 0.9738400789733465 | |
| }, | |
| { | |
| "epoch": 1.9881865284974092, | |
| "grad_norm": 1.9556145668029785, | |
| "learning_rate": 8.020022640476654e-05, | |
| "loss": 0.06203848123550415, | |
| "step": 900, | |
| "token_acc": 0.9791666666666666 | |
| }, | |
| { | |
| "epoch": 1.9992400690846286, | |
| "grad_norm": 3.805701494216919, | |
| "learning_rate": 7.971314830600783e-05, | |
| "loss": 0.06745657324790955, | |
| "step": 905, | |
| "token_acc": 0.9751491053677932 | |
| }, | |
| { | |
| "epoch": 2.0088428324697754, | |
| "grad_norm": 2.904778003692627, | |
| "learning_rate": 7.922167144405706e-05, | |
| "loss": 0.06268702149391174, | |
| "step": 910, | |
| "token_acc": 0.9777777777777777 | |
| }, | |
| { | |
| "epoch": 2.0198963730569948, | |
| "grad_norm": 3.050584554672241, | |
| "learning_rate": 7.87258685776239e-05, | |
| "loss": 0.07978938817977906, | |
| "step": 915, | |
| "token_acc": 0.9681116093672147 | |
| }, | |
| { | |
| "epoch": 2.030949913644214, | |
| "grad_norm": 3.2201292514801025, | |
| "learning_rate": 7.822581310584388e-05, | |
| "loss": 0.07445316910743713, | |
| "step": 920, | |
| "token_acc": 0.9754509018036072 | |
| }, | |
| { | |
| "epoch": 2.0420034542314336, | |
| "grad_norm": 4.6090545654296875, | |
| "learning_rate": 7.772157905741231e-05, | |
| "loss": 0.06728174090385437, | |
| "step": 925, | |
| "token_acc": 0.977205153617443 | |
| }, | |
| { | |
| "epoch": 2.053056994818653, | |
| "grad_norm": 3.3497800827026367, | |
| "learning_rate": 7.721324107962506e-05, | |
| "loss": 0.06557589173316955, | |
| "step": 930, | |
| "token_acc": 0.9775784753363229 | |
| }, | |
| { | |
| "epoch": 2.064110535405872, | |
| "grad_norm": 2.1937358379364014, | |
| "learning_rate": 7.670087442732763e-05, | |
| "loss": 0.05688057541847229, | |
| "step": 935, | |
| "token_acc": 0.981028457314029 | |
| }, | |
| { | |
| "epoch": 2.0751640759930914, | |
| "grad_norm": 5.671027660369873, | |
| "learning_rate": 7.618455495177445e-05, | |
| "loss": 0.08629457950592041, | |
| "step": 940, | |
| "token_acc": 0.9695304695304695 | |
| }, | |
| { | |
| "epoch": 2.086217616580311, | |
| "grad_norm": 3.635037899017334, | |
| "learning_rate": 7.566435908939967e-05, | |
| "loss": 0.0566463053226471, | |
| "step": 945, | |
| "token_acc": 0.9820717131474104 | |
| }, | |
| { | |
| "epoch": 2.09727115716753, | |
| "grad_norm": 3.0799365043640137, | |
| "learning_rate": 7.514036385050147e-05, | |
| "loss": 0.06808796525001526, | |
| "step": 950, | |
| "token_acc": 0.9766052762568442 | |
| }, | |
| { | |
| "epoch": 2.1083246977547496, | |
| "grad_norm": 5.38563871383667, | |
| "learning_rate": 7.461264680784151e-05, | |
| "loss": 0.07369622588157654, | |
| "step": 955, | |
| "token_acc": 0.9737363726461843 | |
| }, | |
| { | |
| "epoch": 2.119378238341969, | |
| "grad_norm": 3.824101448059082, | |
| "learning_rate": 7.408128608516077e-05, | |
| "loss": 0.06465582847595215, | |
| "step": 960, | |
| "token_acc": 0.9786917740336968 | |
| }, | |
| { | |
| "epoch": 2.1304317789291884, | |
| "grad_norm": 3.410547971725464, | |
| "learning_rate": 7.354636034561418e-05, | |
| "loss": 0.051229971647262576, | |
| "step": 965, | |
| "token_acc": 0.9821428571428571 | |
| }, | |
| { | |
| "epoch": 2.1414853195164074, | |
| "grad_norm": 3.721679210662842, | |
| "learning_rate": 7.30079487801252e-05, | |
| "loss": 0.06965258717536926, | |
| "step": 970, | |
| "token_acc": 0.9755854509217738 | |
| }, | |
| { | |
| "epoch": 2.152538860103627, | |
| "grad_norm": 2.798208236694336, | |
| "learning_rate": 7.246613109566238e-05, | |
| "loss": 0.07870134711265564, | |
| "step": 975, | |
| "token_acc": 0.9751243781094527 | |
| }, | |
| { | |
| "epoch": 2.1635924006908462, | |
| "grad_norm": 1.8357700109481812, | |
| "learning_rate": 7.192098750343935e-05, | |
| "loss": 0.0715235412120819, | |
| "step": 980, | |
| "token_acc": 0.97675568743818 | |
| }, | |
| { | |
| "epoch": 2.1746459412780657, | |
| "grad_norm": 3.544813632965088, | |
| "learning_rate": 7.137259870704036e-05, | |
| "loss": 0.055529987812042235, | |
| "step": 985, | |
| "token_acc": 0.9841112214498511 | |
| }, | |
| { | |
| "epoch": 2.185699481865285, | |
| "grad_norm": 2.2707366943359375, | |
| "learning_rate": 7.082104589047285e-05, | |
| "loss": 0.05665128231048584, | |
| "step": 990, | |
| "token_acc": 0.9795102448775612 | |
| }, | |
| { | |
| "epoch": 2.1967530224525045, | |
| "grad_norm": 4.324965476989746, | |
| "learning_rate": 7.026641070614884e-05, | |
| "loss": 0.06112373471260071, | |
| "step": 995, | |
| "token_acc": 0.9775112443778111 | |
| }, | |
| { | |
| "epoch": 2.207806563039724, | |
| "grad_norm": 3.2108352184295654, | |
| "learning_rate": 6.970877526279702e-05, | |
| "loss": 0.061422485113143924, | |
| "step": 1000, | |
| "token_acc": 0.9781854238968766 | |
| }, | |
| { | |
| "epoch": 2.207806563039724, | |
| "eval_loss": 0.07163181900978088, | |
| "eval_runtime": 2173.3435, | |
| "eval_samples_per_second": 1.08, | |
| "eval_steps_per_second": 1.08, | |
| "eval_token_acc": 0.9747910533617569, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.218860103626943, | |
| "grad_norm": 4.137533187866211, | |
| "learning_rate": 6.914822211330742e-05, | |
| "loss": 0.06986818313598633, | |
| "step": 1005, | |
| "token_acc": 0.9751367478866235 | |
| }, | |
| { | |
| "epoch": 2.2299136442141623, | |
| "grad_norm": 1.7135125398635864, | |
| "learning_rate": 6.858483424251001e-05, | |
| "loss": 0.0670811414718628, | |
| "step": 1010, | |
| "token_acc": 0.9776341948310139 | |
| }, | |
| { | |
| "epoch": 2.2409671848013817, | |
| "grad_norm": 2.10587739944458, | |
| "learning_rate": 6.801869505488969e-05, | |
| "loss": 0.06850314140319824, | |
| "step": 1015, | |
| "token_acc": 0.9761312779711586 | |
| }, | |
| { | |
| "epoch": 2.252020725388601, | |
| "grad_norm": 3.5003225803375244, | |
| "learning_rate": 6.744988836223893e-05, | |
| "loss": 0.06779593229293823, | |
| "step": 1020, | |
| "token_acc": 0.9794692038057086 | |
| }, | |
| { | |
| "epoch": 2.2630742659758205, | |
| "grad_norm": 2.2275619506835938, | |
| "learning_rate": 6.687849837125027e-05, | |
| "loss": 0.05072577595710755, | |
| "step": 1025, | |
| "token_acc": 0.9836309523809523 | |
| }, | |
| { | |
| "epoch": 2.2741278065630395, | |
| "grad_norm": 2.6360952854156494, | |
| "learning_rate": 6.630460967105018e-05, | |
| "loss": 0.05415867567062378, | |
| "step": 1030, | |
| "token_acc": 0.9781746031746031 | |
| }, | |
| { | |
| "epoch": 2.285181347150259, | |
| "grad_norm": 1.4451478719711304, | |
| "learning_rate": 6.572830722067653e-05, | |
| "loss": 0.055239105224609376, | |
| "step": 1035, | |
| "token_acc": 0.981094527363184 | |
| }, | |
| { | |
| "epoch": 2.2962348877374783, | |
| "grad_norm": 1.9783443212509155, | |
| "learning_rate": 6.5149676336501e-05, | |
| "loss": 0.05858151912689209, | |
| "step": 1040, | |
| "token_acc": 0.9850224663005491 | |
| }, | |
| { | |
| "epoch": 2.3072884283246977, | |
| "grad_norm": 2.043297529220581, | |
| "learning_rate": 6.456880267959894e-05, | |
| "loss": 0.05577117800712585, | |
| "step": 1045, | |
| "token_acc": 0.9800697558545092 | |
| }, | |
| { | |
| "epoch": 2.318341968911917, | |
| "grad_norm": 4.235545635223389, | |
| "learning_rate": 6.39857722430679e-05, | |
| "loss": 0.05815597772598267, | |
| "step": 1050, | |
| "token_acc": 0.9801587301587301 | |
| }, | |
| { | |
| "epoch": 2.3293955094991365, | |
| "grad_norm": 5.21077823638916, | |
| "learning_rate": 6.340067133929719e-05, | |
| "loss": 0.054069459438323975, | |
| "step": 1055, | |
| "token_acc": 0.9801291604570294 | |
| }, | |
| { | |
| "epoch": 2.340449050086356, | |
| "grad_norm": 2.011702537536621, | |
| "learning_rate": 6.281358658719011e-05, | |
| "loss": 0.07169802188873291, | |
| "step": 1060, | |
| "token_acc": 0.9752107089737233 | |
| }, | |
| { | |
| "epoch": 2.351502590673575, | |
| "grad_norm": 2.3686680793762207, | |
| "learning_rate": 6.22246048993407e-05, | |
| "loss": 0.05615015029907226, | |
| "step": 1065, | |
| "token_acc": 0.9800299550673989 | |
| }, | |
| { | |
| "epoch": 2.3625561312607943, | |
| "grad_norm": 7.002840995788574, | |
| "learning_rate": 6.163381346916732e-05, | |
| "loss": 0.06114639043807983, | |
| "step": 1070, | |
| "token_acc": 0.9760239760239761 | |
| }, | |
| { | |
| "epoch": 2.3736096718480137, | |
| "grad_norm": 4.902541160583496, | |
| "learning_rate": 6.104129975800427e-05, | |
| "loss": 0.07762741446495056, | |
| "step": 1075, | |
| "token_acc": 0.9730807577268196 | |
| }, | |
| { | |
| "epoch": 2.384663212435233, | |
| "grad_norm": 4.661471843719482, | |
| "learning_rate": 6.0447151482153955e-05, | |
| "loss": 0.06509206891059875, | |
| "step": 1080, | |
| "token_acc": 0.9771144278606965 | |
| }, | |
| { | |
| "epoch": 2.3957167530224526, | |
| "grad_norm": 1.999237298965454, | |
| "learning_rate": 5.985145659990138e-05, | |
| "loss": 0.06380823254585266, | |
| "step": 1085, | |
| "token_acc": 0.9796626984126984 | |
| }, | |
| { | |
| "epoch": 2.406770293609672, | |
| "grad_norm": 2.382763385772705, | |
| "learning_rate": 5.925430329849264e-05, | |
| "loss": 0.05442737936973572, | |
| "step": 1090, | |
| "token_acc": 0.9821073558648111 | |
| }, | |
| { | |
| "epoch": 2.4178238341968914, | |
| "grad_norm": 1.8456308841705322, | |
| "learning_rate": 5.865577998107961e-05, | |
| "loss": 0.04835757613182068, | |
| "step": 1095, | |
| "token_acc": 0.9831013916500994 | |
| }, | |
| { | |
| "epoch": 2.4288773747841104, | |
| "grad_norm": 4.092327117919922, | |
| "learning_rate": 5.805597525363263e-05, | |
| "loss": 0.07175707817077637, | |
| "step": 1100, | |
| "token_acc": 0.977589641434263 | |
| }, | |
| { | |
| "epoch": 2.43993091537133, | |
| "grad_norm": 2.878070831298828, | |
| "learning_rate": 5.745497791182325e-05, | |
| "loss": 0.054905033111572264, | |
| "step": 1105, | |
| "token_acc": 0.9791459781529295 | |
| }, | |
| { | |
| "epoch": 2.450984455958549, | |
| "grad_norm": 3.2568767070770264, | |
| "learning_rate": 5.685287692787883e-05, | |
| "loss": 0.060244417190551756, | |
| "step": 1110, | |
| "token_acc": 0.9787549407114624 | |
| }, | |
| { | |
| "epoch": 2.4620379965457686, | |
| "grad_norm": 3.9391286373138428, | |
| "learning_rate": 5.6249761437410895e-05, | |
| "loss": 0.07208690047264099, | |
| "step": 1115, | |
| "token_acc": 0.9765234765234765 | |
| }, | |
| { | |
| "epoch": 2.473091537132988, | |
| "grad_norm": 1.7733020782470703, | |
| "learning_rate": 5.5645720726219584e-05, | |
| "loss": 0.05974746346473694, | |
| "step": 1120, | |
| "token_acc": 0.9806066633515664 | |
| }, | |
| { | |
| "epoch": 2.4841450777202074, | |
| "grad_norm": 4.430456161499023, | |
| "learning_rate": 5.504084421707555e-05, | |
| "loss": 0.0642861008644104, | |
| "step": 1125, | |
| "token_acc": 0.9781529294935452 | |
| }, | |
| { | |
| "epoch": 2.495198618307427, | |
| "grad_norm": 2.7552433013916016, | |
| "learning_rate": 5.443522145648181e-05, | |
| "loss": 0.06047917008399963, | |
| "step": 1130, | |
| "token_acc": 0.9830677290836654 | |
| }, | |
| { | |
| "epoch": 2.506252158894646, | |
| "grad_norm": 1.864843487739563, | |
| "learning_rate": 5.3828942101417136e-05, | |
| "loss": 0.044628658890724184, | |
| "step": 1135, | |
| "token_acc": 0.9856719367588933 | |
| }, | |
| { | |
| "epoch": 2.5173056994818652, | |
| "grad_norm": 4.15530252456665, | |
| "learning_rate": 5.322209590606323e-05, | |
| "loss": 0.0662376880645752, | |
| "step": 1140, | |
| "token_acc": 0.9765234765234765 | |
| }, | |
| { | |
| "epoch": 2.5283592400690846, | |
| "grad_norm": 3.1273319721221924, | |
| "learning_rate": 5.2614772708517324e-05, | |
| "loss": 0.06211344003677368, | |
| "step": 1145, | |
| "token_acc": 0.9807217004448838 | |
| }, | |
| { | |
| "epoch": 2.539412780656304, | |
| "grad_norm": 2.2132909297943115, | |
| "learning_rate": 5.200706241749257e-05, | |
| "loss": 0.05160966515541077, | |
| "step": 1150, | |
| "token_acc": 0.9830423940149626 | |
| }, | |
| { | |
| "epoch": 2.5504663212435235, | |
| "grad_norm": 2.4676833152770996, | |
| "learning_rate": 5.1399054999007756e-05, | |
| "loss": 0.05153646469116211, | |
| "step": 1155, | |
| "token_acc": 0.9795816733067729 | |
| }, | |
| { | |
| "epoch": 2.5615198618307424, | |
| "grad_norm": 2.5266482830047607, | |
| "learning_rate": 5.079084046306877e-05, | |
| "loss": 0.05694507360458374, | |
| "step": 1160, | |
| "token_acc": 0.9826474962816063 | |
| }, | |
| { | |
| "epoch": 2.572573402417962, | |
| "grad_norm": 3.3381104469299316, | |
| "learning_rate": 5.018250885034328e-05, | |
| "loss": 0.056800955533981325, | |
| "step": 1165, | |
| "token_acc": 0.9816377171215881 | |
| }, | |
| { | |
| "epoch": 2.5836269430051813, | |
| "grad_norm": 3.0276429653167725, | |
| "learning_rate": 4.957415021883121e-05, | |
| "loss": 0.061768895387649535, | |
| "step": 1170, | |
| "token_acc": 0.9804413239719157 | |
| }, | |
| { | |
| "epoch": 2.5946804835924007, | |
| "grad_norm": 1.4948302507400513, | |
| "learning_rate": 4.89658546305323e-05, | |
| "loss": 0.048909342288970946, | |
| "step": 1175, | |
| "token_acc": 0.9846306395637084 | |
| }, | |
| { | |
| "epoch": 2.60573402417962, | |
| "grad_norm": 2.0935652256011963, | |
| "learning_rate": 4.835771213811336e-05, | |
| "loss": 0.05250586867332459, | |
| "step": 1180, | |
| "token_acc": 0.9846534653465346 | |
| }, | |
| { | |
| "epoch": 2.6167875647668395, | |
| "grad_norm": 2.5400516986846924, | |
| "learning_rate": 4.774981277157673e-05, | |
| "loss": 0.05397605299949646, | |
| "step": 1185, | |
| "token_acc": 0.9816740960871718 | |
| }, | |
| { | |
| "epoch": 2.627841105354059, | |
| "grad_norm": 1.8448779582977295, | |
| "learning_rate": 4.714224652493212e-05, | |
| "loss": 0.0550678551197052, | |
| "step": 1190, | |
| "token_acc": 0.9800299550673989 | |
| }, | |
| { | |
| "epoch": 2.638894645941278, | |
| "grad_norm": 2.3702774047851562, | |
| "learning_rate": 4.6535103342873885e-05, | |
| "loss": 0.055988776683807376, | |
| "step": 1195, | |
| "token_acc": 0.9795511221945137 | |
| }, | |
| { | |
| "epoch": 2.6499481865284973, | |
| "grad_norm": 3.8877556324005127, | |
| "learning_rate": 4.592847310746549e-05, | |
| "loss": 0.054580336809158324, | |
| "step": 1200, | |
| "token_acc": 0.9802078179119248 | |
| }, | |
| { | |
| "epoch": 2.6499481865284973, | |
| "eval_loss": 0.06643825024366379, | |
| "eval_runtime": 2174.0597, | |
| "eval_samples_per_second": 1.08, | |
| "eval_steps_per_second": 1.08, | |
| "eval_token_acc": 0.9772273542449159, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.6610017271157167, | |
| "grad_norm": 4.951501846313477, | |
| "learning_rate": 4.5322445624833255e-05, | |
| "loss": 0.05614232420921326, | |
| "step": 1205, | |
| "token_acc": 0.979571499750872 | |
| }, | |
| { | |
| "epoch": 2.672055267702936, | |
| "grad_norm": 1.8844807147979736, | |
| "learning_rate": 4.471711061187144e-05, | |
| "loss": 0.05867302417755127, | |
| "step": 1210, | |
| "token_acc": 0.9790523690773068 | |
| }, | |
| { | |
| "epoch": 2.6831088082901555, | |
| "grad_norm": 2.765587329864502, | |
| "learning_rate": 4.411255768296038e-05, | |
| "loss": 0.05516909956932068, | |
| "step": 1215, | |
| "token_acc": 0.9800697558545092 | |
| }, | |
| { | |
| "epoch": 2.694162348877375, | |
| "grad_norm": 3.7630977630615234, | |
| "learning_rate": 4.3508876336699974e-05, | |
| "loss": 0.05011783838272095, | |
| "step": 1220, | |
| "token_acc": 0.981555333998006 | |
| }, | |
| { | |
| "epoch": 2.7052158894645943, | |
| "grad_norm": 3.2775371074676514, | |
| "learning_rate": 4.290615594266013e-05, | |
| "loss": 0.04247501492500305, | |
| "step": 1225, | |
| "token_acc": 0.9855 | |
| }, | |
| { | |
| "epoch": 2.7162694300518133, | |
| "grad_norm": 2.6794540882110596, | |
| "learning_rate": 4.230448572815053e-05, | |
| "loss": 0.04988014101982117, | |
| "step": 1230, | |
| "token_acc": 0.9826130153999006 | |
| }, | |
| { | |
| "epoch": 2.7273229706390327, | |
| "grad_norm": 2.045193910598755, | |
| "learning_rate": 4.170395476501119e-05, | |
| "loss": 0.04342162907123566, | |
| "step": 1235, | |
| "token_acc": 0.9841269841269841 | |
| }, | |
| { | |
| "epoch": 2.738376511226252, | |
| "grad_norm": 2.58183217048645, | |
| "learning_rate": 4.1104651956426296e-05, | |
| "loss": 0.04766501486301422, | |
| "step": 1240, | |
| "token_acc": 0.9831097863884749 | |
| }, | |
| { | |
| "epoch": 2.7494300518134716, | |
| "grad_norm": 3.195136785507202, | |
| "learning_rate": 4.050666602376287e-05, | |
| "loss": 0.05665205121040344, | |
| "step": 1245, | |
| "token_acc": 0.9766749379652605 | |
| }, | |
| { | |
| "epoch": 2.760483592400691, | |
| "grad_norm": 3.2019379138946533, | |
| "learning_rate": 3.991008549343626e-05, | |
| "loss": 0.07038918733596802, | |
| "step": 1250, | |
| "token_acc": 0.9775449101796407 | |
| }, | |
| { | |
| "epoch": 2.77153713298791, | |
| "grad_norm": 4.3877854347229, | |
| "learning_rate": 3.931499868380482e-05, | |
| "loss": 0.06642740964889526, | |
| "step": 1255, | |
| "token_acc": 0.9761904761904762 | |
| }, | |
| { | |
| "epoch": 2.78259067357513, | |
| "grad_norm": 1.3834000825881958, | |
| "learning_rate": 3.872149369209491e-05, | |
| "loss": 0.0616798460483551, | |
| "step": 1260, | |
| "token_acc": 0.979571499750872 | |
| }, | |
| { | |
| "epoch": 2.7936442141623488, | |
| "grad_norm": 4.06312370300293, | |
| "learning_rate": 3.8129658381359156e-05, | |
| "loss": 0.07107862830162048, | |
| "step": 1265, | |
| "token_acc": 0.9755244755244755 | |
| }, | |
| { | |
| "epoch": 2.804697754749568, | |
| "grad_norm": 3.0957045555114746, | |
| "learning_rate": 3.753958036746894e-05, | |
| "loss": 0.0476302444934845, | |
| "step": 1270, | |
| "token_acc": 0.9815645241654211 | |
| }, | |
| { | |
| "epoch": 2.8157512953367876, | |
| "grad_norm": 4.347245693206787, | |
| "learning_rate": 3.695134700614372e-05, | |
| "loss": 0.06514678001403809, | |
| "step": 1275, | |
| "token_acc": 0.9790836653386454 | |
| }, | |
| { | |
| "epoch": 2.826804835924007, | |
| "grad_norm": 3.311340808868408, | |
| "learning_rate": 3.636504538001882e-05, | |
| "loss": 0.0548922598361969, | |
| "step": 1280, | |
| "token_acc": 0.9831432821021319 | |
| }, | |
| { | |
| "epoch": 2.8378583765112264, | |
| "grad_norm": 1.910610556602478, | |
| "learning_rate": 3.5780762285753616e-05, | |
| "loss": 0.04039471745491028, | |
| "step": 1285, | |
| "token_acc": 0.9861454725383474 | |
| }, | |
| { | |
| "epoch": 2.8489119170984454, | |
| "grad_norm": 3.0203163623809814, | |
| "learning_rate": 3.519858422118206e-05, | |
| "loss": 0.06901986002922059, | |
| "step": 1290, | |
| "token_acc": 0.9781312127236581 | |
| }, | |
| { | |
| "epoch": 2.859965457685665, | |
| "grad_norm": 7.763772487640381, | |
| "learning_rate": 3.461859737250752e-05, | |
| "loss": 0.042749127745628356, | |
| "step": 1295, | |
| "token_acc": 0.9840637450199203 | |
| }, | |
| { | |
| "epoch": 2.871018998272884, | |
| "grad_norm": 4.355055332183838, | |
| "learning_rate": 3.4040887601543574e-05, | |
| "loss": 0.06063474416732788, | |
| "step": 1300, | |
| "token_acc": 0.9815277084373439 | |
| }, | |
| { | |
| "epoch": 2.8820725388601036, | |
| "grad_norm": 3.254500389099121, | |
| "learning_rate": 3.346554043300308e-05, | |
| "loss": 0.058100783824920656, | |
| "step": 1305, | |
| "token_acc": 0.9791976225854383 | |
| }, | |
| { | |
| "epoch": 2.893126079447323, | |
| "grad_norm": 4.576897621154785, | |
| "learning_rate": 3.289264104183691e-05, | |
| "loss": 0.05097652673721313, | |
| "step": 1310, | |
| "token_acc": 0.983201581027668 | |
| }, | |
| { | |
| "epoch": 2.9041796200345424, | |
| "grad_norm": 4.716442584991455, | |
| "learning_rate": 3.232227424062464e-05, | |
| "loss": 0.05045266747474671, | |
| "step": 1315, | |
| "token_acc": 0.9829059829059829 | |
| }, | |
| { | |
| "epoch": 2.915233160621762, | |
| "grad_norm": 3.8253066539764404, | |
| "learning_rate": 3.175452446701873e-05, | |
| "loss": 0.05482856035232544, | |
| "step": 1320, | |
| "token_acc": 0.9791356184798807 | |
| }, | |
| { | |
| "epoch": 2.926286701208981, | |
| "grad_norm": 4.78739595413208, | |
| "learning_rate": 3.118947577124439e-05, | |
| "loss": 0.056392842531204225, | |
| "step": 1325, | |
| "token_acc": 0.9797130133597229 | |
| }, | |
| { | |
| "epoch": 2.9373402417962002, | |
| "grad_norm": 2.2553937435150146, | |
| "learning_rate": 3.062721180365669e-05, | |
| "loss": 0.05316250324249268, | |
| "step": 1330, | |
| "token_acc": 0.9816831683168317 | |
| }, | |
| { | |
| "epoch": 2.9483937823834196, | |
| "grad_norm": 3.2007408142089844, | |
| "learning_rate": 3.0067815802356714e-05, | |
| "loss": 0.055870598554611205, | |
| "step": 1335, | |
| "token_acc": 0.9796526054590571 | |
| }, | |
| { | |
| "epoch": 2.959447322970639, | |
| "grad_norm": 1.5691827535629272, | |
| "learning_rate": 2.9511370580869213e-05, | |
| "loss": 0.04847137331962585, | |
| "step": 1340, | |
| "token_acc": 0.980635551142006 | |
| }, | |
| { | |
| "epoch": 2.9705008635578585, | |
| "grad_norm": 4.0064826011657715, | |
| "learning_rate": 2.895795851588252e-05, | |
| "loss": 0.061286211013793945, | |
| "step": 1345, | |
| "token_acc": 0.9805583250249252 | |
| }, | |
| { | |
| "epoch": 2.981554404145078, | |
| "grad_norm": 3.6499500274658203, | |
| "learning_rate": 2.8407661535053588e-05, | |
| "loss": 0.0678468644618988, | |
| "step": 1350, | |
| "token_acc": 0.9766401590457257 | |
| }, | |
| { | |
| "epoch": 2.9926079447322973, | |
| "grad_norm": 3.093632936477661, | |
| "learning_rate": 2.7860561104879357e-05, | |
| "loss": 0.04808221161365509, | |
| "step": 1355, | |
| "token_acc": 0.9815920398009951 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.057897526770830154, | |
| "eval_runtime": 2238.3251, | |
| "eval_samples_per_second": 1.049, | |
| "eval_steps_per_second": 1.049, | |
| "eval_token_acc": 0.9798328426894055, | |
| "step": 1359 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1359, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.83259479920255e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |