Audio-Text-to-Text
Transformers
Safetensors
English
Chinese
qwen2_5_omni
text-to-audio
audio
audio-language-model
instruction-following
rubric-based-evaluation
judge-model
Instructions to use cucl2/AnyAudio-Judge-7B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use cucl2/AnyAudio-Judge-7B with Transformers:
# Load model directly from transformers import AutoProcessor, AutoModelForTextToWaveform processor = AutoProcessor.from_pretrained("cucl2/AnyAudio-Judge-7B") model = AutoModelForTextToWaveform.from_pretrained("cucl2/AnyAudio-Judge-7B") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 200.0, | |
| "global_step": 1641, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0006093845216331506, | |
| "grad_norm": 5.139511599456721, | |
| "learning_rate": 2.0000000000000002e-07, | |
| "loss": 0.97658371925354, | |
| "step": 1, | |
| "token_acc": 0.752757254524039 | |
| }, | |
| { | |
| "epoch": 0.006093845216331505, | |
| "grad_norm": 4.02627916602829, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.979477776421441, | |
| "step": 10, | |
| "token_acc": 0.7513394629500156 | |
| }, | |
| { | |
| "epoch": 0.01218769043266301, | |
| "grad_norm": 2.133243665962445, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.8085936546325684, | |
| "step": 20, | |
| "token_acc": 0.7841330046869243 | |
| }, | |
| { | |
| "epoch": 0.018281535648994516, | |
| "grad_norm": 1.6130698467539089, | |
| "learning_rate": 6e-06, | |
| "loss": 0.6815763473510742, | |
| "step": 30, | |
| "token_acc": 0.8088598674958689 | |
| }, | |
| { | |
| "epoch": 0.02437538086532602, | |
| "grad_norm": 1.5958993846193437, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.6228148937225342, | |
| "step": 40, | |
| "token_acc": 0.820709892041175 | |
| }, | |
| { | |
| "epoch": 0.030469226081657527, | |
| "grad_norm": 1.5835525047156958, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5660243034362793, | |
| "step": 50, | |
| "token_acc": 0.8334044587872124 | |
| }, | |
| { | |
| "epoch": 0.03656307129798903, | |
| "grad_norm": 1.486912724454441, | |
| "learning_rate": 9.999025267866269e-06, | |
| "loss": 0.5425637722015381, | |
| "step": 60, | |
| "token_acc": 0.8377751665687463 | |
| }, | |
| { | |
| "epoch": 0.042656916514320534, | |
| "grad_norm": 1.5337888751937572, | |
| "learning_rate": 9.996101451506166e-06, | |
| "loss": 0.5277560710906982, | |
| "step": 70, | |
| "token_acc": 0.8436990187965481 | |
| }, | |
| { | |
| "epoch": 0.04875076173065204, | |
| "grad_norm": 1.3940886051095893, | |
| "learning_rate": 9.991229690894796e-06, | |
| "loss": 0.5164490699768066, | |
| "step": 80, | |
| "token_acc": 0.843745632858913 | |
| }, | |
| { | |
| "epoch": 0.054844606946983544, | |
| "grad_norm": 1.732808941800786, | |
| "learning_rate": 9.984411885496807e-06, | |
| "loss": 0.5111546516418457, | |
| "step": 90, | |
| "token_acc": 0.8453230842547292 | |
| }, | |
| { | |
| "epoch": 0.06093845216331505, | |
| "grad_norm": 1.502010177161109, | |
| "learning_rate": 9.975650693525798e-06, | |
| "loss": 0.5041120052337646, | |
| "step": 100, | |
| "token_acc": 0.8452364415692656 | |
| }, | |
| { | |
| "epoch": 0.06703229737964655, | |
| "grad_norm": 1.4601020474033113, | |
| "learning_rate": 9.964949530907907e-06, | |
| "loss": 0.5016684532165527, | |
| "step": 110, | |
| "token_acc": 0.8476517417815049 | |
| }, | |
| { | |
| "epoch": 0.07312614259597806, | |
| "grad_norm": 1.2952511202685608, | |
| "learning_rate": 9.952312569949963e-06, | |
| "loss": 0.481311559677124, | |
| "step": 120, | |
| "token_acc": 0.8533387907153323 | |
| }, | |
| { | |
| "epoch": 0.07921998781230957, | |
| "grad_norm": 1.4971652792627679, | |
| "learning_rate": 9.937744737712734e-06, | |
| "loss": 0.4774615287780762, | |
| "step": 130, | |
| "token_acc": 0.8534425009304056 | |
| }, | |
| { | |
| "epoch": 0.08531383302864107, | |
| "grad_norm": 1.3779208298780503, | |
| "learning_rate": 9.921251714089898e-06, | |
| "loss": 0.4781217575073242, | |
| "step": 140, | |
| "token_acc": 0.8540719832383131 | |
| }, | |
| { | |
| "epoch": 0.09140767824497258, | |
| "grad_norm": 1.2833076700573953, | |
| "learning_rate": 9.9028399295935e-06, | |
| "loss": 0.470335865020752, | |
| "step": 150, | |
| "token_acc": 0.855729364137813 | |
| }, | |
| { | |
| "epoch": 0.09750152346130408, | |
| "grad_norm": 1.275667403101274, | |
| "learning_rate": 9.882516562846735e-06, | |
| "loss": 0.4639917850494385, | |
| "step": 160, | |
| "token_acc": 0.856720295350119 | |
| }, | |
| { | |
| "epoch": 0.1035953686776356, | |
| "grad_norm": 1.3612747480512406, | |
| "learning_rate": 9.860289537785058e-06, | |
| "loss": 0.46750926971435547, | |
| "step": 170, | |
| "token_acc": 0.8558511446900368 | |
| }, | |
| { | |
| "epoch": 0.10968921389396709, | |
| "grad_norm": 1.415269189793894, | |
| "learning_rate": 9.83616752056669e-06, | |
| "loss": 0.4647522926330566, | |
| "step": 180, | |
| "token_acc": 0.8561545157582173 | |
| }, | |
| { | |
| "epoch": 0.1157830591102986, | |
| "grad_norm": 1.1892931159809241, | |
| "learning_rate": 9.810159916193763e-06, | |
| "loss": 0.45995321273803713, | |
| "step": 190, | |
| "token_acc": 0.8573448602405447 | |
| }, | |
| { | |
| "epoch": 0.1218769043266301, | |
| "grad_norm": 1.3064527572301536, | |
| "learning_rate": 9.782276864845351e-06, | |
| "loss": 0.4638189792633057, | |
| "step": 200, | |
| "token_acc": 0.8556243509610675 | |
| }, | |
| { | |
| "epoch": 0.12797074954296161, | |
| "grad_norm": 1.314500952457575, | |
| "learning_rate": 9.752529237923914e-06, | |
| "loss": 0.4490074634552002, | |
| "step": 210, | |
| "token_acc": 0.8597137978120026 | |
| }, | |
| { | |
| "epoch": 0.1340645947592931, | |
| "grad_norm": 1.1373343380715917, | |
| "learning_rate": 9.720928633816596e-06, | |
| "loss": 0.4501980781555176, | |
| "step": 220, | |
| "token_acc": 0.859648604947868 | |
| }, | |
| { | |
| "epoch": 0.14015843997562463, | |
| "grad_norm": 1.230125123804479, | |
| "learning_rate": 9.687487373373103e-06, | |
| "loss": 0.44935040473937987, | |
| "step": 230, | |
| "token_acc": 0.8588833095343918 | |
| }, | |
| { | |
| "epoch": 0.14625228519195613, | |
| "grad_norm": 1.6077603667743245, | |
| "learning_rate": 9.652218495101894e-06, | |
| "loss": 0.44729223251342776, | |
| "step": 240, | |
| "token_acc": 0.85995841942315 | |
| }, | |
| { | |
| "epoch": 0.15234613040828762, | |
| "grad_norm": 1.2688011877880512, | |
| "learning_rate": 9.61513575008656e-06, | |
| "loss": 0.43803844451904295, | |
| "step": 250, | |
| "token_acc": 0.8627407782309685 | |
| }, | |
| { | |
| "epoch": 0.15843997562461914, | |
| "grad_norm": 1.178112411608835, | |
| "learning_rate": 9.576253596624367e-06, | |
| "loss": 0.43675899505615234, | |
| "step": 260, | |
| "token_acc": 0.8637918234089942 | |
| }, | |
| { | |
| "epoch": 0.16453382084095064, | |
| "grad_norm": 1.294840110909924, | |
| "learning_rate": 9.53558719458908e-06, | |
| "loss": 0.4456604480743408, | |
| "step": 270, | |
| "token_acc": 0.8607995996575384 | |
| }, | |
| { | |
| "epoch": 0.17062766605728213, | |
| "grad_norm": 1.2275948762976965, | |
| "learning_rate": 9.49315239952023e-06, | |
| "loss": 0.44009056091308596, | |
| "step": 280, | |
| "token_acc": 0.8624380989923378 | |
| }, | |
| { | |
| "epoch": 0.17672151127361366, | |
| "grad_norm": 1.1835184130640346, | |
| "learning_rate": 9.448965756441154e-06, | |
| "loss": 0.43228535652160643, | |
| "step": 290, | |
| "token_acc": 0.8642425086011517 | |
| }, | |
| { | |
| "epoch": 0.18281535648994515, | |
| "grad_norm": 1.0506109771841785, | |
| "learning_rate": 9.403044493408205e-06, | |
| "loss": 0.4331789970397949, | |
| "step": 300, | |
| "token_acc": 0.8650773124725752 | |
| }, | |
| { | |
| "epoch": 0.18890920170627665, | |
| "grad_norm": 1.2538833747077607, | |
| "learning_rate": 9.355406514793667e-06, | |
| "loss": 0.44378862380981443, | |
| "step": 310, | |
| "token_acc": 0.8612388746191983 | |
| }, | |
| { | |
| "epoch": 0.19500304692260817, | |
| "grad_norm": 1.1649745576637627, | |
| "learning_rate": 9.306070394304955e-06, | |
| "loss": 0.4216612339019775, | |
| "step": 320, | |
| "token_acc": 0.8665872154728236 | |
| }, | |
| { | |
| "epoch": 0.20109689213893966, | |
| "grad_norm": 1.1640287810040342, | |
| "learning_rate": 9.255055367742868e-06, | |
| "loss": 0.43276224136352537, | |
| "step": 330, | |
| "token_acc": 0.864183550146075 | |
| }, | |
| { | |
| "epoch": 0.2071907373552712, | |
| "grad_norm": 1.1205258611684763, | |
| "learning_rate": 9.202381325501683e-06, | |
| "loss": 0.42910175323486327, | |
| "step": 340, | |
| "token_acc": 0.8651370039640893 | |
| }, | |
| { | |
| "epoch": 0.21328458257160268, | |
| "grad_norm": 1.1044569461174318, | |
| "learning_rate": 9.148068804814032e-06, | |
| "loss": 0.425107479095459, | |
| "step": 350, | |
| "token_acc": 0.8655852823220787 | |
| }, | |
| { | |
| "epoch": 0.21937842778793418, | |
| "grad_norm": 1.256804160100686, | |
| "learning_rate": 9.092138981743588e-06, | |
| "loss": 0.4197092533111572, | |
| "step": 360, | |
| "token_acc": 0.8678029564108461 | |
| }, | |
| { | |
| "epoch": 0.2254722730042657, | |
| "grad_norm": 1.219596805077906, | |
| "learning_rate": 9.034613662928665e-06, | |
| "loss": 0.4218160629272461, | |
| "step": 370, | |
| "token_acc": 0.8669598748703018 | |
| }, | |
| { | |
| "epoch": 0.2315661182205972, | |
| "grad_norm": 1.0651000148469036, | |
| "learning_rate": 8.975515277079961e-06, | |
| "loss": 0.4222999095916748, | |
| "step": 380, | |
| "token_acc": 0.8668562219942147 | |
| }, | |
| { | |
| "epoch": 0.2376599634369287, | |
| "grad_norm": 1.1478789808745513, | |
| "learning_rate": 8.91486686623577e-06, | |
| "loss": 0.41972966194152833, | |
| "step": 390, | |
| "token_acc": 0.8667315262188772 | |
| }, | |
| { | |
| "epoch": 0.2437538086532602, | |
| "grad_norm": 1.0060533858058822, | |
| "learning_rate": 8.85269207677806e-06, | |
| "loss": 0.4143358707427979, | |
| "step": 400, | |
| "token_acc": 0.8689943563130941 | |
| }, | |
| { | |
| "epoch": 0.2498476538695917, | |
| "grad_norm": 1.2219261737292129, | |
| "learning_rate": 8.789015150212907e-06, | |
| "loss": 0.41486186981201173, | |
| "step": 410, | |
| "token_acc": 0.867653374528066 | |
| }, | |
| { | |
| "epoch": 0.25594149908592323, | |
| "grad_norm": 1.2842286146778168, | |
| "learning_rate": 8.72386091371891e-06, | |
| "loss": 0.4264723777770996, | |
| "step": 420, | |
| "token_acc": 0.865345114787771 | |
| }, | |
| { | |
| "epoch": 0.2620353443022547, | |
| "grad_norm": 1.0550559155752623, | |
| "learning_rate": 8.657254770467252e-06, | |
| "loss": 0.40860881805419924, | |
| "step": 430, | |
| "token_acc": 0.8694270527928576 | |
| }, | |
| { | |
| "epoch": 0.2681291895185862, | |
| "grad_norm": 1.1246909396790437, | |
| "learning_rate": 8.58922268971719e-06, | |
| "loss": 0.4148720264434814, | |
| "step": 440, | |
| "token_acc": 0.86849521403236 | |
| }, | |
| { | |
| "epoch": 0.2742230347349177, | |
| "grad_norm": 1.1877909840033853, | |
| "learning_rate": 8.51979119669081e-06, | |
| "loss": 0.4155715465545654, | |
| "step": 450, | |
| "token_acc": 0.8686248236499153 | |
| }, | |
| { | |
| "epoch": 0.28031687995124926, | |
| "grad_norm": 1.03946599413896, | |
| "learning_rate": 8.448987362231054e-06, | |
| "loss": 0.4156056880950928, | |
| "step": 460, | |
| "token_acc": 0.8682606492506055 | |
| }, | |
| { | |
| "epoch": 0.28641072516758076, | |
| "grad_norm": 1.1045440790462375, | |
| "learning_rate": 8.376838792246978e-06, | |
| "loss": 0.41259098052978516, | |
| "step": 470, | |
| "token_acc": 0.868615067345492 | |
| }, | |
| { | |
| "epoch": 0.29250457038391225, | |
| "grad_norm": 1.1044055109636997, | |
| "learning_rate": 8.303373616950408e-06, | |
| "loss": 0.41626744270324706, | |
| "step": 480, | |
| "token_acc": 0.867445116993405 | |
| }, | |
| { | |
| "epoch": 0.29859841560024375, | |
| "grad_norm": 1.0612884186160958, | |
| "learning_rate": 8.228620479888172e-06, | |
| "loss": 0.4087618350982666, | |
| "step": 490, | |
| "token_acc": 0.869433255622514 | |
| }, | |
| { | |
| "epoch": 0.30469226081657524, | |
| "grad_norm": 1.079879116921211, | |
| "learning_rate": 8.152608526774188e-06, | |
| "loss": 0.40863656997680664, | |
| "step": 500, | |
| "token_acc": 0.8705444341829626 | |
| }, | |
| { | |
| "epoch": 0.31078610603290674, | |
| "grad_norm": 1.0470334273877924, | |
| "learning_rate": 8.075367394125755e-06, | |
| "loss": 0.41130657196044923, | |
| "step": 510, | |
| "token_acc": 0.8699947913802195 | |
| }, | |
| { | |
| "epoch": 0.3168799512492383, | |
| "grad_norm": 1.2778777056879977, | |
| "learning_rate": 7.996927197708486e-06, | |
| "loss": 0.4074504852294922, | |
| "step": 520, | |
| "token_acc": 0.8711178129454153 | |
| }, | |
| { | |
| "epoch": 0.3229737964655698, | |
| "grad_norm": 1.133795250933889, | |
| "learning_rate": 7.917318520794395e-06, | |
| "loss": 0.4040180206298828, | |
| "step": 530, | |
| "token_acc": 0.8719991647774729 | |
| }, | |
| { | |
| "epoch": 0.3290676416819013, | |
| "grad_norm": 1.1320221274981666, | |
| "learning_rate": 7.836572402237683e-06, | |
| "loss": 0.4074112892150879, | |
| "step": 540, | |
| "token_acc": 0.8696679374619692 | |
| }, | |
| { | |
| "epoch": 0.3351614868982328, | |
| "grad_norm": 1.0153565229717176, | |
| "learning_rate": 7.754720324372924e-06, | |
| "loss": 0.4030743598937988, | |
| "step": 550, | |
| "token_acc": 0.8720831783254012 | |
| }, | |
| { | |
| "epoch": 0.34125533211456427, | |
| "grad_norm": 1.0985579621580885, | |
| "learning_rate": 7.67179420074032e-06, | |
| "loss": 0.3988363742828369, | |
| "step": 560, | |
| "token_acc": 0.8726780258889484 | |
| }, | |
| { | |
| "epoch": 0.3473491773308958, | |
| "grad_norm": 1.0584699143582574, | |
| "learning_rate": 7.587826363642845e-06, | |
| "loss": 0.4028042793273926, | |
| "step": 570, | |
| "token_acc": 0.8709437860238254 | |
| }, | |
| { | |
| "epoch": 0.3534430225472273, | |
| "grad_norm": 1.1632651891282637, | |
| "learning_rate": 7.502849551540106e-06, | |
| "loss": 0.3974143028259277, | |
| "step": 580, | |
| "token_acc": 0.8732772418431721 | |
| }, | |
| { | |
| "epoch": 0.3595368677635588, | |
| "grad_norm": 0.9585380945132779, | |
| "learning_rate": 7.4168968962838524e-06, | |
| "loss": 0.40021185874938964, | |
| "step": 590, | |
| "token_acc": 0.8715715660830257 | |
| }, | |
| { | |
| "epoch": 0.3656307129798903, | |
| "grad_norm": 0.939779800665415, | |
| "learning_rate": 7.330001910200111e-06, | |
| "loss": 0.39843976497650146, | |
| "step": 600, | |
| "token_acc": 0.8733910783350537 | |
| }, | |
| { | |
| "epoch": 0.3717245581962218, | |
| "grad_norm": 0.9815164073943617, | |
| "learning_rate": 7.242198473022958e-06, | |
| "loss": 0.3972899913787842, | |
| "step": 610, | |
| "token_acc": 0.8731910420095998 | |
| }, | |
| { | |
| "epoch": 0.3778184034125533, | |
| "grad_norm": 1.0569386302509218, | |
| "learning_rate": 7.15352081868506e-06, | |
| "loss": 0.4026960372924805, | |
| "step": 620, | |
| "token_acc": 0.8716591305210795 | |
| }, | |
| { | |
| "epoch": 0.38391224862888484, | |
| "grad_norm": 1.0897077358900225, | |
| "learning_rate": 7.0640035219701085e-06, | |
| "loss": 0.39238433837890624, | |
| "step": 630, | |
| "token_acc": 0.8741110700683207 | |
| }, | |
| { | |
| "epoch": 0.39000609384521634, | |
| "grad_norm": 1.0094259905078886, | |
| "learning_rate": 6.973681485032359e-06, | |
| "loss": 0.3934662342071533, | |
| "step": 640, | |
| "token_acc": 0.874180305698641 | |
| }, | |
| { | |
| "epoch": 0.39609993906154783, | |
| "grad_norm": 0.9880095870102604, | |
| "learning_rate": 6.8825899237885215e-06, | |
| "loss": 0.3929059743881226, | |
| "step": 650, | |
| "token_acc": 0.873847849697677 | |
| }, | |
| { | |
| "epoch": 0.40219378427787933, | |
| "grad_norm": 0.9583618057687778, | |
| "learning_rate": 6.7907643541873446e-06, | |
| "loss": 0.38638834953308104, | |
| "step": 660, | |
| "token_acc": 0.8764517709444076 | |
| }, | |
| { | |
| "epoch": 0.4082876294942108, | |
| "grad_norm": 1.1091462631909463, | |
| "learning_rate": 6.698240578362179e-06, | |
| "loss": 0.3935162782669067, | |
| "step": 670, | |
| "token_acc": 0.8743182876186542 | |
| }, | |
| { | |
| "epoch": 0.4143814747105424, | |
| "grad_norm": 0.959273015275344, | |
| "learning_rate": 6.6050546706719984e-06, | |
| "loss": 0.38172011375427245, | |
| "step": 680, | |
| "token_acc": 0.8772576395099669 | |
| }, | |
| { | |
| "epoch": 0.42047531992687387, | |
| "grad_norm": 1.0010757728338364, | |
| "learning_rate": 6.511242963636257e-06, | |
| "loss": 0.3927836179733276, | |
| "step": 690, | |
| "token_acc": 0.8740263817041508 | |
| }, | |
| { | |
| "epoch": 0.42656916514320536, | |
| "grad_norm": 1.045230237684538, | |
| "learning_rate": 6.416842033769106e-06, | |
| "loss": 0.38949809074401853, | |
| "step": 700, | |
| "token_acc": 0.8748742675586352 | |
| }, | |
| { | |
| "epoch": 0.43266301035953686, | |
| "grad_norm": 0.9849032327305663, | |
| "learning_rate": 6.321888687318457e-06, | |
| "loss": 0.39299988746643066, | |
| "step": 710, | |
| "token_acc": 0.8744398373706392 | |
| }, | |
| { | |
| "epoch": 0.43875685557586835, | |
| "grad_norm": 0.9773426657855283, | |
| "learning_rate": 6.2264199459155105e-06, | |
| "loss": 0.38987624645233154, | |
| "step": 720, | |
| "token_acc": 0.8749521585172907 | |
| }, | |
| { | |
| "epoch": 0.4448507007921999, | |
| "grad_norm": 1.037517468712357, | |
| "learning_rate": 6.130473032140272e-06, | |
| "loss": 0.38550682067871095, | |
| "step": 730, | |
| "token_acc": 0.8752092114104209 | |
| }, | |
| { | |
| "epoch": 0.4509445460085314, | |
| "grad_norm": 1.0310013780608072, | |
| "learning_rate": 6.0340853550087345e-06, | |
| "loss": 0.378936243057251, | |
| "step": 740, | |
| "token_acc": 0.878043851367452 | |
| }, | |
| { | |
| "epoch": 0.4570383912248629, | |
| "grad_norm": 0.8055934899750623, | |
| "learning_rate": 5.937294495387377e-06, | |
| "loss": 0.38777313232421873, | |
| "step": 750, | |
| "token_acc": 0.8762303990063655 | |
| }, | |
| { | |
| "epoch": 0.4631322364411944, | |
| "grad_norm": 1.0076731680308868, | |
| "learning_rate": 5.840138191340651e-06, | |
| "loss": 0.3867051601409912, | |
| "step": 760, | |
| "token_acc": 0.875447200037364 | |
| }, | |
| { | |
| "epoch": 0.4692260816575259, | |
| "grad_norm": 0.9392775195574543, | |
| "learning_rate": 5.7426543234171736e-06, | |
| "loss": 0.3799318552017212, | |
| "step": 770, | |
| "token_acc": 0.8780739671196323 | |
| }, | |
| { | |
| "epoch": 0.4753199268738574, | |
| "grad_norm": 0.9059297874010275, | |
| "learning_rate": 5.644880899880382e-06, | |
| "loss": 0.38845138549804686, | |
| "step": 780, | |
| "token_acc": 0.8756513846485855 | |
| }, | |
| { | |
| "epoch": 0.48141377209018893, | |
| "grad_norm": 1.0364591251718924, | |
| "learning_rate": 5.546856041889374e-06, | |
| "loss": 0.384658670425415, | |
| "step": 790, | |
| "token_acc": 0.8760285406658391 | |
| }, | |
| { | |
| "epoch": 0.4875076173065204, | |
| "grad_norm": 0.9573686942596932, | |
| "learning_rate": 5.448617968635741e-06, | |
| "loss": 0.3791942596435547, | |
| "step": 800, | |
| "token_acc": 0.8779162415307187 | |
| }, | |
| { | |
| "epoch": 0.4936014625228519, | |
| "grad_norm": 0.9636242802763855, | |
| "learning_rate": 5.35020498244219e-06, | |
| "loss": 0.37176291942596434, | |
| "step": 810, | |
| "token_acc": 0.8793090876456928 | |
| }, | |
| { | |
| "epoch": 0.4996953077391834, | |
| "grad_norm": 1.037660587481492, | |
| "learning_rate": 5.251655453828728e-06, | |
| "loss": 0.37394251823425295, | |
| "step": 820, | |
| "token_acc": 0.8786210190654307 | |
| }, | |
| { | |
| "epoch": 0.505789152955515, | |
| "grad_norm": 1.0719330406024963, | |
| "learning_rate": 5.153007806552275e-06, | |
| "loss": 0.3745760679244995, | |
| "step": 830, | |
| "token_acc": 0.8784241641412887 | |
| }, | |
| { | |
| "epoch": 0.5118829981718465, | |
| "grad_norm": 0.8899515496236061, | |
| "learning_rate": 5.054300502625517e-06, | |
| "loss": 0.3706503868103027, | |
| "step": 840, | |
| "token_acc": 0.8798184912767585 | |
| }, | |
| { | |
| "epoch": 0.517976843388178, | |
| "grad_norm": 0.9136772226114551, | |
| "learning_rate": 4.9555720273208475e-06, | |
| "loss": 0.3767611742019653, | |
| "step": 850, | |
| "token_acc": 0.8780427238279765 | |
| }, | |
| { | |
| "epoch": 0.5240706886045094, | |
| "grad_norm": 0.9760538746168989, | |
| "learning_rate": 4.856860874165218e-06, | |
| "loss": 0.37979438304901125, | |
| "step": 860, | |
| "token_acc": 0.8784071947906439 | |
| }, | |
| { | |
| "epoch": 0.5301645338208409, | |
| "grad_norm": 0.9424993647974058, | |
| "learning_rate": 4.758205529931808e-06, | |
| "loss": 0.3839302062988281, | |
| "step": 870, | |
| "token_acc": 0.8770481761661205 | |
| }, | |
| { | |
| "epoch": 0.5362583790371724, | |
| "grad_norm": 1.0293112779306877, | |
| "learning_rate": 4.659644459634293e-06, | |
| "loss": 0.3767723321914673, | |
| "step": 880, | |
| "token_acc": 0.8782181679486365 | |
| }, | |
| { | |
| "epoch": 0.5423522242535039, | |
| "grad_norm": 1.0743397927299763, | |
| "learning_rate": 4.56121609152961e-06, | |
| "loss": 0.3791919946670532, | |
| "step": 890, | |
| "token_acc": 0.8769342677312787 | |
| }, | |
| { | |
| "epoch": 0.5484460694698354, | |
| "grad_norm": 0.8651643017417293, | |
| "learning_rate": 4.462958802135069e-06, | |
| "loss": 0.36331801414489745, | |
| "step": 900, | |
| "token_acc": 0.8819762679763837 | |
| }, | |
| { | |
| "epoch": 0.5545399146861669, | |
| "grad_norm": 0.9197439306994798, | |
| "learning_rate": 4.364910901265607e-06, | |
| "loss": 0.3720353603363037, | |
| "step": 910, | |
| "token_acc": 0.8795370329732339 | |
| }, | |
| { | |
| "epoch": 0.5606337599024985, | |
| "grad_norm": 0.9973864478854872, | |
| "learning_rate": 4.2671106170970734e-06, | |
| "loss": 0.37818198204040526, | |
| "step": 920, | |
| "token_acc": 0.8787091854009224 | |
| }, | |
| { | |
| "epoch": 0.56672760511883, | |
| "grad_norm": 0.9979320322546561, | |
| "learning_rate": 4.169596081261332e-06, | |
| "loss": 0.368232798576355, | |
| "step": 930, | |
| "token_acc": 0.8808049967885766 | |
| }, | |
| { | |
| "epoch": 0.5728214503351615, | |
| "grad_norm": 0.9817455772913783, | |
| "learning_rate": 4.072405313979021e-06, | |
| "loss": 0.37091827392578125, | |
| "step": 940, | |
| "token_acc": 0.8796466097957818 | |
| }, | |
| { | |
| "epoch": 0.578915295551493, | |
| "grad_norm": 1.0935297334377472, | |
| "learning_rate": 3.975576209235726e-06, | |
| "loss": 0.3674028396606445, | |
| "step": 950, | |
| "token_acc": 0.8807917695163083 | |
| }, | |
| { | |
| "epoch": 0.5850091407678245, | |
| "grad_norm": 0.9835469765967159, | |
| "learning_rate": 3.879146520007399e-06, | |
| "loss": 0.3728478908538818, | |
| "step": 960, | |
| "token_acc": 0.8795413152600885 | |
| }, | |
| { | |
| "epoch": 0.591102985984156, | |
| "grad_norm": 0.9625183356689964, | |
| "learning_rate": 3.7831538435407344e-06, | |
| "loss": 0.37494525909423826, | |
| "step": 970, | |
| "token_acc": 0.8792245580635571 | |
| }, | |
| { | |
| "epoch": 0.5971968312004875, | |
| "grad_norm": 0.9012795424730173, | |
| "learning_rate": 3.687635606694271e-06, | |
| "loss": 0.3702352046966553, | |
| "step": 980, | |
| "token_acc": 0.8801223453080008 | |
| }, | |
| { | |
| "epoch": 0.603290676416819, | |
| "grad_norm": 0.9782757486531443, | |
| "learning_rate": 3.592629051345936e-06, | |
| "loss": 0.3673159837722778, | |
| "step": 990, | |
| "token_acc": 0.8810825035648933 | |
| }, | |
| { | |
| "epoch": 0.6093845216331505, | |
| "grad_norm": 1.0059100640922563, | |
| "learning_rate": 3.4981712198726956e-06, | |
| "loss": 0.3642214059829712, | |
| "step": 1000, | |
| "token_acc": 0.8818312088488447 | |
| }, | |
| { | |
| "epoch": 0.615478366849482, | |
| "grad_norm": 0.9395189399708234, | |
| "learning_rate": 3.4042989407079986e-06, | |
| "loss": 0.3784639358520508, | |
| "step": 1010, | |
| "token_acc": 0.8780194366406157 | |
| }, | |
| { | |
| "epoch": 0.6215722120658135, | |
| "grad_norm": 1.0425592930772825, | |
| "learning_rate": 3.311048813982627e-06, | |
| "loss": 0.36695384979248047, | |
| "step": 1020, | |
| "token_acc": 0.8809777292779815 | |
| }, | |
| { | |
| "epoch": 0.6276660572821451, | |
| "grad_norm": 0.9146308056797927, | |
| "learning_rate": 3.218457197254583e-06, | |
| "loss": 0.36698212623596194, | |
| "step": 1030, | |
| "token_acc": 0.8810339710207495 | |
| }, | |
| { | |
| "epoch": 0.6337599024984766, | |
| "grad_norm": 0.976263078958663, | |
| "learning_rate": 3.1265601913335196e-06, | |
| "loss": 0.365465784072876, | |
| "step": 1040, | |
| "token_acc": 0.8814162812670944 | |
| }, | |
| { | |
| "epoch": 0.6398537477148081, | |
| "grad_norm": 1.0567379406046713, | |
| "learning_rate": 3.035393626205306e-06, | |
| "loss": 0.3610874891281128, | |
| "step": 1050, | |
| "token_acc": 0.8824792140002385 | |
| }, | |
| { | |
| "epoch": 0.6459475929311396, | |
| "grad_norm": 1.0205537815943757, | |
| "learning_rate": 2.944993047062161e-06, | |
| "loss": 0.35759830474853516, | |
| "step": 1060, | |
| "token_acc": 0.8834624031976018 | |
| }, | |
| { | |
| "epoch": 0.6520414381474711, | |
| "grad_norm": 1.0280714401242652, | |
| "learning_rate": 2.8553937004438425e-06, | |
| "loss": 0.3574142217636108, | |
| "step": 1070, | |
| "token_acc": 0.884169503378651 | |
| }, | |
| { | |
| "epoch": 0.6581352833638026, | |
| "grad_norm": 1.0187298702407688, | |
| "learning_rate": 2.766630520495277e-06, | |
| "loss": 0.36029987335205077, | |
| "step": 1080, | |
| "token_acc": 0.8823869756562952 | |
| }, | |
| { | |
| "epoch": 0.664229128580134, | |
| "grad_norm": 0.9191494153561297, | |
| "learning_rate": 2.67873811534598e-06, | |
| "loss": 0.35897092819213866, | |
| "step": 1090, | |
| "token_acc": 0.8827260508533868 | |
| }, | |
| { | |
| "epoch": 0.6703229737964655, | |
| "grad_norm": 0.9492740813391064, | |
| "learning_rate": 2.591750753616596e-06, | |
| "loss": 0.36168532371520995, | |
| "step": 1100, | |
| "token_acc": 0.8825941425209475 | |
| }, | |
| { | |
| "epoch": 0.676416819012797, | |
| "grad_norm": 0.9644543574186545, | |
| "learning_rate": 2.505702351057804e-06, | |
| "loss": 0.3665107488632202, | |
| "step": 1110, | |
| "token_acc": 0.8816928952036972 | |
| }, | |
| { | |
| "epoch": 0.6825106642291285, | |
| "grad_norm": 0.9521683470371731, | |
| "learning_rate": 2.4206264573268174e-06, | |
| "loss": 0.35790448188781737, | |
| "step": 1120, | |
| "token_acc": 0.8832886728694526 | |
| }, | |
| { | |
| "epoch": 0.68860450944546, | |
| "grad_norm": 1.0783164983743936, | |
| "learning_rate": 2.336556242906608e-06, | |
| "loss": 0.3561516284942627, | |
| "step": 1130, | |
| "token_acc": 0.8839432945670233 | |
| }, | |
| { | |
| "epoch": 0.6946983546617916, | |
| "grad_norm": 0.9994299291097577, | |
| "learning_rate": 2.2535244861729707e-06, | |
| "loss": 0.3557067632675171, | |
| "step": 1140, | |
| "token_acc": 0.8837923958883728 | |
| }, | |
| { | |
| "epoch": 0.7007921998781231, | |
| "grad_norm": 1.039214819811771, | |
| "learning_rate": 2.1715635606144653e-06, | |
| "loss": 0.3563429832458496, | |
| "step": 1150, | |
| "token_acc": 0.8836427544336156 | |
| }, | |
| { | |
| "epoch": 0.7068860450944546, | |
| "grad_norm": 0.8549094000634878, | |
| "learning_rate": 2.0907054222102367e-06, | |
| "loss": 0.35337374210357664, | |
| "step": 1160, | |
| "token_acc": 0.8852147256677358 | |
| }, | |
| { | |
| "epoch": 0.7129798903107861, | |
| "grad_norm": 0.894156191232295, | |
| "learning_rate": 2.0109815969705922e-06, | |
| "loss": 0.359290337562561, | |
| "step": 1170, | |
| "token_acc": 0.8828725266946272 | |
| }, | |
| { | |
| "epoch": 0.7190737355271176, | |
| "grad_norm": 0.8673526133846996, | |
| "learning_rate": 1.9324231686452478e-06, | |
| "loss": 0.35991313457489016, | |
| "step": 1180, | |
| "token_acc": 0.8837700799671174 | |
| }, | |
| { | |
| "epoch": 0.7251675807434491, | |
| "grad_norm": 0.9356232121590031, | |
| "learning_rate": 1.8550607666039877e-06, | |
| "loss": 0.3538203716278076, | |
| "step": 1190, | |
| "token_acc": 0.8850202284200351 | |
| }, | |
| { | |
| "epoch": 0.7312614259597806, | |
| "grad_norm": 1.0163312252270116, | |
| "learning_rate": 1.7789245538944971e-06, | |
| "loss": 0.3607466459274292, | |
| "step": 1200, | |
| "token_acc": 0.8824661130842316 | |
| }, | |
| { | |
| "epoch": 0.7373552711761121, | |
| "grad_norm": 0.8390316456040804, | |
| "learning_rate": 1.7040442154820036e-06, | |
| "loss": 0.35505869388580324, | |
| "step": 1210, | |
| "token_acc": 0.8845901901507859 | |
| }, | |
| { | |
| "epoch": 0.7434491163924436, | |
| "grad_norm": 0.921086850463397, | |
| "learning_rate": 1.6304489466753237e-06, | |
| "loss": 0.35682291984558107, | |
| "step": 1220, | |
| "token_acc": 0.884017590582417 | |
| }, | |
| { | |
| "epoch": 0.7495429616087751, | |
| "grad_norm": 0.8352814372993298, | |
| "learning_rate": 1.5581674417438143e-06, | |
| "loss": 0.3599454164505005, | |
| "step": 1230, | |
| "token_acc": 0.8830610223076613 | |
| }, | |
| { | |
| "epoch": 0.7556368068251066, | |
| "grad_norm": 0.9561368940432438, | |
| "learning_rate": 1.4872278827296855e-06, | |
| "loss": 0.3544511079788208, | |
| "step": 1240, | |
| "token_acc": 0.884971241183666 | |
| }, | |
| { | |
| "epoch": 0.7617306520414382, | |
| "grad_norm": 0.9963256225377098, | |
| "learning_rate": 1.417657928460029e-06, | |
| "loss": 0.35143122673034666, | |
| "step": 1250, | |
| "token_acc": 0.8854597977852672 | |
| }, | |
| { | |
| "epoch": 0.7678244972577697, | |
| "grad_norm": 1.0464860200353496, | |
| "learning_rate": 1.349484703762834e-06, | |
| "loss": 0.3545159101486206, | |
| "step": 1260, | |
| "token_acc": 0.8848001191868091 | |
| }, | |
| { | |
| "epoch": 0.7739183424741012, | |
| "grad_norm": 0.9553675018651967, | |
| "learning_rate": 1.2827347888912057e-06, | |
| "loss": 0.3540821552276611, | |
| "step": 1270, | |
| "token_acc": 0.8845431750704823 | |
| }, | |
| { | |
| "epoch": 0.7800121876904327, | |
| "grad_norm": 0.9171124221466627, | |
| "learning_rate": 1.2174342091599277e-06, | |
| "loss": 0.3459270477294922, | |
| "step": 1280, | |
| "token_acc": 0.8876378370255273 | |
| }, | |
| { | |
| "epoch": 0.7861060329067642, | |
| "grad_norm": 0.9897434740336704, | |
| "learning_rate": 1.1536084247983626e-06, | |
| "loss": 0.3577150821685791, | |
| "step": 1290, | |
| "token_acc": 0.8842498302783435 | |
| }, | |
| { | |
| "epoch": 0.7921998781230957, | |
| "grad_norm": 0.88979092902762, | |
| "learning_rate": 1.0912823210237033e-06, | |
| "loss": 0.350811505317688, | |
| "step": 1300, | |
| "token_acc": 0.8856008373344852 | |
| }, | |
| { | |
| "epoch": 0.7982937233394272, | |
| "grad_norm": 0.9287859784083828, | |
| "learning_rate": 1.0304801983383989e-06, | |
| "loss": 0.3551754951477051, | |
| "step": 1310, | |
| "token_acc": 0.8848410538592661 | |
| }, | |
| { | |
| "epoch": 0.8043875685557587, | |
| "grad_norm": 0.8802985747226686, | |
| "learning_rate": 9.712257630555589e-07, | |
| "loss": 0.35124433040618896, | |
| "step": 1320, | |
| "token_acc": 0.8857088187898194 | |
| }, | |
| { | |
| "epoch": 0.8104814137720902, | |
| "grad_norm": 0.9867993671885138, | |
| "learning_rate": 9.135421180560394e-07, | |
| "loss": 0.3533953666687012, | |
| "step": 1330, | |
| "token_acc": 0.8847630099080603 | |
| }, | |
| { | |
| "epoch": 0.8165752589884216, | |
| "grad_norm": 0.926446790364043, | |
| "learning_rate": 8.574517537807897e-07, | |
| "loss": 0.345960807800293, | |
| "step": 1340, | |
| "token_acc": 0.8876687663254338 | |
| }, | |
| { | |
| "epoch": 0.8226691042047533, | |
| "grad_norm": 0.9083144656784397, | |
| "learning_rate": 8.029765394619899e-07, | |
| "loss": 0.35233092308044434, | |
| "step": 1350, | |
| "token_acc": 0.8852270821778219 | |
| }, | |
| { | |
| "epoch": 0.8287629494210847, | |
| "grad_norm": 0.8865701179019319, | |
| "learning_rate": 7.501377145963939e-07, | |
| "loss": 0.35347394943237304, | |
| "step": 1360, | |
| "token_acc": 0.8848507491917527 | |
| }, | |
| { | |
| "epoch": 0.8348567946374162, | |
| "grad_norm": 0.8797844443235806, | |
| "learning_rate": 6.98955880664205e-07, | |
| "loss": 0.35233142375946047, | |
| "step": 1370, | |
| "token_acc": 0.8857494626572902 | |
| }, | |
| { | |
| "epoch": 0.8409506398537477, | |
| "grad_norm": 0.985930499180468, | |
| "learning_rate": 6.494509930967019e-07, | |
| "loss": 0.3484508991241455, | |
| "step": 1380, | |
| "token_acc": 0.8862226663569039 | |
| }, | |
| { | |
| "epoch": 0.8470444850700792, | |
| "grad_norm": 0.8385926015490823, | |
| "learning_rate": 6.016423534957616e-07, | |
| "loss": 0.34513344764709475, | |
| "step": 1390, | |
| "token_acc": 0.88766630420385 | |
| }, | |
| { | |
| "epoch": 0.8531383302864107, | |
| "grad_norm": 0.9469060182104153, | |
| "learning_rate": 5.555486021082979e-07, | |
| "loss": 0.3453853130340576, | |
| "step": 1400, | |
| "token_acc": 0.8872980190401473 | |
| }, | |
| { | |
| "epoch": 0.8592321755027422, | |
| "grad_norm": 1.0619116209691746, | |
| "learning_rate": 5.111877105585672e-07, | |
| "loss": 0.35715694427490235, | |
| "step": 1410, | |
| "token_acc": 0.8840584828365589 | |
| }, | |
| { | |
| "epoch": 0.8653260207190737, | |
| "grad_norm": 0.9990471419637711, | |
| "learning_rate": 4.6857697484116006e-07, | |
| "loss": 0.34844698905944826, | |
| "step": 1420, | |
| "token_acc": 0.8861595746957418 | |
| }, | |
| { | |
| "epoch": 0.8714198659354052, | |
| "grad_norm": 0.8653174829680845, | |
| "learning_rate": 4.277330085774156e-07, | |
| "loss": 0.34473817348480223, | |
| "step": 1430, | |
| "token_acc": 0.8869124712097335 | |
| }, | |
| { | |
| "epoch": 0.8775137111517367, | |
| "grad_norm": 0.978323586867761, | |
| "learning_rate": 3.886717365378867e-07, | |
| "loss": 0.3523882865905762, | |
| "step": 1440, | |
| "token_acc": 0.8849034480348013 | |
| }, | |
| { | |
| "epoch": 0.8836075563680682, | |
| "grad_norm": 1.0140389777919647, | |
| "learning_rate": 3.5140838843339073e-07, | |
| "loss": 0.3476292848587036, | |
| "step": 1450, | |
| "token_acc": 0.8866329934005767 | |
| }, | |
| { | |
| "epoch": 0.8897014015843998, | |
| "grad_norm": 1.0064657138214737, | |
| "learning_rate": 3.159574929770515e-07, | |
| "loss": 0.35365211963653564, | |
| "step": 1460, | |
| "token_acc": 0.8852465385385505 | |
| }, | |
| { | |
| "epoch": 0.8957952468007313, | |
| "grad_norm": 0.9324871915195588, | |
| "learning_rate": 2.8233287221965555e-07, | |
| "loss": 0.3441819190979004, | |
| "step": 1470, | |
| "token_acc": 0.8871095878318941 | |
| }, | |
| { | |
| "epoch": 0.9018890920170628, | |
| "grad_norm": 0.9055988245681192, | |
| "learning_rate": 2.5054763616053967e-07, | |
| "loss": 0.34738845825195314, | |
| "step": 1480, | |
| "token_acc": 0.8870410481583068 | |
| }, | |
| { | |
| "epoch": 0.9079829372333943, | |
| "grad_norm": 0.8845337059700371, | |
| "learning_rate": 2.2061417763608818e-07, | |
| "loss": 0.3496507167816162, | |
| "step": 1490, | |
| "token_acc": 0.8858089991712572 | |
| }, | |
| { | |
| "epoch": 0.9140767824497258, | |
| "grad_norm": 0.8884170981985747, | |
| "learning_rate": 1.9254416748786086e-07, | |
| "loss": 0.34417023658752444, | |
| "step": 1500, | |
| "token_acc": 0.8876897324425693 | |
| }, | |
| { | |
| "epoch": 0.9201706276660573, | |
| "grad_norm": 0.991921395955364, | |
| "learning_rate": 1.6634855001221195e-07, | |
| "loss": 0.3475677490234375, | |
| "step": 1510, | |
| "token_acc": 0.8866243585461391 | |
| }, | |
| { | |
| "epoch": 0.9262644728823888, | |
| "grad_norm": 0.8822494961130495, | |
| "learning_rate": 1.4203753869318882e-07, | |
| "loss": 0.35834810733795164, | |
| "step": 1520, | |
| "token_acc": 0.8836910930175179 | |
| }, | |
| { | |
| "epoch": 0.9323583180987203, | |
| "grad_norm": 1.0007870631310825, | |
| "learning_rate": 1.196206122203647e-07, | |
| "loss": 0.3498887777328491, | |
| "step": 1530, | |
| "token_acc": 0.8859136668935295 | |
| }, | |
| { | |
| "epoch": 0.9384521633150518, | |
| "grad_norm": 0.9115641715955437, | |
| "learning_rate": 9.910651079316824e-08, | |
| "loss": 0.3380606651306152, | |
| "step": 1540, | |
| "token_acc": 0.8888504997761748 | |
| }, | |
| { | |
| "epoch": 0.9445460085313833, | |
| "grad_norm": 0.9336474945041258, | |
| "learning_rate": 8.050323271314331e-08, | |
| "loss": 0.34683611392974856, | |
| "step": 1550, | |
| "token_acc": 0.8867626671565236 | |
| }, | |
| { | |
| "epoch": 0.9506398537477148, | |
| "grad_norm": 0.9153041920993996, | |
| "learning_rate": 6.381803126546405e-08, | |
| "loss": 0.3438985824584961, | |
| "step": 1560, | |
| "token_acc": 0.8876278171714178 | |
| }, | |
| { | |
| "epoch": 0.9567336989640464, | |
| "grad_norm": 0.8723491469657118, | |
| "learning_rate": 4.9057411890933714e-08, | |
| "loss": 0.35089046955108644, | |
| "step": 1570, | |
| "token_acc": 0.8854520115332541 | |
| }, | |
| { | |
| "epoch": 0.9628275441803779, | |
| "grad_norm": 0.8955372459045878, | |
| "learning_rate": 3.622712964956032e-08, | |
| "loss": 0.34657576084136965, | |
| "step": 1580, | |
| "token_acc": 0.8870879211520062 | |
| }, | |
| { | |
| "epoch": 0.9689213893967094, | |
| "grad_norm": 0.9416702515233623, | |
| "learning_rate": 2.5332186976697037e-08, | |
| "loss": 0.35133283138275145, | |
| "step": 1590, | |
| "token_acc": 0.8860156117328086 | |
| }, | |
| { | |
| "epoch": 0.9750152346130408, | |
| "grad_norm": 0.8711895076149625, | |
| "learning_rate": 1.637683173263238e-08, | |
| "loss": 0.35227146148681643, | |
| "step": 1600, | |
| "token_acc": 0.8855705009128142 | |
| }, | |
| { | |
| "epoch": 0.9811090798293723, | |
| "grad_norm": 0.983729311544991, | |
| "learning_rate": 9.364555546375054e-09, | |
| "loss": 0.34629082679748535, | |
| "step": 1610, | |
| "token_acc": 0.8869587094319709 | |
| }, | |
| { | |
| "epoch": 0.9872029250457038, | |
| "grad_norm": 0.94198406227018, | |
| "learning_rate": 4.2980924542984634e-09, | |
| "loss": 0.3403524875640869, | |
| "step": 1620, | |
| "token_acc": 0.8887918722020187 | |
| }, | |
| { | |
| "epoch": 0.9932967702620353, | |
| "grad_norm": 0.8500481071531769, | |
| "learning_rate": 1.179417834153429e-09, | |
| "loss": 0.3546321868896484, | |
| "step": 1630, | |
| "token_acc": 0.8848728077900511 | |
| }, | |
| { | |
| "epoch": 0.9993906154783668, | |
| "grad_norm": 0.9946340081463461, | |
| "learning_rate": 9.74763488759134e-12, | |
| "loss": 0.35070624351501467, | |
| "step": 1640, | |
| "token_acc": 0.8863686895606487 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1641, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2214001985716224.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |