Audio-Text-to-Text
Transformers
Safetensors
English
Chinese
qwen3_omni_moe
text-to-audio
audio
audio-language-model
instruction-following
rubric-based-evaluation
judge-model
Instructions to use cucl2/AnyAudio-Judge-30B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use cucl2/AnyAudio-Judge-30B with Transformers:
# Load model directly from transformers import AutoProcessor, AutoModelForTextToWaveform processor = AutoProcessor.from_pretrained("cucl2/AnyAudio-Judge-30B") model = AutoModelForTextToWaveform.from_pretrained("cucl2/AnyAudio-Judge-30B") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 200.0, | |
| "global_step": 1641, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0006093845216331506, | |
| "grad_norm": 7.29836574807662, | |
| "learning_rate": 2.0000000000000002e-07, | |
| "loss": 0.6879574656486511, | |
| "step": 1, | |
| "token_acc": 0.8069400259219983 | |
| }, | |
| { | |
| "epoch": 0.006093845216331505, | |
| "grad_norm": 3.9069477397555814, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.6863329675462511, | |
| "step": 10, | |
| "token_acc": 0.8062499341798103 | |
| }, | |
| { | |
| "epoch": 0.01218769043266301, | |
| "grad_norm": 1.6499482285217686, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.5632639408111573, | |
| "step": 20, | |
| "token_acc": 0.832315593221387 | |
| }, | |
| { | |
| "epoch": 0.018281535648994516, | |
| "grad_norm": 1.3391083762662725, | |
| "learning_rate": 6e-06, | |
| "loss": 0.4509421348571777, | |
| "step": 30, | |
| "token_acc": 0.859350495238436 | |
| }, | |
| { | |
| "epoch": 0.02437538086532602, | |
| "grad_norm": 1.0720631754188148, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.4215505599975586, | |
| "step": 40, | |
| "token_acc": 0.8660933700604836 | |
| }, | |
| { | |
| "epoch": 0.030469226081657527, | |
| "grad_norm": 1.300502159057601, | |
| "learning_rate": 1e-05, | |
| "loss": 0.3916645526885986, | |
| "step": 50, | |
| "token_acc": 0.8749446762027628 | |
| }, | |
| { | |
| "epoch": 0.03656307129798903, | |
| "grad_norm": 1.2222066565570464, | |
| "learning_rate": 9.999025267866269e-06, | |
| "loss": 0.37738680839538574, | |
| "step": 60, | |
| "token_acc": 0.8773714810281518 | |
| }, | |
| { | |
| "epoch": 0.042656916514320534, | |
| "grad_norm": 1.1003373829333203, | |
| "learning_rate": 9.996101451506166e-06, | |
| "loss": 0.36339468955993653, | |
| "step": 70, | |
| "token_acc": 0.8811438359423324 | |
| }, | |
| { | |
| "epoch": 0.04875076173065204, | |
| "grad_norm": 0.9139572833064542, | |
| "learning_rate": 9.991229690894796e-06, | |
| "loss": 0.35523133277893065, | |
| "step": 80, | |
| "token_acc": 0.8833139693331612 | |
| }, | |
| { | |
| "epoch": 0.054844606946983544, | |
| "grad_norm": 1.0649357265795398, | |
| "learning_rate": 9.984411885496807e-06, | |
| "loss": 0.36147160530090333, | |
| "step": 90, | |
| "token_acc": 0.8827829089555647 | |
| }, | |
| { | |
| "epoch": 0.06093845216331505, | |
| "grad_norm": 1.0474850371515747, | |
| "learning_rate": 9.975650693525798e-06, | |
| "loss": 0.35729637145996096, | |
| "step": 100, | |
| "token_acc": 0.8842431348706591 | |
| }, | |
| { | |
| "epoch": 0.06703229737964655, | |
| "grad_norm": 1.0275221386997506, | |
| "learning_rate": 9.964949530907907e-06, | |
| "loss": 0.3474123477935791, | |
| "step": 110, | |
| "token_acc": 0.8856792866706549 | |
| }, | |
| { | |
| "epoch": 0.07312614259597806, | |
| "grad_norm": 1.2902357608957626, | |
| "learning_rate": 9.952312569949963e-06, | |
| "loss": 0.3479644775390625, | |
| "step": 120, | |
| "token_acc": 0.8849104859335039 | |
| }, | |
| { | |
| "epoch": 0.07921998781230957, | |
| "grad_norm": 1.3451160019315398, | |
| "learning_rate": 9.937744737712734e-06, | |
| "loss": 0.3466474533081055, | |
| "step": 130, | |
| "token_acc": 0.8861058585962355 | |
| }, | |
| { | |
| "epoch": 0.08531383302864107, | |
| "grad_norm": 1.0790851469433436, | |
| "learning_rate": 9.921251714089898e-06, | |
| "loss": 0.34614810943603513, | |
| "step": 140, | |
| "token_acc": 0.8854811515034624 | |
| }, | |
| { | |
| "epoch": 0.09140767824497258, | |
| "grad_norm": 1.0038008030386316, | |
| "learning_rate": 9.9028399295935e-06, | |
| "loss": 0.3384540557861328, | |
| "step": 150, | |
| "token_acc": 0.8879619162858826 | |
| }, | |
| { | |
| "epoch": 0.09750152346130408, | |
| "grad_norm": 1.027349218243697, | |
| "learning_rate": 9.882516562846735e-06, | |
| "loss": 0.33826944828033445, | |
| "step": 160, | |
| "token_acc": 0.8878969612617404 | |
| }, | |
| { | |
| "epoch": 0.1035953686776356, | |
| "grad_norm": 1.0944757240532788, | |
| "learning_rate": 9.860289537785058e-06, | |
| "loss": 0.3368839740753174, | |
| "step": 170, | |
| "token_acc": 0.8883107398785887 | |
| }, | |
| { | |
| "epoch": 0.10968921389396709, | |
| "grad_norm": 0.9672890666466603, | |
| "learning_rate": 9.83616752056669e-06, | |
| "loss": 0.3455761194229126, | |
| "step": 180, | |
| "token_acc": 0.8851188684923262 | |
| }, | |
| { | |
| "epoch": 0.1157830591102986, | |
| "grad_norm": 0.8886431201198384, | |
| "learning_rate": 9.810159916193763e-06, | |
| "loss": 0.32952630519866943, | |
| "step": 190, | |
| "token_acc": 0.8905697047489018 | |
| }, | |
| { | |
| "epoch": 0.1218769043266301, | |
| "grad_norm": 0.9444272816074001, | |
| "learning_rate": 9.782276864845351e-06, | |
| "loss": 0.33125505447387693, | |
| "step": 200, | |
| "token_acc": 0.8897695109589824 | |
| }, | |
| { | |
| "epoch": 0.12797074954296161, | |
| "grad_norm": 1.0093535144294836, | |
| "learning_rate": 9.752529237923914e-06, | |
| "loss": 0.3311288833618164, | |
| "step": 210, | |
| "token_acc": 0.8905364268561583 | |
| }, | |
| { | |
| "epoch": 0.1340645947592931, | |
| "grad_norm": 1.1532189931201864, | |
| "learning_rate": 9.720928633816596e-06, | |
| "loss": 0.3244771003723145, | |
| "step": 220, | |
| "token_acc": 0.8915392526998382 | |
| }, | |
| { | |
| "epoch": 0.14015843997562463, | |
| "grad_norm": 0.9598378464215558, | |
| "learning_rate": 9.687487373373103e-06, | |
| "loss": 0.3279410362243652, | |
| "step": 230, | |
| "token_acc": 0.8906910502215741 | |
| }, | |
| { | |
| "epoch": 0.14625228519195613, | |
| "grad_norm": 0.9540187948014937, | |
| "learning_rate": 9.652218495101894e-06, | |
| "loss": 0.3265339136123657, | |
| "step": 240, | |
| "token_acc": 0.8910028614336833 | |
| }, | |
| { | |
| "epoch": 0.15234613040828762, | |
| "grad_norm": 1.0470189428654273, | |
| "learning_rate": 9.61513575008656e-06, | |
| "loss": 0.33319640159606934, | |
| "step": 250, | |
| "token_acc": 0.8888186484938951 | |
| }, | |
| { | |
| "epoch": 0.15843997562461914, | |
| "grad_norm": 0.9615038649371879, | |
| "learning_rate": 9.576253596624367e-06, | |
| "loss": 0.32928056716918946, | |
| "step": 260, | |
| "token_acc": 0.8897860391237342 | |
| }, | |
| { | |
| "epoch": 0.16453382084095064, | |
| "grad_norm": 1.09800599181465, | |
| "learning_rate": 9.53558719458908e-06, | |
| "loss": 0.32557024955749514, | |
| "step": 270, | |
| "token_acc": 0.8914715787293208 | |
| }, | |
| { | |
| "epoch": 0.17062766605728213, | |
| "grad_norm": 1.0743262974854428, | |
| "learning_rate": 9.49315239952023e-06, | |
| "loss": 0.32053494453430176, | |
| "step": 280, | |
| "token_acc": 0.8929576222604401 | |
| }, | |
| { | |
| "epoch": 0.17672151127361366, | |
| "grad_norm": 1.060412117175443, | |
| "learning_rate": 9.448965756441154e-06, | |
| "loss": 0.3243874073028564, | |
| "step": 290, | |
| "token_acc": 0.8921667614562232 | |
| }, | |
| { | |
| "epoch": 0.18281535648994515, | |
| "grad_norm": 0.9594753815838422, | |
| "learning_rate": 9.403044493408205e-06, | |
| "loss": 0.3233642578125, | |
| "step": 300, | |
| "token_acc": 0.891047436596846 | |
| }, | |
| { | |
| "epoch": 0.18890920170627665, | |
| "grad_norm": 0.9403281436285019, | |
| "learning_rate": 9.355406514793667e-06, | |
| "loss": 0.31829214096069336, | |
| "step": 310, | |
| "token_acc": 0.8938932609968795 | |
| }, | |
| { | |
| "epoch": 0.19500304692260817, | |
| "grad_norm": 0.9536634938537397, | |
| "learning_rate": 9.306070394304955e-06, | |
| "loss": 0.3202193260192871, | |
| "step": 320, | |
| "token_acc": 0.8931520198180799 | |
| }, | |
| { | |
| "epoch": 0.20109689213893966, | |
| "grad_norm": 1.119379822493263, | |
| "learning_rate": 9.255055367742868e-06, | |
| "loss": 0.3239091396331787, | |
| "step": 330, | |
| "token_acc": 0.8923521607278241 | |
| }, | |
| { | |
| "epoch": 0.2071907373552712, | |
| "grad_norm": 1.0373576096304553, | |
| "learning_rate": 9.202381325501683e-06, | |
| "loss": 0.31700589656829836, | |
| "step": 340, | |
| "token_acc": 0.8944783352337514 | |
| }, | |
| { | |
| "epoch": 0.21328458257160268, | |
| "grad_norm": 1.0632563437214946, | |
| "learning_rate": 9.148068804814032e-06, | |
| "loss": 0.31794281005859376, | |
| "step": 350, | |
| "token_acc": 0.8930956012903548 | |
| }, | |
| { | |
| "epoch": 0.21937842778793418, | |
| "grad_norm": 1.0242050960110551, | |
| "learning_rate": 9.092138981743588e-06, | |
| "loss": 0.3202871799468994, | |
| "step": 360, | |
| "token_acc": 0.8935469022061816 | |
| }, | |
| { | |
| "epoch": 0.2254722730042657, | |
| "grad_norm": 0.8239921572139911, | |
| "learning_rate": 9.034613662928665e-06, | |
| "loss": 0.3142183542251587, | |
| "step": 370, | |
| "token_acc": 0.8951745718050066 | |
| }, | |
| { | |
| "epoch": 0.2315661182205972, | |
| "grad_norm": 0.9147511550012487, | |
| "learning_rate": 8.975515277079961e-06, | |
| "loss": 0.3087962865829468, | |
| "step": 380, | |
| "token_acc": 0.8958298740422705 | |
| }, | |
| { | |
| "epoch": 0.2376599634369287, | |
| "grad_norm": 0.8794833827260621, | |
| "learning_rate": 8.91486686623577e-06, | |
| "loss": 0.3132402658462524, | |
| "step": 390, | |
| "token_acc": 0.8948639533970186 | |
| }, | |
| { | |
| "epoch": 0.2437538086532602, | |
| "grad_norm": 1.0069623307664877, | |
| "learning_rate": 8.85269207677806e-06, | |
| "loss": 0.31006736755371095, | |
| "step": 400, | |
| "token_acc": 0.8951928192311975 | |
| }, | |
| { | |
| "epoch": 0.2498476538695917, | |
| "grad_norm": 0.9808015041824597, | |
| "learning_rate": 8.789015150212907e-06, | |
| "loss": 0.30683579444885256, | |
| "step": 410, | |
| "token_acc": 0.8967586393232839 | |
| }, | |
| { | |
| "epoch": 0.25594149908592323, | |
| "grad_norm": 0.9081237770188716, | |
| "learning_rate": 8.72386091371891e-06, | |
| "loss": 0.3061988830566406, | |
| "step": 420, | |
| "token_acc": 0.8959391589507399 | |
| }, | |
| { | |
| "epoch": 0.2620353443022547, | |
| "grad_norm": 1.04219527083527, | |
| "learning_rate": 8.657254770467252e-06, | |
| "loss": 0.3091754674911499, | |
| "step": 430, | |
| "token_acc": 0.8954508616603208 | |
| }, | |
| { | |
| "epoch": 0.2681291895185862, | |
| "grad_norm": 1.0065133793639498, | |
| "learning_rate": 8.58922268971719e-06, | |
| "loss": 0.30993127822875977, | |
| "step": 440, | |
| "token_acc": 0.895664191270881 | |
| }, | |
| { | |
| "epoch": 0.2742230347349177, | |
| "grad_norm": 0.9080797671925362, | |
| "learning_rate": 8.51979119669081e-06, | |
| "loss": 0.31555490493774413, | |
| "step": 450, | |
| "token_acc": 0.8941405988077487 | |
| }, | |
| { | |
| "epoch": 0.28031687995124926, | |
| "grad_norm": 0.9841139463866474, | |
| "learning_rate": 8.448987362231054e-06, | |
| "loss": 0.30534186363220217, | |
| "step": 460, | |
| "token_acc": 0.8968707588256722 | |
| }, | |
| { | |
| "epoch": 0.28641072516758076, | |
| "grad_norm": 0.9677823622528902, | |
| "learning_rate": 8.376838792246978e-06, | |
| "loss": 0.3050978422164917, | |
| "step": 470, | |
| "token_acc": 0.8967596979985816 | |
| }, | |
| { | |
| "epoch": 0.29250457038391225, | |
| "grad_norm": 0.8117589456035273, | |
| "learning_rate": 8.303373616950408e-06, | |
| "loss": 0.3012993335723877, | |
| "step": 480, | |
| "token_acc": 0.898916481794861 | |
| }, | |
| { | |
| "epoch": 0.29859841560024375, | |
| "grad_norm": 0.8967761049487325, | |
| "learning_rate": 8.228620479888172e-06, | |
| "loss": 0.2984607219696045, | |
| "step": 490, | |
| "token_acc": 0.8986162002706045 | |
| }, | |
| { | |
| "epoch": 0.30469226081657524, | |
| "grad_norm": 0.7934114582439064, | |
| "learning_rate": 8.152608526774188e-06, | |
| "loss": 0.3049586057662964, | |
| "step": 500, | |
| "token_acc": 0.8968112886022876 | |
| }, | |
| { | |
| "epoch": 0.31078610603290674, | |
| "grad_norm": 0.825580955342704, | |
| "learning_rate": 8.075367394125755e-06, | |
| "loss": 0.30215206146240237, | |
| "step": 510, | |
| "token_acc": 0.8978885397098497 | |
| }, | |
| { | |
| "epoch": 0.3168799512492383, | |
| "grad_norm": 0.8296290441677941, | |
| "learning_rate": 7.996927197708486e-06, | |
| "loss": 0.3088541507720947, | |
| "step": 520, | |
| "token_acc": 0.8963321107035679 | |
| }, | |
| { | |
| "epoch": 0.3229737964655698, | |
| "grad_norm": 0.8755135202445912, | |
| "learning_rate": 7.917318520794395e-06, | |
| "loss": 0.30083427429199217, | |
| "step": 530, | |
| "token_acc": 0.899119480167394 | |
| }, | |
| { | |
| "epoch": 0.3290676416819013, | |
| "grad_norm": 0.9101072984644949, | |
| "learning_rate": 7.836572402237683e-06, | |
| "loss": 0.3058091878890991, | |
| "step": 540, | |
| "token_acc": 0.896643718272106 | |
| }, | |
| { | |
| "epoch": 0.3351614868982328, | |
| "grad_norm": 0.9771967807763615, | |
| "learning_rate": 7.754720324372924e-06, | |
| "loss": 0.30214991569519045, | |
| "step": 550, | |
| "token_acc": 0.8980588639486945 | |
| }, | |
| { | |
| "epoch": 0.34125533211456427, | |
| "grad_norm": 1.0026225580388461, | |
| "learning_rate": 7.67179420074032e-06, | |
| "loss": 0.3041478395462036, | |
| "step": 560, | |
| "token_acc": 0.8965942594865093 | |
| }, | |
| { | |
| "epoch": 0.3473491773308958, | |
| "grad_norm": 0.9388665918318329, | |
| "learning_rate": 7.587826363642845e-06, | |
| "loss": 0.30187268257141114, | |
| "step": 570, | |
| "token_acc": 0.8980740928392202 | |
| }, | |
| { | |
| "epoch": 0.3534430225472273, | |
| "grad_norm": 0.9610197211126468, | |
| "learning_rate": 7.502849551540106e-06, | |
| "loss": 0.2962314605712891, | |
| "step": 580, | |
| "token_acc": 0.8994921135841125 | |
| }, | |
| { | |
| "epoch": 0.3595368677635588, | |
| "grad_norm": 0.832216076371822, | |
| "learning_rate": 7.4168968962838524e-06, | |
| "loss": 0.2948365926742554, | |
| "step": 590, | |
| "token_acc": 0.8995369426034115 | |
| }, | |
| { | |
| "epoch": 0.3656307129798903, | |
| "grad_norm": 0.9377431212404606, | |
| "learning_rate": 7.330001910200111e-06, | |
| "loss": 0.29007649421691895, | |
| "step": 600, | |
| "token_acc": 0.9010131261293394 | |
| }, | |
| { | |
| "epoch": 0.3717245581962218, | |
| "grad_norm": 0.8726611852126548, | |
| "learning_rate": 7.242198473022958e-06, | |
| "loss": 0.2962885856628418, | |
| "step": 610, | |
| "token_acc": 0.9000062303355035 | |
| }, | |
| { | |
| "epoch": 0.3778184034125533, | |
| "grad_norm": 0.9153282793617801, | |
| "learning_rate": 7.15352081868506e-06, | |
| "loss": 0.30144367218017576, | |
| "step": 620, | |
| "token_acc": 0.8989331770222744 | |
| }, | |
| { | |
| "epoch": 0.38391224862888484, | |
| "grad_norm": 0.993391313101372, | |
| "learning_rate": 7.0640035219701085e-06, | |
| "loss": 0.301465106010437, | |
| "step": 630, | |
| "token_acc": 0.8974685325619576 | |
| }, | |
| { | |
| "epoch": 0.39000609384521634, | |
| "grad_norm": 1.0046408788594328, | |
| "learning_rate": 6.973681485032359e-06, | |
| "loss": 0.2955395460128784, | |
| "step": 640, | |
| "token_acc": 0.8996091046695718 | |
| }, | |
| { | |
| "epoch": 0.39609993906154783, | |
| "grad_norm": 0.822820271911727, | |
| "learning_rate": 6.8825899237885215e-06, | |
| "loss": 0.2931050300598145, | |
| "step": 650, | |
| "token_acc": 0.901203589259751 | |
| }, | |
| { | |
| "epoch": 0.40219378427787933, | |
| "grad_norm": 0.8482496681393756, | |
| "learning_rate": 6.7907643541873446e-06, | |
| "loss": 0.29596996307373047, | |
| "step": 660, | |
| "token_acc": 0.8996866207121305 | |
| }, | |
| { | |
| "epoch": 0.4082876294942108, | |
| "grad_norm": 0.8775663994372018, | |
| "learning_rate": 6.698240578362179e-06, | |
| "loss": 0.29141840934753416, | |
| "step": 670, | |
| "token_acc": 0.9003262426482238 | |
| }, | |
| { | |
| "epoch": 0.4143814747105424, | |
| "grad_norm": 0.984669646190565, | |
| "learning_rate": 6.6050546706719984e-06, | |
| "loss": 0.29290521144866943, | |
| "step": 680, | |
| "token_acc": 0.9014104043327218 | |
| }, | |
| { | |
| "epoch": 0.42047531992687387, | |
| "grad_norm": 0.8784418931211103, | |
| "learning_rate": 6.511242963636257e-06, | |
| "loss": 0.29056534767150877, | |
| "step": 690, | |
| "token_acc": 0.9016642094853267 | |
| }, | |
| { | |
| "epoch": 0.42656916514320536, | |
| "grad_norm": 1.0470361792821843, | |
| "learning_rate": 6.416842033769106e-06, | |
| "loss": 0.2978256940841675, | |
| "step": 700, | |
| "token_acc": 0.8997917186822428 | |
| }, | |
| { | |
| "epoch": 0.43266301035953686, | |
| "grad_norm": 0.9613791001197699, | |
| "learning_rate": 6.321888687318457e-06, | |
| "loss": 0.2870903253555298, | |
| "step": 710, | |
| "token_acc": 0.903113691147251 | |
| }, | |
| { | |
| "epoch": 0.43875685557586835, | |
| "grad_norm": 0.8405716630112535, | |
| "learning_rate": 6.2264199459155105e-06, | |
| "loss": 0.29581589698791505, | |
| "step": 720, | |
| "token_acc": 0.9003898532372131 | |
| }, | |
| { | |
| "epoch": 0.4448507007921999, | |
| "grad_norm": 0.9817927857442479, | |
| "learning_rate": 6.130473032140272e-06, | |
| "loss": 0.29129691123962403, | |
| "step": 730, | |
| "token_acc": 0.9009383225625913 | |
| }, | |
| { | |
| "epoch": 0.4509445460085314, | |
| "grad_norm": 0.9100915684781385, | |
| "learning_rate": 6.0340853550087345e-06, | |
| "loss": 0.29650187492370605, | |
| "step": 740, | |
| "token_acc": 0.9002656385758284 | |
| }, | |
| { | |
| "epoch": 0.4570383912248629, | |
| "grad_norm": 0.9238619342391209, | |
| "learning_rate": 5.937294495387377e-06, | |
| "loss": 0.2921621561050415, | |
| "step": 750, | |
| "token_acc": 0.9008455874319925 | |
| }, | |
| { | |
| "epoch": 0.4631322364411944, | |
| "grad_norm": 0.8289061064281614, | |
| "learning_rate": 5.840138191340651e-06, | |
| "loss": 0.28725643157958985, | |
| "step": 760, | |
| "token_acc": 0.9028466795835374 | |
| }, | |
| { | |
| "epoch": 0.4692260816575259, | |
| "grad_norm": 0.8901360785145829, | |
| "learning_rate": 5.7426543234171736e-06, | |
| "loss": 0.2865636348724365, | |
| "step": 770, | |
| "token_acc": 0.90197109501604 | |
| }, | |
| { | |
| "epoch": 0.4753199268738574, | |
| "grad_norm": 0.8709058451908881, | |
| "learning_rate": 5.644880899880382e-06, | |
| "loss": 0.2886040687561035, | |
| "step": 780, | |
| "token_acc": 0.9023270689287564 | |
| }, | |
| { | |
| "epoch": 0.48141377209018893, | |
| "grad_norm": 0.9306196525173549, | |
| "learning_rate": 5.546856041889374e-06, | |
| "loss": 0.28833470344543455, | |
| "step": 790, | |
| "token_acc": 0.9016039529639475 | |
| }, | |
| { | |
| "epoch": 0.4875076173065204, | |
| "grad_norm": 0.9401250944884257, | |
| "learning_rate": 5.448617968635741e-06, | |
| "loss": 0.28241567611694335, | |
| "step": 800, | |
| "token_acc": 0.9046351860634857 | |
| }, | |
| { | |
| "epoch": 0.4936014625228519, | |
| "grad_norm": 0.849983180158667, | |
| "learning_rate": 5.35020498244219e-06, | |
| "loss": 0.2863471508026123, | |
| "step": 810, | |
| "token_acc": 0.9020820443108771 | |
| }, | |
| { | |
| "epoch": 0.4996953077391834, | |
| "grad_norm": 0.7275676892245573, | |
| "learning_rate": 5.251655453828728e-06, | |
| "loss": 0.28403263092041015, | |
| "step": 820, | |
| "token_acc": 0.9032200331101135 | |
| }, | |
| { | |
| "epoch": 0.505789152955515, | |
| "grad_norm": 0.8630110541652776, | |
| "learning_rate": 5.153007806552275e-06, | |
| "loss": 0.28420357704162597, | |
| "step": 830, | |
| "token_acc": 0.9033704118180856 | |
| }, | |
| { | |
| "epoch": 0.5118829981718465, | |
| "grad_norm": 0.8835421688612489, | |
| "learning_rate": 5.054300502625517e-06, | |
| "loss": 0.2866727352142334, | |
| "step": 840, | |
| "token_acc": 0.9032091030720939 | |
| }, | |
| { | |
| "epoch": 0.517976843388178, | |
| "grad_norm": 0.8544875287993453, | |
| "learning_rate": 4.9555720273208475e-06, | |
| "loss": 0.289061975479126, | |
| "step": 850, | |
| "token_acc": 0.9017317721145331 | |
| }, | |
| { | |
| "epoch": 0.5240706886045094, | |
| "grad_norm": 0.8549205024097043, | |
| "learning_rate": 4.856860874165218e-06, | |
| "loss": 0.2889714241027832, | |
| "step": 860, | |
| "token_acc": 0.9025821278082484 | |
| }, | |
| { | |
| "epoch": 0.5301645338208409, | |
| "grad_norm": 0.9236105201664164, | |
| "learning_rate": 4.758205529931808e-06, | |
| "loss": 0.2887147903442383, | |
| "step": 870, | |
| "token_acc": 0.9019780647042623 | |
| }, | |
| { | |
| "epoch": 0.5362583790371724, | |
| "grad_norm": 0.8682794949168545, | |
| "learning_rate": 4.659644459634293e-06, | |
| "loss": 0.27901973724365237, | |
| "step": 880, | |
| "token_acc": 0.9043348147353298 | |
| }, | |
| { | |
| "epoch": 0.5423522242535039, | |
| "grad_norm": 0.8729641279912889, | |
| "learning_rate": 4.56121609152961e-06, | |
| "loss": 0.2851783275604248, | |
| "step": 890, | |
| "token_acc": 0.9031912203833561 | |
| }, | |
| { | |
| "epoch": 0.5484460694698354, | |
| "grad_norm": 0.8418875200344721, | |
| "learning_rate": 4.462958802135069e-06, | |
| "loss": 0.27748913764953614, | |
| "step": 900, | |
| "token_acc": 0.9059390881360567 | |
| }, | |
| { | |
| "epoch": 0.5545399146861669, | |
| "grad_norm": 0.8894129853584928, | |
| "learning_rate": 4.364910901265607e-06, | |
| "loss": 0.28034243583679197, | |
| "step": 910, | |
| "token_acc": 0.9040050510001095 | |
| }, | |
| { | |
| "epoch": 0.5606337599024985, | |
| "grad_norm": 0.8334588350840866, | |
| "learning_rate": 4.2671106170970734e-06, | |
| "loss": 0.2801810264587402, | |
| "step": 920, | |
| "token_acc": 0.9042555097117814 | |
| }, | |
| { | |
| "epoch": 0.56672760511883, | |
| "grad_norm": 0.8763484647820953, | |
| "learning_rate": 4.169596081261332e-06, | |
| "loss": 0.2837662696838379, | |
| "step": 930, | |
| "token_acc": 0.9037383810780553 | |
| }, | |
| { | |
| "epoch": 0.5728214503351615, | |
| "grad_norm": 0.8713237221620964, | |
| "learning_rate": 4.072405313979021e-06, | |
| "loss": 0.27712116241455076, | |
| "step": 940, | |
| "token_acc": 0.9053036654966837 | |
| }, | |
| { | |
| "epoch": 0.578915295551493, | |
| "grad_norm": 0.8844118885887313, | |
| "learning_rate": 3.975576209235726e-06, | |
| "loss": 0.2806640625, | |
| "step": 950, | |
| "token_acc": 0.9047340125759082 | |
| }, | |
| { | |
| "epoch": 0.5850091407678245, | |
| "grad_norm": 0.8719900072150049, | |
| "learning_rate": 3.879146520007399e-06, | |
| "loss": 0.27962145805358884, | |
| "step": 960, | |
| "token_acc": 0.9052189543003484 | |
| }, | |
| { | |
| "epoch": 0.591102985984156, | |
| "grad_norm": 0.8621214557871747, | |
| "learning_rate": 3.7831538435407344e-06, | |
| "loss": 0.281157398223877, | |
| "step": 970, | |
| "token_acc": 0.9040866660422715 | |
| }, | |
| { | |
| "epoch": 0.5971968312004875, | |
| "grad_norm": 0.85966956497571, | |
| "learning_rate": 3.687635606694271e-06, | |
| "loss": 0.2849492073059082, | |
| "step": 980, | |
| "token_acc": 0.9041384613065175 | |
| }, | |
| { | |
| "epoch": 0.603290676416819, | |
| "grad_norm": 0.8505152160082087, | |
| "learning_rate": 3.592629051345936e-06, | |
| "loss": 0.2792569637298584, | |
| "step": 990, | |
| "token_acc": 0.9054755884673447 | |
| }, | |
| { | |
| "epoch": 0.6093845216331505, | |
| "grad_norm": 0.9214402604733031, | |
| "learning_rate": 3.4981712198726956e-06, | |
| "loss": 0.2757925033569336, | |
| "step": 1000, | |
| "token_acc": 0.9061934946027913 | |
| }, | |
| { | |
| "epoch": 0.615478366849482, | |
| "grad_norm": 0.8580050185956459, | |
| "learning_rate": 3.4042989407079986e-06, | |
| "loss": 0.2790709972381592, | |
| "step": 1010, | |
| "token_acc": 0.9051715866568587 | |
| }, | |
| { | |
| "epoch": 0.6215722120658135, | |
| "grad_norm": 0.7762593811197912, | |
| "learning_rate": 3.311048813982627e-06, | |
| "loss": 0.2719182014465332, | |
| "step": 1020, | |
| "token_acc": 0.9072872717021148 | |
| }, | |
| { | |
| "epoch": 0.6276660572821451, | |
| "grad_norm": 0.8305900083620258, | |
| "learning_rate": 3.218457197254583e-06, | |
| "loss": 0.27586350440979, | |
| "step": 1030, | |
| "token_acc": 0.9060086339753238 | |
| }, | |
| { | |
| "epoch": 0.6337599024984766, | |
| "grad_norm": 0.8955059982745348, | |
| "learning_rate": 3.1265601913335196e-06, | |
| "loss": 0.2731196403503418, | |
| "step": 1040, | |
| "token_acc": 0.9076037121001682 | |
| }, | |
| { | |
| "epoch": 0.6398537477148081, | |
| "grad_norm": 0.8712242634564721, | |
| "learning_rate": 3.035393626205306e-06, | |
| "loss": 0.2795309066772461, | |
| "step": 1050, | |
| "token_acc": 0.9047484454494065 | |
| }, | |
| { | |
| "epoch": 0.6459475929311396, | |
| "grad_norm": 0.8162886626845998, | |
| "learning_rate": 2.944993047062161e-06, | |
| "loss": 0.26994550228118896, | |
| "step": 1060, | |
| "token_acc": 0.9082915598041501 | |
| }, | |
| { | |
| "epoch": 0.6520414381474711, | |
| "grad_norm": 0.8874044395879559, | |
| "learning_rate": 2.8553937004438425e-06, | |
| "loss": 0.2744093418121338, | |
| "step": 1070, | |
| "token_acc": 0.9072907727436752 | |
| }, | |
| { | |
| "epoch": 0.6581352833638026, | |
| "grad_norm": 0.8288310546310844, | |
| "learning_rate": 2.766630520495277e-06, | |
| "loss": 0.2674886226654053, | |
| "step": 1080, | |
| "token_acc": 0.9087633615660454 | |
| }, | |
| { | |
| "epoch": 0.664229128580134, | |
| "grad_norm": 0.8828846811452266, | |
| "learning_rate": 2.67873811534598e-06, | |
| "loss": 0.2735260486602783, | |
| "step": 1090, | |
| "token_acc": 0.9060899523658108 | |
| }, | |
| { | |
| "epoch": 0.6703229737964655, | |
| "grad_norm": 0.8055682508984224, | |
| "learning_rate": 2.591750753616596e-06, | |
| "loss": 0.2687216758728027, | |
| "step": 1100, | |
| "token_acc": 0.9077474362897096 | |
| }, | |
| { | |
| "epoch": 0.676416819012797, | |
| "grad_norm": 0.8527567804445506, | |
| "learning_rate": 2.505702351057804e-06, | |
| "loss": 0.27487955093383787, | |
| "step": 1110, | |
| "token_acc": 0.9064443638076686 | |
| }, | |
| { | |
| "epoch": 0.6825106642291285, | |
| "grad_norm": 0.8043496565707575, | |
| "learning_rate": 2.4206264573268174e-06, | |
| "loss": 0.2709942102432251, | |
| "step": 1120, | |
| "token_acc": 0.9082038753361505 | |
| }, | |
| { | |
| "epoch": 0.68860450944546, | |
| "grad_norm": 0.8177848047582682, | |
| "learning_rate": 2.336556242906608e-06, | |
| "loss": 0.26909465789794923, | |
| "step": 1130, | |
| "token_acc": 0.907756650686803 | |
| }, | |
| { | |
| "epoch": 0.6946983546617916, | |
| "grad_norm": 0.8281752422683824, | |
| "learning_rate": 2.2535244861729707e-06, | |
| "loss": 0.27281508445739744, | |
| "step": 1140, | |
| "token_acc": 0.9068872307019957 | |
| }, | |
| { | |
| "epoch": 0.7007921998781231, | |
| "grad_norm": 0.7368812719716331, | |
| "learning_rate": 2.1715635606144653e-06, | |
| "loss": 0.2704050064086914, | |
| "step": 1150, | |
| "token_acc": 0.9086829548350435 | |
| }, | |
| { | |
| "epoch": 0.7068860450944546, | |
| "grad_norm": 0.8983810091681733, | |
| "learning_rate": 2.0907054222102367e-06, | |
| "loss": 0.2690997362136841, | |
| "step": 1160, | |
| "token_acc": 0.9079458353782861 | |
| }, | |
| { | |
| "epoch": 0.7129798903107861, | |
| "grad_norm": 0.976946993038541, | |
| "learning_rate": 2.0109815969705922e-06, | |
| "loss": 0.2747433423995972, | |
| "step": 1170, | |
| "token_acc": 0.9060301301519122 | |
| }, | |
| { | |
| "epoch": 0.7190737355271176, | |
| "grad_norm": 0.8007237087596002, | |
| "learning_rate": 1.9324231686452478e-06, | |
| "loss": 0.2671233654022217, | |
| "step": 1180, | |
| "token_acc": 0.9086050565301521 | |
| }, | |
| { | |
| "epoch": 0.7251675807434491, | |
| "grad_norm": 0.8064570085543009, | |
| "learning_rate": 1.8550607666039877e-06, | |
| "loss": 0.27011594772338865, | |
| "step": 1190, | |
| "token_acc": 0.9079702457204528 | |
| }, | |
| { | |
| "epoch": 0.7312614259597806, | |
| "grad_norm": 0.8831329237202693, | |
| "learning_rate": 1.7789245538944971e-06, | |
| "loss": 0.2661958456039429, | |
| "step": 1200, | |
| "token_acc": 0.909048799129166 | |
| }, | |
| { | |
| "epoch": 0.7373552711761121, | |
| "grad_norm": 0.8430483750865159, | |
| "learning_rate": 1.7040442154820036e-06, | |
| "loss": 0.2669236183166504, | |
| "step": 1210, | |
| "token_acc": 0.9086229167124993 | |
| }, | |
| { | |
| "epoch": 0.7434491163924436, | |
| "grad_norm": 0.8347549917161227, | |
| "learning_rate": 1.6304489466753237e-06, | |
| "loss": 0.26542019844055176, | |
| "step": 1220, | |
| "token_acc": 0.9091426534148126 | |
| }, | |
| { | |
| "epoch": 0.7495429616087751, | |
| "grad_norm": 0.830454588444548, | |
| "learning_rate": 1.5581674417438143e-06, | |
| "loss": 0.2647353410720825, | |
| "step": 1230, | |
| "token_acc": 0.909506020348688 | |
| }, | |
| { | |
| "epoch": 0.7556368068251066, | |
| "grad_norm": 0.8676010280531331, | |
| "learning_rate": 1.4872278827296855e-06, | |
| "loss": 0.2685891628265381, | |
| "step": 1240, | |
| "token_acc": 0.9081622979570555 | |
| }, | |
| { | |
| "epoch": 0.7617306520414382, | |
| "grad_norm": 0.707455832514829, | |
| "learning_rate": 1.417657928460029e-06, | |
| "loss": 0.2678367614746094, | |
| "step": 1250, | |
| "token_acc": 0.9088005125349524 | |
| }, | |
| { | |
| "epoch": 0.7678244972577697, | |
| "grad_norm": 0.9332592296684585, | |
| "learning_rate": 1.349484703762834e-06, | |
| "loss": 0.2678724765777588, | |
| "step": 1260, | |
| "token_acc": 0.9090774872882107 | |
| }, | |
| { | |
| "epoch": 0.7739183424741012, | |
| "grad_norm": 0.9124536066814944, | |
| "learning_rate": 1.2827347888912057e-06, | |
| "loss": 0.2636892795562744, | |
| "step": 1270, | |
| "token_acc": 0.9094603622970171 | |
| }, | |
| { | |
| "epoch": 0.7800121876904327, | |
| "grad_norm": 0.8868523419233089, | |
| "learning_rate": 1.2174342091599277e-06, | |
| "loss": 0.2640355587005615, | |
| "step": 1280, | |
| "token_acc": 0.9101203136208611 | |
| }, | |
| { | |
| "epoch": 0.7861060329067642, | |
| "grad_norm": 0.8162281839833351, | |
| "learning_rate": 1.1536084247983626e-06, | |
| "loss": 0.2618927717208862, | |
| "step": 1290, | |
| "token_acc": 0.9093984578881031 | |
| }, | |
| { | |
| "epoch": 0.7921998781230957, | |
| "grad_norm": 0.8334510756887459, | |
| "learning_rate": 1.0912823210237033e-06, | |
| "loss": 0.2639930725097656, | |
| "step": 1300, | |
| "token_acc": 0.9095154304277207 | |
| }, | |
| { | |
| "epoch": 0.7982937233394272, | |
| "grad_norm": 0.9484830756554262, | |
| "learning_rate": 1.0304801983383989e-06, | |
| "loss": 0.2679661750793457, | |
| "step": 1310, | |
| "token_acc": 0.9085439305540266 | |
| }, | |
| { | |
| "epoch": 0.8043875685557587, | |
| "grad_norm": 0.7917038864004372, | |
| "learning_rate": 9.712257630555589e-07, | |
| "loss": 0.263914155960083, | |
| "step": 1320, | |
| "token_acc": 0.9098282765579997 | |
| }, | |
| { | |
| "epoch": 0.8104814137720902, | |
| "grad_norm": 0.8164310323072432, | |
| "learning_rate": 9.135421180560394e-07, | |
| "loss": 0.27391440868377687, | |
| "step": 1330, | |
| "token_acc": 0.9072812991094814 | |
| }, | |
| { | |
| "epoch": 0.8165752589884216, | |
| "grad_norm": 0.7878349824156636, | |
| "learning_rate": 8.574517537807897e-07, | |
| "loss": 0.2658750057220459, | |
| "step": 1340, | |
| "token_acc": 0.9089495350890863 | |
| }, | |
| { | |
| "epoch": 0.8226691042047533, | |
| "grad_norm": 0.7620095983862565, | |
| "learning_rate": 8.029765394619899e-07, | |
| "loss": 0.25719194412231444, | |
| "step": 1350, | |
| "token_acc": 0.911888654763225 | |
| }, | |
| { | |
| "epoch": 0.8287629494210847, | |
| "grad_norm": 0.8206579913283775, | |
| "learning_rate": 7.501377145963939e-07, | |
| "loss": 0.2592960834503174, | |
| "step": 1360, | |
| "token_acc": 0.9114338606023208 | |
| }, | |
| { | |
| "epoch": 0.8348567946374162, | |
| "grad_norm": 0.8789992765077687, | |
| "learning_rate": 6.98955880664205e-07, | |
| "loss": 0.26435413360595705, | |
| "step": 1370, | |
| "token_acc": 0.9108234231521902 | |
| }, | |
| { | |
| "epoch": 0.8409506398537477, | |
| "grad_norm": 0.9837537034286392, | |
| "learning_rate": 6.494509930967019e-07, | |
| "loss": 0.2641714572906494, | |
| "step": 1380, | |
| "token_acc": 0.9101989856105199 | |
| }, | |
| { | |
| "epoch": 0.8470444850700792, | |
| "grad_norm": 0.8346126227296959, | |
| "learning_rate": 6.016423534957616e-07, | |
| "loss": 0.26149678230285645, | |
| "step": 1390, | |
| "token_acc": 0.9105589320112891 | |
| }, | |
| { | |
| "epoch": 0.8531383302864107, | |
| "grad_norm": 0.789773058927434, | |
| "learning_rate": 5.555486021082979e-07, | |
| "loss": 0.25979223251342776, | |
| "step": 1400, | |
| "token_acc": 0.9105615762961907 | |
| }, | |
| { | |
| "epoch": 0.8592321755027422, | |
| "grad_norm": 0.7391262213112039, | |
| "learning_rate": 5.111877105585672e-07, | |
| "loss": 0.2619319915771484, | |
| "step": 1410, | |
| "token_acc": 0.9112515917773331 | |
| }, | |
| { | |
| "epoch": 0.8653260207190737, | |
| "grad_norm": 0.732756554862386, | |
| "learning_rate": 4.6857697484116006e-07, | |
| "loss": 0.26052017211914064, | |
| "step": 1420, | |
| "token_acc": 0.9111355670436785 | |
| }, | |
| { | |
| "epoch": 0.8714198659354052, | |
| "grad_norm": 0.9052605008388693, | |
| "learning_rate": 4.277330085774156e-07, | |
| "loss": 0.26050865650177, | |
| "step": 1430, | |
| "token_acc": 0.9113159185335296 | |
| }, | |
| { | |
| "epoch": 0.8775137111517367, | |
| "grad_norm": 0.8239425361941399, | |
| "learning_rate": 3.886717365378867e-07, | |
| "loss": 0.2652243137359619, | |
| "step": 1440, | |
| "token_acc": 0.9098248347337728 | |
| }, | |
| { | |
| "epoch": 0.8836075563680682, | |
| "grad_norm": 0.8321718064306127, | |
| "learning_rate": 3.5140838843339073e-07, | |
| "loss": 0.2614146709442139, | |
| "step": 1450, | |
| "token_acc": 0.9103242825028786 | |
| }, | |
| { | |
| "epoch": 0.8897014015843998, | |
| "grad_norm": 0.9427110487674982, | |
| "learning_rate": 3.159574929770515e-07, | |
| "loss": 0.26317219734191893, | |
| "step": 1460, | |
| "token_acc": 0.9102542106779491 | |
| }, | |
| { | |
| "epoch": 0.8957952468007313, | |
| "grad_norm": 0.8005907233947733, | |
| "learning_rate": 2.8233287221965555e-07, | |
| "loss": 0.2689415216445923, | |
| "step": 1470, | |
| "token_acc": 0.9084669140620019 | |
| }, | |
| { | |
| "epoch": 0.9018890920170628, | |
| "grad_norm": 0.8834142513691242, | |
| "learning_rate": 2.5054763616053967e-07, | |
| "loss": 0.26386346817016604, | |
| "step": 1480, | |
| "token_acc": 0.9098926633899981 | |
| }, | |
| { | |
| "epoch": 0.9079829372333943, | |
| "grad_norm": 0.8652226986660423, | |
| "learning_rate": 2.2061417763608818e-07, | |
| "loss": 0.2603492259979248, | |
| "step": 1490, | |
| "token_acc": 0.9111148919621807 | |
| }, | |
| { | |
| "epoch": 0.9140767824497258, | |
| "grad_norm": 0.7761477175475302, | |
| "learning_rate": 1.9254416748786086e-07, | |
| "loss": 0.2592171669006348, | |
| "step": 1500, | |
| "token_acc": 0.9112373322356396 | |
| }, | |
| { | |
| "epoch": 0.9201706276660573, | |
| "grad_norm": 0.7766751712855907, | |
| "learning_rate": 1.6634855001221195e-07, | |
| "loss": 0.258951997756958, | |
| "step": 1510, | |
| "token_acc": 0.9106356546794409 | |
| }, | |
| { | |
| "epoch": 0.9262644728823888, | |
| "grad_norm": 0.856909898768609, | |
| "learning_rate": 1.4203753869318882e-07, | |
| "loss": 0.2605564117431641, | |
| "step": 1520, | |
| "token_acc": 0.9109015609309732 | |
| }, | |
| { | |
| "epoch": 0.9323583180987203, | |
| "grad_norm": 0.8678261922910359, | |
| "learning_rate": 1.196206122203647e-07, | |
| "loss": 0.267201566696167, | |
| "step": 1530, | |
| "token_acc": 0.9091924387660025 | |
| }, | |
| { | |
| "epoch": 0.9384521633150518, | |
| "grad_norm": 0.8245437796092319, | |
| "learning_rate": 9.910651079316824e-08, | |
| "loss": 0.25865275859832765, | |
| "step": 1540, | |
| "token_acc": 0.9117370919567883 | |
| }, | |
| { | |
| "epoch": 0.9445460085313833, | |
| "grad_norm": 0.7648349491441419, | |
| "learning_rate": 8.050323271314331e-08, | |
| "loss": 0.2569366216659546, | |
| "step": 1550, | |
| "token_acc": 0.9122892575583048 | |
| }, | |
| { | |
| "epoch": 0.9506398537477148, | |
| "grad_norm": 0.844132664732268, | |
| "learning_rate": 6.381803126546405e-08, | |
| "loss": 0.26746933460235595, | |
| "step": 1560, | |
| "token_acc": 0.9087516916083089 | |
| }, | |
| { | |
| "epoch": 0.9567336989640464, | |
| "grad_norm": 0.8550282187735159, | |
| "learning_rate": 4.9057411890933714e-08, | |
| "loss": 0.2634291172027588, | |
| "step": 1570, | |
| "token_acc": 0.9101502847948816 | |
| }, | |
| { | |
| "epoch": 0.9628275441803779, | |
| "grad_norm": 0.8962920945122091, | |
| "learning_rate": 3.622712964956032e-08, | |
| "loss": 0.26028733253479003, | |
| "step": 1580, | |
| "token_acc": 0.9110691577022408 | |
| }, | |
| { | |
| "epoch": 0.9689213893967094, | |
| "grad_norm": 0.8191620838439264, | |
| "learning_rate": 2.5332186976697037e-08, | |
| "loss": 0.26295406818389894, | |
| "step": 1590, | |
| "token_acc": 0.9106372558253433 | |
| }, | |
| { | |
| "epoch": 0.9750152346130408, | |
| "grad_norm": 0.803005796954641, | |
| "learning_rate": 1.637683173263238e-08, | |
| "loss": 0.2601941585540771, | |
| "step": 1600, | |
| "token_acc": 0.9106438532047947 | |
| }, | |
| { | |
| "epoch": 0.9811090798293723, | |
| "grad_norm": 1.0200184560604955, | |
| "learning_rate": 9.364555546375054e-09, | |
| "loss": 0.265762186050415, | |
| "step": 1610, | |
| "token_acc": 0.9099375217270665 | |
| }, | |
| { | |
| "epoch": 0.9872029250457038, | |
| "grad_norm": 0.8217240197064228, | |
| "learning_rate": 4.2980924542984634e-09, | |
| "loss": 0.261862587928772, | |
| "step": 1620, | |
| "token_acc": 0.9104295425993519 | |
| }, | |
| { | |
| "epoch": 0.9932967702620353, | |
| "grad_norm": 0.8981159929317022, | |
| "learning_rate": 1.179417834153429e-09, | |
| "loss": 0.2626341342926025, | |
| "step": 1630, | |
| "token_acc": 0.9100063135380294 | |
| }, | |
| { | |
| "epoch": 0.9993906154783668, | |
| "grad_norm": 0.8766885423326849, | |
| "learning_rate": 9.74763488759134e-12, | |
| "loss": 0.2605599880218506, | |
| "step": 1640, | |
| "token_acc": 0.9109949846594887 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1641, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1566399809454080.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |