| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9999751200457792, |
| "eval_steps": 500, |
| "global_step": 10048, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0009951981688353694, |
| "grad_norm": 0.6644615530967712, |
| "learning_rate": 3.578528827037773e-05, |
| "loss": -0.2368, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0019903963376707388, |
| "grad_norm": 0.23524807393550873, |
| "learning_rate": 7.554671968190855e-05, |
| "loss": -0.2801, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.002985594506506108, |
| "grad_norm": 0.22816260159015656, |
| "learning_rate": 0.00011530815109343937, |
| "loss": -0.3487, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0039807926753414775, |
| "grad_norm": 0.23517341911792755, |
| "learning_rate": 0.00015506958250497018, |
| "loss": -0.421, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.004975990844176847, |
| "grad_norm": 0.14163829386234283, |
| "learning_rate": 0.000194831013916501, |
| "loss": -0.474, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.005971189013012216, |
| "grad_norm": 0.17084212601184845, |
| "learning_rate": 0.00023459244532803182, |
| "loss": -0.5109, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.006966387181847586, |
| "grad_norm": 0.15078520774841309, |
| "learning_rate": 0.00027435387673956264, |
| "loss": -0.5484, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.007961585350682955, |
| "grad_norm": 0.095310278236866, |
| "learning_rate": 0.00031411530815109347, |
| "loss": -0.5527, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.008956783519518324, |
| "grad_norm": 0.10798652470111847, |
| "learning_rate": 0.0003538767395626243, |
| "loss": -0.555, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.009951981688353694, |
| "grad_norm": 0.14966444671154022, |
| "learning_rate": 0.00039363817097415506, |
| "loss": -0.5603, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.010947179857189062, |
| "grad_norm": 0.11505836993455887, |
| "learning_rate": 0.0004333996023856859, |
| "loss": -0.5812, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.011942378026024433, |
| "grad_norm": 0.10001794248819351, |
| "learning_rate": 0.0004731610337972167, |
| "loss": -0.6047, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.012937576194859801, |
| "grad_norm": 0.09755612164735794, |
| "learning_rate": 0.0005129224652087476, |
| "loss": -0.6262, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.013932774363695171, |
| "grad_norm": 0.09725947678089142, |
| "learning_rate": 0.0005526838966202783, |
| "loss": -0.5985, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.01492797253253054, |
| "grad_norm": 0.11158037185668945, |
| "learning_rate": 0.0005924453280318092, |
| "loss": -0.6026, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.01592317070136591, |
| "grad_norm": 0.14563938975334167, |
| "learning_rate": 0.00063220675944334, |
| "loss": -0.65, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.01691836887020128, |
| "grad_norm": 0.1282544583082199, |
| "learning_rate": 0.0006719681908548709, |
| "loss": -0.6005, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.017913567039036647, |
| "grad_norm": 0.10261674225330353, |
| "learning_rate": 0.0007117296222664016, |
| "loss": -0.6009, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.01890876520787202, |
| "grad_norm": 0.09247754514217377, |
| "learning_rate": 0.0007514910536779325, |
| "loss": -0.6396, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.019903963376707388, |
| "grad_norm": 0.10132607072591782, |
| "learning_rate": 0.0007912524850894633, |
| "loss": -0.6591, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.020899161545542756, |
| "grad_norm": 0.16978560388088226, |
| "learning_rate": 0.000831013916500994, |
| "loss": -0.6426, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.021894359714378125, |
| "grad_norm": 0.11510445177555084, |
| "learning_rate": 0.0008707753479125249, |
| "loss": -0.6005, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.022889557883213493, |
| "grad_norm": 0.07095818221569061, |
| "learning_rate": 0.0009105367793240557, |
| "loss": -0.6672, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.023884756052048865, |
| "grad_norm": 0.09719249606132507, |
| "learning_rate": 0.0009502982107355866, |
| "loss": -0.6317, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.024879954220884234, |
| "grad_norm": 0.07893206924200058, |
| "learning_rate": 0.0009900596421471173, |
| "loss": -0.6573, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.025875152389719602, |
| "grad_norm": 0.09626351296901703, |
| "learning_rate": 0.001029821073558648, |
| "loss": -0.6316, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.02687035055855497, |
| "grad_norm": 0.0947069525718689, |
| "learning_rate": 0.0010695825049701789, |
| "loss": -0.6453, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.027865548727390343, |
| "grad_norm": 0.10596685111522675, |
| "learning_rate": 0.0011093439363817096, |
| "loss": -0.6316, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.02886074689622571, |
| "grad_norm": 0.09962307661771774, |
| "learning_rate": 0.0011491053677932406, |
| "loss": -0.6847, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.02985594506506108, |
| "grad_norm": 0.09802088141441345, |
| "learning_rate": 0.0011888667992047714, |
| "loss": -0.6957, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.03085114323389645, |
| "grad_norm": 0.10051831603050232, |
| "learning_rate": 0.0012286282306163022, |
| "loss": -0.6896, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.03184634140273182, |
| "grad_norm": 0.0771615281701088, |
| "learning_rate": 0.001268389662027833, |
| "loss": -0.6673, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.032841539571567185, |
| "grad_norm": 0.09795242547988892, |
| "learning_rate": 0.001308151093439364, |
| "loss": -0.6615, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.03383673774040256, |
| "grad_norm": 0.06744644790887833, |
| "learning_rate": 0.0013479125248508945, |
| "loss": -0.6949, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.03483193590923793, |
| "grad_norm": 0.12363786995410919, |
| "learning_rate": 0.0013876739562624254, |
| "loss": -0.6797, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.035827134078073294, |
| "grad_norm": 0.11588004976511002, |
| "learning_rate": 0.0014274353876739562, |
| "loss": -0.6947, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.036822332246908666, |
| "grad_norm": 0.07055709511041641, |
| "learning_rate": 0.0014671968190854872, |
| "loss": -0.6801, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.03781753041574404, |
| "grad_norm": 0.11170762777328491, |
| "learning_rate": 0.0015069582504970177, |
| "loss": -0.6894, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.0388127285845794, |
| "grad_norm": 0.06324685364961624, |
| "learning_rate": 0.0015467196819085487, |
| "loss": -0.7317, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.039807926753414775, |
| "grad_norm": 0.06563621014356613, |
| "learning_rate": 0.0015864811133200797, |
| "loss": -0.7251, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.04080312492225014, |
| "grad_norm": 0.06519268453121185, |
| "learning_rate": 0.0016262425447316103, |
| "loss": -0.7299, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.04179832309108551, |
| "grad_norm": 0.09023866802453995, |
| "learning_rate": 0.0016660039761431412, |
| "loss": -0.7148, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.042793521259920884, |
| "grad_norm": 0.11152295768260956, |
| "learning_rate": 0.001705765407554672, |
| "loss": -0.728, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.04378871942875625, |
| "grad_norm": 0.07573758065700531, |
| "learning_rate": 0.001745526838966203, |
| "loss": -0.7095, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.04478391759759162, |
| "grad_norm": 0.12177475541830063, |
| "learning_rate": 0.0017852882703777335, |
| "loss": -0.7138, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.04577911576642699, |
| "grad_norm": 0.10095696151256561, |
| "learning_rate": 0.0018250497017892645, |
| "loss": -0.7141, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.04677431393526236, |
| "grad_norm": 0.06338244676589966, |
| "learning_rate": 0.0018648111332007953, |
| "loss": -0.7236, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.04776951210409773, |
| "grad_norm": 0.14773637056350708, |
| "learning_rate": 0.001904572564612326, |
| "loss": -0.7243, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.048764710272933096, |
| "grad_norm": 0.08487069606781006, |
| "learning_rate": 0.0019443339960238568, |
| "loss": -0.7226, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.04975990844176847, |
| "grad_norm": 0.0774376168847084, |
| "learning_rate": 0.001984095427435388, |
| "loss": -0.7551, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05075510661060384, |
| "grad_norm": 0.07008124142885208, |
| "learning_rate": 0.0019999980500645096, |
| "loss": -0.7235, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.051750304779439205, |
| "grad_norm": 0.0964338481426239, |
| "learning_rate": 0.0019999861338196065, |
| "loss": -0.7487, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.05274550294827458, |
| "grad_norm": 0.06681355834007263, |
| "learning_rate": 0.0019999633847562264, |
| "loss": -0.7639, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.05374070111710994, |
| "grad_norm": 0.1224050298333168, |
| "learning_rate": 0.0019999298031208096, |
| "loss": -0.758, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.054735899285945314, |
| "grad_norm": 0.08830223232507706, |
| "learning_rate": 0.001999885389277145, |
| "loss": -0.7676, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.055731097454780686, |
| "grad_norm": 0.06694019585847855, |
| "learning_rate": 0.001999830143706366, |
| "loss": -0.7881, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.05672629562361605, |
| "grad_norm": 0.10447929799556732, |
| "learning_rate": 0.0019997640670069467, |
| "loss": -0.7783, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.05772149379245142, |
| "grad_norm": 0.18173834681510925, |
| "learning_rate": 0.0019996871598946934, |
| "loss": -0.7783, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.058716691961286795, |
| "grad_norm": 0.06380622833967209, |
| "learning_rate": 0.001999599423202739, |
| "loss": -0.7888, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.05971189013012216, |
| "grad_norm": 0.04775335267186165, |
| "learning_rate": 0.0019995008578815314, |
| "loss": -0.799, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.06070708829895753, |
| "grad_norm": 0.08990439772605896, |
| "learning_rate": 0.0019993914649988264, |
| "loss": -0.7885, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.0617022864677929, |
| "grad_norm": 0.05452219769358635, |
| "learning_rate": 0.0019992712457396733, |
| "loss": -0.794, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.06269748463662826, |
| "grad_norm": 0.047960225492715836, |
| "learning_rate": 0.0019991402014064037, |
| "loss": -0.7613, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.06369268280546364, |
| "grad_norm": 0.0659102350473404, |
| "learning_rate": 0.001998998333418617, |
| "loss": -0.7943, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.064687880974299, |
| "grad_norm": 0.09903737157583237, |
| "learning_rate": 0.0019988456433131644, |
| "loss": -0.7935, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.06568307914313437, |
| "grad_norm": 0.10571643710136414, |
| "learning_rate": 0.0019986821327441333, |
| "loss": -0.826, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.06667827731196975, |
| "grad_norm": 0.09088798612356186, |
| "learning_rate": 0.001998507803482828, |
| "loss": -0.7864, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.06767347548080511, |
| "grad_norm": 0.06041436642408371, |
| "learning_rate": 0.0019983226574177525, |
| "loss": -0.8122, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.06866867364964048, |
| "grad_norm": 0.04567006230354309, |
| "learning_rate": 0.0019981266965545877, |
| "loss": -0.8225, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.06966387181847586, |
| "grad_norm": 0.1008988469839096, |
| "learning_rate": 0.0019979199230161725, |
| "loss": -0.7983, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.07065906998731122, |
| "grad_norm": 0.0660218819975853, |
| "learning_rate": 0.0019977023390424778, |
| "loss": -0.8171, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.07165426815614659, |
| "grad_norm": 0.0423499271273613, |
| "learning_rate": 0.0019974739469905828, |
| "loss": -0.8051, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.07264946632498197, |
| "grad_norm": 0.09598391503095627, |
| "learning_rate": 0.001997234749334653, |
| "loss": -0.7979, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.07364466449381733, |
| "grad_norm": 0.04957730695605278, |
| "learning_rate": 0.001996984748665908, |
| "loss": -0.8326, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.0746398626626527, |
| "grad_norm": 0.09694412350654602, |
| "learning_rate": 0.0019967239476925986, |
| "loss": -0.8425, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.07563506083148808, |
| "grad_norm": 0.040948230773210526, |
| "learning_rate": 0.001996452349239972, |
| "loss": -0.8062, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.07663025900032344, |
| "grad_norm": 0.1682979166507721, |
| "learning_rate": 0.001996169956250247, |
| "loss": -0.7646, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.0776254571691588, |
| "grad_norm": 0.058550719171762466, |
| "learning_rate": 0.001995876771782577, |
| "loss": -0.7909, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.07862065533799417, |
| "grad_norm": 0.11844189465045929, |
| "learning_rate": 0.001995572799013021, |
| "loss": -0.8219, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.07961585350682955, |
| "grad_norm": 0.0915180891752243, |
| "learning_rate": 0.001995258041234506, |
| "loss": -0.8018, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.08061105167566492, |
| "grad_norm": 0.053632501512765884, |
| "learning_rate": 0.001994932501856793, |
| "loss": -0.82, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.08160624984450028, |
| "grad_norm": 0.10094787180423737, |
| "learning_rate": 0.00199459618440644, |
| "loss": -0.7917, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.08260144801333566, |
| "grad_norm": 0.05512174591422081, |
| "learning_rate": 0.001994249092526764, |
| "loss": -0.842, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.08359664618217102, |
| "grad_norm": 0.05738908424973488, |
| "learning_rate": 0.0019938912299778, |
| "loss": -0.8688, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.08459184435100639, |
| "grad_norm": 0.11923105269670486, |
| "learning_rate": 0.001993522600636262, |
| "loss": -0.8018, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.08558704251984177, |
| "grad_norm": 0.22091729938983917, |
| "learning_rate": 0.0019931432084954992, |
| "loss": -0.8532, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.08658224068867713, |
| "grad_norm": 0.08304735273122787, |
| "learning_rate": 0.0019927530576654565, |
| "loss": -0.8289, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.0875774388575125, |
| "grad_norm": 0.1574501097202301, |
| "learning_rate": 0.001992352152372624, |
| "loss": -0.8044, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.08857263702634788, |
| "grad_norm": 0.0682770311832428, |
| "learning_rate": 0.001991940496959997, |
| "loss": -0.7923, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.08956783519518324, |
| "grad_norm": 0.1527094841003418, |
| "learning_rate": 0.001991518095887025, |
| "loss": -0.83, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.09056303336401861, |
| "grad_norm": 0.053579773753881454, |
| "learning_rate": 0.001991084953729567, |
| "loss": -0.809, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.09155823153285397, |
| "grad_norm": 0.06435493379831314, |
| "learning_rate": 0.001990641075179837, |
| "loss": -0.7872, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.09255342970168935, |
| "grad_norm": 0.045854952186346054, |
| "learning_rate": 0.0019901864650463583, |
| "loss": -0.8038, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.09354862787052472, |
| "grad_norm": 0.053723808377981186, |
| "learning_rate": 0.0019897211282539078, |
| "loss": -0.8497, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.09454382603936008, |
| "grad_norm": 0.2113220989704132, |
| "learning_rate": 0.0019892450698434645, |
| "loss": -0.8268, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.09553902420819546, |
| "grad_norm": 0.06267759203910828, |
| "learning_rate": 0.001988758294972154, |
| "loss": -0.8163, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.09653422237703083, |
| "grad_norm": 0.06445565819740295, |
| "learning_rate": 0.0019882608089131937, |
| "loss": -0.8093, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.09752942054586619, |
| "grad_norm": 0.16995398700237274, |
| "learning_rate": 0.0019877526170558346, |
| "loss": -0.8083, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.09852461871470157, |
| "grad_norm": 0.03674469143152237, |
| "learning_rate": 0.0019872337249053026, |
| "loss": -0.8669, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.09951981688353694, |
| "grad_norm": 0.12962763011455536, |
| "learning_rate": 0.0019867041380827407, |
| "loss": -0.8912, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1005150150523723, |
| "grad_norm": 0.043658774346113205, |
| "learning_rate": 0.0019861638623251457, |
| "loss": -0.8531, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.10151021322120768, |
| "grad_norm": 0.0549648217856884, |
| "learning_rate": 0.0019856129034853086, |
| "loss": -0.8226, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.10250541139004304, |
| "grad_norm": 0.05147390440106392, |
| "learning_rate": 0.001985051267531749, |
| "loss": -0.8368, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.10350060955887841, |
| "grad_norm": 0.059379711747169495, |
| "learning_rate": 0.0019844789605486515, |
| "loss": -0.8074, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.10449580772771377, |
| "grad_norm": 0.10174992680549622, |
| "learning_rate": 0.0019838959887358, |
| "loss": -0.8322, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.10549100589654915, |
| "grad_norm": 0.19209684431552887, |
| "learning_rate": 0.0019833023584085096, |
| "loss": -0.8304, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.10648620406538452, |
| "grad_norm": 0.0454883873462677, |
| "learning_rate": 0.001982698075997559, |
| "loss": -0.8354, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.10748140223421988, |
| "grad_norm": 0.12441568821668625, |
| "learning_rate": 0.001982083148049121, |
| "loss": -0.8562, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.10847660040305526, |
| "grad_norm": 0.06760125607252121, |
| "learning_rate": 0.0019814575812246906, |
| "loss": -0.8471, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.10947179857189063, |
| "grad_norm": 0.04590695723891258, |
| "learning_rate": 0.0019808213823010136, |
| "loss": -0.845, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.11046699674072599, |
| "grad_norm": 0.14543893933296204, |
| "learning_rate": 0.001980174558170013, |
| "loss": -0.8236, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.11146219490956137, |
| "grad_norm": 0.044470854103565216, |
| "learning_rate": 0.001979517115838715, |
| "loss": -0.8177, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.11245739307839674, |
| "grad_norm": 0.05811746418476105, |
| "learning_rate": 0.0019788490624291707, |
| "loss": -0.8414, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.1134525912472321, |
| "grad_norm": 0.05510469153523445, |
| "learning_rate": 0.0019781704051783826, |
| "loss": -0.8606, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.11444778941606748, |
| "grad_norm": 0.04229406639933586, |
| "learning_rate": 0.0019774811514382223, |
| "loss": -0.8722, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.11544298758490285, |
| "grad_norm": 0.0429520383477211, |
| "learning_rate": 0.0019767813086753556, |
| "loss": -0.8211, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.11643818575373821, |
| "grad_norm": 0.061233002692461014, |
| "learning_rate": 0.0019760708844711564, |
| "loss": -0.8616, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.11743338392257359, |
| "grad_norm": 0.04444749280810356, |
| "learning_rate": 0.0019753498865216278, |
| "loss": -0.8412, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.11842858209140895, |
| "grad_norm": 0.0725698322057724, |
| "learning_rate": 0.001974618322637318, |
| "loss": -0.867, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.11942378026024432, |
| "grad_norm": 0.05399654805660248, |
| "learning_rate": 0.0019738762007432357, |
| "loss": -0.8645, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.12041897842907968, |
| "grad_norm": 0.07678170502185822, |
| "learning_rate": 0.0019731235288787644, |
| "loss": -0.8491, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.12141417659791506, |
| "grad_norm": 0.07065637409687042, |
| "learning_rate": 0.001972360315197575, |
| "loss": -0.802, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.12240937476675043, |
| "grad_norm": 0.07059847563505173, |
| "learning_rate": 0.0019715865679675363, |
| "loss": -0.8345, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.1234045729355858, |
| "grad_norm": 0.06182623654603958, |
| "learning_rate": 0.0019708022955706294, |
| "loss": -0.8061, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.12439977110442117, |
| "grad_norm": 0.0434928797185421, |
| "learning_rate": 0.001970007506502851, |
| "loss": -0.847, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.12539496927325652, |
| "grad_norm": 0.18933677673339844, |
| "learning_rate": 0.0019692022093741276, |
| "loss": -0.8603, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.12639016744209192, |
| "grad_norm": 0.08330973982810974, |
| "learning_rate": 0.001968386412908217, |
| "loss": -0.8027, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.12738536561092728, |
| "grad_norm": 0.04857812821865082, |
| "learning_rate": 0.001967560125942617, |
| "loss": -0.8462, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.12838056377976265, |
| "grad_norm": 0.05622467026114464, |
| "learning_rate": 0.001966723357428468, |
| "loss": -0.8372, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.129375761948598, |
| "grad_norm": 0.057230498641729355, |
| "learning_rate": 0.001965876116430458, |
| "loss": -0.8392, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.13037096011743338, |
| "grad_norm": 0.0401107594370842, |
| "learning_rate": 0.0019650184121267214, |
| "loss": -0.8606, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.13136615828626874, |
| "grad_norm": 0.0766066461801529, |
| "learning_rate": 0.0019641502538087423, |
| "loss": -0.8347, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.13236135645510413, |
| "grad_norm": 0.279532790184021, |
| "learning_rate": 0.001963271650881253, |
| "loss": -0.8474, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.1333565546239395, |
| "grad_norm": 0.06721749901771545, |
| "learning_rate": 0.0019623826128621308, |
| "loss": -0.8662, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.13435175279277486, |
| "grad_norm": 0.06590744107961655, |
| "learning_rate": 0.0019614831493822973, |
| "loss": -0.8409, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.13534695096161023, |
| "grad_norm": 0.047027770429849625, |
| "learning_rate": 0.0019605732701856115, |
| "loss": -0.8455, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.1363421491304456, |
| "grad_norm": 0.038147736340761185, |
| "learning_rate": 0.001959652985128767, |
| "loss": -0.8335, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.13733734729928096, |
| "grad_norm": 0.03474709019064903, |
| "learning_rate": 0.001958722304181183, |
| "loss": -0.8443, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.13833254546811635, |
| "grad_norm": 0.04871761053800583, |
| "learning_rate": 0.001957781237424896, |
| "loss": -0.8282, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.13932774363695172, |
| "grad_norm": 0.039951737970113754, |
| "learning_rate": 0.0019568297950544543, |
| "loss": -0.8458, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.14032294180578708, |
| "grad_norm": 0.07247880846261978, |
| "learning_rate": 0.0019558679873768023, |
| "loss": -0.8035, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.14131813997462245, |
| "grad_norm": 0.12037922441959381, |
| "learning_rate": 0.0019548958248111724, |
| "loss": -0.8209, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.1423133381434578, |
| "grad_norm": 0.1445380449295044, |
| "learning_rate": 0.0019539133178889715, |
| "loss": -0.8602, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.14330853631229318, |
| "grad_norm": 0.04830145835876465, |
| "learning_rate": 0.0019529204772536664, |
| "loss": -0.8569, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.14430373448112854, |
| "grad_norm": 0.0444733202457428, |
| "learning_rate": 0.0019519173136606685, |
| "loss": -0.8213, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.14529893264996394, |
| "grad_norm": 0.04037511348724365, |
| "learning_rate": 0.0019509038379772177, |
| "loss": -0.8319, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.1462941308187993, |
| "grad_norm": 0.0549684576690197, |
| "learning_rate": 0.0019498800611822645, |
| "loss": -0.8654, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.14728932898763467, |
| "grad_norm": 0.03622014820575714, |
| "learning_rate": 0.0019488459943663502, |
| "loss": -0.8603, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.14828452715647003, |
| "grad_norm": 0.05168438330292702, |
| "learning_rate": 0.0019478016487314888, |
| "loss": -0.8359, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.1492797253253054, |
| "grad_norm": 0.05102885514497757, |
| "learning_rate": 0.0019467470355910438, |
| "loss": -0.8362, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.15027492349414076, |
| "grad_norm": 0.08411931991577148, |
| "learning_rate": 0.0019456821663696063, |
| "loss": -0.8557, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.15127012166297615, |
| "grad_norm": 0.02999095432460308, |
| "learning_rate": 0.001944607052602871, |
| "loss": -0.8318, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.15226531983181152, |
| "grad_norm": 0.06085268408060074, |
| "learning_rate": 0.0019435217059375121, |
| "loss": -0.866, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.15326051800064688, |
| "grad_norm": 0.03816385567188263, |
| "learning_rate": 0.0019424261381310558, |
| "loss": -0.8348, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.15425571616948225, |
| "grad_norm": 0.03999185562133789, |
| "learning_rate": 0.0019413203610517537, |
| "loss": -0.8375, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.1552509143383176, |
| "grad_norm": 0.06329697370529175, |
| "learning_rate": 0.0019402043866784545, |
| "loss": -0.8598, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.15624611250715298, |
| "grad_norm": 0.29359814524650574, |
| "learning_rate": 0.0019390782271004735, |
| "loss": -0.8305, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.15724131067598834, |
| "grad_norm": 0.04354254528880119, |
| "learning_rate": 0.0019379418945174624, |
| "loss": -0.8428, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.15823650884482374, |
| "grad_norm": 0.09485089778900146, |
| "learning_rate": 0.001936795401239276, |
| "loss": -0.8524, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.1592317070136591, |
| "grad_norm": 0.052271757274866104, |
| "learning_rate": 0.0019356387596858404, |
| "loss": -0.8889, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.16022690518249447, |
| "grad_norm": 0.07193049043416977, |
| "learning_rate": 0.0019344719823870175, |
| "loss": -0.8396, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.16122210335132983, |
| "grad_norm": 0.04632144048810005, |
| "learning_rate": 0.001933295081982469, |
| "loss": -0.8434, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.1622173015201652, |
| "grad_norm": 0.05413699895143509, |
| "learning_rate": 0.0019321080712215205, |
| "loss": -0.8523, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.16321249968900056, |
| "grad_norm": 0.045868393033742905, |
| "learning_rate": 0.0019309109629630217, |
| "loss": -0.8795, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.16420769785783595, |
| "grad_norm": 0.03584539145231247, |
| "learning_rate": 0.0019297037701752095, |
| "loss": -0.8834, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.16520289602667132, |
| "grad_norm": 0.05212506279349327, |
| "learning_rate": 0.0019284865059355654, |
| "loss": -0.8491, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.16619809419550668, |
| "grad_norm": 0.039999254047870636, |
| "learning_rate": 0.0019272591834306745, |
| "loss": -0.832, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.16719329236434205, |
| "grad_norm": 0.04333388805389404, |
| "learning_rate": 0.0019260218159560837, |
| "loss": -0.8568, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.16818849053317741, |
| "grad_norm": 0.05948438495397568, |
| "learning_rate": 0.0019247744169161552, |
| "loss": -0.8372, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.16918368870201278, |
| "grad_norm": 0.05164005607366562, |
| "learning_rate": 0.0019235169998239247, |
| "loss": -0.8453, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.17017888687084815, |
| "grad_norm": 0.07558200508356094, |
| "learning_rate": 0.0019222495783009516, |
| "loss": -0.837, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.17117408503968354, |
| "grad_norm": 0.039037518203258514, |
| "learning_rate": 0.0019209721660771737, |
| "loss": -0.8494, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.1721692832085189, |
| "grad_norm": 0.18770313262939453, |
| "learning_rate": 0.0019196847769907578, |
| "loss": -0.8765, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.17316448137735427, |
| "grad_norm": 0.08727400749921799, |
| "learning_rate": 0.0019183874249879495, |
| "loss": -0.8125, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.17415967954618963, |
| "grad_norm": 0.07949760556221008, |
| "learning_rate": 0.001917080124122922, |
| "loss": -0.8654, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.175154877715025, |
| "grad_norm": 0.06581337749958038, |
| "learning_rate": 0.0019157628885576252, |
| "loss": -0.8996, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.17615007588386036, |
| "grad_norm": 0.062335576862096786, |
| "learning_rate": 0.0019144357325616306, |
| "loss": -0.8842, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.17714527405269576, |
| "grad_norm": 0.040845975279808044, |
| "learning_rate": 0.0019130986705119773, |
| "loss": -0.8228, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.17814047222153112, |
| "grad_norm": 0.06012045592069626, |
| "learning_rate": 0.0019117517168930167, |
| "loss": -0.8529, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.17913567039036649, |
| "grad_norm": 0.030869802460074425, |
| "learning_rate": 0.0019103948862962555, |
| "loss": -0.8951, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.18013086855920185, |
| "grad_norm": 0.05283189192414284, |
| "learning_rate": 0.0019090281934201964, |
| "loss": -0.9048, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.18112606672803722, |
| "grad_norm": 0.020264575257897377, |
| "learning_rate": 0.0019076516530701815, |
| "loss": -0.8583, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.18212126489687258, |
| "grad_norm": 0.06241228058934212, |
| "learning_rate": 0.0019062652801582285, |
| "loss": -0.8432, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.18311646306570795, |
| "grad_norm": 0.048376064747571945, |
| "learning_rate": 0.001904869089702872, |
| "loss": -0.84, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.18411166123454334, |
| "grad_norm": 0.04957319051027298, |
| "learning_rate": 0.0019034630968289997, |
| "loss": -0.8312, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.1851068594033787, |
| "grad_norm": 0.11068796366453171, |
| "learning_rate": 0.001902047316767688, |
| "loss": -0.8366, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.18610205757221407, |
| "grad_norm": 0.03778199851512909, |
| "learning_rate": 0.0019006217648560382, |
| "loss": -0.8542, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.18709725574104943, |
| "grad_norm": 0.03918764367699623, |
| "learning_rate": 0.0018991864565370096, |
| "loss": -0.8395, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.1880924539098848, |
| "grad_norm": 0.0716870054602623, |
| "learning_rate": 0.0018977414073592521, |
| "loss": -0.7759, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.18908765207872016, |
| "grad_norm": 0.08251062780618668, |
| "learning_rate": 0.001896286632976938, |
| "loss": -0.8232, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.19008285024755556, |
| "grad_norm": 0.04269665852189064, |
| "learning_rate": 0.001894822149149593, |
| "loss": -0.8706, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.19107804841639092, |
| "grad_norm": 0.07331789284944534, |
| "learning_rate": 0.0018933479717419246, |
| "loss": -0.8209, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.1920732465852263, |
| "grad_norm": 0.03469880297780037, |
| "learning_rate": 0.0018918641167236503, |
| "loss": -0.8526, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.19306844475406165, |
| "grad_norm": 0.0502643845975399, |
| "learning_rate": 0.0018903706001693252, |
| "loss": -0.8058, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.19406364292289702, |
| "grad_norm": 0.04479772225022316, |
| "learning_rate": 0.0018888674382581672, |
| "loss": -0.8794, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.19505884109173238, |
| "grad_norm": 0.12034481018781662, |
| "learning_rate": 0.0018873546472738822, |
| "loss": -0.8684, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.19605403926056775, |
| "grad_norm": 0.10239536315202713, |
| "learning_rate": 0.0018858322436044875, |
| "loss": -0.8758, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.19704923742940314, |
| "grad_norm": 0.0984937995672226, |
| "learning_rate": 0.0018843002437421345, |
| "loss": -0.8669, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.1980444355982385, |
| "grad_norm": 0.04999193921685219, |
| "learning_rate": 0.00188275866428293, |
| "loss": -0.8509, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.19903963376707387, |
| "grad_norm": 0.02357977069914341, |
| "learning_rate": 0.001881207521926756, |
| "loss": -0.8546, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.20003483193590924, |
| "grad_norm": 0.05344194918870926, |
| "learning_rate": 0.0018796468334770884, |
| "loss": -0.8483, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.2010300301047446, |
| "grad_norm": 0.026923952624201775, |
| "learning_rate": 0.0018780766158408167, |
| "loss": -0.8808, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.20202522827357997, |
| "grad_norm": 0.06462886184453964, |
| "learning_rate": 0.0018764968860280598, |
| "loss": -0.8494, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.20302042644241536, |
| "grad_norm": 0.11881168186664581, |
| "learning_rate": 0.0018749076611519807, |
| "loss": -0.8483, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.20401562461125072, |
| "grad_norm": 0.0413346029818058, |
| "learning_rate": 0.001873308958428603, |
| "loss": -0.8356, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.2050108227800861, |
| "grad_norm": 0.08872512727975845, |
| "learning_rate": 0.0018717007951766233, |
| "loss": -0.8571, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.20600602094892145, |
| "grad_norm": 0.04187199845910072, |
| "learning_rate": 0.0018700831888172236, |
| "loss": -0.9066, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.20700121911775682, |
| "grad_norm": 0.08172470331192017, |
| "learning_rate": 0.001868456156873883, |
| "loss": -0.854, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.20799641728659218, |
| "grad_norm": 0.039352960884571075, |
| "learning_rate": 0.0018668197169721874, |
| "loss": -0.8822, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.20899161545542755, |
| "grad_norm": 0.0712110847234726, |
| "learning_rate": 0.0018651738868396394, |
| "loss": -0.8432, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.20998681362426294, |
| "grad_norm": 0.035332091152668, |
| "learning_rate": 0.0018635186843054651, |
| "loss": -0.8688, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.2109820117930983, |
| "grad_norm": 0.042279988527297974, |
| "learning_rate": 0.001861854127300422, |
| "loss": -0.8718, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.21197720996193367, |
| "grad_norm": 0.04759962484240532, |
| "learning_rate": 0.0018601802338566037, |
| "loss": -0.8676, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.21297240813076904, |
| "grad_norm": 0.06393526494503021, |
| "learning_rate": 0.0018584970221072453, |
| "loss": -0.8408, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.2139676062996044, |
| "grad_norm": 0.062339067459106445, |
| "learning_rate": 0.001856804510286527, |
| "loss": -0.8687, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.21496280446843977, |
| "grad_norm": 0.030178282409906387, |
| "learning_rate": 0.0018551027167293768, |
| "loss": -0.8563, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.21595800263727516, |
| "grad_norm": 0.0338846780359745, |
| "learning_rate": 0.0018533916598712707, |
| "loss": -0.8596, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.21695320080611052, |
| "grad_norm": 0.03212130814790726, |
| "learning_rate": 0.0018516713582480341, |
| "loss": -0.8531, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.2179483989749459, |
| "grad_norm": 0.08240335434675217, |
| "learning_rate": 0.001849941830495641, |
| "loss": -0.8391, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.21894359714378125, |
| "grad_norm": 0.0584181547164917, |
| "learning_rate": 0.001848203095350011, |
| "loss": -0.8469, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.21993879531261662, |
| "grad_norm": 0.05169396847486496, |
| "learning_rate": 0.0018464551716468071, |
| "loss": -0.8808, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.22093399348145198, |
| "grad_norm": 0.10674053430557251, |
| "learning_rate": 0.0018446980783212328, |
| "loss": -0.8367, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.22192919165028735, |
| "grad_norm": 0.03972488269209862, |
| "learning_rate": 0.0018429318344078246, |
| "loss": -0.8194, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.22292438981912274, |
| "grad_norm": 0.0668836236000061, |
| "learning_rate": 0.001841156459040248, |
| "loss": -0.831, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.2239195879879581, |
| "grad_norm": 0.06554841250181198, |
| "learning_rate": 0.001839371971451088, |
| "loss": -0.8217, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.22491478615679347, |
| "grad_norm": 0.13248029351234436, |
| "learning_rate": 0.0018375783909716432, |
| "loss": -0.8556, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.22590998432562884, |
| "grad_norm": 0.06625150144100189, |
| "learning_rate": 0.0018357757370317152, |
| "loss": -0.9072, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.2269051824944642, |
| "grad_norm": 0.07411986589431763, |
| "learning_rate": 0.0018339640291593971, |
| "loss": -0.8659, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.22790038066329957, |
| "grad_norm": 0.03291834518313408, |
| "learning_rate": 0.0018321432869808638, |
| "loss": -0.8748, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.22889557883213496, |
| "grad_norm": 0.15677815675735474, |
| "learning_rate": 0.0018303135302201578, |
| "loss": -0.8674, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.22989077700097033, |
| "grad_norm": 0.053248967975378036, |
| "learning_rate": 0.0018284747786989778, |
| "loss": -0.8293, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.2308859751698057, |
| "grad_norm": 0.021416958421468735, |
| "learning_rate": 0.0018266270523364608, |
| "loss": -0.8894, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.23188117333864106, |
| "grad_norm": 0.08533802628517151, |
| "learning_rate": 0.0018247703711489684, |
| "loss": -0.8688, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.23287637150747642, |
| "grad_norm": 0.0453949049115181, |
| "learning_rate": 0.0018229047552498706, |
| "loss": -0.848, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.23387156967631179, |
| "grad_norm": 0.20563171803951263, |
| "learning_rate": 0.0018210302248493253, |
| "loss": -0.7981, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.23486676784514718, |
| "grad_norm": 0.061649810522794724, |
| "learning_rate": 0.0018191468002540616, |
| "loss": -0.9015, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.23586196601398254, |
| "grad_norm": 0.050693582743406296, |
| "learning_rate": 0.0018172545018671595, |
| "loss": -0.8103, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.2368571641828179, |
| "grad_norm": 0.11253263801336288, |
| "learning_rate": 0.0018153533501878284, |
| "loss": -0.8513, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.23785236235165327, |
| "grad_norm": 0.05799407884478569, |
| "learning_rate": 0.0018134433658111844, |
| "loss": -0.8725, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.23884756052048864, |
| "grad_norm": 0.12612490355968475, |
| "learning_rate": 0.0018115245694280287, |
| "loss": -0.8313, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.239842758689324, |
| "grad_norm": 0.052735164761543274, |
| "learning_rate": 0.0018095969818246224, |
| "loss": -0.8749, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.24083795685815937, |
| "grad_norm": 0.062358558177948, |
| "learning_rate": 0.0018076606238824615, |
| "loss": -0.8253, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.24183315502699476, |
| "grad_norm": 0.05003003403544426, |
| "learning_rate": 0.0018057155165780512, |
| "loss": -0.869, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.24282835319583013, |
| "grad_norm": 0.038384635001420975, |
| "learning_rate": 0.001803761680982678, |
| "loss": -0.8525, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.2438235513646655, |
| "grad_norm": 0.05786055326461792, |
| "learning_rate": 0.0018017991382621814, |
| "loss": -0.8248, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.24481874953350086, |
| "grad_norm": 0.05332957208156586, |
| "learning_rate": 0.0017998279096767256, |
| "loss": -0.8151, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.24581394770233622, |
| "grad_norm": 0.05446227639913559, |
| "learning_rate": 0.0017978480165805682, |
| "loss": -0.8425, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.2468091458711716, |
| "grad_norm": 0.061572350561618805, |
| "learning_rate": 0.001795859480421829, |
| "loss": -0.8527, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.24780434404000698, |
| "grad_norm": 0.07180485129356384, |
| "learning_rate": 0.0017938623227422576, |
| "loss": -0.8539, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.24879954220884234, |
| "grad_norm": 0.040774550288915634, |
| "learning_rate": 0.0017918565651770012, |
| "loss": -0.845, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.2497947403776777, |
| "grad_norm": 0.08509187400341034, |
| "learning_rate": 0.001789842229454368, |
| "loss": -0.8622, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.25078993854651305, |
| "grad_norm": 0.03797999024391174, |
| "learning_rate": 0.0017878193373955947, |
| "loss": -0.8216, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.25178513671534847, |
| "grad_norm": 0.06903784722089767, |
| "learning_rate": 0.0017857879109146068, |
| "loss": -0.8296, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.25278033488418383, |
| "grad_norm": 0.05143648013472557, |
| "learning_rate": 0.0017837479720177844, |
| "loss": -0.8783, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.2537755330530192, |
| "grad_norm": 0.06422683596611023, |
| "learning_rate": 0.0017816995428037213, |
| "loss": -0.8332, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.25477073122185456, |
| "grad_norm": 0.12899571657180786, |
| "learning_rate": 0.001779642645462987, |
| "loss": -0.8527, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.25576592939068993, |
| "grad_norm": 0.06423981487751007, |
| "learning_rate": 0.0017775773022778863, |
| "loss": -0.8072, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.2567611275595253, |
| "grad_norm": 0.06049381569027901, |
| "learning_rate": 0.0017755035356222173, |
| "loss": -0.8617, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.25775632572836066, |
| "grad_norm": 0.05178122967481613, |
| "learning_rate": 0.0017734213679610287, |
| "loss": -0.8668, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.258751523897196, |
| "grad_norm": 0.07370316982269287, |
| "learning_rate": 0.001771330821850378, |
| "loss": -0.8563, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.2597467220660314, |
| "grad_norm": 0.055624667555093765, |
| "learning_rate": 0.0017692319199370857, |
| "loss": -0.8197, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.26074192023486675, |
| "grad_norm": 0.0338563434779644, |
| "learning_rate": 0.0017671246849584902, |
| "loss": -0.8392, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.2617371184037021, |
| "grad_norm": 0.056284110993146896, |
| "learning_rate": 0.0017650091397422027, |
| "loss": -0.8636, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.2627323165725375, |
| "grad_norm": 0.08218339830636978, |
| "learning_rate": 0.001762885307205858, |
| "loss": -0.8284, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.26372751474137285, |
| "grad_norm": 0.04653181508183479, |
| "learning_rate": 0.0017607532103568672, |
| "loss": -0.8151, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.26472271291020827, |
| "grad_norm": 0.05832862854003906, |
| "learning_rate": 0.001758612872292169, |
| "loss": -0.893, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.26571791107904363, |
| "grad_norm": 0.12119904160499573, |
| "learning_rate": 0.0017564643161979783, |
| "loss": -0.8758, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.266713109247879, |
| "grad_norm": 0.09644432365894318, |
| "learning_rate": 0.0017543075653495364, |
| "loss": -0.8273, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.26770830741671436, |
| "grad_norm": 0.05218861624598503, |
| "learning_rate": 0.0017521426431108573, |
| "loss": -0.8439, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.26870350558554973, |
| "grad_norm": 0.043938472867012024, |
| "learning_rate": 0.0017499695729344764, |
| "loss": -0.8499, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.2696987037543851, |
| "grad_norm": 0.05423975735902786, |
| "learning_rate": 0.0017477883783611943, |
| "loss": -0.8484, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.27069390192322046, |
| "grad_norm": 0.05785686895251274, |
| "learning_rate": 0.001745599083019824, |
| "loss": -0.9024, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.2716891000920558, |
| "grad_norm": 0.06736781448125839, |
| "learning_rate": 0.0017434017106269326, |
| "loss": -0.8415, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.2726842982608912, |
| "grad_norm": 0.04341396689414978, |
| "learning_rate": 0.0017411962849865873, |
| "loss": -0.849, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.27367949642972655, |
| "grad_norm": 0.060457851737737656, |
| "learning_rate": 0.0017389828299900947, |
| "loss": -0.8814, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.2746746945985619, |
| "grad_norm": 0.06649858504533768, |
| "learning_rate": 0.0017367613696157435, |
| "loss": -0.8329, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.2756698927673973, |
| "grad_norm": 0.20052272081375122, |
| "learning_rate": 0.0017345319279285438, |
| "loss": -0.8024, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.2766650909362327, |
| "grad_norm": 0.07833337783813477, |
| "learning_rate": 0.0017322945290799687, |
| "loss": -0.8234, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.27766028910506807, |
| "grad_norm": 0.07275154441595078, |
| "learning_rate": 0.001730049197307689, |
| "loss": -0.8805, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.27865548727390343, |
| "grad_norm": 0.042492885142564774, |
| "learning_rate": 0.0017277959569353138, |
| "loss": -0.8511, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.2796506854427388, |
| "grad_norm": 0.04260088875889778, |
| "learning_rate": 0.0017255348323721256, |
| "loss": -0.8304, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.28064588361157417, |
| "grad_norm": 0.03392525017261505, |
| "learning_rate": 0.0017232658481128157, |
| "loss": -0.8563, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.28164108178040953, |
| "grad_norm": 0.05141417682170868, |
| "learning_rate": 0.0017209890287372199, |
| "loss": -0.844, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.2826362799492449, |
| "grad_norm": 0.11153557151556015, |
| "learning_rate": 0.0017187043989100508, |
| "loss": -0.8143, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.28363147811808026, |
| "grad_norm": 0.07110270857810974, |
| "learning_rate": 0.001716411983380632, |
| "loss": -0.8752, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.2846266762869156, |
| "grad_norm": 0.023993652313947678, |
| "learning_rate": 0.001714111806982629, |
| "loss": -0.8588, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.285621874455751, |
| "grad_norm": 0.052228838205337524, |
| "learning_rate": 0.0017118038946337811, |
| "loss": -0.8507, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.28661707262458636, |
| "grad_norm": 0.022853029891848564, |
| "learning_rate": 0.00170948827133563, |
| "loss": -0.8648, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.2876122707934217, |
| "grad_norm": 0.044159069657325745, |
| "learning_rate": 0.0017071649621732507, |
| "loss": -0.8385, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.2886074689622571, |
| "grad_norm": 0.05615956708788872, |
| "learning_rate": 0.0017048339923149789, |
| "loss": -0.8399, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.2896026671310925, |
| "grad_norm": 0.1265154331922531, |
| "learning_rate": 0.0017024953870121374, |
| "loss": -0.819, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.29059786529992787, |
| "grad_norm": 0.03834633529186249, |
| "learning_rate": 0.0017001491715987643, |
| "loss": -0.859, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.29159306346876324, |
| "grad_norm": 0.07074081897735596, |
| "learning_rate": 0.0016977953714913383, |
| "loss": -0.8078, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.2925882616375986, |
| "grad_norm": 0.06447435915470123, |
| "learning_rate": 0.001695434012188502, |
| "loss": -0.8533, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.29358345980643397, |
| "grad_norm": 0.040345288813114166, |
| "learning_rate": 0.0016930651192707865, |
| "loss": -0.8452, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.29457865797526933, |
| "grad_norm": 0.04355823993682861, |
| "learning_rate": 0.0016906887184003345, |
| "loss": -0.8232, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.2955738561441047, |
| "grad_norm": 0.1037873849272728, |
| "learning_rate": 0.0016883048353206228, |
| "loss": -0.8356, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.29656905431294006, |
| "grad_norm": 0.1285988837480545, |
| "learning_rate": 0.001685913495856182, |
| "loss": -0.8347, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.2975642524817754, |
| "grad_norm": 0.06592582166194916, |
| "learning_rate": 0.0016835147259123176, |
| "loss": -0.8578, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.2985594506506108, |
| "grad_norm": 0.0572790652513504, |
| "learning_rate": 0.001681108551474829, |
| "loss": -0.8624, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.29955464881944616, |
| "grad_norm": 0.02885902300477028, |
| "learning_rate": 0.001678694998609729, |
| "loss": -0.8771, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.3005498469882815, |
| "grad_norm": 0.081394262611866, |
| "learning_rate": 0.00167627409346296, |
| "loss": -0.8258, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.3015450451571169, |
| "grad_norm": 0.09440901130437851, |
| "learning_rate": 0.0016738458622601114, |
| "loss": -0.8845, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.3025402433259523, |
| "grad_norm": 0.0748928114771843, |
| "learning_rate": 0.001671410331306136, |
| "loss": -0.7864, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.30353544149478767, |
| "grad_norm": 0.05985994637012482, |
| "learning_rate": 0.0016689675269850645, |
| "loss": -0.8587, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.30453063966362304, |
| "grad_norm": 0.05486709251999855, |
| "learning_rate": 0.0016665174757597196, |
| "loss": -0.838, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.3055258378324584, |
| "grad_norm": 0.02882876992225647, |
| "learning_rate": 0.0016640602041714293, |
| "loss": -0.8639, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.30652103600129377, |
| "grad_norm": 0.0584944412112236, |
| "learning_rate": 0.00166159573883974, |
| "loss": -0.8264, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.30751623417012913, |
| "grad_norm": 0.04569467529654503, |
| "learning_rate": 0.0016591241064621272, |
| "loss": -0.8153, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.3085114323389645, |
| "grad_norm": 0.04677337408065796, |
| "learning_rate": 0.0016566453338137071, |
| "loss": -0.8432, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.30950663050779986, |
| "grad_norm": 0.04505476728081703, |
| "learning_rate": 0.0016541594477469466, |
| "loss": -0.8812, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.3105018286766352, |
| "grad_norm": 0.04246704652905464, |
| "learning_rate": 0.0016516664751913713, |
| "loss": -0.8448, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.3114970268454706, |
| "grad_norm": 0.06890695542097092, |
| "learning_rate": 0.0016491664431532746, |
| "loss": -0.8286, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.31249222501430596, |
| "grad_norm": 0.04187687113881111, |
| "learning_rate": 0.001646659378715426, |
| "loss": -0.8733, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.3134874231831413, |
| "grad_norm": 0.04906761646270752, |
| "learning_rate": 0.0016441453090367752, |
| "loss": -0.8545, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.3144826213519767, |
| "grad_norm": 0.02713639661669731, |
| "learning_rate": 0.001641624261352161, |
| "loss": -0.8881, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.3154778195208121, |
| "grad_norm": 0.053854767233133316, |
| "learning_rate": 0.0016390962629720138, |
| "loss": -0.8535, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.3164730176896475, |
| "grad_norm": 0.04315832257270813, |
| "learning_rate": 0.001636561341282061, |
| "loss": -0.837, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.31746821585848284, |
| "grad_norm": 0.037244752049446106, |
| "learning_rate": 0.0016340195237430292, |
| "loss": -0.8767, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.3184634140273182, |
| "grad_norm": 0.03590194508433342, |
| "learning_rate": 0.0016314708378903492, |
| "loss": -0.8656, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.31945861219615357, |
| "grad_norm": 0.04543634504079819, |
| "learning_rate": 0.0016289153113338544, |
| "loss": -0.8195, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.32045381036498893, |
| "grad_norm": 0.08021537959575653, |
| "learning_rate": 0.0016263529717574841, |
| "loss": -0.8524, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.3214490085338243, |
| "grad_norm": 0.03517700731754303, |
| "learning_rate": 0.001623783846918983, |
| "loss": -0.8523, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.32244420670265966, |
| "grad_norm": 0.08983421325683594, |
| "learning_rate": 0.0016212079646495995, |
| "loss": -0.8836, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.32343940487149503, |
| "grad_norm": 0.03937257453799248, |
| "learning_rate": 0.001618625352853786, |
| "loss": -0.8303, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.3244346030403304, |
| "grad_norm": 0.05866921320557594, |
| "learning_rate": 0.001616036039508895, |
| "loss": -0.8363, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.32542980120916576, |
| "grad_norm": 0.036655690521001816, |
| "learning_rate": 0.001613440052664877, |
| "loss": -0.8672, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.3264249993780011, |
| "grad_norm": 0.04297897219657898, |
| "learning_rate": 0.0016108374204439767, |
| "loss": -0.819, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.3274201975468365, |
| "grad_norm": 0.13241665065288544, |
| "learning_rate": 0.0016082281710404264, |
| "loss": -0.842, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.3284153957156719, |
| "grad_norm": 0.08025432378053665, |
| "learning_rate": 0.0016056123327201437, |
| "loss": -0.8578, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.3294105938845073, |
| "grad_norm": 0.037778500467538834, |
| "learning_rate": 0.0016029899338204233, |
| "loss": -0.8423, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.33040579205334264, |
| "grad_norm": 0.051869478076696396, |
| "learning_rate": 0.0016003610027496304, |
| "loss": -0.8651, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.331400990222178, |
| "grad_norm": 0.03212331607937813, |
| "learning_rate": 0.0015977255679868931, |
| "loss": -0.8604, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.33239618839101337, |
| "grad_norm": 0.03444599732756615, |
| "learning_rate": 0.0015950836580817928, |
| "loss": -0.8549, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.33339138655984873, |
| "grad_norm": 0.07950103282928467, |
| "learning_rate": 0.001592435301654058, |
| "loss": -0.8384, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.3343865847286841, |
| "grad_norm": 0.06033974885940552, |
| "learning_rate": 0.0015897805273932502, |
| "loss": -0.8699, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.33538178289751946, |
| "grad_norm": 0.041036177426576614, |
| "learning_rate": 0.0015871193640584562, |
| "loss": -0.8253, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.33637698106635483, |
| "grad_norm": 0.08517392724752426, |
| "learning_rate": 0.001584451840477975, |
| "loss": -0.8145, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.3373721792351902, |
| "grad_norm": 0.053120095282793045, |
| "learning_rate": 0.0015817779855490054, |
| "loss": -0.8427, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.33836737740402556, |
| "grad_norm": 0.056728117167949677, |
| "learning_rate": 0.0015790978282373348, |
| "loss": -0.8381, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.3393625755728609, |
| "grad_norm": 0.050754986703395844, |
| "learning_rate": 0.0015764113975770236, |
| "loss": -0.8605, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.3403577737416963, |
| "grad_norm": 0.02987166866660118, |
| "learning_rate": 0.0015737187226700904, |
| "loss": -0.8775, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.3413529719105317, |
| "grad_norm": 0.041901689022779465, |
| "learning_rate": 0.001571019832686199, |
| "loss": -0.8808, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.3423481700793671, |
| "grad_norm": 0.03326866030693054, |
| "learning_rate": 0.001568314756862339, |
| "loss": -0.8301, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.34334336824820244, |
| "grad_norm": 0.08149438351392746, |
| "learning_rate": 0.0015656035245025129, |
| "loss": -0.867, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.3443385664170378, |
| "grad_norm": 0.0481393039226532, |
| "learning_rate": 0.0015628861649774155, |
| "loss": -0.8777, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.34533376458587317, |
| "grad_norm": 0.06255054473876953, |
| "learning_rate": 0.0015601627077241182, |
| "loss": -0.8579, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.34632896275470854, |
| "grad_norm": 0.05797385796904564, |
| "learning_rate": 0.0015574331822457471, |
| "loss": -0.877, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.3473241609235439, |
| "grad_norm": 0.05263502523303032, |
| "learning_rate": 0.0015546976181111671, |
| "loss": -0.8364, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.34831935909237927, |
| "grad_norm": 0.06181139498949051, |
| "learning_rate": 0.0015519560449546584, |
| "loss": -0.8426, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.34931455726121463, |
| "grad_norm": 0.0733242854475975, |
| "learning_rate": 0.0015492084924755972, |
| "loss": -0.8277, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.35030975543005, |
| "grad_norm": 0.041065763682127, |
| "learning_rate": 0.0015464549904381335, |
| "loss": -0.8914, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.35130495359888536, |
| "grad_norm": 0.0405518040060997, |
| "learning_rate": 0.0015436955686708687, |
| "loss": -0.8374, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.3523001517677207, |
| "grad_norm": 0.05705835297703743, |
| "learning_rate": 0.0015409302570665324, |
| "loss": -0.848, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.3532953499365561, |
| "grad_norm": 0.049008939415216446, |
| "learning_rate": 0.0015381590855816586, |
| "loss": -0.839, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.3542905481053915, |
| "grad_norm": 0.23253494501113892, |
| "learning_rate": 0.0015353820842362613, |
| "loss": -0.844, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.3552857462742269, |
| "grad_norm": 0.06854347884654999, |
| "learning_rate": 0.001532599283113509, |
| "loss": -0.8313, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.35628094444306224, |
| "grad_norm": 0.04343127831816673, |
| "learning_rate": 0.001529810712359399, |
| "loss": -0.8745, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.3572761426118976, |
| "grad_norm": 0.03824308142066002, |
| "learning_rate": 0.0015270164021824314, |
| "loss": -0.8711, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.35827134078073297, |
| "grad_norm": 0.03023524396121502, |
| "learning_rate": 0.0015242163828532804, |
| "loss": -0.8434, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.35926653894956834, |
| "grad_norm": 0.041595038026571274, |
| "learning_rate": 0.001521410684704468, |
| "loss": -0.851, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.3602617371184037, |
| "grad_norm": 0.044001661241054535, |
| "learning_rate": 0.0015185993381300346, |
| "loss": -0.8513, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.36125693528723907, |
| "grad_norm": 0.022997932508587837, |
| "learning_rate": 0.0015157823735852088, |
| "loss": -0.86, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.36225213345607443, |
| "grad_norm": 0.0806240439414978, |
| "learning_rate": 0.0015129598215860803, |
| "loss": -0.8273, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.3632473316249098, |
| "grad_norm": 0.039777714759111404, |
| "learning_rate": 0.0015101317127092658, |
| "loss": -0.8552, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.36424252979374516, |
| "grad_norm": 0.04692668467760086, |
| "learning_rate": 0.0015072980775915805, |
| "loss": -0.7963, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.3652377279625805, |
| "grad_norm": 0.03576252609491348, |
| "learning_rate": 0.001504458946929705, |
| "loss": -0.85, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.3662329261314159, |
| "grad_norm": 0.039703838527202606, |
| "learning_rate": 0.001501614351479853, |
| "loss": -0.8753, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.3672281243002513, |
| "grad_norm": 0.054135072976350784, |
| "learning_rate": 0.0014987643220574372, |
| "loss": -0.8365, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.3682233224690867, |
| "grad_norm": 0.05153687298297882, |
| "learning_rate": 0.001495908889536738, |
| "loss": -0.8924, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.36921852063792204, |
| "grad_norm": 0.03708389773964882, |
| "learning_rate": 0.0014930480848505657, |
| "loss": -0.8845, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.3702137188067574, |
| "grad_norm": 0.051574934273958206, |
| "learning_rate": 0.0014901819389899283, |
| "loss": -0.8512, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.3712089169755928, |
| "grad_norm": 0.13312560319900513, |
| "learning_rate": 0.0014873104830036943, |
| "loss": -0.8762, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.37220411514442814, |
| "grad_norm": 0.08459248393774033, |
| "learning_rate": 0.0014844337479982563, |
| "loss": -0.8661, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.3731993133132635, |
| "grad_norm": 0.029227307066321373, |
| "learning_rate": 0.0014815517651371945, |
| "loss": -0.8333, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.37419451148209887, |
| "grad_norm": 0.04313547909259796, |
| "learning_rate": 0.0014786645656409392, |
| "loss": -0.8348, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.37518970965093423, |
| "grad_norm": 0.08348735421895981, |
| "learning_rate": 0.0014757721807864318, |
| "loss": -0.8379, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.3761849078197696, |
| "grad_norm": 0.02443796768784523, |
| "learning_rate": 0.0014728746419067872, |
| "loss": -0.8608, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.37718010598860496, |
| "grad_norm": 0.040778059512376785, |
| "learning_rate": 0.001469971980390953, |
| "loss": -0.8305, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.37817530415744033, |
| "grad_norm": 0.04360215365886688, |
| "learning_rate": 0.001467064227683371, |
| "loss": -0.8496, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.3791705023262757, |
| "grad_norm": 0.040289781987667084, |
| "learning_rate": 0.0014641514152836349, |
| "loss": -0.8688, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.3801657004951111, |
| "grad_norm": 0.03933040797710419, |
| "learning_rate": 0.00146123357474615, |
| "loss": -0.8777, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.3811608986639465, |
| "grad_norm": 0.09043987840414047, |
| "learning_rate": 0.001458310737679792, |
| "loss": -0.857, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.38215609683278184, |
| "grad_norm": 0.034497786313295364, |
| "learning_rate": 0.001455382935747563, |
| "loss": -0.8712, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.3831512950016172, |
| "grad_norm": 0.03928445652127266, |
| "learning_rate": 0.0014524502006662498, |
| "loss": -0.8774, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.3841464931704526, |
| "grad_norm": 0.04413870349526405, |
| "learning_rate": 0.0014495125642060797, |
| "loss": -0.8739, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.38514169133928794, |
| "grad_norm": 0.023376120254397392, |
| "learning_rate": 0.0014465700581903764, |
| "loss": -0.9013, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.3861368895081233, |
| "grad_norm": 0.04416632652282715, |
| "learning_rate": 0.0014436227144952155, |
| "loss": -0.8906, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.38713208767695867, |
| "grad_norm": 0.0480741448700428, |
| "learning_rate": 0.0014406705650490787, |
| "loss": -0.8795, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.38812728584579403, |
| "grad_norm": 0.12349933385848999, |
| "learning_rate": 0.001437713641832509, |
| "loss": -0.8592, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.3891224840146294, |
| "grad_norm": 0.06916704773902893, |
| "learning_rate": 0.0014347519768777627, |
| "loss": -0.8575, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.39011768218346476, |
| "grad_norm": 0.05390172451734543, |
| "learning_rate": 0.0014317856022684637, |
| "loss": -0.8743, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.39111288035230013, |
| "grad_norm": 0.0792585238814354, |
| "learning_rate": 0.0014288145501392552, |
| "loss": -0.8422, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.3921080785211355, |
| "grad_norm": 0.06161206215620041, |
| "learning_rate": 0.0014258388526754517, |
| "loss": -0.8521, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.3931032766899709, |
| "grad_norm": 0.1214885413646698, |
| "learning_rate": 0.001422858542112691, |
| "loss": -0.7868, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.3940984748588063, |
| "grad_norm": 0.022072777152061462, |
| "learning_rate": 0.0014198736507365834, |
| "loss": -0.858, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.39509367302764165, |
| "grad_norm": 0.038208458572626114, |
| "learning_rate": 0.0014168842108823645, |
| "loss": -0.8562, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.396088871196477, |
| "grad_norm": 0.0507245697081089, |
| "learning_rate": 0.0014138902549345428, |
| "loss": -0.8513, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.3970840693653124, |
| "grad_norm": 0.045909419655799866, |
| "learning_rate": 0.0014108918153265485, |
| "loss": -0.852, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.39807926753414774, |
| "grad_norm": 0.09012339264154434, |
| "learning_rate": 0.0014078889245403843, |
| "loss": -0.8763, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.3990744657029831, |
| "grad_norm": 0.08021102845668793, |
| "learning_rate": 0.001404881615106272, |
| "loss": -0.8464, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.40006966387181847, |
| "grad_norm": 0.07724355161190033, |
| "learning_rate": 0.001401869919602301, |
| "loss": -0.8451, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.40106486204065384, |
| "grad_norm": 0.03839779272675514, |
| "learning_rate": 0.001398853870654074, |
| "loss": -0.8635, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.4020600602094892, |
| "grad_norm": 0.059427373111248016, |
| "learning_rate": 0.0013958335009343547, |
| "loss": -0.8255, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.40305525837832457, |
| "grad_norm": 0.050591662526130676, |
| "learning_rate": 0.0013928088431627145, |
| "loss": -0.8643, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.40405045654715993, |
| "grad_norm": 0.0813014954328537, |
| "learning_rate": 0.001389779930105175, |
| "loss": -0.8737, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.4050456547159953, |
| "grad_norm": 0.04178832843899727, |
| "learning_rate": 0.0013867467945738576, |
| "loss": -0.8632, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.4060408528848307, |
| "grad_norm": 0.08438771218061447, |
| "learning_rate": 0.0013837094694266244, |
| "loss": -0.8227, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.4070360510536661, |
| "grad_norm": 0.047368500381708145, |
| "learning_rate": 0.0013806679875667228, |
| "loss": -0.8522, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.40803124922250145, |
| "grad_norm": 0.04647338017821312, |
| "learning_rate": 0.0013776223819424317, |
| "loss": -0.8457, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.4090264473913368, |
| "grad_norm": 0.2708975672721863, |
| "learning_rate": 0.0013745726855467005, |
| "loss": -0.8376, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.4100216455601722, |
| "grad_norm": 0.06989341974258423, |
| "learning_rate": 0.0013715189314167954, |
| "loss": -0.8457, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.41101684372900754, |
| "grad_norm": 0.047301627695560455, |
| "learning_rate": 0.0013684611526339392, |
| "loss": -0.8713, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.4120120418978429, |
| "grad_norm": 0.039312686771154404, |
| "learning_rate": 0.0013653993823229535, |
| "loss": -0.8742, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.41300724006667827, |
| "grad_norm": 0.055356428027153015, |
| "learning_rate": 0.0013623336536519, |
| "loss": -0.8134, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.41400243823551364, |
| "grad_norm": 0.10555850714445114, |
| "learning_rate": 0.001359263999831722, |
| "loss": -0.8181, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.414997636404349, |
| "grad_norm": 0.10872016102075577, |
| "learning_rate": 0.0013561904541158827, |
| "loss": -0.8514, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.41599283457318437, |
| "grad_norm": 0.031025927513837814, |
| "learning_rate": 0.001353113049800007, |
| "loss": -0.8503, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.41698803274201973, |
| "grad_norm": 0.040020719170570374, |
| "learning_rate": 0.0013500318202215192, |
| "loss": -0.842, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.4179832309108551, |
| "grad_norm": 0.16235248744487762, |
| "learning_rate": 0.0013469467987592828, |
| "loss": -0.855, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.4189784290796905, |
| "grad_norm": 0.04261363670229912, |
| "learning_rate": 0.001343858018833239, |
| "loss": -0.8704, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.4199736272485259, |
| "grad_norm": 0.05656075477600098, |
| "learning_rate": 0.0013407655139040435, |
| "loss": -0.8526, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.42096882541736125, |
| "grad_norm": 0.04471505433320999, |
| "learning_rate": 0.0013376693174727064, |
| "loss": -0.8372, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.4219640235861966, |
| "grad_norm": 0.05171617493033409, |
| "learning_rate": 0.001334569463080226, |
| "loss": -0.8628, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.422959221755032, |
| "grad_norm": 0.06456071138381958, |
| "learning_rate": 0.0013314659843072273, |
| "loss": -0.8115, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.42395441992386734, |
| "grad_norm": 0.03985525295138359, |
| "learning_rate": 0.0013283589147735995, |
| "loss": -0.8998, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.4249496180927027, |
| "grad_norm": 0.05016870051622391, |
| "learning_rate": 0.0013252482881381297, |
| "loss": -0.8782, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.4259448162615381, |
| "grad_norm": 0.12391635030508041, |
| "learning_rate": 0.001322134138098138, |
| "loss": -0.8462, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.42694001443037344, |
| "grad_norm": 0.038156382739543915, |
| "learning_rate": 0.0013190164983891148, |
| "loss": -0.8127, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.4279352125992088, |
| "grad_norm": 0.05774056166410446, |
| "learning_rate": 0.0013158954027843537, |
| "loss": -0.8537, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.42893041076804417, |
| "grad_norm": 0.044757694005966187, |
| "learning_rate": 0.0013127708850945855, |
| "loss": -0.8843, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.42992560893687953, |
| "grad_norm": 0.035451311618089676, |
| "learning_rate": 0.0013096429791676122, |
| "loss": -0.8726, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.4309208071057149, |
| "grad_norm": 0.043845247477293015, |
| "learning_rate": 0.0013065117188879407, |
| "loss": -0.8458, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.4319160052745503, |
| "grad_norm": 0.0807977095246315, |
| "learning_rate": 0.001303377138176416, |
| "loss": -0.8799, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.4329112034433857, |
| "grad_norm": 0.030945105478167534, |
| "learning_rate": 0.0013002392709898526, |
| "loss": -0.8306, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.43390640161222105, |
| "grad_norm": 0.10755793750286102, |
| "learning_rate": 0.0012970981513206676, |
| "loss": -0.8119, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.4349015997810564, |
| "grad_norm": 0.04102904722094536, |
| "learning_rate": 0.0012939538131965121, |
| "loss": -0.8274, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.4358967979498918, |
| "grad_norm": 0.02935400977730751, |
| "learning_rate": 0.0012908062906799018, |
| "loss": -0.876, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.43689199611872714, |
| "grad_norm": 0.09526260197162628, |
| "learning_rate": 0.0012876556178678504, |
| "loss": -0.8462, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.4378871942875625, |
| "grad_norm": 0.03269312158226967, |
| "learning_rate": 0.0012845018288914977, |
| "loss": -0.8404, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.4388823924563979, |
| "grad_norm": 0.029963362962007523, |
| "learning_rate": 0.0012813449579157405, |
| "loss": -0.8987, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.43987759062523324, |
| "grad_norm": 0.043722447007894516, |
| "learning_rate": 0.0012781850391388638, |
| "loss": -0.8708, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.4408727887940686, |
| "grad_norm": 0.06401796638965607, |
| "learning_rate": 0.0012750221067921686, |
| "loss": -0.8667, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.44186798696290397, |
| "grad_norm": 0.0654074028134346, |
| "learning_rate": 0.0012718561951396016, |
| "loss": -0.8699, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.44286318513173933, |
| "grad_norm": 0.049930647015571594, |
| "learning_rate": 0.0012686873384773847, |
| "loss": -0.8465, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.4438583833005747, |
| "grad_norm": 0.04100380092859268, |
| "learning_rate": 0.001265515571133643, |
| "loss": -0.8787, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.4448535814694101, |
| "grad_norm": 0.04323037341237068, |
| "learning_rate": 0.0012623409274680334, |
| "loss": -0.8522, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.4458487796382455, |
| "grad_norm": 0.051976267248392105, |
| "learning_rate": 0.0012591634418713706, |
| "loss": -0.8574, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.44684397780708085, |
| "grad_norm": 0.06411243230104446, |
| "learning_rate": 0.0012559831487652566, |
| "loss": -0.8257, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.4478391759759162, |
| "grad_norm": 0.05125259980559349, |
| "learning_rate": 0.0012528000826017075, |
| "loss": -0.8826, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.4488343741447516, |
| "grad_norm": 0.12121732532978058, |
| "learning_rate": 0.001249614277862779, |
| "loss": -0.8667, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.44982957231358695, |
| "grad_norm": 0.24887067079544067, |
| "learning_rate": 0.0012464257690601938, |
| "loss": -0.855, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.4508247704824223, |
| "grad_norm": 0.041807617992162704, |
| "learning_rate": 0.001243234590734969, |
| "loss": -0.89, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.4518199686512577, |
| "grad_norm": 0.07936426997184753, |
| "learning_rate": 0.0012400407774570384, |
| "loss": -0.8489, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.45281516682009304, |
| "grad_norm": 0.07328188419342041, |
| "learning_rate": 0.0012368443638248819, |
| "loss": -0.8469, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.4538103649889284, |
| "grad_norm": 0.037623438984155655, |
| "learning_rate": 0.0012336453844651479, |
| "loss": -0.8402, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.45480556315776377, |
| "grad_norm": 0.06323853880167007, |
| "learning_rate": 0.0012304438740322794, |
| "loss": -0.8408, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.45580076132659914, |
| "grad_norm": 0.050275370478630066, |
| "learning_rate": 0.0012272398672081395, |
| "loss": -0.8539, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.45679595949543456, |
| "grad_norm": 0.045320775359869, |
| "learning_rate": 0.0012240333987016331, |
| "loss": -0.8633, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.4577911576642699, |
| "grad_norm": 0.1679801642894745, |
| "learning_rate": 0.0012208245032483335, |
| "loss": -0.8422, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.4587863558331053, |
| "grad_norm": 0.06080428883433342, |
| "learning_rate": 0.0012176132156101046, |
| "loss": -0.888, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.45978155400194065, |
| "grad_norm": 0.09289630502462387, |
| "learning_rate": 0.0012143995705747245, |
| "loss": -0.8749, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.460776752170776, |
| "grad_norm": 0.03888930380344391, |
| "learning_rate": 0.0012111836029555097, |
| "loss": -0.8391, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.4617719503396114, |
| "grad_norm": 0.05981600657105446, |
| "learning_rate": 0.0012079653475909366, |
| "loss": -0.8282, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.46276714850844675, |
| "grad_norm": 0.0493403784930706, |
| "learning_rate": 0.0012047448393442647, |
| "loss": -0.8631, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.4637623466772821, |
| "grad_norm": 0.028433311730623245, |
| "learning_rate": 0.001201522113103159, |
| "loss": -0.8989, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.4647575448461175, |
| "grad_norm": 0.03943129628896713, |
| "learning_rate": 0.0011982972037793123, |
| "loss": -0.8246, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.46575274301495284, |
| "grad_norm": 0.08593238145112991, |
| "learning_rate": 0.0011950701463080656, |
| "loss": -0.8393, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.4667479411837882, |
| "grad_norm": 0.10303488373756409, |
| "learning_rate": 0.001191840975648032, |
| "loss": -0.8671, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.46774313935262357, |
| "grad_norm": 0.03931227698922157, |
| "learning_rate": 0.0011886097267807159, |
| "loss": -0.861, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.46873833752145894, |
| "grad_norm": 0.06996316462755203, |
| "learning_rate": 0.0011853764347101354, |
| "loss": -0.8654, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.46973353569029436, |
| "grad_norm": 0.05362046882510185, |
| "learning_rate": 0.001182141134462442, |
| "loss": -0.8735, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.4707287338591297, |
| "grad_norm": 0.04977956414222717, |
| "learning_rate": 0.0011789038610855425, |
| "loss": -0.8828, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.4717239320279651, |
| "grad_norm": 0.05062364414334297, |
| "learning_rate": 0.001175664649648717, |
| "loss": -0.8668, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.47271913019680045, |
| "grad_norm": 0.0940314307808876, |
| "learning_rate": 0.0011724235352422426, |
| "loss": -0.8503, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.4737143283656358, |
| "grad_norm": 0.10042206943035126, |
| "learning_rate": 0.0011691805529770094, |
| "loss": -0.8796, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.4747095265344712, |
| "grad_norm": 0.07659121602773666, |
| "learning_rate": 0.0011659357379841433, |
| "loss": -0.8085, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.47570472470330655, |
| "grad_norm": 0.04234703257679939, |
| "learning_rate": 0.0011626891254146235, |
| "loss": -0.9062, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.4766999228721419, |
| "grad_norm": 0.041499655693769455, |
| "learning_rate": 0.0011594407504389016, |
| "loss": -0.8454, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.4776951210409773, |
| "grad_norm": 0.05934852361679077, |
| "learning_rate": 0.001156190648246523, |
| "loss": -0.8612, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.47869031920981264, |
| "grad_norm": 0.035424910485744476, |
| "learning_rate": 0.0011529388540457422, |
| "loss": -0.9154, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.479685517378648, |
| "grad_norm": 0.09620529413223267, |
| "learning_rate": 0.0011496854030631444, |
| "loss": -0.8585, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.4806807155474834, |
| "grad_norm": 0.09317132830619812, |
| "learning_rate": 0.001146430330543262, |
| "loss": -0.8507, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.48167591371631874, |
| "grad_norm": 0.03553410992026329, |
| "learning_rate": 0.0011431736717481935, |
| "loss": -0.8593, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.48267111188515416, |
| "grad_norm": 0.23797239363193512, |
| "learning_rate": 0.0011399154619572225, |
| "loss": -0.8728, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.4836663100539895, |
| "grad_norm": 0.11758790910243988, |
| "learning_rate": 0.0011366557364664326, |
| "loss": -0.8411, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.4846615082228249, |
| "grad_norm": 0.028982490301132202, |
| "learning_rate": 0.001133394530588329, |
| "loss": -0.8588, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.48565670639166025, |
| "grad_norm": 0.030277423560619354, |
| "learning_rate": 0.0011301318796514519, |
| "loss": -0.8613, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.4866519045604956, |
| "grad_norm": 0.0474855974316597, |
| "learning_rate": 0.001126867818999997, |
| "loss": -0.8964, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.487647102729331, |
| "grad_norm": 0.034340258687734604, |
| "learning_rate": 0.0011236023839934315, |
| "loss": -0.8219, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.48864230089816635, |
| "grad_norm": 0.08748650550842285, |
| "learning_rate": 0.0011203356100061104, |
| "loss": -0.837, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.4896374990670017, |
| "grad_norm": 0.05180235579609871, |
| "learning_rate": 0.0011170675324268943, |
| "loss": -0.8452, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.4906326972358371, |
| "grad_norm": 0.05069277063012123, |
| "learning_rate": 0.0011137981866587644, |
| "loss": -0.8689, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.49162789540467244, |
| "grad_norm": 0.0969507172703743, |
| "learning_rate": 0.0011105276081184417, |
| "loss": -0.8616, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.4926230935735078, |
| "grad_norm": 0.03841566666960716, |
| "learning_rate": 0.0011072558322360014, |
| "loss": -0.8737, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.4936182917423432, |
| "grad_norm": 0.03460973501205444, |
| "learning_rate": 0.0011039828944544884, |
| "loss": -0.8892, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.49461348991117854, |
| "grad_norm": 0.052395064383745193, |
| "learning_rate": 0.0011007088302295359, |
| "loss": -0.8513, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.49560868808001396, |
| "grad_norm": 0.09648430347442627, |
| "learning_rate": 0.0010974336750289788, |
| "loss": -0.8646, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.4966038862488493, |
| "grad_norm": 0.05110828951001167, |
| "learning_rate": 0.001094157464332471, |
| "loss": -0.8282, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.4975990844176847, |
| "grad_norm": 0.046441659331321716, |
| "learning_rate": 0.0010908802336311002, |
| "loss": -0.8459, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.49859428258652005, |
| "grad_norm": 0.08815504610538483, |
| "learning_rate": 0.0010876020184270039, |
| "loss": -0.8547, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.4995894807553554, |
| "grad_norm": 0.054612450301647186, |
| "learning_rate": 0.0010843228542329849, |
| "loss": -0.8494, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.5005846789241908, |
| "grad_norm": 0.0314970538020134, |
| "learning_rate": 0.0010810427765721266, |
| "loss": -0.867, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.5015798770930261, |
| "grad_norm": 0.05496123805642128, |
| "learning_rate": 0.0010777618209774071, |
| "loss": -0.8536, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.5025750752618615, |
| "grad_norm": 0.08668066561222076, |
| "learning_rate": 0.001074480022991316, |
| "loss": -0.8833, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.5035702734306969, |
| "grad_norm": 0.04756620153784752, |
| "learning_rate": 0.0010711974181654676, |
| "loss": -0.8271, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.5045654715995322, |
| "grad_norm": 0.055564671754837036, |
| "learning_rate": 0.0010679140420602176, |
| "loss": -0.8774, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.5055606697683677, |
| "grad_norm": 0.044413670897483826, |
| "learning_rate": 0.001064629930244276, |
| "loss": -0.7932, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.506555867937203, |
| "grad_norm": 0.035696543753147125, |
| "learning_rate": 0.0010613451182943244, |
| "loss": -0.8406, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.5075510661060384, |
| "grad_norm": 0.042886171489953995, |
| "learning_rate": 0.001058059641794627, |
| "loss": -0.8608, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.5085462642748737, |
| "grad_norm": 0.04136540740728378, |
| "learning_rate": 0.001054773536336648, |
| "loss": -0.8654, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.5095414624437091, |
| "grad_norm": 0.07801708579063416, |
| "learning_rate": 0.0010514868375186646, |
| "loss": -0.8573, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.5105366606125444, |
| "grad_norm": 0.02773255482316017, |
| "learning_rate": 0.0010481995809453826, |
| "loss": -0.8535, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.5115318587813799, |
| "grad_norm": 0.03874874860048294, |
| "learning_rate": 0.0010449118022275495, |
| "loss": -0.8643, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.5125270569502152, |
| "grad_norm": 0.06789931654930115, |
| "learning_rate": 0.0010416235369815692, |
| "loss": -0.8693, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.5135222551190506, |
| "grad_norm": 0.09328795224428177, |
| "learning_rate": 0.001038334820829116, |
| "loss": -0.851, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.5145174532878859, |
| "grad_norm": 0.05135517194867134, |
| "learning_rate": 0.001035045689396749, |
| "loss": -0.9017, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.5155126514567213, |
| "grad_norm": 0.0431673601269722, |
| "learning_rate": 0.0010317561783155262, |
| "loss": -0.8748, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.5165078496255567, |
| "grad_norm": 0.03427030146121979, |
| "learning_rate": 0.0010284663232206178, |
| "loss": -0.8548, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.517503047794392, |
| "grad_norm": 0.07007991522550583, |
| "learning_rate": 0.0010251761597509215, |
| "loss": -0.881, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.5184982459632275, |
| "grad_norm": 0.08876392245292664, |
| "learning_rate": 0.001021885723548675, |
| "loss": -0.8547, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.5194934441320628, |
| "grad_norm": 0.11546041816473007, |
| "learning_rate": 0.0010185950502590703, |
| "loss": -0.8441, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.5204886423008982, |
| "grad_norm": 0.03411949425935745, |
| "learning_rate": 0.0010153041755298687, |
| "loss": -0.8651, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.5214838404697335, |
| "grad_norm": 0.043606266379356384, |
| "learning_rate": 0.0010120131350110125, |
| "loss": -0.9115, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.5224790386385689, |
| "grad_norm": 0.03440025821328163, |
| "learning_rate": 0.001008721964354241, |
| "loss": -0.9109, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.5234742368074042, |
| "grad_norm": 0.029122570529580116, |
| "learning_rate": 0.0010054306992127028, |
| "loss": -0.8272, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.5244694349762397, |
| "grad_norm": 0.06181350350379944, |
| "learning_rate": 0.0010021393752405697, |
| "loss": -0.8695, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.525464633145075, |
| "grad_norm": 0.13611923158168793, |
| "learning_rate": 0.0009988480280926522, |
| "loss": -0.8621, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.5264598313139104, |
| "grad_norm": 0.04990173131227493, |
| "learning_rate": 0.00099555669342401, |
| "loss": -0.8884, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.5274550294827457, |
| "grad_norm": 0.030578065663576126, |
| "learning_rate": 0.000992265406889569, |
| "loss": -0.8843, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.5284502276515811, |
| "grad_norm": 0.05405525118112564, |
| "learning_rate": 0.0009889742041437322, |
| "loss": -0.8543, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.5294454258204165, |
| "grad_norm": 0.025917502120137215, |
| "learning_rate": 0.0009856831208399975, |
| "loss": -0.8611, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.5304406239892518, |
| "grad_norm": 0.058636005967855453, |
| "learning_rate": 0.0009823921926305661, |
| "loss": -0.8683, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.5314358221580873, |
| "grad_norm": 0.08390301465988159, |
| "learning_rate": 0.000979101455165961, |
| "loss": -0.8605, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.5324310203269226, |
| "grad_norm": 0.06718181073665619, |
| "learning_rate": 0.0009758109440946377, |
| "loss": -0.8469, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.533426218495758, |
| "grad_norm": 0.04684724658727646, |
| "learning_rate": 0.000972520695062599, |
| "loss": -0.8649, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.5344214166645933, |
| "grad_norm": 0.04522204399108887, |
| "learning_rate": 0.0009692307437130106, |
| "loss": -0.8793, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.5354166148334287, |
| "grad_norm": 0.02280861884355545, |
| "learning_rate": 0.0009659411256858122, |
| "loss": -0.8456, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.536411813002264, |
| "grad_norm": 0.022606611251831055, |
| "learning_rate": 0.000962651876617333, |
| "loss": -0.8819, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.5374070111710995, |
| "grad_norm": 0.04157610610127449, |
| "learning_rate": 0.0009593630321399049, |
| "loss": -0.8498, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.5384022093399348, |
| "grad_norm": 0.04567135497927666, |
| "learning_rate": 0.0009560746278814769, |
| "loss": -0.8388, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.5393974075087702, |
| "grad_norm": 0.04893554747104645, |
| "learning_rate": 0.0009527866994652299, |
| "loss": -0.877, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.5403926056776056, |
| "grad_norm": 0.03215763717889786, |
| "learning_rate": 0.0009494992825091892, |
| "loss": -0.8619, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.5413878038464409, |
| "grad_norm": 0.04611560329794884, |
| "learning_rate": 0.0009462124126258401, |
| "loss": -0.882, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.5423830020152763, |
| "grad_norm": 0.03594022989273071, |
| "learning_rate": 0.0009429261254217408, |
| "loss": -0.8399, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.5433782001841116, |
| "grad_norm": 0.047222770750522614, |
| "learning_rate": 0.0009396404564971374, |
| "loss": -0.8705, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.5443733983529471, |
| "grad_norm": 0.04804938659071922, |
| "learning_rate": 0.0009363554414455789, |
| "loss": -0.8645, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.5453685965217824, |
| "grad_norm": 0.07348743081092834, |
| "learning_rate": 0.0009330711158535307, |
| "loss": -0.88, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.5463637946906178, |
| "grad_norm": 0.03578435257077217, |
| "learning_rate": 0.0009297875152999887, |
| "loss": -0.8764, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.5473589928594531, |
| "grad_norm": 0.03122727759182453, |
| "learning_rate": 0.000926504675356095, |
| "loss": -0.8684, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.5483541910282885, |
| "grad_norm": 0.04529644921422005, |
| "learning_rate": 0.000923222631584752, |
| "loss": -0.8612, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.5493493891971238, |
| "grad_norm": 0.21876747906208038, |
| "learning_rate": 0.0009199414195402367, |
| "loss": -0.8319, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.5503445873659593, |
| "grad_norm": 0.039174988865852356, |
| "learning_rate": 0.000916661074767817, |
| "loss": -0.8831, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.5513397855347946, |
| "grad_norm": 0.032812558114528656, |
| "learning_rate": 0.0009133816328033649, |
| "loss": -0.8915, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.55233498370363, |
| "grad_norm": 0.04397067800164223, |
| "learning_rate": 0.0009101031291729726, |
| "loss": -0.8962, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.5533301818724654, |
| "grad_norm": 0.09248298406600952, |
| "learning_rate": 0.000906825599392567, |
| "loss": -0.8603, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.5543253800413007, |
| "grad_norm": 0.13714151084423065, |
| "learning_rate": 0.0009035490789675257, |
| "loss": -0.8565, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.5553205782101361, |
| "grad_norm": 0.06470950692892075, |
| "learning_rate": 0.000900273603392292, |
| "loss": -0.8365, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.5563157763789714, |
| "grad_norm": 0.02791593410074711, |
| "learning_rate": 0.0008969992081499903, |
| "loss": -0.8642, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.5573109745478069, |
| "grad_norm": 0.12671250104904175, |
| "learning_rate": 0.0008937259287120415, |
| "loss": -0.7883, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.5583061727166422, |
| "grad_norm": 0.034118831157684326, |
| "learning_rate": 0.0008904538005377793, |
| "loss": -0.8028, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.5593013708854776, |
| "grad_norm": 0.04268977791070938, |
| "learning_rate": 0.000887182859074066, |
| "loss": -0.8865, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.5602965690543129, |
| "grad_norm": 0.05735842511057854, |
| "learning_rate": 0.0008839131397549074, |
| "loss": -0.8794, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.5612917672231483, |
| "grad_norm": 0.03807393088936806, |
| "learning_rate": 0.0008806446780010716, |
| "loss": -0.8765, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.5622869653919836, |
| "grad_norm": 0.05110938474535942, |
| "learning_rate": 0.0008773775092197017, |
| "loss": -0.8541, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.5632821635608191, |
| "grad_norm": 0.05651829019188881, |
| "learning_rate": 0.0008741116688039349, |
| "loss": -0.877, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.5642773617296544, |
| "grad_norm": 0.16070760786533356, |
| "learning_rate": 0.000870847192132518, |
| "loss": -0.897, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.5652725598984898, |
| "grad_norm": 0.17324046790599823, |
| "learning_rate": 0.0008675841145694246, |
| "loss": -0.8577, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.5662677580673252, |
| "grad_norm": 0.0440760962665081, |
| "learning_rate": 0.0008643224714634723, |
| "loss": -0.8694, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.5672629562361605, |
| "grad_norm": 0.03526591882109642, |
| "learning_rate": 0.0008610622981479383, |
| "loss": -0.8741, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.5682581544049959, |
| "grad_norm": 0.04858911782503128, |
| "learning_rate": 0.0008578036299401784, |
| "loss": -0.8604, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.5692533525738313, |
| "grad_norm": 0.042971156537532806, |
| "learning_rate": 0.0008545465021412428, |
| "loss": -0.8235, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.5702485507426667, |
| "grad_norm": 0.03934862092137337, |
| "learning_rate": 0.0008512909500354946, |
| "loss": -0.8501, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.571243748911502, |
| "grad_norm": 0.12695066630840302, |
| "learning_rate": 0.000848037008890229, |
| "loss": -0.849, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.5722389470803374, |
| "grad_norm": 0.06290268152952194, |
| "learning_rate": 0.0008447847139552878, |
| "loss": -0.8564, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.5732341452491727, |
| "grad_norm": 0.06408194452524185, |
| "learning_rate": 0.0008415341004626802, |
| "loss": -0.894, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.5742293434180081, |
| "grad_norm": 0.03536716476082802, |
| "learning_rate": 0.0008382852036262007, |
| "loss": -0.8723, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.5752245415868434, |
| "grad_norm": 0.10467568039894104, |
| "learning_rate": 0.0008350380586410468, |
| "loss": -0.8447, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.5762197397556789, |
| "grad_norm": 0.059401463717222214, |
| "learning_rate": 0.000831792700683438, |
| "loss": -0.847, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.5772149379245142, |
| "grad_norm": 0.028337785974144936, |
| "learning_rate": 0.0008285491649102361, |
| "loss": -0.8471, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.5782101360933496, |
| "grad_norm": 0.0379803366959095, |
| "learning_rate": 0.0008253074864585625, |
| "loss": -0.8737, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.579205334262185, |
| "grad_norm": 0.0755845308303833, |
| "learning_rate": 0.0008220677004454181, |
| "loss": -0.8041, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.5802005324310203, |
| "grad_norm": 0.07279976457357407, |
| "learning_rate": 0.0008188298419673036, |
| "loss": -0.8035, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.5811957305998557, |
| "grad_norm": 0.049286697059869766, |
| "learning_rate": 0.0008155939460998381, |
| "loss": -0.8583, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.582190928768691, |
| "grad_norm": 0.045156385749578476, |
| "learning_rate": 0.0008123600478973808, |
| "loss": -0.8901, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.5831861269375265, |
| "grad_norm": 0.047281067818403244, |
| "learning_rate": 0.000809128182392649, |
| "loss": -0.8595, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.5841813251063618, |
| "grad_norm": 0.08644957095384598, |
| "learning_rate": 0.0008058983845963412, |
| "loss": -0.8523, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.5851765232751972, |
| "grad_norm": 0.05607840046286583, |
| "learning_rate": 0.0008026706894967554, |
| "loss": -0.8779, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.5861717214440325, |
| "grad_norm": 0.15865112841129303, |
| "learning_rate": 0.0007994451320594113, |
| "loss": -0.8798, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.5871669196128679, |
| "grad_norm": 0.05027535930275917, |
| "learning_rate": 0.0007962217472266723, |
| "loss": -0.8804, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.5881621177817032, |
| "grad_norm": 0.08624923229217529, |
| "learning_rate": 0.0007930005699173649, |
| "loss": -0.8769, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.5891573159505387, |
| "grad_norm": 0.0529584065079689, |
| "learning_rate": 0.0007897816350264023, |
| "loss": -0.8858, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.590152514119374, |
| "grad_norm": 0.08956887573003769, |
| "learning_rate": 0.0007865649774244049, |
| "loss": -0.8535, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.5911477122882094, |
| "grad_norm": 0.07602567970752716, |
| "learning_rate": 0.0007833506319573244, |
| "loss": -0.8598, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.5921429104570448, |
| "grad_norm": 0.034962136298418045, |
| "learning_rate": 0.0007801386334460638, |
| "loss": -0.8663, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.5931381086258801, |
| "grad_norm": 0.05293145775794983, |
| "learning_rate": 0.0007769290166861033, |
| "loss": -0.8277, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.5941333067947155, |
| "grad_norm": 0.04431833326816559, |
| "learning_rate": 0.0007737218164471205, |
| "loss": -0.8222, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.5951285049635509, |
| "grad_norm": 0.12767989933490753, |
| "learning_rate": 0.0007705170674726148, |
| "loss": -0.8855, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.5961237031323863, |
| "grad_norm": 0.03889622166752815, |
| "learning_rate": 0.0007673148044795319, |
| "loss": -0.8203, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.5971189013012216, |
| "grad_norm": 0.04737401381134987, |
| "learning_rate": 0.000764115062157886, |
| "loss": -0.8795, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.598114099470057, |
| "grad_norm": 0.03833894431591034, |
| "learning_rate": 0.0007609178751703861, |
| "loss": -0.8269, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.5991092976388923, |
| "grad_norm": 0.14908933639526367, |
| "learning_rate": 0.0007577232781520585, |
| "loss": -0.8262, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.6001044958077277, |
| "grad_norm": 0.05108148232102394, |
| "learning_rate": 0.0007545313057098726, |
| "loss": -0.8171, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.601099693976563, |
| "grad_norm": 0.05908135324716568, |
| "learning_rate": 0.0007513419924223661, |
| "loss": -0.8625, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.6020948921453985, |
| "grad_norm": 0.027432410046458244, |
| "learning_rate": 0.0007481553728392692, |
| "loss": -0.8343, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.6030900903142338, |
| "grad_norm": 0.04073173180222511, |
| "learning_rate": 0.0007449714814811333, |
| "loss": -0.8918, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.6040852884830692, |
| "grad_norm": 0.05039272829890251, |
| "learning_rate": 0.0007417903528389534, |
| "loss": -0.8388, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.6050804866519046, |
| "grad_norm": 0.04605141282081604, |
| "learning_rate": 0.0007386120213737961, |
| "loss": -0.8493, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.6060756848207399, |
| "grad_norm": 0.07342466711997986, |
| "learning_rate": 0.0007354365215164267, |
| "loss": -0.8343, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.6070708829895753, |
| "grad_norm": 0.03411560505628586, |
| "learning_rate": 0.000732263887666936, |
| "loss": -0.8551, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.6080660811584107, |
| "grad_norm": 0.1048220545053482, |
| "learning_rate": 0.0007290941541943664, |
| "loss": -0.8344, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.6090612793272461, |
| "grad_norm": 0.15538884699344635, |
| "learning_rate": 0.0007259273554363426, |
| "loss": -0.8448, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.6100564774960814, |
| "grad_norm": 0.03414257988333702, |
| "learning_rate": 0.0007227635256986955, |
| "loss": -0.8815, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.6110516756649168, |
| "grad_norm": 0.05820440128445625, |
| "learning_rate": 0.0007196026992550941, |
| "loss": -0.8684, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.6120468738337521, |
| "grad_norm": 0.03307825326919556, |
| "learning_rate": 0.000716444910346672, |
| "loss": -0.8694, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.6130420720025875, |
| "grad_norm": 0.050421956926584244, |
| "learning_rate": 0.0007132901931816571, |
| "loss": -0.8455, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.6140372701714228, |
| "grad_norm": 0.11055120080709457, |
| "learning_rate": 0.0007101385819350025, |
| "loss": -0.8937, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.6150324683402583, |
| "grad_norm": 0.041438162326812744, |
| "learning_rate": 0.0007069901107480138, |
| "loss": -0.8734, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.6160276665090936, |
| "grad_norm": 0.03807257115840912, |
| "learning_rate": 0.000703844813727981, |
| "loss": -0.8929, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.617022864677929, |
| "grad_norm": 0.06824404746294022, |
| "learning_rate": 0.0007007027249478077, |
| "loss": -0.8985, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.6180180628467644, |
| "grad_norm": 0.07732049375772476, |
| "learning_rate": 0.0006975638784456437, |
| "loss": -0.8537, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.6190132610155997, |
| "grad_norm": 0.14929652214050293, |
| "learning_rate": 0.0006944283082245149, |
| "loss": -0.8419, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.6200084591844351, |
| "grad_norm": 0.14729063212871552, |
| "learning_rate": 0.0006912960482519553, |
| "loss": -0.8594, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.6210036573532705, |
| "grad_norm": 0.023850033059716225, |
| "learning_rate": 0.0006881671324596388, |
| "loss": -0.8411, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.6219988555221059, |
| "grad_norm": 0.024202531203627586, |
| "learning_rate": 0.0006850415947430126, |
| "loss": -0.8503, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.6229940536909412, |
| "grad_norm": 0.06503470242023468, |
| "learning_rate": 0.0006819194689609287, |
| "loss": -0.8522, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.6239892518597766, |
| "grad_norm": 0.06802047044038773, |
| "learning_rate": 0.0006788007889352777, |
| "loss": -0.855, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.6249844500286119, |
| "grad_norm": 0.052513618022203445, |
| "learning_rate": 0.0006756855884506234, |
| "loss": -0.8805, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.6259796481974473, |
| "grad_norm": 0.03414776176214218, |
| "learning_rate": 0.0006725739012538345, |
| "loss": -0.8627, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.6269748463662826, |
| "grad_norm": 0.0709032341837883, |
| "learning_rate": 0.0006694657610537211, |
| "loss": -0.8588, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.6279700445351181, |
| "grad_norm": 0.0368841178715229, |
| "learning_rate": 0.0006663612015206687, |
| "loss": -0.8632, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.6289652427039534, |
| "grad_norm": 0.09083976596593857, |
| "learning_rate": 0.0006632602562862733, |
| "loss": -0.8653, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.6299604408727888, |
| "grad_norm": 0.037867575883865356, |
| "learning_rate": 0.000660162958942978, |
| "loss": -0.891, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.6309556390416242, |
| "grad_norm": 0.046128176152706146, |
| "learning_rate": 0.0006570693430437077, |
| "loss": -0.8694, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.6319508372104595, |
| "grad_norm": 0.078248530626297, |
| "learning_rate": 0.0006539794421015066, |
| "loss": -0.885, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.632946035379295, |
| "grad_norm": 0.047228600829839706, |
| "learning_rate": 0.0006508932895891747, |
| "loss": -0.8547, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.6339412335481303, |
| "grad_norm": 0.03179970011115074, |
| "learning_rate": 0.0006478109189389056, |
| "loss": -0.8712, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.6349364317169657, |
| "grad_norm": 0.05649185925722122, |
| "learning_rate": 0.000644732363541924, |
| "loss": -0.854, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.635931629885801, |
| "grad_norm": 0.03506692498922348, |
| "learning_rate": 0.0006416576567481245, |
| "loss": -0.8737, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.6369268280546364, |
| "grad_norm": 0.04961368441581726, |
| "learning_rate": 0.0006385868318657091, |
| "loss": -0.8685, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.6379220262234717, |
| "grad_norm": 0.11547062546014786, |
| "learning_rate": 0.0006355199221608277, |
| "loss": -0.8553, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.6389172243923071, |
| "grad_norm": 0.06517086923122406, |
| "learning_rate": 0.0006324569608572171, |
| "loss": -0.8541, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.6399124225611424, |
| "grad_norm": 0.03779355809092522, |
| "learning_rate": 0.0006293979811358413, |
| "loss": -0.8782, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.6409076207299779, |
| "grad_norm": 0.03799843415617943, |
| "learning_rate": 0.0006263430161345316, |
| "loss": -0.876, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.6419028188988132, |
| "grad_norm": 0.09262421727180481, |
| "learning_rate": 0.0006232920989476285, |
| "loss": -0.8416, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.6428980170676486, |
| "grad_norm": 0.046641264110803604, |
| "learning_rate": 0.0006202452626256223, |
| "loss": -0.8784, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.643893215236484, |
| "grad_norm": 0.12603308260440826, |
| "learning_rate": 0.0006172025401747955, |
| "loss": -0.8633, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.6448884134053193, |
| "grad_norm": 0.0811128169298172, |
| "learning_rate": 0.0006141639645568645, |
| "loss": -0.858, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.6458836115741547, |
| "grad_norm": 0.04808896780014038, |
| "learning_rate": 0.0006111295686886248, |
| "loss": -0.8712, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.6468788097429901, |
| "grad_norm": 0.07724402099847794, |
| "learning_rate": 0.0006080993854415916, |
| "loss": -0.8559, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.6478740079118255, |
| "grad_norm": 0.0562870092689991, |
| "learning_rate": 0.0006050734476416448, |
| "loss": -0.8769, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.6488692060806608, |
| "grad_norm": 0.04276958853006363, |
| "learning_rate": 0.0006020517880686738, |
| "loss": -0.8588, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.6498644042494962, |
| "grad_norm": 0.167204350233078, |
| "learning_rate": 0.0005990344394562226, |
| "loss": -0.8472, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.6508596024183315, |
| "grad_norm": 0.04978903755545616, |
| "learning_rate": 0.0005960214344911334, |
| "loss": -0.8613, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.6518548005871669, |
| "grad_norm": 0.04822786897420883, |
| "learning_rate": 0.0005930128058131957, |
| "loss": -0.8988, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.6528499987560022, |
| "grad_norm": 0.08410031348466873, |
| "learning_rate": 0.000590008586014789, |
| "loss": -0.8125, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.6538451969248377, |
| "grad_norm": 0.10245650261640549, |
| "learning_rate": 0.000587008807640533, |
| "loss": -0.8562, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.654840395093673, |
| "grad_norm": 0.052541933953762054, |
| "learning_rate": 0.0005840135031869322, |
| "loss": -0.8309, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.6558355932625084, |
| "grad_norm": 0.09064075350761414, |
| "learning_rate": 0.0005810227051020261, |
| "loss": -0.8508, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.6568307914313438, |
| "grad_norm": 0.023997120559215546, |
| "learning_rate": 0.0005780364457850369, |
| "loss": -0.8659, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.6578259896001791, |
| "grad_norm": 0.049732670187950134, |
| "learning_rate": 0.0005750547575860184, |
| "loss": -0.8599, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.6588211877690145, |
| "grad_norm": 0.04103129357099533, |
| "learning_rate": 0.000572077672805505, |
| "loss": -0.8497, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.6598163859378499, |
| "grad_norm": 0.026592710986733437, |
| "learning_rate": 0.0005691052236941639, |
| "loss": -0.8606, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.6608115841066853, |
| "grad_norm": 0.06547830998897552, |
| "learning_rate": 0.0005661374424524415, |
| "loss": -0.9132, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.6618067822755206, |
| "grad_norm": 0.03540598228573799, |
| "learning_rate": 0.000563174361230221, |
| "loss": -0.876, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.662801980444356, |
| "grad_norm": 0.037611719220876694, |
| "learning_rate": 0.0005602160121264677, |
| "loss": -0.8552, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.6637971786131913, |
| "grad_norm": 0.05235538259148598, |
| "learning_rate": 0.0005572624271888844, |
| "loss": -0.8538, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.6647923767820267, |
| "grad_norm": 0.03196645900607109, |
| "learning_rate": 0.0005543136384135649, |
| "loss": -0.886, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.665787574950862, |
| "grad_norm": 0.036912400275468826, |
| "learning_rate": 0.000551369677744645, |
| "loss": -0.8372, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.6667827731196975, |
| "grad_norm": 0.04574640095233917, |
| "learning_rate": 0.0005484305770739589, |
| "loss": -0.8618, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.6677779712885328, |
| "grad_norm": 0.09847969561815262, |
| "learning_rate": 0.0005454963682406921, |
| "loss": -0.8348, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.6687731694573682, |
| "grad_norm": 0.05522662028670311, |
| "learning_rate": 0.0005425670830310372, |
| "loss": -0.8791, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.6697683676262036, |
| "grad_norm": 0.03674080967903137, |
| "learning_rate": 0.0005396427531778492, |
| "loss": -0.8899, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.6707635657950389, |
| "grad_norm": 0.06172318384051323, |
| "learning_rate": 0.0005367234103603009, |
| "loss": -0.9089, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.6717587639638744, |
| "grad_norm": 0.034352660179138184, |
| "learning_rate": 0.0005338090862035426, |
| "loss": -0.86, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.6727539621327097, |
| "grad_norm": 0.05538021773099899, |
| "learning_rate": 0.0005308998122783561, |
| "loss": -0.8895, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.6737491603015451, |
| "grad_norm": 0.04365914314985275, |
| "learning_rate": 0.0005279956201008154, |
| "loss": -0.8885, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.6747443584703804, |
| "grad_norm": 0.04602828249335289, |
| "learning_rate": 0.0005250965411319427, |
| "loss": -0.8615, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.6757395566392158, |
| "grad_norm": 0.060164544731378555, |
| "learning_rate": 0.0005222026067773705, |
| "loss": -0.8694, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.6767347548080511, |
| "grad_norm": 0.09196409583091736, |
| "learning_rate": 0.0005193138483869979, |
| "loss": -0.8795, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.6777299529768865, |
| "grad_norm": 0.050159208476543427, |
| "learning_rate": 0.0005164302972546548, |
| "loss": -0.8552, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.6787251511457219, |
| "grad_norm": 0.04204307124018669, |
| "learning_rate": 0.0005135519846177609, |
| "loss": -0.8477, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.6797203493145573, |
| "grad_norm": 0.05275953561067581, |
| "learning_rate": 0.0005106789416569857, |
| "loss": -0.8263, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.6807155474833926, |
| "grad_norm": 0.04620354622602463, |
| "learning_rate": 0.0005078111994959145, |
| "loss": -0.8495, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.681710745652228, |
| "grad_norm": 0.043242305517196655, |
| "learning_rate": 0.0005049487892007078, |
| "loss": -0.8876, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.6827059438210634, |
| "grad_norm": 0.06443754583597183, |
| "learning_rate": 0.0005020917417797668, |
| "loss": -0.8523, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.6837011419898987, |
| "grad_norm": 0.054831694811582565, |
| "learning_rate": 0.0004992400881833973, |
| "loss": -0.8757, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.6846963401587342, |
| "grad_norm": 0.043946340680122375, |
| "learning_rate": 0.0004963938593034726, |
| "loss": -0.8559, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.6856915383275695, |
| "grad_norm": 0.04879409447312355, |
| "learning_rate": 0.0004935530859731018, |
| "loss": -0.8686, |
| "step": 6890 |
| }, |
| { |
| "epoch": 0.6866867364964049, |
| "grad_norm": 0.04872363060712814, |
| "learning_rate": 0.0004907177989662926, |
| "loss": -0.8543, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.6876819346652402, |
| "grad_norm": 0.06434311717748642, |
| "learning_rate": 0.00048788802899762094, |
| "loss": -0.8264, |
| "step": 6910 |
| }, |
| { |
| "epoch": 0.6886771328340756, |
| "grad_norm": 0.03144066780805588, |
| "learning_rate": 0.00048506380672189663, |
| "loss": -0.8666, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.6896723310029109, |
| "grad_norm": 0.050940971821546555, |
| "learning_rate": 0.0004822451627338302, |
| "loss": -0.8725, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.6906675291717463, |
| "grad_norm": 0.037656985223293304, |
| "learning_rate": 0.00047943212756770473, |
| "loss": -0.8629, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.6916627273405817, |
| "grad_norm": 0.052575841546058655, |
| "learning_rate": 0.0004766247316970411, |
| "loss": -0.8356, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.6926579255094171, |
| "grad_norm": 0.0732717514038086, |
| "learning_rate": 0.0004738230055342714, |
| "loss": -0.8525, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.6936531236782524, |
| "grad_norm": 0.06910708546638489, |
| "learning_rate": 0.00047102697943040775, |
| "loss": -0.9031, |
| "step": 6970 |
| }, |
| { |
| "epoch": 0.6946483218470878, |
| "grad_norm": 0.04597754031419754, |
| "learning_rate": 0.0004682366836747126, |
| "loss": -0.8482, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.6956435200159232, |
| "grad_norm": 0.08330798149108887, |
| "learning_rate": 0.00046545214849437347, |
| "loss": -0.8632, |
| "step": 6990 |
| }, |
| { |
| "epoch": 0.6966387181847585, |
| "grad_norm": 0.03966710716485977, |
| "learning_rate": 0.00046267340405417167, |
| "loss": -0.8473, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.697633916353594, |
| "grad_norm": 0.06126915290951729, |
| "learning_rate": 0.00045990048045615973, |
| "loss": -0.9005, |
| "step": 7010 |
| }, |
| { |
| "epoch": 0.6986291145224293, |
| "grad_norm": 0.0399840883910656, |
| "learning_rate": 0.0004571334077393313, |
| "loss": -0.8857, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.6996243126912647, |
| "grad_norm": 0.047068994492292404, |
| "learning_rate": 0.000454372215879299, |
| "loss": -0.8877, |
| "step": 7030 |
| }, |
| { |
| "epoch": 0.7006195108601, |
| "grad_norm": 0.05347800627350807, |
| "learning_rate": 0.00045161693478796796, |
| "loss": -0.8843, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.7016147090289354, |
| "grad_norm": 0.0830862745642662, |
| "learning_rate": 0.0004488675943132113, |
| "loss": -0.8706, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.7026099071977707, |
| "grad_norm": 0.04167691618204117, |
| "learning_rate": 0.00044612422423854917, |
| "loss": -0.8649, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.7036051053666061, |
| "grad_norm": 0.05179230123758316, |
| "learning_rate": 0.0004433868542828224, |
| "loss": -0.8429, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.7046003035354415, |
| "grad_norm": 0.0687342956662178, |
| "learning_rate": 0.0004406555140998756, |
| "loss": -0.8253, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.7055955017042769, |
| "grad_norm": 0.04089014232158661, |
| "learning_rate": 0.00043793023327823067, |
| "loss": -0.8639, |
| "step": 7090 |
| }, |
| { |
| "epoch": 0.7065906998731122, |
| "grad_norm": 0.05847838521003723, |
| "learning_rate": 0.00043521104134076904, |
| "loss": -0.8564, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.7075858980419476, |
| "grad_norm": 0.078822560608387, |
| "learning_rate": 0.00043249796774441255, |
| "loss": -0.8841, |
| "step": 7110 |
| }, |
| { |
| "epoch": 0.708581096210783, |
| "grad_norm": 0.037112757563591, |
| "learning_rate": 0.00042979104187980144, |
| "loss": -0.9033, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.7095762943796183, |
| "grad_norm": 0.03796043619513512, |
| "learning_rate": 0.00042709029307098033, |
| "loss": -0.8329, |
| "step": 7130 |
| }, |
| { |
| "epoch": 0.7105714925484538, |
| "grad_norm": 0.17252549529075623, |
| "learning_rate": 0.0004243957505750754, |
| "loss": -0.889, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.7115666907172891, |
| "grad_norm": 0.06463641673326492, |
| "learning_rate": 0.00042170744358198186, |
| "loss": -0.9217, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.7125618888861245, |
| "grad_norm": 0.0993466004729271, |
| "learning_rate": 0.00041902540121404474, |
| "loss": -0.8848, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.7135570870549598, |
| "grad_norm": 0.09770558774471283, |
| "learning_rate": 0.00041634965252574486, |
| "loss": -0.8752, |
| "step": 7170 |
| }, |
| { |
| "epoch": 0.7145522852237952, |
| "grad_norm": 0.03831467777490616, |
| "learning_rate": 0.00041368022650338423, |
| "loss": -0.9107, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.7155474833926305, |
| "grad_norm": 0.03482762351632118, |
| "learning_rate": 0.0004110171520647713, |
| "loss": -0.8684, |
| "step": 7190 |
| }, |
| { |
| "epoch": 0.7165426815614659, |
| "grad_norm": 0.07047673314809799, |
| "learning_rate": 0.00040836045805890854, |
| "loss": -0.8774, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.7175378797303013, |
| "grad_norm": 0.0411464087665081, |
| "learning_rate": 0.00040571017326567816, |
| "loss": -0.8507, |
| "step": 7210 |
| }, |
| { |
| "epoch": 0.7185330778991367, |
| "grad_norm": 0.04212634265422821, |
| "learning_rate": 0.0004030663263955332, |
| "loss": -0.8251, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.719528276067972, |
| "grad_norm": 0.10478588193655014, |
| "learning_rate": 0.000400428946089183, |
| "loss": -0.8466, |
| "step": 7230 |
| }, |
| { |
| "epoch": 0.7205234742368074, |
| "grad_norm": 0.10188216716051102, |
| "learning_rate": 0.00039779806091728656, |
| "loss": -0.8648, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.7215186724056428, |
| "grad_norm": 0.03693991154432297, |
| "learning_rate": 0.00039517369938014057, |
| "loss": -0.8833, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.7225138705744781, |
| "grad_norm": 0.0405866913497448, |
| "learning_rate": 0.000392555889907371, |
| "loss": -0.8494, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.7235090687433136, |
| "grad_norm": 0.03353391960263252, |
| "learning_rate": 0.00038994466085762636, |
| "loss": -0.8369, |
| "step": 7270 |
| }, |
| { |
| "epoch": 0.7245042669121489, |
| "grad_norm": 0.03976055979728699, |
| "learning_rate": 0.00038734004051826866, |
| "loss": -0.8975, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.7254994650809843, |
| "grad_norm": 0.05324774980545044, |
| "learning_rate": 0.0003847420571050687, |
| "loss": -0.8547, |
| "step": 7290 |
| }, |
| { |
| "epoch": 0.7264946632498196, |
| "grad_norm": 0.07501663267612457, |
| "learning_rate": 0.0003821507387618999, |
| "loss": -0.8501, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.727489861418655, |
| "grad_norm": 0.04217078164219856, |
| "learning_rate": 0.0003795661135604319, |
| "loss": -0.8804, |
| "step": 7310 |
| }, |
| { |
| "epoch": 0.7284850595874903, |
| "grad_norm": 0.0331064835190773, |
| "learning_rate": 0.00037698820949982946, |
| "loss": -0.8464, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.7294802577563257, |
| "grad_norm": 0.030969824641942978, |
| "learning_rate": 0.0003744170545064458, |
| "loss": -0.8597, |
| "step": 7330 |
| }, |
| { |
| "epoch": 0.730475455925161, |
| "grad_norm": 0.0441804938018322, |
| "learning_rate": 0.00037185267643352274, |
| "loss": -0.8352, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.7314706540939965, |
| "grad_norm": 0.032954998314380646, |
| "learning_rate": 0.00036929510306088796, |
| "loss": -0.8148, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.7324658522628318, |
| "grad_norm": 0.04472510516643524, |
| "learning_rate": 0.0003667443620946531, |
| "loss": -0.8487, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.7334610504316672, |
| "grad_norm": 0.03529642894864082, |
| "learning_rate": 0.00036420048116691584, |
| "loss": -0.8733, |
| "step": 7370 |
| }, |
| { |
| "epoch": 0.7344562486005026, |
| "grad_norm": 0.06249464303255081, |
| "learning_rate": 0.0003616634878354581, |
| "loss": -0.8785, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.7354514467693379, |
| "grad_norm": 0.03591424226760864, |
| "learning_rate": 0.00035913340958344933, |
| "loss": -0.8787, |
| "step": 7390 |
| }, |
| { |
| "epoch": 0.7364466449381734, |
| "grad_norm": 0.03739127516746521, |
| "learning_rate": 0.00035661027381914833, |
| "loss": -0.8771, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.7374418431070087, |
| "grad_norm": 0.040590737015008926, |
| "learning_rate": 0.00035409410787560537, |
| "loss": -0.8405, |
| "step": 7410 |
| }, |
| { |
| "epoch": 0.7384370412758441, |
| "grad_norm": 0.040956106036901474, |
| "learning_rate": 0.00035158493901036783, |
| "loss": -0.8661, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.7394322394446794, |
| "grad_norm": 0.06951002776622772, |
| "learning_rate": 0.00034908279440518277, |
| "loss": -0.8241, |
| "step": 7430 |
| }, |
| { |
| "epoch": 0.7404274376135148, |
| "grad_norm": 0.04111276939511299, |
| "learning_rate": 0.0003465877011657048, |
| "loss": -0.8855, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.7414226357823501, |
| "grad_norm": 0.06843695789575577, |
| "learning_rate": 0.00034409968632120126, |
| "loss": -0.8462, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.7424178339511855, |
| "grad_norm": 0.04960676655173302, |
| "learning_rate": 0.00034161877682425826, |
| "loss": -0.8506, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.7434130321200209, |
| "grad_norm": 0.06440480053424835, |
| "learning_rate": 0.00033914499955049125, |
| "loss": -0.8441, |
| "step": 7470 |
| }, |
| { |
| "epoch": 0.7444082302888563, |
| "grad_norm": 0.043779365718364716, |
| "learning_rate": 0.0003366783812982516, |
| "loss": -0.8786, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.7454034284576916, |
| "grad_norm": 0.042465850710868835, |
| "learning_rate": 0.00033421894878833805, |
| "loss": -0.8727, |
| "step": 7490 |
| }, |
| { |
| "epoch": 0.746398626626527, |
| "grad_norm": 0.042998846620321274, |
| "learning_rate": 0.00033176672866370505, |
| "loss": -0.8465, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.7473938247953624, |
| "grad_norm": 0.03316570073366165, |
| "learning_rate": 0.00032932174748917775, |
| "loss": -0.8432, |
| "step": 7510 |
| }, |
| { |
| "epoch": 0.7483890229641977, |
| "grad_norm": 0.05994047224521637, |
| "learning_rate": 0.00032688403175116, |
| "loss": -0.8284, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.7493842211330332, |
| "grad_norm": 0.049616336822509766, |
| "learning_rate": 0.0003244536078573497, |
| "loss": -0.8264, |
| "step": 7530 |
| }, |
| { |
| "epoch": 0.7503794193018685, |
| "grad_norm": 0.11972280591726303, |
| "learning_rate": 0.00032203050213645357, |
| "loss": -0.8796, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.7513746174707039, |
| "grad_norm": 0.0751202329993248, |
| "learning_rate": 0.00031961474083789886, |
| "loss": -0.8651, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.7523698156395392, |
| "grad_norm": 0.15553037822246552, |
| "learning_rate": 0.0003172063501315534, |
| "loss": -0.869, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.7533650138083746, |
| "grad_norm": 0.09399143606424332, |
| "learning_rate": 0.00031480535610743757, |
| "loss": -0.8175, |
| "step": 7570 |
| }, |
| { |
| "epoch": 0.7543602119772099, |
| "grad_norm": 0.045804236084222794, |
| "learning_rate": 0.00031241178477544473, |
| "loss": -0.8935, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.7553554101460453, |
| "grad_norm": 0.04258604347705841, |
| "learning_rate": 0.0003100256620650581, |
| "loss": -0.8238, |
| "step": 7590 |
| }, |
| { |
| "epoch": 0.7563506083148807, |
| "grad_norm": 0.13974538445472717, |
| "learning_rate": 0.00030764701382506965, |
| "loss": -0.843, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.7573458064837161, |
| "grad_norm": 0.05728553980588913, |
| "learning_rate": 0.00030527586582330247, |
| "loss": -0.8302, |
| "step": 7610 |
| }, |
| { |
| "epoch": 0.7583410046525514, |
| "grad_norm": 0.05504688248038292, |
| "learning_rate": 0.00030291224374632766, |
| "loss": -0.8729, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.7593362028213868, |
| "grad_norm": 0.05460618436336517, |
| "learning_rate": 0.0003005561731991898, |
| "loss": -0.8746, |
| "step": 7630 |
| }, |
| { |
| "epoch": 0.7603314009902222, |
| "grad_norm": 0.03898203745484352, |
| "learning_rate": 0.00029820767970512686, |
| "loss": -0.8818, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.7613265991590575, |
| "grad_norm": 0.043011441826820374, |
| "learning_rate": 0.00029586678870529583, |
| "loss": -0.8876, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.762321797327893, |
| "grad_norm": 0.060498643666505814, |
| "learning_rate": 0.000293533525558495, |
| "loss": -0.8379, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.7633169954967283, |
| "grad_norm": 0.044412821531295776, |
| "learning_rate": 0.00029120791554089134, |
| "loss": -0.8474, |
| "step": 7670 |
| }, |
| { |
| "epoch": 0.7643121936655637, |
| "grad_norm": 0.028071725741028786, |
| "learning_rate": 0.0002888899838457455, |
| "loss": -0.8469, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.765307391834399, |
| "grad_norm": 0.10462887585163116, |
| "learning_rate": 0.00028657975558313867, |
| "loss": -0.8564, |
| "step": 7690 |
| }, |
| { |
| "epoch": 0.7663025900032344, |
| "grad_norm": 0.041337281465530396, |
| "learning_rate": 0.00028427725577970155, |
| "loss": -0.8903, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.7672977881720697, |
| "grad_norm": 0.13814806938171387, |
| "learning_rate": 0.00028198250937834204, |
| "loss": -0.8536, |
| "step": 7710 |
| }, |
| { |
| "epoch": 0.7682929863409051, |
| "grad_norm": 0.07473795861005783, |
| "learning_rate": 0.00027969554123797615, |
| "loss": -0.8976, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.7692881845097405, |
| "grad_norm": 0.25231143832206726, |
| "learning_rate": 0.00027741637613325866, |
| "loss": -0.8385, |
| "step": 7730 |
| }, |
| { |
| "epoch": 0.7702833826785759, |
| "grad_norm": 0.08813674002885818, |
| "learning_rate": 0.0002751450387543131, |
| "loss": -0.8863, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.7712785808474112, |
| "grad_norm": 0.1306094378232956, |
| "learning_rate": 0.00027288155370646663, |
| "loss": -0.8489, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.7722737790162466, |
| "grad_norm": 0.04535212367773056, |
| "learning_rate": 0.00027062594550998154, |
| "loss": -0.8451, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.773268977185082, |
| "grad_norm": 0.043633297085762024, |
| "learning_rate": 0.0002683782385997909, |
| "loss": -0.8781, |
| "step": 7770 |
| }, |
| { |
| "epoch": 0.7742641753539173, |
| "grad_norm": 0.08653497695922852, |
| "learning_rate": 0.0002661384573252338, |
| "loss": -0.874, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.7752593735227528, |
| "grad_norm": 0.0407242476940155, |
| "learning_rate": 0.0002639066259497899, |
| "loss": -0.8756, |
| "step": 7790 |
| }, |
| { |
| "epoch": 0.7762545716915881, |
| "grad_norm": 0.03243414685130119, |
| "learning_rate": 0.0002616827686508192, |
| "loss": -0.8929, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.7772497698604235, |
| "grad_norm": 0.08098597824573517, |
| "learning_rate": 0.00025946690951929763, |
| "loss": -0.8485, |
| "step": 7810 |
| }, |
| { |
| "epoch": 0.7782449680292588, |
| "grad_norm": 0.0585702583193779, |
| "learning_rate": 0.00025725907255955805, |
| "loss": -0.8777, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.7792401661980942, |
| "grad_norm": 0.04518997669219971, |
| "learning_rate": 0.0002550592816890295, |
| "loss": -0.8682, |
| "step": 7830 |
| }, |
| { |
| "epoch": 0.7802353643669295, |
| "grad_norm": 0.04417124390602112, |
| "learning_rate": 0.0002528675607379769, |
| "loss": -0.8164, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.781230562535765, |
| "grad_norm": 0.11929408460855484, |
| "learning_rate": 0.00025068393344924533, |
| "loss": -0.8469, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.7822257607046003, |
| "grad_norm": 0.04880857467651367, |
| "learning_rate": 0.00024850842347800016, |
| "loss": -0.8689, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.7832209588734357, |
| "grad_norm": 0.04271296039223671, |
| "learning_rate": 0.0002463410543914734, |
| "loss": -0.8697, |
| "step": 7870 |
| }, |
| { |
| "epoch": 0.784216157042271, |
| "grad_norm": 0.2216760665178299, |
| "learning_rate": 0.0002441818496687064, |
| "loss": -0.8428, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.7852113552111064, |
| "grad_norm": 0.060294851660728455, |
| "learning_rate": 0.0002420308327002958, |
| "loss": -0.8743, |
| "step": 7890 |
| }, |
| { |
| "epoch": 0.7862065533799418, |
| "grad_norm": 0.02434210106730461, |
| "learning_rate": 0.0002398880267881419, |
| "loss": -0.8757, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.7872017515487771, |
| "grad_norm": 0.017280230298638344, |
| "learning_rate": 0.0002377534551451932, |
| "loss": -0.8565, |
| "step": 7910 |
| }, |
| { |
| "epoch": 0.7881969497176126, |
| "grad_norm": 0.056493718177080154, |
| "learning_rate": 0.0002356271408951982, |
| "loss": -0.8494, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.7891921478864479, |
| "grad_norm": 0.0454886369407177, |
| "learning_rate": 0.00023350910707245175, |
| "loss": -0.8915, |
| "step": 7930 |
| }, |
| { |
| "epoch": 0.7901873460552833, |
| "grad_norm": 0.07155793905258179, |
| "learning_rate": 0.00023139937662154897, |
| "loss": -0.8625, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.7911825442241186, |
| "grad_norm": 0.03907801955938339, |
| "learning_rate": 0.00022929797239713324, |
| "loss": -0.8586, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.792177742392954, |
| "grad_norm": 0.03151841461658478, |
| "learning_rate": 0.00022720491716365056, |
| "loss": -0.8832, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.7931729405617893, |
| "grad_norm": 0.06963200867176056, |
| "learning_rate": 0.00022512023359510302, |
| "loss": -0.8816, |
| "step": 7970 |
| }, |
| { |
| "epoch": 0.7941681387306248, |
| "grad_norm": 0.04865657910704613, |
| "learning_rate": 0.0002230439442748019, |
| "loss": -0.8955, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.7951633368994601, |
| "grad_norm": 0.05831298232078552, |
| "learning_rate": 0.00022097607169512535, |
| "loss": -0.8827, |
| "step": 7990 |
| }, |
| { |
| "epoch": 0.7961585350682955, |
| "grad_norm": 0.049753572791814804, |
| "learning_rate": 0.0002189166382572715, |
| "loss": -0.8189, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.7971537332371308, |
| "grad_norm": 0.09022746980190277, |
| "learning_rate": 0.00021686566627101888, |
| "loss": -0.8772, |
| "step": 8010 |
| }, |
| { |
| "epoch": 0.7981489314059662, |
| "grad_norm": 0.06951210647821426, |
| "learning_rate": 0.00021482317795448248, |
| "loss": -0.8906, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.7991441295748016, |
| "grad_norm": 0.044376324862241745, |
| "learning_rate": 0.00021278919543387366, |
| "loss": -0.8528, |
| "step": 8030 |
| }, |
| { |
| "epoch": 0.8001393277436369, |
| "grad_norm": 0.04204118996858597, |
| "learning_rate": 0.00021076374074326253, |
| "loss": -0.8485, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.8011345259124724, |
| "grad_norm": 0.04041268303990364, |
| "learning_rate": 0.00020874683582433563, |
| "loss": -0.898, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.8021297240813077, |
| "grad_norm": 0.04211512207984924, |
| "learning_rate": 0.0002067385025261611, |
| "loss": -0.8741, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.8031249222501431, |
| "grad_norm": 0.14570793509483337, |
| "learning_rate": 0.0002047387626049504, |
| "loss": -0.8216, |
| "step": 8070 |
| }, |
| { |
| "epoch": 0.8041201204189784, |
| "grad_norm": 0.04487913101911545, |
| "learning_rate": 0.00020274763772382386, |
| "loss": -0.8405, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.8051153185878138, |
| "grad_norm": 0.03393075242638588, |
| "learning_rate": 0.00020076514945257441, |
| "loss": -0.8828, |
| "step": 8090 |
| }, |
| { |
| "epoch": 0.8061105167566491, |
| "grad_norm": 0.05002270266413689, |
| "learning_rate": 0.00019879131926743576, |
| "loss": -0.885, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.8071057149254846, |
| "grad_norm": 0.03206557407975197, |
| "learning_rate": 0.00019682616855084878, |
| "loss": -0.8404, |
| "step": 8110 |
| }, |
| { |
| "epoch": 0.8081009130943199, |
| "grad_norm": 0.08117040991783142, |
| "learning_rate": 0.00019486971859122916, |
| "loss": -0.8432, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.8090961112631553, |
| "grad_norm": 0.032579779624938965, |
| "learning_rate": 0.0001929219905827384, |
| "loss": -0.875, |
| "step": 8130 |
| }, |
| { |
| "epoch": 0.8100913094319906, |
| "grad_norm": 0.03493876755237579, |
| "learning_rate": 0.00019098300562505265, |
| "loss": -0.8912, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.811086507600826, |
| "grad_norm": 0.061572328209877014, |
| "learning_rate": 0.00018905278472313548, |
| "loss": -0.8395, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.8120817057696614, |
| "grad_norm": 0.040702104568481445, |
| "learning_rate": 0.00018713134878700977, |
| "loss": -0.8944, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.8130769039384967, |
| "grad_norm": 0.03129720687866211, |
| "learning_rate": 0.00018521871863153017, |
| "loss": -0.8673, |
| "step": 8170 |
| }, |
| { |
| "epoch": 0.8140721021073322, |
| "grad_norm": 0.03429539501667023, |
| "learning_rate": 0.00018331491497616004, |
| "loss": -0.8957, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.8150673002761675, |
| "grad_norm": 0.04819618538022041, |
| "learning_rate": 0.00018141995844474414, |
| "loss": -0.8567, |
| "step": 8190 |
| }, |
| { |
| "epoch": 0.8160624984450029, |
| "grad_norm": 0.07731886953115463, |
| "learning_rate": 0.0001795338695652874, |
| "loss": -0.8782, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.8170576966138382, |
| "grad_norm": 0.11185674369335175, |
| "learning_rate": 0.00017765666876973197, |
| "loss": -0.8439, |
| "step": 8210 |
| }, |
| { |
| "epoch": 0.8180528947826736, |
| "grad_norm": 0.04613180086016655, |
| "learning_rate": 0.0001757883763937348, |
| "loss": -0.8608, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.8190480929515089, |
| "grad_norm": 0.07438188791275024, |
| "learning_rate": 0.0001739290126764491, |
| "loss": -0.862, |
| "step": 8230 |
| }, |
| { |
| "epoch": 0.8200432911203444, |
| "grad_norm": 0.03218982741236687, |
| "learning_rate": 0.00017207859776030332, |
| "loss": -0.8423, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.8210384892891797, |
| "grad_norm": 0.043732915073633194, |
| "learning_rate": 0.00017023715169078458, |
| "loss": -0.8335, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.8220336874580151, |
| "grad_norm": 0.10098463296890259, |
| "learning_rate": 0.00016840469441622085, |
| "loss": -0.8614, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.8230288856268504, |
| "grad_norm": 0.04664154723286629, |
| "learning_rate": 0.00016658124578756373, |
| "loss": -0.856, |
| "step": 8270 |
| }, |
| { |
| "epoch": 0.8240240837956858, |
| "grad_norm": 0.03871015086770058, |
| "learning_rate": 0.00016476682555817567, |
| "loss": -0.8304, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.8250192819645212, |
| "grad_norm": 0.04428160935640335, |
| "learning_rate": 0.0001629614533836138, |
| "loss": -0.8413, |
| "step": 8290 |
| }, |
| { |
| "epoch": 0.8260144801333565, |
| "grad_norm": 0.04464396834373474, |
| "learning_rate": 0.00016116514882141852, |
| "loss": -0.874, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.827009678302192, |
| "grad_norm": 0.04489503055810928, |
| "learning_rate": 0.00015937793133090117, |
| "loss": -0.8462, |
| "step": 8310 |
| }, |
| { |
| "epoch": 0.8280048764710273, |
| "grad_norm": 0.055837105959653854, |
| "learning_rate": 0.00015759982027293242, |
| "loss": -0.8428, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.8290000746398627, |
| "grad_norm": 0.031371161341667175, |
| "learning_rate": 0.00015583083490973404, |
| "loss": -0.8567, |
| "step": 8330 |
| }, |
| { |
| "epoch": 0.829995272808698, |
| "grad_norm": 0.04616473242640495, |
| "learning_rate": 0.00015407099440466876, |
| "loss": -0.8605, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.8309904709775334, |
| "grad_norm": 0.059836626052856445, |
| "learning_rate": 0.0001523203178220338, |
| "loss": -0.8875, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.8319856691463687, |
| "grad_norm": 0.037584539502859116, |
| "learning_rate": 0.00015057882412685387, |
| "loss": -0.861, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.8329808673152042, |
| "grad_norm": 0.0409194752573967, |
| "learning_rate": 0.0001488465321846757, |
| "loss": -0.8785, |
| "step": 8370 |
| }, |
| { |
| "epoch": 0.8339760654840395, |
| "grad_norm": 0.06696359813213348, |
| "learning_rate": 0.00014712346076136361, |
| "loss": -0.8812, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.8349712636528749, |
| "grad_norm": 0.07238131761550903, |
| "learning_rate": 0.00014540962852289607, |
| "loss": -0.8724, |
| "step": 8390 |
| }, |
| { |
| "epoch": 0.8359664618217102, |
| "grad_norm": 0.040291257202625275, |
| "learning_rate": 0.00014370505403516444, |
| "loss": -0.8852, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.8369616599905456, |
| "grad_norm": 0.04370676726102829, |
| "learning_rate": 0.00014200975576377019, |
| "loss": -0.8811, |
| "step": 8410 |
| }, |
| { |
| "epoch": 0.837956858159381, |
| "grad_norm": 0.05080821365118027, |
| "learning_rate": 0.0001403237520738273, |
| "loss": -0.8789, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.8389520563282163, |
| "grad_norm": 0.02113007754087448, |
| "learning_rate": 0.00013864706122976024, |
| "loss": -0.8439, |
| "step": 8430 |
| }, |
| { |
| "epoch": 0.8399472544970518, |
| "grad_norm": 0.0272366963326931, |
| "learning_rate": 0.00013697970139510895, |
| "loss": -0.8755, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.8409424526658871, |
| "grad_norm": 0.0732220858335495, |
| "learning_rate": 0.00013532169063233, |
| "loss": -0.8723, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.8419376508347225, |
| "grad_norm": 0.03853273764252663, |
| "learning_rate": 0.00013367304690260163, |
| "loss": -0.8629, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.8429328490035578, |
| "grad_norm": 0.06368320435285568, |
| "learning_rate": 0.0001320337880656307, |
| "loss": -0.8632, |
| "step": 8470 |
| }, |
| { |
| "epoch": 0.8439280471723932, |
| "grad_norm": 0.052474457770586014, |
| "learning_rate": 0.00013040393187945621, |
| "loss": -0.8442, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.8449232453412285, |
| "grad_norm": 0.06055787578225136, |
| "learning_rate": 0.00012878349600025952, |
| "loss": -0.8541, |
| "step": 8490 |
| }, |
| { |
| "epoch": 0.845918443510064, |
| "grad_norm": 0.040329884737730026, |
| "learning_rate": 0.00012717249798217134, |
| "loss": -0.8903, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.8469136416788993, |
| "grad_norm": 0.04345664381980896, |
| "learning_rate": 0.00012557095527708306, |
| "loss": -0.8855, |
| "step": 8510 |
| }, |
| { |
| "epoch": 0.8479088398477347, |
| "grad_norm": 0.035441722720861435, |
| "learning_rate": 0.00012397888523445688, |
| "loss": -0.8772, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.84890403801657, |
| "grad_norm": 0.04323457553982735, |
| "learning_rate": 0.00012239630510113732, |
| "loss": -0.8932, |
| "step": 8530 |
| }, |
| { |
| "epoch": 0.8498992361854054, |
| "grad_norm": 0.06097767502069473, |
| "learning_rate": 0.00012082323202116563, |
| "loss": -0.8563, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.8508944343542408, |
| "grad_norm": 0.07687395066022873, |
| "learning_rate": 0.0001192596830355931, |
| "loss": -0.85, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.8518896325230761, |
| "grad_norm": 0.04906463623046875, |
| "learning_rate": 0.00011770567508229712, |
| "loss": -0.8315, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.8528848306919116, |
| "grad_norm": 0.04658054560422897, |
| "learning_rate": 0.00011616122499579684, |
| "loss": -0.8413, |
| "step": 8570 |
| }, |
| { |
| "epoch": 0.8538800288607469, |
| "grad_norm": 0.08086878061294556, |
| "learning_rate": 0.00011462634950707185, |
| "loss": -0.8537, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.8548752270295823, |
| "grad_norm": 0.05222253501415253, |
| "learning_rate": 0.00011310106524338071, |
| "loss": -0.8909, |
| "step": 8590 |
| }, |
| { |
| "epoch": 0.8558704251984176, |
| "grad_norm": 0.08359741419553757, |
| "learning_rate": 0.00011158538872807933, |
| "loss": -0.8564, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.856865623367253, |
| "grad_norm": 0.10812195390462875, |
| "learning_rate": 0.00011007933638044454, |
| "loss": -0.8798, |
| "step": 8610 |
| }, |
| { |
| "epoch": 0.8578608215360883, |
| "grad_norm": 0.05695914104580879, |
| "learning_rate": 0.0001085829245154939, |
| "loss": -0.8917, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.8588560197049238, |
| "grad_norm": 0.08079014718532562, |
| "learning_rate": 0.00010709616934381038, |
| "loss": -0.8678, |
| "step": 8630 |
| }, |
| { |
| "epoch": 0.8598512178737591, |
| "grad_norm": 0.04299706593155861, |
| "learning_rate": 0.00010561908697136657, |
| "loss": -0.8425, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.8608464160425945, |
| "grad_norm": 0.04730239138007164, |
| "learning_rate": 0.00010415169339934894, |
| "loss": -0.8632, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.8618416142114298, |
| "grad_norm": 0.08874726295471191, |
| "learning_rate": 0.00010269400452398659, |
| "loss": -0.8682, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.8628368123802652, |
| "grad_norm": 0.038999781012535095, |
| "learning_rate": 0.00010124603613637707, |
| "loss": -0.8594, |
| "step": 8670 |
| }, |
| { |
| "epoch": 0.8638320105491006, |
| "grad_norm": 0.04859554022550583, |
| "learning_rate": 9.980780392231692e-05, |
| "loss": -0.8741, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.864827208717936, |
| "grad_norm": 0.04477314278483391, |
| "learning_rate": 9.837932346213063e-05, |
| "loss": -0.8545, |
| "step": 8690 |
| }, |
| { |
| "epoch": 0.8658224068867714, |
| "grad_norm": 0.033657848834991455, |
| "learning_rate": 9.696061023050207e-05, |
| "loss": -0.8219, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.8668176050556067, |
| "grad_norm": 0.07091525197029114, |
| "learning_rate": 9.555167959630762e-05, |
| "loss": -0.8416, |
| "step": 8710 |
| }, |
| { |
| "epoch": 0.8678128032244421, |
| "grad_norm": 0.04914763569831848, |
| "learning_rate": 9.415254682244834e-05, |
| "loss": -0.8523, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.8688080013932774, |
| "grad_norm": 0.08185221999883652, |
| "learning_rate": 9.276322706568596e-05, |
| "loss": -0.883, |
| "step": 8730 |
| }, |
| { |
| "epoch": 0.8698031995621128, |
| "grad_norm": 0.07200242578983307, |
| "learning_rate": 9.138373537647804e-05, |
| "loss": -0.8945, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.8707983977309481, |
| "grad_norm": 0.1187279000878334, |
| "learning_rate": 9.00140866988145e-05, |
| "loss": -0.8538, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.8717935958997836, |
| "grad_norm": 0.10631956160068512, |
| "learning_rate": 8.865429587005702e-05, |
| "loss": -0.8618, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.8727887940686189, |
| "grad_norm": 0.05690061300992966, |
| "learning_rate": 8.730437762077658e-05, |
| "loss": -0.8398, |
| "step": 8770 |
| }, |
| { |
| "epoch": 0.8737839922374543, |
| "grad_norm": 0.14262869954109192, |
| "learning_rate": 8.596434657459562e-05, |
| "loss": -0.8841, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.8747791904062896, |
| "grad_norm": 0.06665827333927155, |
| "learning_rate": 8.463421724802845e-05, |
| "loss": -0.8232, |
| "step": 8790 |
| }, |
| { |
| "epoch": 0.875774388575125, |
| "grad_norm": 0.039045125246047974, |
| "learning_rate": 8.331400405032452e-05, |
| "loss": -0.9178, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.8767695867439604, |
| "grad_norm": 0.05045272409915924, |
| "learning_rate": 8.200372128331202e-05, |
| "loss": -0.8279, |
| "step": 8810 |
| }, |
| { |
| "epoch": 0.8777647849127957, |
| "grad_norm": 0.02887490577995777, |
| "learning_rate": 8.070338314124282e-05, |
| "loss": -0.8917, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.8787599830816312, |
| "grad_norm": 0.20094476640224457, |
| "learning_rate": 7.941300371063954e-05, |
| "loss": -0.8626, |
| "step": 8830 |
| }, |
| { |
| "epoch": 0.8797551812504665, |
| "grad_norm": 0.05032164603471756, |
| "learning_rate": 7.813259697014219e-05, |
| "loss": -0.8431, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.8807503794193019, |
| "grad_norm": 0.04273492842912674, |
| "learning_rate": 7.686217679035712e-05, |
| "loss": -0.8707, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.8817455775881372, |
| "grad_norm": 0.03904595598578453, |
| "learning_rate": 7.560175693370575e-05, |
| "loss": -0.8689, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.8827407757569726, |
| "grad_norm": 0.03059651516377926, |
| "learning_rate": 7.43513510542776e-05, |
| "loss": -0.868, |
| "step": 8870 |
| }, |
| { |
| "epoch": 0.8837359739258079, |
| "grad_norm": 0.04011029377579689, |
| "learning_rate": 7.311097269767997e-05, |
| "loss": -0.8794, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.8847311720946434, |
| "grad_norm": 0.06519157439470291, |
| "learning_rate": 7.188063530089262e-05, |
| "loss": -0.8933, |
| "step": 8890 |
| }, |
| { |
| "epoch": 0.8857263702634787, |
| "grad_norm": 0.0295392032712698, |
| "learning_rate": 7.066035219212264e-05, |
| "loss": -0.8859, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.8867215684323141, |
| "grad_norm": 0.05787239223718643, |
| "learning_rate": 6.945013659065813e-05, |
| "loss": -0.8734, |
| "step": 8910 |
| }, |
| { |
| "epoch": 0.8877167666011494, |
| "grad_norm": 0.026180433109402657, |
| "learning_rate": 6.825000160672734e-05, |
| "loss": -0.8962, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.8887119647699848, |
| "grad_norm": 0.045901086181402206, |
| "learning_rate": 6.705996024135453e-05, |
| "loss": -0.8262, |
| "step": 8930 |
| }, |
| { |
| "epoch": 0.8897071629388202, |
| "grad_norm": 0.11189127713441849, |
| "learning_rate": 6.588002538622062e-05, |
| "loss": -0.8305, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.8907023611076555, |
| "grad_norm": 0.03631236031651497, |
| "learning_rate": 6.471020982352338e-05, |
| "loss": -0.8792, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.891697559276491, |
| "grad_norm": 0.017476355656981468, |
| "learning_rate": 6.355052622583756e-05, |
| "loss": -0.8748, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.8926927574453263, |
| "grad_norm": 0.10378779470920563, |
| "learning_rate": 6.240098715597975e-05, |
| "loss": -0.8606, |
| "step": 8970 |
| }, |
| { |
| "epoch": 0.8936879556141617, |
| "grad_norm": 0.029117906466126442, |
| "learning_rate": 6.12616050668704e-05, |
| "loss": -0.8741, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.894683153782997, |
| "grad_norm": 0.026523800566792488, |
| "learning_rate": 6.0132392301400105e-05, |
| "loss": -0.838, |
| "step": 8990 |
| }, |
| { |
| "epoch": 0.8956783519518324, |
| "grad_norm": 0.06945938616991043, |
| "learning_rate": 5.901336109229538e-05, |
| "loss": -0.8427, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.8966735501206677, |
| "grad_norm": 0.06329178810119629, |
| "learning_rate": 5.790452356198628e-05, |
| "loss": -0.8554, |
| "step": 9010 |
| }, |
| { |
| "epoch": 0.8976687482895032, |
| "grad_norm": 0.0641210675239563, |
| "learning_rate": 5.680589172247519e-05, |
| "loss": -0.8831, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.8986639464583385, |
| "grad_norm": 0.09026167541742325, |
| "learning_rate": 5.571747747520617e-05, |
| "loss": -0.873, |
| "step": 9030 |
| }, |
| { |
| "epoch": 0.8996591446271739, |
| "grad_norm": 0.06779684126377106, |
| "learning_rate": 5.463929261093692e-05, |
| "loss": -0.855, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.9006543427960093, |
| "grad_norm": 0.09357842057943344, |
| "learning_rate": 5.357134880961012e-05, |
| "loss": -0.8481, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.9016495409648446, |
| "grad_norm": 0.051097650080919266, |
| "learning_rate": 5.251365764022753e-05, |
| "loss": -0.8628, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.90264473913368, |
| "grad_norm": 0.0375586561858654, |
| "learning_rate": 5.1466230560724746e-05, |
| "loss": -0.8639, |
| "step": 9070 |
| }, |
| { |
| "epoch": 0.9036399373025154, |
| "grad_norm": 0.03898193687200546, |
| "learning_rate": 5.0429078917846204e-05, |
| "loss": -0.872, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.9046351354713508, |
| "grad_norm": 0.03768244758248329, |
| "learning_rate": 4.940221394702349e-05, |
| "loss": -0.8711, |
| "step": 9090 |
| }, |
| { |
| "epoch": 0.9056303336401861, |
| "grad_norm": 0.03134647756814957, |
| "learning_rate": 4.8385646772252324e-05, |
| "loss": -0.8317, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.9066255318090215, |
| "grad_norm": 0.04638220742344856, |
| "learning_rate": 4.7379388405973225e-05, |
| "loss": -0.8638, |
| "step": 9110 |
| }, |
| { |
| "epoch": 0.9076207299778568, |
| "grad_norm": 0.0377880223095417, |
| "learning_rate": 4.6383449748951703e-05, |
| "loss": -0.8896, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.9086159281466922, |
| "grad_norm": 0.03936488553881645, |
| "learning_rate": 4.539784159015992e-05, |
| "loss": -0.8538, |
| "step": 9130 |
| }, |
| { |
| "epoch": 0.9096111263155275, |
| "grad_norm": 0.06327816098928452, |
| "learning_rate": 4.4422574606660216e-05, |
| "loss": -0.8563, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.910606324484363, |
| "grad_norm": 0.08217553049325943, |
| "learning_rate": 4.3457659363489224e-05, |
| "loss": -0.8979, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.9116015226531983, |
| "grad_norm": 0.06306605041027069, |
| "learning_rate": 4.2503106313543705e-05, |
| "loss": -0.8936, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.9125967208220337, |
| "grad_norm": 0.036004096269607544, |
| "learning_rate": 4.15589257974669e-05, |
| "loss": -0.8948, |
| "step": 9170 |
| }, |
| { |
| "epoch": 0.9135919189908691, |
| "grad_norm": 0.06238359585404396, |
| "learning_rate": 4.062512804353669e-05, |
| "loss": -0.8376, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.9145871171597044, |
| "grad_norm": 0.05194539204239845, |
| "learning_rate": 3.9701723167555046e-05, |
| "loss": -0.8677, |
| "step": 9190 |
| }, |
| { |
| "epoch": 0.9155823153285398, |
| "grad_norm": 0.08999146521091461, |
| "learning_rate": 3.87887211727379e-05, |
| "loss": -0.8978, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.9165775134973752, |
| "grad_norm": 0.05085189267992973, |
| "learning_rate": 3.788613194960733e-05, |
| "loss": -0.8942, |
| "step": 9210 |
| }, |
| { |
| "epoch": 0.9175727116662106, |
| "grad_norm": 0.11529362946748734, |
| "learning_rate": 3.699396527588428e-05, |
| "loss": -0.8911, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.9185679098350459, |
| "grad_norm": 0.09689343720674515, |
| "learning_rate": 3.6112230816382374e-05, |
| "loss": -0.8627, |
| "step": 9230 |
| }, |
| { |
| "epoch": 0.9195631080038813, |
| "grad_norm": 0.05098208412528038, |
| "learning_rate": 3.52409381229033e-05, |
| "loss": -0.8221, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.9205583061727166, |
| "grad_norm": 0.030068768188357353, |
| "learning_rate": 3.4380096634133326e-05, |
| "loss": -0.8836, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.921553504341552, |
| "grad_norm": 0.036479201167821884, |
| "learning_rate": 3.352971567554175e-05, |
| "loss": -0.8772, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.9225487025103873, |
| "grad_norm": 0.06124093383550644, |
| "learning_rate": 3.2689804459278494e-05, |
| "loss": -0.8289, |
| "step": 9270 |
| }, |
| { |
| "epoch": 0.9235439006792228, |
| "grad_norm": 0.04751259461045265, |
| "learning_rate": 3.186037208407588e-05, |
| "loss": -0.8631, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.9245390988480581, |
| "grad_norm": 0.22392483055591583, |
| "learning_rate": 3.1041427535148495e-05, |
| "loss": -0.8707, |
| "step": 9290 |
| }, |
| { |
| "epoch": 0.9255342970168935, |
| "grad_norm": 0.0312654934823513, |
| "learning_rate": 3.0232979684097218e-05, |
| "loss": -0.8499, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.9265294951857289, |
| "grad_norm": 0.04291137680411339, |
| "learning_rate": 2.943503728881225e-05, |
| "loss": -0.8521, |
| "step": 9310 |
| }, |
| { |
| "epoch": 0.9275246933545642, |
| "grad_norm": 0.03710932657122612, |
| "learning_rate": 2.8647608993378372e-05, |
| "loss": -0.8544, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.9285198915233996, |
| "grad_norm": 0.039480455219745636, |
| "learning_rate": 2.7870703327981917e-05, |
| "loss": -0.831, |
| "step": 9330 |
| }, |
| { |
| "epoch": 0.929515089692235, |
| "grad_norm": 0.03444267436861992, |
| "learning_rate": 2.7104328708817517e-05, |
| "loss": -0.8909, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.9305102878610704, |
| "grad_norm": 0.038241058588027954, |
| "learning_rate": 2.63484934379975e-05, |
| "loss": -0.8829, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.9315054860299057, |
| "grad_norm": 0.031606342643499374, |
| "learning_rate": 2.560320570346164e-05, |
| "loss": -0.9075, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.9325006841987411, |
| "grad_norm": 0.04962693154811859, |
| "learning_rate": 2.486847357888844e-05, |
| "loss": -0.8927, |
| "step": 9370 |
| }, |
| { |
| "epoch": 0.9334958823675764, |
| "grad_norm": 0.04051986709237099, |
| "learning_rate": 2.4144305023608427e-05, |
| "loss": -0.8797, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.9344910805364118, |
| "grad_norm": 0.04305783286690712, |
| "learning_rate": 2.3430707882516555e-05, |
| "loss": -0.8594, |
| "step": 9390 |
| }, |
| { |
| "epoch": 0.9354862787052471, |
| "grad_norm": 0.12860926985740662, |
| "learning_rate": 2.2727689885988388e-05, |
| "loss": -0.8479, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.9364814768740826, |
| "grad_norm": 0.11391417682170868, |
| "learning_rate": 2.203525864979583e-05, |
| "loss": -0.8835, |
| "step": 9410 |
| }, |
| { |
| "epoch": 0.9374766750429179, |
| "grad_norm": 0.04358312115073204, |
| "learning_rate": 2.1353421675024854e-05, |
| "loss": -0.8827, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.9384718732117533, |
| "grad_norm": 0.0354497916996479, |
| "learning_rate": 2.0682186347994127e-05, |
| "loss": -0.8626, |
| "step": 9430 |
| }, |
| { |
| "epoch": 0.9394670713805887, |
| "grad_norm": 0.10072290152311325, |
| "learning_rate": 2.002155994017474e-05, |
| "loss": -0.8557, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.940462269549424, |
| "grad_norm": 0.10969705134630203, |
| "learning_rate": 1.9371549608112048e-05, |
| "loss": -0.8552, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.9414574677182594, |
| "grad_norm": 0.04841183125972748, |
| "learning_rate": 1.8732162393347518e-05, |
| "loss": -0.8737, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.9424526658870948, |
| "grad_norm": 0.14277507364749908, |
| "learning_rate": 1.8103405222342883e-05, |
| "loss": -0.8753, |
| "step": 9470 |
| }, |
| { |
| "epoch": 0.9434478640559302, |
| "grad_norm": 0.03108733333647251, |
| "learning_rate": 1.7485284906404776e-05, |
| "loss": -0.8666, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.9444430622247655, |
| "grad_norm": 0.07302725315093994, |
| "learning_rate": 1.687780814161144e-05, |
| "loss": -0.8559, |
| "step": 9490 |
| }, |
| { |
| "epoch": 0.9454382603936009, |
| "grad_norm": 0.03906615450978279, |
| "learning_rate": 1.6280981508739467e-05, |
| "loss": -0.9099, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.9464334585624362, |
| "grad_norm": 0.0752185583114624, |
| "learning_rate": 1.569481147319318e-05, |
| "loss": -0.8274, |
| "step": 9510 |
| }, |
| { |
| "epoch": 0.9474286567312716, |
| "grad_norm": 0.045788075774908066, |
| "learning_rate": 1.5119304384934252e-05, |
| "loss": -0.8736, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.9484238549001069, |
| "grad_norm": 0.027977997437119484, |
| "learning_rate": 1.4554466478412743e-05, |
| "loss": -0.892, |
| "step": 9530 |
| }, |
| { |
| "epoch": 0.9494190530689424, |
| "grad_norm": 0.027789343148469925, |
| "learning_rate": 1.4000303872500286e-05, |
| "loss": -0.8402, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.9504142512377777, |
| "grad_norm": 0.053874798119068146, |
| "learning_rate": 1.34568225704228e-05, |
| "loss": -0.893, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.9514094494066131, |
| "grad_norm": 0.06773625314235687, |
| "learning_rate": 1.2924028459696314e-05, |
| "loss": -0.8907, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.9524046475754485, |
| "grad_norm": 0.03913061320781708, |
| "learning_rate": 1.240192731206291e-05, |
| "loss": -0.8149, |
| "step": 9570 |
| }, |
| { |
| "epoch": 0.9533998457442838, |
| "grad_norm": 0.04107360541820526, |
| "learning_rate": 1.1890524783427559e-05, |
| "loss": -0.8805, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.9543950439131192, |
| "grad_norm": 0.03134811297059059, |
| "learning_rate": 1.1389826413798265e-05, |
| "loss": -0.8906, |
| "step": 9590 |
| }, |
| { |
| "epoch": 0.9553902420819546, |
| "grad_norm": 0.06669458001852036, |
| "learning_rate": 1.0899837627224685e-05, |
| "loss": -0.8781, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.95638544025079, |
| "grad_norm": 0.07125691324472427, |
| "learning_rate": 1.0420563731739829e-05, |
| "loss": -0.8394, |
| "step": 9610 |
| }, |
| { |
| "epoch": 0.9573806384196253, |
| "grad_norm": 0.04976421222090721, |
| "learning_rate": 9.952009919302896e-06, |
| "loss": -0.8506, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.9583758365884607, |
| "grad_norm": 0.04743211343884468, |
| "learning_rate": 9.494181265742641e-06, |
| "loss": -0.8757, |
| "step": 9630 |
| }, |
| { |
| "epoch": 0.959371034757296, |
| "grad_norm": 0.037107232958078384, |
| "learning_rate": 9.04708273070265e-06, |
| "loss": -0.8332, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.9603662329261314, |
| "grad_norm": 0.03232187032699585, |
| "learning_rate": 8.610719157587155e-06, |
| "loss": -0.8973, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.9613614310949667, |
| "grad_norm": 0.03275300934910774, |
| "learning_rate": 8.185095273509412e-06, |
| "loss": -0.8738, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.9623566292638022, |
| "grad_norm": 0.045168716460466385, |
| "learning_rate": 7.770215689239301e-06, |
| "loss": -0.8946, |
| "step": 9670 |
| }, |
| { |
| "epoch": 0.9633518274326375, |
| "grad_norm": 0.03318966180086136, |
| "learning_rate": 7.366084899154357e-06, |
| "loss": -0.8539, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.9643470256014729, |
| "grad_norm": 0.049187105149030685, |
| "learning_rate": 6.972707281191037e-06, |
| "loss": -0.8727, |
| "step": 9690 |
| }, |
| { |
| "epoch": 0.9653422237703083, |
| "grad_norm": 0.10269180685281754, |
| "learning_rate": 6.5900870967965375e-06, |
| "loss": -0.7973, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.9663374219391436, |
| "grad_norm": 0.11406390368938446, |
| "learning_rate": 6.218228490883493e-06, |
| "loss": -0.8991, |
| "step": 9710 |
| }, |
| { |
| "epoch": 0.967332620107979, |
| "grad_norm": 0.03116353042423725, |
| "learning_rate": 5.8571354917844596e-06, |
| "loss": -0.8567, |
| "step": 9720 |
| }, |
| { |
| "epoch": 0.9683278182768144, |
| "grad_norm": 0.04186422377824783, |
| "learning_rate": 5.5068120112086174e-06, |
| "loss": -0.8416, |
| "step": 9730 |
| }, |
| { |
| "epoch": 0.9693230164456498, |
| "grad_norm": 0.05916735902428627, |
| "learning_rate": 5.167261844199134e-06, |
| "loss": -0.8515, |
| "step": 9740 |
| }, |
| { |
| "epoch": 0.9703182146144851, |
| "grad_norm": 0.05601034685969353, |
| "learning_rate": 4.838488669092533e-06, |
| "loss": -0.8607, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.9713134127833205, |
| "grad_norm": 0.05893021821975708, |
| "learning_rate": 4.520496047478284e-06, |
| "loss": -0.8704, |
| "step": 9760 |
| }, |
| { |
| "epoch": 0.9723086109521558, |
| "grad_norm": 0.16313856840133667, |
| "learning_rate": 4.213287424160272e-06, |
| "loss": -0.8473, |
| "step": 9770 |
| }, |
| { |
| "epoch": 0.9733038091209912, |
| "grad_norm": 0.03145838901400566, |
| "learning_rate": 3.916866127120278e-06, |
| "loss": -0.8733, |
| "step": 9780 |
| }, |
| { |
| "epoch": 0.9742990072898265, |
| "grad_norm": 0.05181713029742241, |
| "learning_rate": 3.6312353674805567e-06, |
| "loss": -0.868, |
| "step": 9790 |
| }, |
| { |
| "epoch": 0.975294205458662, |
| "grad_norm": 0.0550004206597805, |
| "learning_rate": 3.3563982394704262e-06, |
| "loss": -0.8923, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.9762894036274973, |
| "grad_norm": 0.13421136140823364, |
| "learning_rate": 3.0923577203918474e-06, |
| "loss": -0.8603, |
| "step": 9810 |
| }, |
| { |
| "epoch": 0.9772846017963327, |
| "grad_norm": 0.04877639561891556, |
| "learning_rate": 2.8391166705874493e-06, |
| "loss": -0.8536, |
| "step": 9820 |
| }, |
| { |
| "epoch": 0.9782797999651681, |
| "grad_norm": 0.055065833032131195, |
| "learning_rate": 2.5966778334096662e-06, |
| "loss": -0.8699, |
| "step": 9830 |
| }, |
| { |
| "epoch": 0.9792749981340034, |
| "grad_norm": 0.04793205112218857, |
| "learning_rate": 2.36504383519065e-06, |
| "loss": -0.8556, |
| "step": 9840 |
| }, |
| { |
| "epoch": 0.9802701963028388, |
| "grad_norm": 0.03042910061776638, |
| "learning_rate": 2.1442171852144032e-06, |
| "loss": -0.856, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.9812653944716742, |
| "grad_norm": 0.08802367746829987, |
| "learning_rate": 1.9342002756891353e-06, |
| "loss": -0.8797, |
| "step": 9860 |
| }, |
| { |
| "epoch": 0.9822605926405096, |
| "grad_norm": 0.05212515965104103, |
| "learning_rate": 1.7349953817213938e-06, |
| "loss": -0.8264, |
| "step": 9870 |
| }, |
| { |
| "epoch": 0.9832557908093449, |
| "grad_norm": 0.1572272628545761, |
| "learning_rate": 1.5466046612915286e-06, |
| "loss": -0.894, |
| "step": 9880 |
| }, |
| { |
| "epoch": 0.9842509889781803, |
| "grad_norm": 0.05914374813437462, |
| "learning_rate": 1.3690301552303775e-06, |
| "loss": -0.8948, |
| "step": 9890 |
| }, |
| { |
| "epoch": 0.9852461871470156, |
| "grad_norm": 0.04328109323978424, |
| "learning_rate": 1.2022737871969502e-06, |
| "loss": -0.868, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.986241385315851, |
| "grad_norm": 0.037384625524282455, |
| "learning_rate": 1.0463373636578898e-06, |
| "loss": -0.874, |
| "step": 9910 |
| }, |
| { |
| "epoch": 0.9872365834846863, |
| "grad_norm": 0.038554031401872635, |
| "learning_rate": 9.012225738673774e-07, |
| "loss": -0.8484, |
| "step": 9920 |
| }, |
| { |
| "epoch": 0.9882317816535218, |
| "grad_norm": 0.092408187687397, |
| "learning_rate": 7.669309898495902e-07, |
| "loss": -0.864, |
| "step": 9930 |
| }, |
| { |
| "epoch": 0.9892269798223571, |
| "grad_norm": 0.05051958188414574, |
| "learning_rate": 6.434640663808278e-07, |
| "loss": -0.8263, |
| "step": 9940 |
| }, |
| { |
| "epoch": 0.9902221779911925, |
| "grad_norm": 0.03660441190004349, |
| "learning_rate": 5.308231409746345e-07, |
| "loss": -0.848, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.9912173761600279, |
| "grad_norm": 0.03256652131676674, |
| "learning_rate": 4.290094338664785e-07, |
| "loss": -0.8729, |
| "step": 9960 |
| }, |
| { |
| "epoch": 0.9922125743288632, |
| "grad_norm": 0.05838792771100998, |
| "learning_rate": 3.3802404800120646e-07, |
| "loss": -0.8351, |
| "step": 9970 |
| }, |
| { |
| "epoch": 0.9932077724976986, |
| "grad_norm": 0.05779346823692322, |
| "learning_rate": 2.578679690204977e-07, |
| "loss": -0.8578, |
| "step": 9980 |
| }, |
| { |
| "epoch": 0.994202970666534, |
| "grad_norm": 0.03772381693124771, |
| "learning_rate": 1.8854206525265039e-07, |
| "loss": -0.8928, |
| "step": 9990 |
| }, |
| { |
| "epoch": 0.9951981688353694, |
| "grad_norm": 0.025430290028452873, |
| "learning_rate": 1.3004708770314455e-07, |
| "loss": -0.8629, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.9961933670042047, |
| "grad_norm": 0.04903922230005264, |
| "learning_rate": 8.238367004609337e-08, |
| "loss": -0.8402, |
| "step": 10010 |
| }, |
| { |
| "epoch": 0.9971885651730401, |
| "grad_norm": 0.035914886742830276, |
| "learning_rate": 4.555232861802594e-08, |
| "loss": -0.8761, |
| "step": 10020 |
| }, |
| { |
| "epoch": 0.9981837633418754, |
| "grad_norm": 0.06849048286676407, |
| "learning_rate": 1.9553462411447953e-08, |
| "loss": -0.8929, |
| "step": 10030 |
| }, |
| { |
| "epoch": 0.9991789615107108, |
| "grad_norm": 0.03138072043657303, |
| "learning_rate": 4.387353071400035e-09, |
| "loss": -0.8618, |
| "step": 10040 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 10048, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.2351234362159137e+19, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|