diff --git "a/checkpoint/trainer_state.json" "b/checkpoint/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint/trainer_state.json" @@ -0,0 +1,94653 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.6180380883938197, + "eval_steps": 111100020, + "global_step": 94620, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.00042273467058400794, + "grad_norm": 1.5754607915878296, + "learning_rate": 1e-05, + "loss": 0.9202, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.375, + "step": 10 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0008454693411680159, + "grad_norm": 0.8984742760658264, + "learning_rate": 2e-05, + "loss": 0.8598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.375, + "step": 20 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0012682040117520238, + "grad_norm": 0.5608882308006287, + "learning_rate": 3e-05, + "loss": 0.7582, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.375, + "step": 30 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0016909386823360318, + "grad_norm": 0.41935500502586365, + "learning_rate": 4e-05, + "loss": 0.6671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.375, + "step": 40 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.00211367335292004, + "grad_norm": 0.38084542751312256, + "learning_rate": 5e-05, + "loss": 0.6111, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.375, + "step": 50 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0025364080235040476, + "grad_norm": 0.28111326694488525, + "learning_rate": 6e-05, + "loss": 0.5586, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 60 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.002959142694088056, + "grad_norm": 0.24558104574680328, + "learning_rate": 7e-05, + "loss": 0.5453, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 70 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0033818773646720635, + "grad_norm": 0.1833844631910324, + "learning_rate": 8e-05, + "loss": 0.5243, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 80 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0038046120352560717, + "grad_norm": 0.1957726776599884, + "learning_rate": 9e-05, + "loss": 0.495, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 90 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.00422734670584008, + "grad_norm": 0.4201236367225647, + "learning_rate": 0.0001, + "loss": 0.4827, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0046500813764240876, + "grad_norm": 0.5044054985046387, + "learning_rate": 9.999999723819937e-05, + "loss": 0.4681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.005072816047008095, + "grad_norm": 0.50824373960495, + "learning_rate": 9.99999889527978e-05, + "loss": 0.4602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.005495550717592103, + "grad_norm": 0.3367324769496918, + "learning_rate": 9.999997514379617e-05, + "loss": 0.4445, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.005918285388176112, + "grad_norm": 0.5926761031150818, + "learning_rate": 9.999995581119605e-05, + "loss": 0.4416, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.006341020058760119, + "grad_norm": 0.8930962681770325, + "learning_rate": 9.999993095499955e-05, + "loss": 0.4359, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.006763754729344127, + "grad_norm": 0.5190911293029785, + "learning_rate": 9.99999005752094e-05, + "loss": 0.4273, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.007186489399928135, + "grad_norm": 0.5804879665374756, + "learning_rate": 9.9999864671829e-05, + "loss": 0.4314, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.007609224070512143, + "grad_norm": 0.5835158824920654, + "learning_rate": 9.999982324486228e-05, + "loss": 0.431, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.00803195874109615, + "grad_norm": 0.533743143081665, + "learning_rate": 9.999977629431383e-05, + "loss": 0.4307, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.00845469341168016, + "grad_norm": 0.720042884349823, + "learning_rate": 9.999972382018885e-05, + "loss": 0.4199, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.008877428082264167, + "grad_norm": 0.4704369902610779, + "learning_rate": 9.99996658224931e-05, + "loss": 0.4133, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.009300162752848175, + "grad_norm": 0.7180871367454529, + "learning_rate": 9.999960230123302e-05, + "loss": 0.4234, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.009722897423432183, + "grad_norm": 0.6877025961875916, + "learning_rate": 9.999953325641562e-05, + "loss": 0.4126, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.01014563209401619, + "grad_norm": 0.6965651512145996, + "learning_rate": 9.999945868804852e-05, + "loss": 0.4156, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.010568366764600198, + "grad_norm": 0.5324245691299438, + "learning_rate": 9.999937859613997e-05, + "loss": 0.4088, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.010991101435184206, + "grad_norm": 0.5420552492141724, + "learning_rate": 9.999929298069881e-05, + "loss": 0.4116, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.011413836105768214, + "grad_norm": 0.6198002696037292, + "learning_rate": 9.999920184173449e-05, + "loss": 0.4004, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.011836570776352223, + "grad_norm": 0.8753314018249512, + "learning_rate": 9.99991051792571e-05, + "loss": 0.4089, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.012259305446936231, + "grad_norm": 0.654732882976532, + "learning_rate": 9.999900299327729e-05, + "loss": 0.4095, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.012682040117520239, + "grad_norm": 0.5857046842575073, + "learning_rate": 9.999889528380637e-05, + "loss": 0.4078, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.013104774788104246, + "grad_norm": 0.680976390838623, + "learning_rate": 9.999878205085623e-05, + "loss": 0.4046, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.013527509458688254, + "grad_norm": 0.5760939717292786, + "learning_rate": 9.99986632944394e-05, + "loss": 0.4063, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.013950244129272262, + "grad_norm": 0.6907309293746948, + "learning_rate": 9.999853901456895e-05, + "loss": 0.4066, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.01437297879985627, + "grad_norm": 0.44848451018333435, + "learning_rate": 9.999840921125866e-05, + "loss": 0.393, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.014795713470440279, + "grad_norm": 0.5177687406539917, + "learning_rate": 9.999827388452285e-05, + "loss": 0.4009, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.015218448141024287, + "grad_norm": 0.6795690655708313, + "learning_rate": 9.999813303437646e-05, + "loss": 0.4002, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.015641182811608294, + "grad_norm": 0.5112910866737366, + "learning_rate": 9.999798666083508e-05, + "loss": 0.3953, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0160639174821923, + "grad_norm": 0.783458411693573, + "learning_rate": 9.999783476391485e-05, + "loss": 0.3964, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.01648665215277631, + "grad_norm": 0.8602914810180664, + "learning_rate": 9.999767734363254e-05, + "loss": 0.4012, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.01690938682336032, + "grad_norm": 0.4979186952114105, + "learning_rate": 9.999751440000558e-05, + "loss": 0.4029, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.017332121493944325, + "grad_norm": 0.543412446975708, + "learning_rate": 9.999734593305195e-05, + "loss": 0.397, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.017754856164528335, + "grad_norm": 0.4599871039390564, + "learning_rate": 9.999717194279027e-05, + "loss": 0.3993, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.01817759083511234, + "grad_norm": 0.6940227746963501, + "learning_rate": 9.999699242923975e-05, + "loss": 0.3947, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.01860032550569635, + "grad_norm": 0.4701397716999054, + "learning_rate": 9.999680739242022e-05, + "loss": 0.3942, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.019023060176280356, + "grad_norm": 0.5089147686958313, + "learning_rate": 9.999661683235213e-05, + "loss": 0.3904, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.019445794846864366, + "grad_norm": 0.4079025089740753, + "learning_rate": 9.999642074905654e-05, + "loss": 0.3884, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.019868529517448375, + "grad_norm": 0.5033764243125916, + "learning_rate": 9.999621914255508e-05, + "loss": 0.3949, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.02029126418803238, + "grad_norm": 0.6463996171951294, + "learning_rate": 9.999601201287004e-05, + "loss": 0.3972, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.02071399885861639, + "grad_norm": 0.6911121606826782, + "learning_rate": 9.99957993600243e-05, + "loss": 0.3943, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.021136733529200397, + "grad_norm": 0.5224618315696716, + "learning_rate": 9.999558118404137e-05, + "loss": 0.3959, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.021559468199784406, + "grad_norm": 0.5731706619262695, + "learning_rate": 9.999535748494535e-05, + "loss": 0.3893, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.021982202870368412, + "grad_norm": 0.5287659764289856, + "learning_rate": 9.999512826276092e-05, + "loss": 0.3968, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.02240493754095242, + "grad_norm": 0.8613202571868896, + "learning_rate": 9.999489351751343e-05, + "loss": 0.395, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.022827672211536428, + "grad_norm": 0.4532279074192047, + "learning_rate": 9.99946532492288e-05, + "loss": 0.3841, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.023250406882120437, + "grad_norm": 0.674324631690979, + "learning_rate": 9.99944074579336e-05, + "loss": 0.3943, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.023673141552704446, + "grad_norm": 0.7623412609100342, + "learning_rate": 9.999415614365494e-05, + "loss": 0.3845, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.024095876223288452, + "grad_norm": 0.5885199904441833, + "learning_rate": 9.999389930642061e-05, + "loss": 0.3925, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.024518610893872462, + "grad_norm": 0.6440210342407227, + "learning_rate": 9.999363694625899e-05, + "loss": 0.3917, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.024941345564456468, + "grad_norm": 0.5878811478614807, + "learning_rate": 9.999336906319903e-05, + "loss": 0.3836, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.025364080235040477, + "grad_norm": 0.45065879821777344, + "learning_rate": 9.999309565727037e-05, + "loss": 0.39, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.025786814905624483, + "grad_norm": 0.6479048728942871, + "learning_rate": 9.999281672850317e-05, + "loss": 0.3839, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.026209549576208493, + "grad_norm": 0.5250176787376404, + "learning_rate": 9.999253227692826e-05, + "loss": 0.3892, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.026632284246792502, + "grad_norm": 0.4726778566837311, + "learning_rate": 9.999224230257709e-05, + "loss": 0.3848, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.027055018917376508, + "grad_norm": 0.46320950984954834, + "learning_rate": 9.999194680548166e-05, + "loss": 0.3841, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.027477753587960518, + "grad_norm": 0.5241245627403259, + "learning_rate": 9.999164578567461e-05, + "loss": 0.3912, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.027900488258544524, + "grad_norm": 0.5394160747528076, + "learning_rate": 9.999133924318924e-05, + "loss": 0.3892, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.028323222929128533, + "grad_norm": 0.5381983518600464, + "learning_rate": 9.999102717805938e-05, + "loss": 0.3828, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.02874595759971254, + "grad_norm": 0.6301137208938599, + "learning_rate": 9.999070959031948e-05, + "loss": 0.3942, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.02916869227029655, + "grad_norm": 0.5624942183494568, + "learning_rate": 9.999038648000467e-05, + "loss": 0.3832, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.029591426940880558, + "grad_norm": 0.6309047937393188, + "learning_rate": 9.999005784715064e-05, + "loss": 0.3806, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.030014161611464564, + "grad_norm": 0.6600726246833801, + "learning_rate": 9.998972369179365e-05, + "loss": 0.3817, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.030436896282048573, + "grad_norm": 0.6492134928703308, + "learning_rate": 9.998938401397067e-05, + "loss": 0.3877, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.03085963095263258, + "grad_norm": 0.6567886471748352, + "learning_rate": 9.998903881371919e-05, + "loss": 0.3888, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.03128236562321659, + "grad_norm": 0.6828862428665161, + "learning_rate": 9.998868809107738e-05, + "loss": 0.3905, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.031705100293800595, + "grad_norm": 0.5002163648605347, + "learning_rate": 9.998833184608394e-05, + "loss": 0.3825, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0321278349643846, + "grad_norm": 0.44172027707099915, + "learning_rate": 9.998797007877824e-05, + "loss": 0.3845, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.032550569634968614, + "grad_norm": 0.3929919898509979, + "learning_rate": 9.998760278920029e-05, + "loss": 0.3839, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.03297330430555262, + "grad_norm": 0.5147657990455627, + "learning_rate": 9.99872299773906e-05, + "loss": 0.382, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.033396038976136626, + "grad_norm": 0.6065912842750549, + "learning_rate": 9.998685164339039e-05, + "loss": 0.3848, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.03381877364672064, + "grad_norm": 0.6750016212463379, + "learning_rate": 9.998646778724144e-05, + "loss": 0.3816, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.034241508317304645, + "grad_norm": 0.5763168334960938, + "learning_rate": 9.998607840898617e-05, + "loss": 0.3765, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.03466424298788865, + "grad_norm": 0.46419087052345276, + "learning_rate": 9.998568350866759e-05, + "loss": 0.3845, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.03508697765847266, + "grad_norm": 0.544208288192749, + "learning_rate": 9.998528308632932e-05, + "loss": 0.3816, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.03550971232905667, + "grad_norm": 0.5872101187705994, + "learning_rate": 9.99848771420156e-05, + "loss": 0.3876, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.035932446999640676, + "grad_norm": 0.46997925639152527, + "learning_rate": 9.998446567577128e-05, + "loss": 0.382, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.03635518167022468, + "grad_norm": 0.5652914047241211, + "learning_rate": 9.99840486876418e-05, + "loss": 0.3825, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.036777916340808695, + "grad_norm": 0.4828193783760071, + "learning_rate": 9.998362617767325e-05, + "loss": 0.3844, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0372006510113927, + "grad_norm": 0.52516770362854, + "learning_rate": 9.998319814591228e-05, + "loss": 0.3915, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.037623385681976707, + "grad_norm": 0.6175445914268494, + "learning_rate": 9.998276459240621e-05, + "loss": 0.3816, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.03804612035256071, + "grad_norm": 0.47972017526626587, + "learning_rate": 9.998232551720289e-05, + "loss": 0.382, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.038468855023144725, + "grad_norm": 0.6079577207565308, + "learning_rate": 9.998188092035086e-05, + "loss": 0.378, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.03889158969372873, + "grad_norm": 0.5997479557991028, + "learning_rate": 9.998143080189922e-05, + "loss": 0.3808, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.03931432436431274, + "grad_norm": 0.5748251676559448, + "learning_rate": 9.99809751618977e-05, + "loss": 0.3896, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.03973705903489675, + "grad_norm": 0.5714238882064819, + "learning_rate": 9.998051400039664e-05, + "loss": 0.3854, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.040159793705480756, + "grad_norm": 0.48763173818588257, + "learning_rate": 9.998004731744697e-05, + "loss": 0.3823, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.04058252837606476, + "grad_norm": 0.5566950440406799, + "learning_rate": 9.997957511310025e-05, + "loss": 0.3838, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.04100526304664877, + "grad_norm": 0.41744205355644226, + "learning_rate": 9.997909738740867e-05, + "loss": 0.383, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.04142799771723278, + "grad_norm": 0.48450562357902527, + "learning_rate": 9.997861414042498e-05, + "loss": 0.3834, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.04185073238781679, + "grad_norm": 0.6103826761245728, + "learning_rate": 9.997812537220257e-05, + "loss": 0.3793, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.04227346705840079, + "grad_norm": 0.580582320690155, + "learning_rate": 9.997763108279543e-05, + "loss": 0.3836, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.042696201728984806, + "grad_norm": 0.5165266394615173, + "learning_rate": 9.997713127225818e-05, + "loss": 0.3803, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.04311893639956881, + "grad_norm": 0.524989902973175, + "learning_rate": 9.997662594064603e-05, + "loss": 0.3814, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.04354167107015282, + "grad_norm": 0.43069642782211304, + "learning_rate": 9.99761150880148e-05, + "loss": 0.3821, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.043964405740736824, + "grad_norm": 0.59302818775177, + "learning_rate": 9.997559871442093e-05, + "loss": 0.3842, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.04438714041132084, + "grad_norm": 0.5538245439529419, + "learning_rate": 9.997507681992144e-05, + "loss": 0.3815, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.04480987508190484, + "grad_norm": 0.7668962478637695, + "learning_rate": 9.997454940457404e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.04523260975248885, + "grad_norm": 0.5473869442939758, + "learning_rate": 9.997401646843694e-05, + "loss": 0.3847, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.045655344423072855, + "grad_norm": 0.4700976014137268, + "learning_rate": 9.997347801156905e-05, + "loss": 0.3857, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.04607807909365687, + "grad_norm": 0.6115397810935974, + "learning_rate": 9.997293403402983e-05, + "loss": 0.3768, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.046500813764240874, + "grad_norm": 0.4986879825592041, + "learning_rate": 9.997238453587939e-05, + "loss": 0.3714, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.04692354843482488, + "grad_norm": 0.4612989127635956, + "learning_rate": 9.997182951717841e-05, + "loss": 0.385, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.04734628310540889, + "grad_norm": 0.5636703968048096, + "learning_rate": 9.997126897798825e-05, + "loss": 0.3842, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0477690177759929, + "grad_norm": 0.5998519659042358, + "learning_rate": 9.997070291837079e-05, + "loss": 0.3796, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.048191752446576905, + "grad_norm": 0.4849514067173004, + "learning_rate": 9.997013133838859e-05, + "loss": 0.3813, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.04861448711716091, + "grad_norm": 0.5015349388122559, + "learning_rate": 9.996955423810478e-05, + "loss": 0.3783, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.049037221787744924, + "grad_norm": 0.6126686334609985, + "learning_rate": 9.996897161758312e-05, + "loss": 0.3832, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.04945995645832893, + "grad_norm": 0.5664612054824829, + "learning_rate": 9.996838347688797e-05, + "loss": 0.3872, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.049882691128912936, + "grad_norm": 0.4876980483531952, + "learning_rate": 9.99677898160843e-05, + "loss": 0.3778, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.05030542579949695, + "grad_norm": 0.619940996170044, + "learning_rate": 9.99671906352377e-05, + "loss": 0.3838, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.050728160470080955, + "grad_norm": 0.4359891414642334, + "learning_rate": 9.996658593441435e-05, + "loss": 0.3812, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.05115089514066496, + "grad_norm": 0.5255918502807617, + "learning_rate": 9.996597571368107e-05, + "loss": 0.3862, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.05157362981124897, + "grad_norm": 0.45523712038993835, + "learning_rate": 9.996535997310527e-05, + "loss": 0.388, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.05199636448183298, + "grad_norm": 0.3960236608982086, + "learning_rate": 9.996473871275495e-05, + "loss": 0.3875, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.052419099152416986, + "grad_norm": 0.5919860005378723, + "learning_rate": 9.99641119326988e-05, + "loss": 0.3833, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.05284183382300099, + "grad_norm": 0.47187161445617676, + "learning_rate": 9.996347963300598e-05, + "loss": 0.3796, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.053264568493585004, + "grad_norm": 0.6943979263305664, + "learning_rate": 9.996284181374639e-05, + "loss": 0.3758, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.05368730316416901, + "grad_norm": 0.5482822060585022, + "learning_rate": 9.996219847499049e-05, + "loss": 0.3819, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.054110037834753016, + "grad_norm": 0.5442325472831726, + "learning_rate": 9.996154961680933e-05, + "loss": 0.3721, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.05453277250533702, + "grad_norm": 0.393838107585907, + "learning_rate": 9.996089523927461e-05, + "loss": 0.3756, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.054955507175921035, + "grad_norm": 0.8175389766693115, + "learning_rate": 9.996023534245861e-05, + "loss": 0.3841, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.05537824184650504, + "grad_norm": 0.42842531204223633, + "learning_rate": 9.995956992643425e-05, + "loss": 0.3854, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.05580097651708905, + "grad_norm": 0.5137342214584351, + "learning_rate": 9.995889899127501e-05, + "loss": 0.38, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.05622371118767306, + "grad_norm": 0.5118895173072815, + "learning_rate": 9.995822253705505e-05, + "loss": 0.3763, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.056646445858257066, + "grad_norm": 0.4384053349494934, + "learning_rate": 9.995754056384905e-05, + "loss": 0.382, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.05706918052884107, + "grad_norm": 0.46432942152023315, + "learning_rate": 9.995685307173237e-05, + "loss": 0.3832, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.05749191519942508, + "grad_norm": 0.5243934392929077, + "learning_rate": 9.995616006078097e-05, + "loss": 0.3723, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.05791464987000909, + "grad_norm": 0.5211062431335449, + "learning_rate": 9.99554615310714e-05, + "loss": 0.384, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0583373845405931, + "grad_norm": 0.43892043828964233, + "learning_rate": 9.995475748268081e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0587601192111771, + "grad_norm": 0.8995327353477478, + "learning_rate": 9.995404791568701e-05, + "loss": 0.3824, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.059182853881761116, + "grad_norm": 0.5201218128204346, + "learning_rate": 9.995333283016838e-05, + "loss": 0.3848, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.05960558855234512, + "grad_norm": 0.4167262613773346, + "learning_rate": 9.995261222620392e-05, + "loss": 0.3772, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06002832322292913, + "grad_norm": 0.3574886620044708, + "learning_rate": 9.99518861038732e-05, + "loss": 0.3772, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.060451057893513134, + "grad_norm": 0.428311824798584, + "learning_rate": 9.995115446325647e-05, + "loss": 0.375, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06087379256409715, + "grad_norm": 0.5424283742904663, + "learning_rate": 9.995041730443454e-05, + "loss": 0.3729, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06129652723468115, + "grad_norm": 0.4261209964752197, + "learning_rate": 9.994967462748887e-05, + "loss": 0.3733, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06171926190526516, + "grad_norm": 0.4913170635700226, + "learning_rate": 9.994892643250147e-05, + "loss": 0.3787, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.062141996575849165, + "grad_norm": 0.3789992332458496, + "learning_rate": 9.994817271955503e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06256473124643318, + "grad_norm": 0.44887110590934753, + "learning_rate": 9.994741348873279e-05, + "loss": 0.3748, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06298746591701719, + "grad_norm": 0.44052746891975403, + "learning_rate": 9.994664874011863e-05, + "loss": 0.3762, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06341020058760119, + "grad_norm": 0.5042121410369873, + "learning_rate": 9.994587847379703e-05, + "loss": 0.3735, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0638329352581852, + "grad_norm": 0.46043047308921814, + "learning_rate": 9.994510268985309e-05, + "loss": 0.3725, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0642556699287692, + "grad_norm": 0.5166441202163696, + "learning_rate": 9.994432138837252e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06467840459935321, + "grad_norm": 0.37917783856391907, + "learning_rate": 9.994353456944161e-05, + "loss": 0.3822, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06510113926993723, + "grad_norm": 0.35627034306526184, + "learning_rate": 9.99427422331473e-05, + "loss": 0.3779, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06552387394052123, + "grad_norm": 0.5540376901626587, + "learning_rate": 9.994194437957711e-05, + "loss": 0.3708, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06594660861110524, + "grad_norm": 0.4451822340488434, + "learning_rate": 9.994114100881919e-05, + "loss": 0.3719, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06636934328168925, + "grad_norm": 0.4918615520000458, + "learning_rate": 9.994033212096228e-05, + "loss": 0.3806, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06679207795227325, + "grad_norm": 0.4379864037036896, + "learning_rate": 9.993951771609574e-05, + "loss": 0.3781, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06721481262285726, + "grad_norm": 0.47501134872436523, + "learning_rate": 9.993869779430955e-05, + "loss": 0.3818, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06763754729344128, + "grad_norm": 0.4866444766521454, + "learning_rate": 9.993787235569428e-05, + "loss": 0.3792, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06806028196402528, + "grad_norm": 0.47261741757392883, + "learning_rate": 9.99370414003411e-05, + "loss": 0.3771, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06848301663460929, + "grad_norm": 0.552355170249939, + "learning_rate": 9.993620492834186e-05, + "loss": 0.3766, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06890575130519329, + "grad_norm": 0.4518074691295624, + "learning_rate": 9.993536293978891e-05, + "loss": 0.3853, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0693284859757773, + "grad_norm": 0.4832629859447479, + "learning_rate": 9.99345154347753e-05, + "loss": 0.3754, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.06975122064636131, + "grad_norm": 0.4242904484272003, + "learning_rate": 9.993366241339464e-05, + "loss": 0.3747, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07017395531694531, + "grad_norm": 0.4122840464115143, + "learning_rate": 9.993280387574118e-05, + "loss": 0.375, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07059668998752933, + "grad_norm": 0.6627521514892578, + "learning_rate": 9.993193982190974e-05, + "loss": 0.3826, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07101942465811334, + "grad_norm": 0.7066740393638611, + "learning_rate": 9.993107025199579e-05, + "loss": 0.3792, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07144215932869734, + "grad_norm": 0.5179650783538818, + "learning_rate": 9.99301951660954e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07186489399928135, + "grad_norm": 0.43250009417533875, + "learning_rate": 9.992931456430523e-05, + "loss": 0.3782, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07228762866986536, + "grad_norm": 0.44840967655181885, + "learning_rate": 9.992842844672257e-05, + "loss": 0.3832, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07271036334044936, + "grad_norm": 0.45832473039627075, + "learning_rate": 9.99275368134453e-05, + "loss": 0.3709, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07313309801103338, + "grad_norm": 0.4574330151081085, + "learning_rate": 9.992663966457194e-05, + "loss": 0.383, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07355583268161739, + "grad_norm": 0.5558742880821228, + "learning_rate": 9.992573700020158e-05, + "loss": 0.3825, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07397856735220139, + "grad_norm": 0.3577042520046234, + "learning_rate": 9.992482882043393e-05, + "loss": 0.3763, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0744013020227854, + "grad_norm": 0.45625075697898865, + "learning_rate": 9.992391512536936e-05, + "loss": 0.3768, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0748240366933694, + "grad_norm": 0.4859465956687927, + "learning_rate": 9.992299591510876e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07524677136395341, + "grad_norm": 0.5242054462432861, + "learning_rate": 9.992207118975371e-05, + "loss": 0.3775, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07566950603453743, + "grad_norm": 0.5951898694038391, + "learning_rate": 9.992114094940637e-05, + "loss": 0.3775, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07609224070512143, + "grad_norm": 0.5416415929794312, + "learning_rate": 9.992020519416948e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07651497537570544, + "grad_norm": 0.4221881628036499, + "learning_rate": 9.991926392414643e-05, + "loss": 0.3756, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07693771004628945, + "grad_norm": 0.40904274582862854, + "learning_rate": 9.99183171394412e-05, + "loss": 0.3871, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07736044471687345, + "grad_norm": 0.4200010299682617, + "learning_rate": 9.991736484015838e-05, + "loss": 0.3748, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07778317938745746, + "grad_norm": 0.4883674383163452, + "learning_rate": 9.991640702640317e-05, + "loss": 0.3711, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07820591405804148, + "grad_norm": 0.5103897452354431, + "learning_rate": 9.99154436982814e-05, + "loss": 0.3762, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07862864872862547, + "grad_norm": 0.37684234976768494, + "learning_rate": 9.991447485589947e-05, + "loss": 0.3812, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.07905138339920949, + "grad_norm": 0.4920998811721802, + "learning_rate": 9.991350049936442e-05, + "loss": 0.3753, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0794741180697935, + "grad_norm": 0.43770599365234375, + "learning_rate": 9.991252062878389e-05, + "loss": 0.3699, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0798968527403775, + "grad_norm": 0.39147403836250305, + "learning_rate": 9.991153524426613e-05, + "loss": 0.3761, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08031958741096151, + "grad_norm": 0.42293551564216614, + "learning_rate": 9.991054434592e-05, + "loss": 0.3743, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08074232208154551, + "grad_norm": 0.48516514897346497, + "learning_rate": 9.990954793385493e-05, + "loss": 0.3772, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08116505675212952, + "grad_norm": 0.45829808712005615, + "learning_rate": 9.990854600818104e-05, + "loss": 0.3765, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08158779142271354, + "grad_norm": 0.37229228019714355, + "learning_rate": 9.9907538569009e-05, + "loss": 0.3717, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08201052609329754, + "grad_norm": 0.36379000544548035, + "learning_rate": 9.990652561645012e-05, + "loss": 0.3797, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08243326076388155, + "grad_norm": 0.3169632852077484, + "learning_rate": 9.990550715061627e-05, + "loss": 0.3815, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08285599543446556, + "grad_norm": 0.4494742155075073, + "learning_rate": 9.990448317162e-05, + "loss": 0.3734, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08327873010504956, + "grad_norm": 0.36993643641471863, + "learning_rate": 9.990345367957439e-05, + "loss": 0.3725, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08370146477563357, + "grad_norm": 0.4072403609752655, + "learning_rate": 9.990241867459318e-05, + "loss": 0.3731, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08412419944621759, + "grad_norm": 0.4201242923736572, + "learning_rate": 9.990137815679074e-05, + "loss": 0.3811, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 1990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08454693411680159, + "grad_norm": 0.3965272009372711, + "learning_rate": 9.990033212628199e-05, + "loss": 0.3818, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0849696687873856, + "grad_norm": 0.42635616660118103, + "learning_rate": 9.98992805831825e-05, + "loss": 0.3826, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08539240345796961, + "grad_norm": 0.5978305339813232, + "learning_rate": 9.989822352760842e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08581513812855361, + "grad_norm": 0.44878295063972473, + "learning_rate": 9.989716095967655e-05, + "loss": 0.3789, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08623787279913762, + "grad_norm": 0.43008363246917725, + "learning_rate": 9.989609287950424e-05, + "loss": 0.3684, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08666060746972162, + "grad_norm": 0.43856799602508545, + "learning_rate": 9.989501928720953e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08708334214030564, + "grad_norm": 0.4552156329154968, + "learning_rate": 9.989394018291096e-05, + "loss": 0.3718, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08750607681088965, + "grad_norm": 0.5295701026916504, + "learning_rate": 9.98928555667278e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08792881148147365, + "grad_norm": 0.3852839469909668, + "learning_rate": 9.989176543877983e-05, + "loss": 0.3824, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08835154615205766, + "grad_norm": 0.34348800778388977, + "learning_rate": 9.98906697991875e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08877428082264167, + "grad_norm": 0.41687220335006714, + "learning_rate": 9.988956864807185e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08919701549322567, + "grad_norm": 0.4447932839393616, + "learning_rate": 9.988846198555451e-05, + "loss": 0.3778, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.08961975016380969, + "grad_norm": 0.4299245774745941, + "learning_rate": 9.988734981175774e-05, + "loss": 0.3772, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0900424848343937, + "grad_norm": 0.3251185417175293, + "learning_rate": 9.988623212680442e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0904652195049777, + "grad_norm": 0.38975751399993896, + "learning_rate": 9.988510893081799e-05, + "loss": 0.3753, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09088795417556171, + "grad_norm": 0.48835518956184387, + "learning_rate": 9.988398022392259e-05, + "loss": 0.3719, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09131068884614571, + "grad_norm": 0.549762487411499, + "learning_rate": 9.988284600624282e-05, + "loss": 0.3717, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09173342351672972, + "grad_norm": 0.44599807262420654, + "learning_rate": 9.988170627790407e-05, + "loss": 0.372, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09215615818731374, + "grad_norm": 0.3702348470687866, + "learning_rate": 9.98805610390322e-05, + "loss": 0.3855, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09257889285789773, + "grad_norm": 0.40003833174705505, + "learning_rate": 9.987941028975373e-05, + "loss": 0.3789, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09300162752848175, + "grad_norm": 0.427507221698761, + "learning_rate": 9.98782540301958e-05, + "loss": 0.3759, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09342436219906576, + "grad_norm": 0.44479408860206604, + "learning_rate": 9.987709226048612e-05, + "loss": 0.3814, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09384709686964976, + "grad_norm": 0.5662469267845154, + "learning_rate": 9.987592498075307e-05, + "loss": 0.3797, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09426983154023377, + "grad_norm": 0.4375860393047333, + "learning_rate": 9.987475219112556e-05, + "loss": 0.3736, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09469256621081779, + "grad_norm": 0.4219077527523041, + "learning_rate": 9.987357389173319e-05, + "loss": 0.3736, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09511530088140178, + "grad_norm": 0.44563838839530945, + "learning_rate": 9.987239008270611e-05, + "loss": 0.3736, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.0955380355519858, + "grad_norm": 0.4578148424625397, + "learning_rate": 9.98712007641751e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09596077022256981, + "grad_norm": 0.39621227979660034, + "learning_rate": 9.987000593627153e-05, + "loss": 0.3739, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09638350489315381, + "grad_norm": 0.4463416039943695, + "learning_rate": 9.986880559912742e-05, + "loss": 0.3781, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09680623956373782, + "grad_norm": 0.3230365812778473, + "learning_rate": 9.986759975287536e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09722897423432182, + "grad_norm": 0.38139641284942627, + "learning_rate": 9.986638839764857e-05, + "loss": 0.3807, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09765170890490583, + "grad_norm": 0.38402536511421204, + "learning_rate": 9.986517153358086e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09807444357548985, + "grad_norm": 0.43068501353263855, + "learning_rate": 9.986394916080666e-05, + "loss": 0.3719, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09849717824607385, + "grad_norm": 0.4785890281200409, + "learning_rate": 9.986272127946103e-05, + "loss": 0.3755, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09891991291665786, + "grad_norm": 0.42531269788742065, + "learning_rate": 9.98614878896796e-05, + "loss": 0.3753, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09934264758724187, + "grad_norm": 0.3281669318675995, + "learning_rate": 9.986024899159863e-05, + "loss": 0.3743, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.09976538225782587, + "grad_norm": 0.3672811985015869, + "learning_rate": 9.985900458535497e-05, + "loss": 0.3697, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10018811692840988, + "grad_norm": 0.4108651280403137, + "learning_rate": 9.98577546710861e-05, + "loss": 0.3743, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1006108515989939, + "grad_norm": 0.41944172978401184, + "learning_rate": 9.985649924893011e-05, + "loss": 0.383, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1010335862695779, + "grad_norm": 0.3885670602321625, + "learning_rate": 9.985523831902567e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10145632094016191, + "grad_norm": 0.3705422878265381, + "learning_rate": 9.98539718815121e-05, + "loss": 0.3714, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10187905561074592, + "grad_norm": 0.4333280920982361, + "learning_rate": 9.985269993652929e-05, + "loss": 0.375, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10230179028132992, + "grad_norm": 0.4459168314933777, + "learning_rate": 9.985142248421775e-05, + "loss": 0.3773, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10272452495191393, + "grad_norm": 0.3721534311771393, + "learning_rate": 9.985013952471862e-05, + "loss": 0.3744, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10314725962249793, + "grad_norm": 0.39690783619880676, + "learning_rate": 9.984885105817364e-05, + "loss": 0.3774, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10356999429308195, + "grad_norm": 0.43417179584503174, + "learning_rate": 9.98475570847251e-05, + "loss": 0.3724, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10399272896366596, + "grad_norm": 0.5115193724632263, + "learning_rate": 9.9846257604516e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10441546363424996, + "grad_norm": 0.3761536180973053, + "learning_rate": 9.984495261768987e-05, + "loss": 0.3724, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10483819830483397, + "grad_norm": 0.5119365453720093, + "learning_rate": 9.984364212439088e-05, + "loss": 0.3697, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10526093297541798, + "grad_norm": 0.45339593291282654, + "learning_rate": 9.98423261247638e-05, + "loss": 0.3742, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10568366764600198, + "grad_norm": 0.42223218083381653, + "learning_rate": 9.984100461895403e-05, + "loss": 0.3712, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.106106402316586, + "grad_norm": 0.45894742012023926, + "learning_rate": 9.983967760710754e-05, + "loss": 0.3784, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10652913698717001, + "grad_norm": 0.4379785358905792, + "learning_rate": 9.983834508937093e-05, + "loss": 0.3729, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10695187165775401, + "grad_norm": 0.49081653356552124, + "learning_rate": 9.983700706589141e-05, + "loss": 0.3732, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10737460632833802, + "grad_norm": 0.3791351914405823, + "learning_rate": 9.98356635368168e-05, + "loss": 0.3781, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10779734099892202, + "grad_norm": 0.501582682132721, + "learning_rate": 9.983431450229548e-05, + "loss": 0.3721, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10822007566950603, + "grad_norm": 0.3954605460166931, + "learning_rate": 9.983295996247655e-05, + "loss": 0.375, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10864281034009005, + "grad_norm": 0.4125593304634094, + "learning_rate": 9.983159991750959e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10906554501067404, + "grad_norm": 0.38281190395355225, + "learning_rate": 9.983023436754489e-05, + "loss": 0.3693, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10948827968125806, + "grad_norm": 0.39266642928123474, + "learning_rate": 9.982886331273328e-05, + "loss": 0.3789, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.10991101435184207, + "grad_norm": 0.5181350708007812, + "learning_rate": 9.982748675322622e-05, + "loss": 0.3785, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11033374902242607, + "grad_norm": 0.46125757694244385, + "learning_rate": 9.98261046891758e-05, + "loss": 0.3732, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11075648369301008, + "grad_norm": 0.4075755774974823, + "learning_rate": 9.982471712073469e-05, + "loss": 0.3716, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1111792183635941, + "grad_norm": 0.415294349193573, + "learning_rate": 9.982332404805617e-05, + "loss": 0.3759, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1116019530341781, + "grad_norm": 0.3474576771259308, + "learning_rate": 9.982192547129414e-05, + "loss": 0.3711, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11202468770476211, + "grad_norm": 0.3842771053314209, + "learning_rate": 9.982052139060311e-05, + "loss": 0.3738, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11244742237534612, + "grad_norm": 0.3526894152164459, + "learning_rate": 9.981911180613821e-05, + "loss": 0.3704, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11287015704593012, + "grad_norm": 0.429877370595932, + "learning_rate": 9.981769671805513e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11329289171651413, + "grad_norm": 0.5078182220458984, + "learning_rate": 9.981627612651017e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11371562638709813, + "grad_norm": 0.396127313375473, + "learning_rate": 9.981485003166034e-05, + "loss": 0.377, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11413836105768214, + "grad_norm": 0.39165905117988586, + "learning_rate": 9.981341843366311e-05, + "loss": 0.3719, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11456109572826616, + "grad_norm": 0.4042210280895233, + "learning_rate": 9.981198133267671e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11498383039885016, + "grad_norm": 0.3514523506164551, + "learning_rate": 9.981053872885983e-05, + "loss": 0.3721, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11540656506943417, + "grad_norm": 0.3894653022289276, + "learning_rate": 9.980909062237186e-05, + "loss": 0.3734, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11582929974001818, + "grad_norm": 0.41008639335632324, + "learning_rate": 9.980763701337278e-05, + "loss": 0.3788, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11625203441060218, + "grad_norm": 0.49372169375419617, + "learning_rate": 9.980617790202318e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1166747690811862, + "grad_norm": 0.3864573836326599, + "learning_rate": 9.980471328848424e-05, + "loss": 0.3782, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11709750375177021, + "grad_norm": 0.3956460952758789, + "learning_rate": 9.980324317291775e-05, + "loss": 0.3717, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1175202384223542, + "grad_norm": 0.3903290927410126, + "learning_rate": 9.980176755548613e-05, + "loss": 0.3752, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11794297309293822, + "grad_norm": 0.3626936674118042, + "learning_rate": 9.980028643635239e-05, + "loss": 0.379, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11836570776352223, + "grad_norm": 0.3574053645133972, + "learning_rate": 9.979879981568017e-05, + "loss": 0.3755, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11878844243410623, + "grad_norm": 0.40763410925865173, + "learning_rate": 9.979730769363368e-05, + "loss": 0.3713, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11921117710469024, + "grad_norm": 0.516631543636322, + "learning_rate": 9.979581007037776e-05, + "loss": 0.3784, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.11963391177527424, + "grad_norm": 0.40867993235588074, + "learning_rate": 9.979430694607785e-05, + "loss": 0.3708, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12005664644585826, + "grad_norm": 0.34989121556282043, + "learning_rate": 9.979279832090002e-05, + "loss": 0.3734, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12047938111644227, + "grad_norm": 0.3620661795139313, + "learning_rate": 9.979128419501092e-05, + "loss": 0.3743, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12090211578702627, + "grad_norm": 0.420282781124115, + "learning_rate": 9.978976456857783e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12132485045761028, + "grad_norm": 0.3829314410686493, + "learning_rate": 9.978823944176859e-05, + "loss": 0.376, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1217475851281943, + "grad_norm": 0.28633221983909607, + "learning_rate": 9.978670881475172e-05, + "loss": 0.376, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12217031979877829, + "grad_norm": 0.39408859610557556, + "learning_rate": 9.978517268769632e-05, + "loss": 0.3742, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1225930544693623, + "grad_norm": 0.42843031883239746, + "learning_rate": 9.978363106077207e-05, + "loss": 0.3693, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12301578913994632, + "grad_norm": 0.40901970863342285, + "learning_rate": 9.978208393414925e-05, + "loss": 0.3699, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12343852381053032, + "grad_norm": 0.39132583141326904, + "learning_rate": 9.978053130799883e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12386125848111433, + "grad_norm": 0.3506423234939575, + "learning_rate": 9.977897318249228e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12428399315169833, + "grad_norm": 0.4445333778858185, + "learning_rate": 9.977740955780177e-05, + "loss": 0.3731, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12470672782228234, + "grad_norm": 0.4596666991710663, + "learning_rate": 9.977584043410001e-05, + "loss": 0.3809, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12512946249286636, + "grad_norm": 0.4121941328048706, + "learning_rate": 9.977426581156035e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12555219716345037, + "grad_norm": 0.44364845752716064, + "learning_rate": 9.977268569035675e-05, + "loss": 0.3756, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12597493183403438, + "grad_norm": 0.42326274514198303, + "learning_rate": 9.977110007066377e-05, + "loss": 0.3755, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12639766650461837, + "grad_norm": 0.4230799376964569, + "learning_rate": 9.976950895265658e-05, + "loss": 0.3745, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 2990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12682040117520238, + "grad_norm": 0.34855833649635315, + "learning_rate": 9.976791233651093e-05, + "loss": 0.377, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1272431358457864, + "grad_norm": 0.2869081199169159, + "learning_rate": 9.976631022240322e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1276658705163704, + "grad_norm": 0.3107573986053467, + "learning_rate": 9.976470261051042e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12808860518695442, + "grad_norm": 0.37276336550712585, + "learning_rate": 9.976308950101016e-05, + "loss": 0.3748, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1285113398575384, + "grad_norm": 0.36662399768829346, + "learning_rate": 9.976147089408063e-05, + "loss": 0.3714, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12893407452812242, + "grad_norm": 0.37542811036109924, + "learning_rate": 9.975984678990064e-05, + "loss": 0.3721, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12935680919870643, + "grad_norm": 0.3802073299884796, + "learning_rate": 9.97582171886496e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.12977954386929044, + "grad_norm": 0.3874414563179016, + "learning_rate": 9.975658209050752e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.13020227853987446, + "grad_norm": 0.34914112091064453, + "learning_rate": 9.975494149565506e-05, + "loss": 0.3757, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.13062501321045847, + "grad_norm": 0.3894507586956024, + "learning_rate": 9.975329540427346e-05, + "loss": 0.3717, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.13104774788104245, + "grad_norm": 0.41183900833129883, + "learning_rate": 9.975164381654457e-05, + "loss": 0.3803, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.13147048255162647, + "grad_norm": 0.3494017422199249, + "learning_rate": 9.974998673265081e-05, + "loss": 0.3758, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.13189321722221048, + "grad_norm": 0.3302718997001648, + "learning_rate": 9.974832415277527e-05, + "loss": 0.3719, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1323159518927945, + "grad_norm": 0.2855677306652069, + "learning_rate": 9.974665607710161e-05, + "loss": 0.3735, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1327386865633785, + "grad_norm": 0.36685052514076233, + "learning_rate": 9.974498250581412e-05, + "loss": 0.3731, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1331614212339625, + "grad_norm": 0.4018462300300598, + "learning_rate": 9.974330343909767e-05, + "loss": 0.3785, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1335841559045465, + "grad_norm": 0.3625712990760803, + "learning_rate": 9.974161887713775e-05, + "loss": 0.3774, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.13400689057513052, + "grad_norm": 0.4217083156108856, + "learning_rate": 9.973992882012045e-05, + "loss": 0.3756, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.13442962524571453, + "grad_norm": 0.48154520988464355, + "learning_rate": 9.973823326823249e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.13485235991629854, + "grad_norm": 0.403174489736557, + "learning_rate": 9.973653222166117e-05, + "loss": 0.3762, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.13527509458688255, + "grad_norm": 0.3221226930618286, + "learning_rate": 9.973482568059443e-05, + "loss": 0.3716, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.13569782925746654, + "grad_norm": 0.35904166102409363, + "learning_rate": 9.973311364522076e-05, + "loss": 0.371, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.13612056392805055, + "grad_norm": 0.37680402398109436, + "learning_rate": 9.97313961157293e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.13654329859863457, + "grad_norm": 0.4115135967731476, + "learning_rate": 9.97296730923098e-05, + "loss": 0.3723, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.13696603326921858, + "grad_norm": 0.34188392758369446, + "learning_rate": 9.972794457515262e-05, + "loss": 0.3711, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1373887679398026, + "grad_norm": 0.3198833167552948, + "learning_rate": 9.972621056444869e-05, + "loss": 0.3763, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.13781150261038658, + "grad_norm": 0.4039269983768463, + "learning_rate": 9.972447106038957e-05, + "loss": 0.375, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1382342372809706, + "grad_norm": 0.39218267798423767, + "learning_rate": 9.972272606316744e-05, + "loss": 0.3771, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1386569719515546, + "grad_norm": 0.3440128266811371, + "learning_rate": 9.972097557297507e-05, + "loss": 0.3737, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.13907970662213862, + "grad_norm": 0.3171690106391907, + "learning_rate": 9.97192195900058e-05, + "loss": 0.3752, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.13950244129272263, + "grad_norm": 0.3252759575843811, + "learning_rate": 9.97174581144537e-05, + "loss": 0.3712, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.13992517596330664, + "grad_norm": 0.28817737102508545, + "learning_rate": 9.971569114651329e-05, + "loss": 0.3746, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.14034791063389063, + "grad_norm": 0.3335595726966858, + "learning_rate": 9.97139186863798e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.14077064530447464, + "grad_norm": 0.35422056913375854, + "learning_rate": 9.971214073424905e-05, + "loss": 0.3708, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.14119337997505865, + "grad_norm": 0.33759742975234985, + "learning_rate": 9.971035729031743e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.14161611464564267, + "grad_norm": 0.32206785678863525, + "learning_rate": 9.970856835478197e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.14203884931622668, + "grad_norm": 0.29024723172187805, + "learning_rate": 9.97067739278403e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1424615839868107, + "grad_norm": 0.35258811712265015, + "learning_rate": 9.970497400969063e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.14288431865739468, + "grad_norm": 0.36694902181625366, + "learning_rate": 9.970316860053184e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1433070533279787, + "grad_norm": 0.3411976099014282, + "learning_rate": 9.970135770056334e-05, + "loss": 0.3761, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1437297879985627, + "grad_norm": 0.3484957218170166, + "learning_rate": 9.969954130998523e-05, + "loss": 0.3728, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.14415252266914672, + "grad_norm": 0.34768611192703247, + "learning_rate": 9.969771942899812e-05, + "loss": 0.375, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.14457525733973073, + "grad_norm": 0.2909049987792969, + "learning_rate": 9.969589205780332e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1449979920103147, + "grad_norm": 0.3651459217071533, + "learning_rate": 9.969405919660267e-05, + "loss": 0.3746, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.14542072668089873, + "grad_norm": 0.35090869665145874, + "learning_rate": 9.969222084559867e-05, + "loss": 0.3697, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.14584346135148274, + "grad_norm": 0.35221967101097107, + "learning_rate": 9.969037700499439e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.14626619602206675, + "grad_norm": 0.36162465810775757, + "learning_rate": 9.968852767499354e-05, + "loss": 0.3728, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.14668893069265077, + "grad_norm": 0.33175188302993774, + "learning_rate": 9.96866728558004e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.14711166536323478, + "grad_norm": 0.43245822191238403, + "learning_rate": 9.96848125476199e-05, + "loss": 0.3756, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.14753440003381876, + "grad_norm": 0.29240161180496216, + "learning_rate": 9.968294675065752e-05, + "loss": 0.375, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.14795713470440278, + "grad_norm": 0.3187413811683655, + "learning_rate": 9.968107546511942e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1483798693749868, + "grad_norm": 0.3465772271156311, + "learning_rate": 9.967919869121229e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1488026040455708, + "grad_norm": 0.36339762806892395, + "learning_rate": 9.967731642914347e-05, + "loss": 0.3745, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.14922533871615482, + "grad_norm": 0.3722393810749054, + "learning_rate": 9.967542867912091e-05, + "loss": 0.3717, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1496480733867388, + "grad_norm": 0.3705660402774811, + "learning_rate": 9.967353544135314e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1500708080573228, + "grad_norm": 0.3620944619178772, + "learning_rate": 9.967163671604931e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.15049354272790683, + "grad_norm": 0.35326331853866577, + "learning_rate": 9.966973250341918e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.15091627739849084, + "grad_norm": 0.37920811772346497, + "learning_rate": 9.96678228036731e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.15133901206907485, + "grad_norm": 0.32309281826019287, + "learning_rate": 9.966590761702207e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.15176174673965886, + "grad_norm": 0.31654083728790283, + "learning_rate": 9.966398694367765e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.15218448141024285, + "grad_norm": 0.4941222369670868, + "learning_rate": 9.966206078385199e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.15260721608082686, + "grad_norm": 0.30994418263435364, + "learning_rate": 9.966012913775792e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.15302995075141088, + "grad_norm": 0.38143959641456604, + "learning_rate": 9.965819200560881e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1534526854219949, + "grad_norm": 0.40366026759147644, + "learning_rate": 9.965624938761867e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1538754200925789, + "grad_norm": 0.33808547258377075, + "learning_rate": 9.965430128400208e-05, + "loss": 0.3767, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.15429815476316291, + "grad_norm": 0.3471464514732361, + "learning_rate": 9.96523476949743e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1547208894337469, + "grad_norm": 0.3172456920146942, + "learning_rate": 9.96503886207511e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1551436241043309, + "grad_norm": 0.3332526385784149, + "learning_rate": 9.96484240615489e-05, + "loss": 0.3694, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.15556635877491493, + "grad_norm": 0.3314318358898163, + "learning_rate": 9.964645401758477e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.15598909344549894, + "grad_norm": 0.29616549611091614, + "learning_rate": 9.964447848907632e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.15641182811608295, + "grad_norm": 0.3450833857059479, + "learning_rate": 9.964249747624179e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.15683456278666694, + "grad_norm": 0.3139835596084595, + "learning_rate": 9.964051097930005e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.15725729745725095, + "grad_norm": 0.39813393354415894, + "learning_rate": 9.963851899847053e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.15768003212783496, + "grad_norm": 0.4093906581401825, + "learning_rate": 9.963652153397325e-05, + "loss": 0.3766, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.15810276679841898, + "grad_norm": 0.2914859652519226, + "learning_rate": 9.963451858602895e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.158525501469003, + "grad_norm": 0.46909236907958984, + "learning_rate": 9.963251015485887e-05, + "loss": 0.3732, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.158948236139587, + "grad_norm": 0.3512389659881592, + "learning_rate": 9.963049624068486e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.159370970810171, + "grad_norm": 0.3240947127342224, + "learning_rate": 9.962847684372942e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.159793705480755, + "grad_norm": 0.32013750076293945, + "learning_rate": 9.962645196421566e-05, + "loss": 0.3727, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.160216440151339, + "grad_norm": 0.3417747914791107, + "learning_rate": 9.962442160236723e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.16063917482192303, + "grad_norm": 0.3912096917629242, + "learning_rate": 9.962238575840847e-05, + "loss": 0.3743, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.16106190949250704, + "grad_norm": 0.43111467361450195, + "learning_rate": 9.962034443256426e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.16148464416309102, + "grad_norm": 0.3466228246688843, + "learning_rate": 9.961829762506009e-05, + "loss": 0.3736, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.16190737883367504, + "grad_norm": 0.41003668308258057, + "learning_rate": 9.961624533612211e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.16233011350425905, + "grad_norm": 0.3276771605014801, + "learning_rate": 9.961418756597702e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.16275284817484306, + "grad_norm": 0.3247922658920288, + "learning_rate": 9.961212431485216e-05, + "loss": 0.3826, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.16317558284542708, + "grad_norm": 0.3428981602191925, + "learning_rate": 9.961005558297545e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1635983175160111, + "grad_norm": 0.34923723340034485, + "learning_rate": 9.960798137057544e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.16402105218659507, + "grad_norm": 0.36241087317466736, + "learning_rate": 9.960590167788126e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1644437868571791, + "grad_norm": 0.2714046537876129, + "learning_rate": 9.960381650512267e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1648665215277631, + "grad_norm": 0.35071805119514465, + "learning_rate": 9.960172585253e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1652892561983471, + "grad_norm": 0.35849741101264954, + "learning_rate": 9.959962972033423e-05, + "loss": 0.3711, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.16571199086893112, + "grad_norm": 0.3085476756095886, + "learning_rate": 9.959752810876692e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1661347255395151, + "grad_norm": 0.44853153824806213, + "learning_rate": 9.959542101806026e-05, + "loss": 0.3722, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.16655746021009912, + "grad_norm": 0.3724740445613861, + "learning_rate": 9.959330844844697e-05, + "loss": 0.3763, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.16698019488068314, + "grad_norm": 0.3317045271396637, + "learning_rate": 9.959119040016047e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.16740292955126715, + "grad_norm": 0.333893746137619, + "learning_rate": 9.958906687343474e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.16782566422185116, + "grad_norm": 0.3398449122905731, + "learning_rate": 9.958693786850437e-05, + "loss": 0.3699, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.16824839889243517, + "grad_norm": 0.33153781294822693, + "learning_rate": 9.958480338560454e-05, + "loss": 0.3709, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.16867113356301916, + "grad_norm": 0.3153848648071289, + "learning_rate": 9.958266342497108e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 3990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.16909386823360317, + "grad_norm": 0.3541620671749115, + "learning_rate": 9.958051798684037e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.16951660290418719, + "grad_norm": 0.3397897183895111, + "learning_rate": 9.957836707144944e-05, + "loss": 0.3697, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1699393375747712, + "grad_norm": 0.3467687964439392, + "learning_rate": 9.957621067903589e-05, + "loss": 0.3726, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1703620722453552, + "grad_norm": 0.27241402864456177, + "learning_rate": 9.957404880983795e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.17078480691593922, + "grad_norm": 0.22731398046016693, + "learning_rate": 9.957188146409442e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1712075415865232, + "grad_norm": 0.36170700192451477, + "learning_rate": 9.956970864204478e-05, + "loss": 0.3697, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.17163027625710722, + "grad_norm": 0.3170284330844879, + "learning_rate": 9.956753034392904e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.17205301092769124, + "grad_norm": 0.32717517018318176, + "learning_rate": 9.956534656998784e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.17247574559827525, + "grad_norm": 0.3659948408603668, + "learning_rate": 9.956315732046243e-05, + "loss": 0.3709, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.17289848026885926, + "grad_norm": 0.3723655045032501, + "learning_rate": 9.956096259559463e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.17332121493944325, + "grad_norm": 0.320584237575531, + "learning_rate": 9.955876239562695e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.17374394961002726, + "grad_norm": 0.3363623023033142, + "learning_rate": 9.955655672080241e-05, + "loss": 0.378, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.17416668428061127, + "grad_norm": 0.29168403148651123, + "learning_rate": 9.95543455713647e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.17458941895119529, + "grad_norm": 0.3680339753627777, + "learning_rate": 9.955212894755807e-05, + "loss": 0.3735, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1750121536217793, + "grad_norm": 0.39286333322525024, + "learning_rate": 9.954990684962742e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1754348882923633, + "grad_norm": 0.37998440861701965, + "learning_rate": 9.954767927781821e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1758576229629473, + "grad_norm": 0.28728610277175903, + "learning_rate": 9.954544623237653e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1762803576335313, + "grad_norm": 0.3091057240962982, + "learning_rate": 9.954320771354906e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.17670309230411532, + "grad_norm": 0.3205787241458893, + "learning_rate": 9.95409637215831e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.17712582697469934, + "grad_norm": 0.3171769380569458, + "learning_rate": 9.953871425672657e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.17754856164528335, + "grad_norm": 0.42323023080825806, + "learning_rate": 9.953645931922792e-05, + "loss": 0.3726, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.17797129631586733, + "grad_norm": 0.33009710907936096, + "learning_rate": 9.953419890933632e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.17839403098645135, + "grad_norm": 0.399915486574173, + "learning_rate": 9.953193302730144e-05, + "loss": 0.3716, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.17881676565703536, + "grad_norm": 0.35240820050239563, + "learning_rate": 9.95296616733736e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.17923950032761937, + "grad_norm": 0.44713103771209717, + "learning_rate": 9.952738484780376e-05, + "loss": 0.3747, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.17966223499820339, + "grad_norm": 0.3555920720100403, + "learning_rate": 9.952510255084338e-05, + "loss": 0.3712, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1800849696687874, + "grad_norm": 0.3715249300003052, + "learning_rate": 9.952281478274465e-05, + "loss": 0.3735, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18050770433937138, + "grad_norm": 0.49289989471435547, + "learning_rate": 9.952052154376026e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1809304390099554, + "grad_norm": 0.31295114755630493, + "learning_rate": 9.951822283414358e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1813531736805394, + "grad_norm": 0.3171769976615906, + "learning_rate": 9.951591865414855e-05, + "loss": 0.3693, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18177590835112342, + "grad_norm": 0.5297925472259521, + "learning_rate": 9.95136090040297e-05, + "loss": 0.3735, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18219864302170743, + "grad_norm": 0.3064420819282532, + "learning_rate": 9.951129388404219e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18262137769229142, + "grad_norm": 0.3169324994087219, + "learning_rate": 9.950897329444177e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18304411236287543, + "grad_norm": 0.40489691495895386, + "learning_rate": 9.950664723548482e-05, + "loss": 0.3721, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18346684703345945, + "grad_norm": 0.31811535358428955, + "learning_rate": 9.950431570742829e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18388958170404346, + "grad_norm": 0.3823894262313843, + "learning_rate": 9.950197871052974e-05, + "loss": 0.3798, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18431231637462747, + "grad_norm": 0.35948964953422546, + "learning_rate": 9.949963624504737e-05, + "loss": 0.3743, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18473505104521148, + "grad_norm": 0.3079812824726105, + "learning_rate": 9.949728831123993e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18515778571579547, + "grad_norm": 0.3705575168132782, + "learning_rate": 9.949493490936681e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18558052038637948, + "grad_norm": 0.2923615574836731, + "learning_rate": 9.949257603968798e-05, + "loss": 0.3724, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1860032550569635, + "grad_norm": 0.29001665115356445, + "learning_rate": 9.949021170246407e-05, + "loss": 0.3694, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1864259897275475, + "grad_norm": 0.3250223994255066, + "learning_rate": 9.948784189795623e-05, + "loss": 0.3749, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18684872439813152, + "grad_norm": 0.3513059914112091, + "learning_rate": 9.94854666264263e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18727145906871553, + "grad_norm": 0.37375393509864807, + "learning_rate": 9.948308588813665e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18769419373929952, + "grad_norm": 0.4029262363910675, + "learning_rate": 9.948069968335027e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18811692840988353, + "grad_norm": 0.23709559440612793, + "learning_rate": 9.94783080123308e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18853966308046755, + "grad_norm": 0.293226420879364, + "learning_rate": 9.947591087534244e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18896239775105156, + "grad_norm": 0.25836747884750366, + "learning_rate": 9.947350827265003e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18938513242163557, + "grad_norm": 0.3088396191596985, + "learning_rate": 9.947110020451895e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.18980786709221956, + "grad_norm": 0.34934407472610474, + "learning_rate": 9.946868667121525e-05, + "loss": 0.3754, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.19023060176280357, + "grad_norm": 0.4338078498840332, + "learning_rate": 9.946626767300556e-05, + "loss": 0.3723, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.19065333643338758, + "grad_norm": 0.3142784833908081, + "learning_rate": 9.946384321015709e-05, + "loss": 0.3721, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1910760711039716, + "grad_norm": 0.3630223870277405, + "learning_rate": 9.94614132829377e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1914988057745556, + "grad_norm": 0.34624168276786804, + "learning_rate": 9.945897789161581e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.19192154044513962, + "grad_norm": 0.4198145270347595, + "learning_rate": 9.945653703646047e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1923442751157236, + "grad_norm": 0.25219252705574036, + "learning_rate": 9.945409071774133e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.19276700978630762, + "grad_norm": 0.37824782729148865, + "learning_rate": 9.945163893572865e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.19318974445689163, + "grad_norm": 0.33213314414024353, + "learning_rate": 9.944918169069326e-05, + "loss": 0.3831, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.19361247912747565, + "grad_norm": 0.2731019854545593, + "learning_rate": 9.94467189829066e-05, + "loss": 0.3729, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.19403521379805966, + "grad_norm": 0.3181804120540619, + "learning_rate": 9.944425081264079e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.19445794846864364, + "grad_norm": 0.352775901556015, + "learning_rate": 9.944177718016845e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.19488068313922766, + "grad_norm": 0.3134710490703583, + "learning_rate": 9.943929808576287e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.19530341780981167, + "grad_norm": 0.2670317590236664, + "learning_rate": 9.943681352969789e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.19572615248039568, + "grad_norm": 0.3988470733165741, + "learning_rate": 9.943432351224801e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1961488871509797, + "grad_norm": 0.3216482102870941, + "learning_rate": 9.943182803368829e-05, + "loss": 0.3721, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1965716218215637, + "grad_norm": 0.3827167749404907, + "learning_rate": 9.942932709429444e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1969943564921477, + "grad_norm": 0.3243727684020996, + "learning_rate": 9.942682069434272e-05, + "loss": 0.3693, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.1974170911627317, + "grad_norm": 0.29885315895080566, + "learning_rate": 9.942430883411001e-05, + "loss": 0.374, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.19783982583331572, + "grad_norm": 0.31837576627731323, + "learning_rate": 9.942179151387381e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.19826256050389973, + "grad_norm": 0.45830047130584717, + "learning_rate": 9.941926873391223e-05, + "loss": 0.3684, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.19868529517448374, + "grad_norm": 0.2927664816379547, + "learning_rate": 9.941674049450393e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.19910802984506773, + "grad_norm": 0.3339039385318756, + "learning_rate": 9.941420679592825e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.19953076451565174, + "grad_norm": 0.26133981347084045, + "learning_rate": 9.941166763846508e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.19995349918623576, + "grad_norm": 0.3379649221897125, + "learning_rate": 9.940912302239491e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20037623385681977, + "grad_norm": 0.3078257441520691, + "learning_rate": 9.940657294799885e-05, + "loss": 0.3743, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20079896852740378, + "grad_norm": 0.3279525339603424, + "learning_rate": 9.940401741555863e-05, + "loss": 0.3694, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2012217031979878, + "grad_norm": 0.31777504086494446, + "learning_rate": 9.940145642535657e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20164443786857178, + "grad_norm": 0.36387500166893005, + "learning_rate": 9.939888997767555e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2020671725391558, + "grad_norm": 0.3728339970111847, + "learning_rate": 9.939631807279912e-05, + "loss": 0.3749, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2024899072097398, + "grad_norm": 0.29449307918548584, + "learning_rate": 9.93937407110114e-05, + "loss": 0.3712, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20291264188032382, + "grad_norm": 0.3560914397239685, + "learning_rate": 9.93911578925971e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20333537655090783, + "grad_norm": 0.300055593252182, + "learning_rate": 9.938856961784159e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20375811122149184, + "grad_norm": 0.3730282485485077, + "learning_rate": 9.938597588703076e-05, + "loss": 0.3746, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20418084589207583, + "grad_norm": 0.29869571328163147, + "learning_rate": 9.938337670045117e-05, + "loss": 0.3684, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20460358056265984, + "grad_norm": 0.32646504044532776, + "learning_rate": 9.938077205838993e-05, + "loss": 0.3744, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20502631523324386, + "grad_norm": 0.358453631401062, + "learning_rate": 9.937816196113481e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20544904990382787, + "grad_norm": 0.3021109998226166, + "learning_rate": 9.937554640897413e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20587178457441188, + "grad_norm": 0.29870015382766724, + "learning_rate": 9.937292540219686e-05, + "loss": 0.3714, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20629451924499587, + "grad_norm": 0.29040610790252686, + "learning_rate": 9.937029894109252e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20671725391557988, + "grad_norm": 0.3761932849884033, + "learning_rate": 9.936766702595128e-05, + "loss": 0.3735, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2071399885861639, + "grad_norm": 0.30568650364875793, + "learning_rate": 9.93650296570639e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2075627232567479, + "grad_norm": 0.29320210218429565, + "learning_rate": 9.93623868347217e-05, + "loss": 0.3762, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20798545792733192, + "grad_norm": 0.28238368034362793, + "learning_rate": 9.935973855921667e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20840819259791593, + "grad_norm": 0.27401310205459595, + "learning_rate": 9.935708483084136e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20883092726849992, + "grad_norm": 0.3683825433254242, + "learning_rate": 9.935442564988892e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20925366193908393, + "grad_norm": 0.28900885581970215, + "learning_rate": 9.935176101665316e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.20967639660966794, + "grad_norm": 0.34743884205818176, + "learning_rate": 9.934909093142839e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.21009913128025196, + "grad_norm": 0.38701438903808594, + "learning_rate": 9.934641539450961e-05, + "loss": 0.3684, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.21052186595083597, + "grad_norm": 0.3102369010448456, + "learning_rate": 9.934373440619238e-05, + "loss": 0.3722, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.21094460062141995, + "grad_norm": 0.28667306900024414, + "learning_rate": 9.934104796677291e-05, + "loss": 0.3771, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 4990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.21136733529200397, + "grad_norm": 0.2843165099620819, + "learning_rate": 9.933835607654792e-05, + "loss": 0.3764, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.21179006996258798, + "grad_norm": 0.3274308741092682, + "learning_rate": 9.933565873581483e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.212212804633172, + "grad_norm": 0.34651151299476624, + "learning_rate": 9.933295594487159e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.212635539303756, + "grad_norm": 0.33205491304397583, + "learning_rate": 9.933024770401682e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.21305827397434002, + "grad_norm": 0.2990874946117401, + "learning_rate": 9.932753401354968e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.213481008644924, + "grad_norm": 0.2735016345977783, + "learning_rate": 9.932481487376997e-05, + "loss": 0.3761, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.21390374331550802, + "grad_norm": 0.27834948897361755, + "learning_rate": 9.932209028497806e-05, + "loss": 0.3727, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.21432647798609203, + "grad_norm": 0.4598560631275177, + "learning_rate": 9.931936024747495e-05, + "loss": 0.3723, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.21474921265667604, + "grad_norm": 0.35691672563552856, + "learning_rate": 9.931662476156224e-05, + "loss": 0.3784, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.21517194732726005, + "grad_norm": 0.3425796627998352, + "learning_rate": 9.931388382754212e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.21559468199784404, + "grad_norm": 0.3163090646266937, + "learning_rate": 9.931113744571739e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.21601741666842805, + "grad_norm": 0.28969883918762207, + "learning_rate": 9.930838561639142e-05, + "loss": 0.3716, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.21644015133901207, + "grad_norm": 0.3263460695743561, + "learning_rate": 9.930562833986825e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.21686288600959608, + "grad_norm": 0.2825050950050354, + "learning_rate": 9.930286561645248e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2172856206801801, + "grad_norm": 0.28846216201782227, + "learning_rate": 9.930009744644928e-05, + "loss": 0.3713, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2177083553507641, + "grad_norm": 0.27928784489631653, + "learning_rate": 9.929732383016447e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2181310900213481, + "grad_norm": 0.29804128408432007, + "learning_rate": 9.92945447679045e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2185538246919321, + "grad_norm": 0.2990981936454773, + "learning_rate": 9.92917602599763e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.21897655936251612, + "grad_norm": 0.42071375250816345, + "learning_rate": 9.928897030668754e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.21939929403310013, + "grad_norm": 0.342602401971817, + "learning_rate": 9.92861749083464e-05, + "loss": 0.3709, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.21982202870368414, + "grad_norm": 0.3220101296901703, + "learning_rate": 9.928337406526172e-05, + "loss": 0.3776, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22024476337426815, + "grad_norm": 0.33327916264533997, + "learning_rate": 9.928056777774291e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22066749804485214, + "grad_norm": 0.4209468364715576, + "learning_rate": 9.927775604609994e-05, + "loss": 0.3728, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22109023271543615, + "grad_norm": 0.3477969467639923, + "learning_rate": 9.92749388706435e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22151296738602017, + "grad_norm": 0.2833927869796753, + "learning_rate": 9.927211625168476e-05, + "loss": 0.3711, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22193570205660418, + "grad_norm": 0.25705254077911377, + "learning_rate": 9.926928818953556e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2223584367271882, + "grad_norm": 0.2696680724620819, + "learning_rate": 9.92664546845083e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22278117139777218, + "grad_norm": 0.3077714443206787, + "learning_rate": 9.926361573691603e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2232039060683562, + "grad_norm": 0.3576662242412567, + "learning_rate": 9.926077134707236e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2236266407389402, + "grad_norm": 0.26451051235198975, + "learning_rate": 9.925792151529154e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22404937540952422, + "grad_norm": 0.3434135615825653, + "learning_rate": 9.925506624188836e-05, + "loss": 0.3778, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22447211008010823, + "grad_norm": 0.31630373001098633, + "learning_rate": 9.925220552717826e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22489484475069224, + "grad_norm": 0.26238343119621277, + "learning_rate": 9.924933937147726e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22531757942127623, + "grad_norm": 0.26873451471328735, + "learning_rate": 9.924646777510202e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22574031409186024, + "grad_norm": 0.3215339779853821, + "learning_rate": 9.924359073836976e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22616304876244425, + "grad_norm": 0.33430215716362, + "learning_rate": 9.92407082615983e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22658578343302826, + "grad_norm": 0.35590219497680664, + "learning_rate": 9.923782034510607e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22700851810361228, + "grad_norm": 0.28185099363327026, + "learning_rate": 9.923492698921214e-05, + "loss": 0.3722, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22743125277419626, + "grad_norm": 0.30604037642478943, + "learning_rate": 9.923202819423608e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22785398744478028, + "grad_norm": 0.29311704635620117, + "learning_rate": 9.922912396049817e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2282767221153643, + "grad_norm": 0.2782231867313385, + "learning_rate": 9.922621428831925e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2286994567859483, + "grad_norm": 0.270018994808197, + "learning_rate": 9.922329917802076e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22912219145653231, + "grad_norm": 0.23649287223815918, + "learning_rate": 9.92203786299247e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.22954492612711633, + "grad_norm": 0.3847610354423523, + "learning_rate": 9.921745264435373e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2299676607977003, + "grad_norm": 0.3118453621864319, + "learning_rate": 9.921452122163113e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.23039039546828433, + "grad_norm": 0.25769007205963135, + "learning_rate": 9.921158436208068e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.23081313013886834, + "grad_norm": 0.3470607101917267, + "learning_rate": 9.920864206602684e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.23123586480945235, + "grad_norm": 0.2645820379257202, + "learning_rate": 9.920569433379468e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.23165859948003636, + "grad_norm": 0.35822415351867676, + "learning_rate": 9.92027411657098e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.23208133415062035, + "grad_norm": 0.2900504171848297, + "learning_rate": 9.919978256209845e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.23250406882120436, + "grad_norm": 0.3309238851070404, + "learning_rate": 9.919681852328751e-05, + "loss": 0.3711, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.23292680349178838, + "grad_norm": 0.3280176520347595, + "learning_rate": 9.919384904960437e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2333495381623724, + "grad_norm": 0.3034656047821045, + "learning_rate": 9.919087414137711e-05, + "loss": 0.374, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2337722728329564, + "grad_norm": 0.28276920318603516, + "learning_rate": 9.918789379893437e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.23419500750354041, + "grad_norm": 0.27182644605636597, + "learning_rate": 9.918490802260538e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2346177421741244, + "grad_norm": 0.27875614166259766, + "learning_rate": 9.918191681272e-05, + "loss": 0.3719, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2350404768447084, + "grad_norm": 0.31888729333877563, + "learning_rate": 9.917892016960869e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.23546321151529243, + "grad_norm": 0.29894840717315674, + "learning_rate": 9.917591809360244e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.23588594618587644, + "grad_norm": 0.3510309159755707, + "learning_rate": 9.917291058503295e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.23630868085646045, + "grad_norm": 0.35647472739219666, + "learning_rate": 9.916989764423244e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.23673141552704446, + "grad_norm": 0.26439762115478516, + "learning_rate": 9.916687927153376e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.23715415019762845, + "grad_norm": 0.2790079414844513, + "learning_rate": 9.916385546727036e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.23757688486821246, + "grad_norm": 0.3031173646450043, + "learning_rate": 9.916082623177627e-05, + "loss": 0.3697, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.23799961953879648, + "grad_norm": 0.35308918356895447, + "learning_rate": 9.915779156538615e-05, + "loss": 0.3715, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2384223542093805, + "grad_norm": 0.27427732944488525, + "learning_rate": 9.915475146843527e-05, + "loss": 0.3715, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2388450888799645, + "grad_norm": 0.30625995993614197, + "learning_rate": 9.915170594125941e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2392678235505485, + "grad_norm": 0.28917935490608215, + "learning_rate": 9.91486549841951e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2396905582211325, + "grad_norm": 0.3109765648841858, + "learning_rate": 9.91455985975793e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2401132928917165, + "grad_norm": 0.32658371329307556, + "learning_rate": 9.914253678174971e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24053602756230053, + "grad_norm": 0.3816923499107361, + "learning_rate": 9.913946953704458e-05, + "loss": 0.3762, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24095876223288454, + "grad_norm": 0.2245430201292038, + "learning_rate": 9.913639686380272e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24138149690346855, + "grad_norm": 0.32036635279655457, + "learning_rate": 9.913331876236358e-05, + "loss": 0.3763, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24180423157405254, + "grad_norm": 0.36646655201911926, + "learning_rate": 9.913023523306723e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24222696624463655, + "grad_norm": 0.24143271148204803, + "learning_rate": 9.91271462762543e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24264970091522056, + "grad_norm": 0.32148897647857666, + "learning_rate": 9.912405189226602e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24307243558580457, + "grad_norm": 0.3327990770339966, + "learning_rate": 9.912095208144424e-05, + "loss": 0.3759, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2434951702563886, + "grad_norm": 0.33844858407974243, + "learning_rate": 9.91178468441314e-05, + "loss": 0.3735, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24391790492697257, + "grad_norm": 0.2503212094306946, + "learning_rate": 9.911473618067057e-05, + "loss": 0.3764, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24434063959755659, + "grad_norm": 0.3005054295063019, + "learning_rate": 9.911162009140537e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2447633742681406, + "grad_norm": 0.3048080503940582, + "learning_rate": 9.910849857668004e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2451861089387246, + "grad_norm": 0.2989408075809479, + "learning_rate": 9.910537163683939e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24560884360930862, + "grad_norm": 0.35985973477363586, + "learning_rate": 9.910223927222892e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24603157827989264, + "grad_norm": 0.2286856472492218, + "learning_rate": 9.909910148319462e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24645431295047662, + "grad_norm": 0.27875959873199463, + "learning_rate": 9.909595827008316e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24687704762106064, + "grad_norm": 0.21971681714057922, + "learning_rate": 9.909280963324176e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24729978229164465, + "grad_norm": 0.3611120581626892, + "learning_rate": 9.908965557301826e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24772251696222866, + "grad_norm": 0.30712229013442993, + "learning_rate": 9.908649608976109e-05, + "loss": 0.3772, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24814525163281267, + "grad_norm": 0.2710953652858734, + "learning_rate": 9.90833311838193e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24856798630339666, + "grad_norm": 0.2589755654335022, + "learning_rate": 9.90801608555425e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24899072097398067, + "grad_norm": 0.2748330533504486, + "learning_rate": 9.907698510528094e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.24941345564456469, + "grad_norm": 0.3943820595741272, + "learning_rate": 9.907380393338544e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2498361903151487, + "grad_norm": 0.25345736742019653, + "learning_rate": 9.907061734020745e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2502589249857327, + "grad_norm": 0.34278690814971924, + "learning_rate": 9.906742532609899e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2506816596563167, + "grad_norm": 0.28218165040016174, + "learning_rate": 9.906422789141268e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.25110439432690074, + "grad_norm": 0.21986664831638336, + "learning_rate": 9.906102503650174e-05, + "loss": 0.3716, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.25152712899748475, + "grad_norm": 0.24733497202396393, + "learning_rate": 9.905781676172002e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.25194986366806876, + "grad_norm": 0.2556326985359192, + "learning_rate": 9.905460306742193e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2523725983386527, + "grad_norm": 0.2738790214061737, + "learning_rate": 9.905138395396251e-05, + "loss": 0.3744, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.25279533300923673, + "grad_norm": 0.23880572617053986, + "learning_rate": 9.904815942169736e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.25321806767982075, + "grad_norm": 0.2600707411766052, + "learning_rate": 9.904492947098269e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 5990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.25364080235040476, + "grad_norm": 0.29769840836524963, + "learning_rate": 9.904169410217537e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.25406353702098877, + "grad_norm": 0.30044373869895935, + "learning_rate": 9.903845331563278e-05, + "loss": 0.3699, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2544862716915728, + "grad_norm": 0.24489979445934296, + "learning_rate": 9.903520711171293e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2549090063621568, + "grad_norm": 0.2743474245071411, + "learning_rate": 9.903195549077444e-05, + "loss": 0.3709, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2553317410327408, + "grad_norm": 0.25198668241500854, + "learning_rate": 9.902869845317652e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2557544757033248, + "grad_norm": 0.42758288979530334, + "learning_rate": 9.902543599927902e-05, + "loss": 0.3694, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.25617721037390884, + "grad_norm": 0.30598175525665283, + "learning_rate": 9.902216812944232e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.25659994504449285, + "grad_norm": 0.29804834723472595, + "learning_rate": 9.901889484402742e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2570226797150768, + "grad_norm": 0.24450832605361938, + "learning_rate": 9.901561614339593e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2574454143856608, + "grad_norm": 0.2909616529941559, + "learning_rate": 9.901266068311677e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.25786814905624483, + "grad_norm": 0.309601753950119, + "learning_rate": 9.900937169457215e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.25829088372682885, + "grad_norm": 0.32557442784309387, + "learning_rate": 9.900607729186298e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.25871361839741286, + "grad_norm": 0.2670527994632721, + "learning_rate": 9.900277747535322e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.25913635306799687, + "grad_norm": 0.29782310128211975, + "learning_rate": 9.899947224540738e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2595590877385809, + "grad_norm": 0.35445940494537354, + "learning_rate": 9.899616160239061e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2599818224091649, + "grad_norm": 0.3145839273929596, + "learning_rate": 9.899284554666866e-05, + "loss": 0.3725, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2604045570797489, + "grad_norm": 0.3287108838558197, + "learning_rate": 9.898952407860782e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2608272917503329, + "grad_norm": 0.26978686451911926, + "learning_rate": 9.898619719857507e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.26125002642091694, + "grad_norm": 0.32764723896980286, + "learning_rate": 9.89828649069379e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2616727610915009, + "grad_norm": 0.2719414532184601, + "learning_rate": 9.897952720406444e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2620954957620849, + "grad_norm": 0.24891772866249084, + "learning_rate": 9.897618409032343e-05, + "loss": 0.371, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2625182304326689, + "grad_norm": 0.2508772611618042, + "learning_rate": 9.897283556608416e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.26294096510325293, + "grad_norm": 0.27356716990470886, + "learning_rate": 9.896948163171659e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.26336369977383695, + "grad_norm": 0.2934323847293854, + "learning_rate": 9.89661222875912e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.26378643444442096, + "grad_norm": 0.3489281237125397, + "learning_rate": 9.896275753407912e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.26420916911500497, + "grad_norm": 0.26084935665130615, + "learning_rate": 9.895938737155206e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.264631903785589, + "grad_norm": 0.3025212585926056, + "learning_rate": 9.895601180038233e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.265054638456173, + "grad_norm": 0.2697588801383972, + "learning_rate": 9.895263082094283e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.265477373126757, + "grad_norm": 0.23360101878643036, + "learning_rate": 9.894924443360707e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.265900107797341, + "grad_norm": 0.29226014018058777, + "learning_rate": 9.894585263874914e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.266322842467925, + "grad_norm": 0.32515406608581543, + "learning_rate": 9.894245543674375e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.266745577138509, + "grad_norm": 0.34937164187431335, + "learning_rate": 9.89390528279662e-05, + "loss": 0.3763, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.267168311809093, + "grad_norm": 0.23752422630786896, + "learning_rate": 9.893564481279235e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.267591046479677, + "grad_norm": 0.27129802107810974, + "learning_rate": 9.893223139159875e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.26801378115026103, + "grad_norm": 0.32095202803611755, + "learning_rate": 9.892881256476244e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.26843651582084505, + "grad_norm": 0.45490849018096924, + "learning_rate": 9.892538833266112e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.26885925049142906, + "grad_norm": 0.3035055100917816, + "learning_rate": 9.892195869567307e-05, + "loss": 0.3741, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.26928198516201307, + "grad_norm": 0.29048824310302734, + "learning_rate": 9.891852365417715e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2697047198325971, + "grad_norm": 0.3297345042228699, + "learning_rate": 9.891508320855288e-05, + "loss": 0.3694, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2701274545031811, + "grad_norm": 0.24968573451042175, + "learning_rate": 9.891163735918029e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2705501891737651, + "grad_norm": 0.24985958635807037, + "learning_rate": 9.890818610644008e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.27097292384434907, + "grad_norm": 0.3130433261394501, + "learning_rate": 9.89047294507135e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2713956585149331, + "grad_norm": 0.3095228672027588, + "learning_rate": 9.890126739238241e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2718183931855171, + "grad_norm": 0.3014909625053406, + "learning_rate": 9.889779993182928e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2722411278561011, + "grad_norm": 0.26843729615211487, + "learning_rate": 9.889432706943717e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2726638625266851, + "grad_norm": 0.2751530706882477, + "learning_rate": 9.889084880558974e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.27308659719726913, + "grad_norm": 0.26305070519447327, + "learning_rate": 9.888736514067123e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.27350933186785314, + "grad_norm": 0.36979013681411743, + "learning_rate": 9.888387607506648e-05, + "loss": 0.38, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.27393206653843716, + "grad_norm": 0.2646428048610687, + "learning_rate": 9.888038160916093e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.27435480120902117, + "grad_norm": 0.23370973765850067, + "learning_rate": 9.887688174334066e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2747775358796052, + "grad_norm": 0.33312514424324036, + "learning_rate": 9.887337647799227e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2752002705501892, + "grad_norm": 0.2772720158100128, + "learning_rate": 9.886986581350301e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.27562300522077315, + "grad_norm": 0.2822073996067047, + "learning_rate": 9.88663497502607e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.27604573989135717, + "grad_norm": 0.40307214856147766, + "learning_rate": 9.886282828865376e-05, + "loss": 0.3693, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2764684745619412, + "grad_norm": 0.3816313147544861, + "learning_rate": 9.885930142907123e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2768912092325252, + "grad_norm": 0.2707776129245758, + "learning_rate": 9.885576917190274e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2773139439031092, + "grad_norm": 0.24381022155284882, + "learning_rate": 9.885223151753848e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2777366785736932, + "grad_norm": 0.19797243177890778, + "learning_rate": 9.884868846636927e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.27815941324427723, + "grad_norm": 0.2897120714187622, + "learning_rate": 9.88451400187865e-05, + "loss": 0.3743, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.27858214791486124, + "grad_norm": 0.2674011290073395, + "learning_rate": 9.884158617518222e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.27900488258544526, + "grad_norm": 0.29988306760787964, + "learning_rate": 9.8838026935949e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.27942761725602927, + "grad_norm": 0.2436377853155136, + "learning_rate": 9.883446230148004e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2798503519266133, + "grad_norm": 0.2724027633666992, + "learning_rate": 9.883089227216913e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2802730865971973, + "grad_norm": 0.2931690812110901, + "learning_rate": 9.882731684841066e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.28069582126778125, + "grad_norm": 0.2844920754432678, + "learning_rate": 9.882373603059961e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.28111855593836527, + "grad_norm": 0.28703904151916504, + "learning_rate": 9.882014981913157e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2815412906089493, + "grad_norm": 0.2646797299385071, + "learning_rate": 9.881655821440272e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2819640252795333, + "grad_norm": 0.30833256244659424, + "learning_rate": 9.88129612168098e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2823867599501173, + "grad_norm": 0.43559834361076355, + "learning_rate": 9.880935882675022e-05, + "loss": 0.374, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2828094946207013, + "grad_norm": 0.2552347779273987, + "learning_rate": 9.880575104462193e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.28323222929128533, + "grad_norm": 0.29106155037879944, + "learning_rate": 9.880213787082348e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.28365496396186934, + "grad_norm": 0.2706899046897888, + "learning_rate": 9.879851930575401e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.28407769863245336, + "grad_norm": 0.21382154524326324, + "learning_rate": 9.87948953498133e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.28450043330303737, + "grad_norm": 0.26409029960632324, + "learning_rate": 9.879126600340169e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2849231679736214, + "grad_norm": 0.3160908818244934, + "learning_rate": 9.87876312669201e-05, + "loss": 0.3711, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.28534590264420534, + "grad_norm": 0.2946678102016449, + "learning_rate": 9.878399114077009e-05, + "loss": 0.3694, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.28576863731478935, + "grad_norm": 0.32904860377311707, + "learning_rate": 9.878034562535379e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.28619137198537337, + "grad_norm": 0.2580677270889282, + "learning_rate": 9.877669472107391e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2866141066559574, + "grad_norm": 0.2713833153247833, + "learning_rate": 9.877303842833378e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2870368413265414, + "grad_norm": 0.2502497136592865, + "learning_rate": 9.876937674753734e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2874595759971254, + "grad_norm": 0.27786093950271606, + "learning_rate": 9.876570967908908e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2878823106677094, + "grad_norm": 0.33783242106437683, + "learning_rate": 9.876203722339411e-05, + "loss": 0.3773, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.28830504533829343, + "grad_norm": 0.28177371621131897, + "learning_rate": 9.875835938085814e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.28872778000887744, + "grad_norm": 0.3329963684082031, + "learning_rate": 9.875467615188747e-05, + "loss": 0.3732, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.28915051467946146, + "grad_norm": 0.24774563312530518, + "learning_rate": 9.875098753688899e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.28957324935004547, + "grad_norm": 0.31690749526023865, + "learning_rate": 9.874729353627017e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2899959840206294, + "grad_norm": 0.2609540522098541, + "learning_rate": 9.874359415043913e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.29041871869121344, + "grad_norm": 0.2917507588863373, + "learning_rate": 9.873988937980454e-05, + "loss": 0.3694, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.29084145336179745, + "grad_norm": 0.2746194303035736, + "learning_rate": 9.873617922477564e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.29126418803238147, + "grad_norm": 0.24422763288021088, + "learning_rate": 9.873246368576234e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2916869227029655, + "grad_norm": 0.25962430238723755, + "learning_rate": 9.872874276317507e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2921096573735495, + "grad_norm": 0.24126902222633362, + "learning_rate": 9.872501645742493e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2925323920441335, + "grad_norm": 0.23193836212158203, + "learning_rate": 9.872128476892352e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2929551267147175, + "grad_norm": 0.24376173317432404, + "learning_rate": 9.871754769808313e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.29337786138530153, + "grad_norm": 0.2667694389820099, + "learning_rate": 9.871380524531658e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.29380059605588554, + "grad_norm": 0.2815886437892914, + "learning_rate": 9.871005741103732e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.29422333072646956, + "grad_norm": 0.31123071908950806, + "learning_rate": 9.870630419565936e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2946460653970535, + "grad_norm": 0.29784929752349854, + "learning_rate": 9.870254559959735e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2950688000676375, + "grad_norm": 0.31549879908561707, + "learning_rate": 9.869878162326649e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.29549153473822154, + "grad_norm": 0.23860172927379608, + "learning_rate": 9.86950122670826e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 6990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.29591426940880555, + "grad_norm": 0.28266990184783936, + "learning_rate": 9.86912375314621e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.29633700407938957, + "grad_norm": 0.24431101977825165, + "learning_rate": 9.868745741682197e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2967597387499736, + "grad_norm": 0.24138173460960388, + "learning_rate": 9.868367192357984e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2971824734205576, + "grad_norm": 0.28111982345581055, + "learning_rate": 9.867988105215386e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2976052080911416, + "grad_norm": 0.268535315990448, + "learning_rate": 9.867608480296284e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2980279427617256, + "grad_norm": 0.297137051820755, + "learning_rate": 9.867228317642616e-05, + "loss": 0.3699, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.29845067743230963, + "grad_norm": 0.3327830135822296, + "learning_rate": 9.86684761729638e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.29887341210289364, + "grad_norm": 0.28386157751083374, + "learning_rate": 9.86646637929963e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2992961467734776, + "grad_norm": 0.2289627343416214, + "learning_rate": 9.866084603694483e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.2997188814440616, + "grad_norm": 0.2771284580230713, + "learning_rate": 9.865702290523118e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3001416161146456, + "grad_norm": 0.2669488787651062, + "learning_rate": 9.865319439827765e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.30056435078522964, + "grad_norm": 0.2855728268623352, + "learning_rate": 9.864936051650721e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.30098708545581365, + "grad_norm": 0.345621794462204, + "learning_rate": 9.864552126034339e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.30140982012639767, + "grad_norm": 0.29778939485549927, + "learning_rate": 9.864167663021034e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3018325547969817, + "grad_norm": 0.23339036107063293, + "learning_rate": 9.863782662653274e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3022552894675657, + "grad_norm": 0.27732208371162415, + "learning_rate": 9.863397124973594e-05, + "loss": 0.3711, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3026780241381497, + "grad_norm": 0.2399568259716034, + "learning_rate": 9.863011050024586e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3031007588087337, + "grad_norm": 0.24685083329677582, + "learning_rate": 9.862624437848898e-05, + "loss": 0.3744, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.30352349347931773, + "grad_norm": 0.33310195803642273, + "learning_rate": 9.86223728848924e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3039462281499017, + "grad_norm": 0.3806230425834656, + "learning_rate": 9.861849601988383e-05, + "loss": 0.3745, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3043689628204857, + "grad_norm": 0.26553142070770264, + "learning_rate": 9.861461378389154e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3047916974910697, + "grad_norm": 0.24446162581443787, + "learning_rate": 9.861072617734443e-05, + "loss": 0.3699, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3052144321616537, + "grad_norm": 0.3398219048976898, + "learning_rate": 9.860683320067195e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.30563716683223774, + "grad_norm": 0.2480510175228119, + "learning_rate": 9.860293485430415e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.30605990150282175, + "grad_norm": 0.2137630432844162, + "learning_rate": 9.859903113867173e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.30648263617340576, + "grad_norm": 0.27766233682632446, + "learning_rate": 9.859512205420591e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3069053708439898, + "grad_norm": 0.21472546458244324, + "learning_rate": 9.859120760133854e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3073281055145738, + "grad_norm": 0.28008535504341125, + "learning_rate": 9.858728778050206e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3077508401851578, + "grad_norm": 0.2870387136936188, + "learning_rate": 9.858336259212951e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3081735748557418, + "grad_norm": 0.3071660101413727, + "learning_rate": 9.85794320366545e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.30859630952632583, + "grad_norm": 0.25238001346588135, + "learning_rate": 9.857549611451127e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3090190441969098, + "grad_norm": 0.3024027347564697, + "learning_rate": 9.85715548261346e-05, + "loss": 0.3712, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3094417788674938, + "grad_norm": 0.2968749403953552, + "learning_rate": 9.856760817195989e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3098645135380778, + "grad_norm": 0.2819089889526367, + "learning_rate": 9.856365615242318e-05, + "loss": 0.3714, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3102872482086618, + "grad_norm": 0.27896490693092346, + "learning_rate": 9.8559698767961e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.31070998287924584, + "grad_norm": 0.3012271523475647, + "learning_rate": 9.855573601901056e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.31113271754982985, + "grad_norm": 0.32530540227890015, + "learning_rate": 9.855176790600964e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.31155545222041386, + "grad_norm": 0.22729282081127167, + "learning_rate": 9.854779442939659e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3119781868909979, + "grad_norm": 0.23806335031986237, + "learning_rate": 9.854381558961037e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3124009215615819, + "grad_norm": 0.2207084745168686, + "learning_rate": 9.853983138709053e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3128236562321659, + "grad_norm": 0.3229181468486786, + "learning_rate": 9.853584182227721e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3132463909027499, + "grad_norm": 0.3199625611305237, + "learning_rate": 9.853184689561118e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3136691255733339, + "grad_norm": 0.2056804597377777, + "learning_rate": 9.852784660753371e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3140918602439179, + "grad_norm": 0.25236526131629944, + "learning_rate": 9.852384095848677e-05, + "loss": 0.3728, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3145145949145019, + "grad_norm": 0.23903554677963257, + "learning_rate": 9.851982994891284e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3149373295850859, + "grad_norm": 0.2436927706003189, + "learning_rate": 9.851581357925505e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3153600642556699, + "grad_norm": 0.25209107995033264, + "learning_rate": 9.851179184995707e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.31578279892625394, + "grad_norm": 0.2796352207660675, + "learning_rate": 9.850776476146319e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.31620553359683795, + "grad_norm": 0.24718710780143738, + "learning_rate": 9.850373231421833e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.31662826826742196, + "grad_norm": 0.30236127972602844, + "learning_rate": 9.849969450866791e-05, + "loss": 0.3736, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.317051002938006, + "grad_norm": 0.2610178589820862, + "learning_rate": 9.849565134525803e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.31747373760859, + "grad_norm": 0.2434697151184082, + "learning_rate": 9.849160282443532e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.317896472279174, + "grad_norm": 0.28123772144317627, + "learning_rate": 9.848754894664706e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.31831920694975796, + "grad_norm": 0.22319208085536957, + "learning_rate": 9.848348971234107e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.318741941620342, + "grad_norm": 0.250559002161026, + "learning_rate": 9.847942512196578e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.319164676290926, + "grad_norm": 0.25896644592285156, + "learning_rate": 9.847535517597021e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.31958741096151, + "grad_norm": 0.24291685223579407, + "learning_rate": 9.8471279874804e-05, + "loss": 0.3697, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.320010145632094, + "grad_norm": 0.2955228090286255, + "learning_rate": 9.846719921891734e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.320432880302678, + "grad_norm": 0.2479138970375061, + "learning_rate": 9.846311320876103e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.32085561497326204, + "grad_norm": 0.23512329161167145, + "learning_rate": 9.845902184478645e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.32127834964384605, + "grad_norm": 0.22126024961471558, + "learning_rate": 9.845492512744559e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.32170108431443006, + "grad_norm": 0.30782508850097656, + "learning_rate": 9.845082305719103e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3221238189850141, + "grad_norm": 0.22336198389530182, + "learning_rate": 9.844671563447591e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3225465536555981, + "grad_norm": 0.244611918926239, + "learning_rate": 9.844260285975402e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.32296928832618205, + "grad_norm": 0.28173843026161194, + "learning_rate": 9.84384847334797e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.32339202299676606, + "grad_norm": 0.3095923662185669, + "learning_rate": 9.843436125610786e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3238147576673501, + "grad_norm": 0.2134583741426468, + "learning_rate": 9.843023242809404e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3242374923379341, + "grad_norm": 0.2581598162651062, + "learning_rate": 9.842609824989437e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3246602270085181, + "grad_norm": 0.2299111932516098, + "learning_rate": 9.842195872196556e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3250829616791021, + "grad_norm": 0.2247466742992401, + "learning_rate": 9.84178138447649e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3255056963496861, + "grad_norm": 0.2719270586967468, + "learning_rate": 9.84136636187503e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.32592843102027014, + "grad_norm": 0.23614567518234253, + "learning_rate": 9.840950804438023e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.32635116569085415, + "grad_norm": 0.2230408936738968, + "learning_rate": 9.840534712211377e-05, + "loss": 0.3694, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.32677390036143816, + "grad_norm": 0.26722466945648193, + "learning_rate": 9.840118085241058e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3271966350320222, + "grad_norm": 0.21206092834472656, + "learning_rate": 9.839700923573094e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.32761936970260613, + "grad_norm": 0.20931878685951233, + "learning_rate": 9.839283227253567e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.32804210437319015, + "grad_norm": 0.22873342037200928, + "learning_rate": 9.83886499632862e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.32846483904377416, + "grad_norm": 0.31818607449531555, + "learning_rate": 9.83844623084446e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3288875737143582, + "grad_norm": 0.26538243889808655, + "learning_rate": 9.838026930847346e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3293103083849422, + "grad_norm": 0.25369539856910706, + "learning_rate": 9.837607096383597e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3297330430555262, + "grad_norm": 0.2990206778049469, + "learning_rate": 9.837186727499597e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3301557777261102, + "grad_norm": 0.28181716799736023, + "learning_rate": 9.836765824241782e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3305785123966942, + "grad_norm": 0.25225916504859924, + "learning_rate": 9.836344386656653e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.33100124706727824, + "grad_norm": 0.3200971782207489, + "learning_rate": 9.835922414790763e-05, + "loss": 0.3704, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.33142398173786225, + "grad_norm": 0.2310212403535843, + "learning_rate": 9.835499908690734e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.33184671640844626, + "grad_norm": 0.22035916149616241, + "learning_rate": 9.835076868403235e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3322694510790302, + "grad_norm": 0.23471814393997192, + "learning_rate": 9.834653293975003e-05, + "loss": 0.3717, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.33269218574961423, + "grad_norm": 0.2929060757160187, + "learning_rate": 9.83422918545283e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.33311492042019825, + "grad_norm": 0.30678683519363403, + "learning_rate": 9.83380454288357e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.33353765509078226, + "grad_norm": 0.24544572830200195, + "learning_rate": 9.83337936631413e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.33396038976136627, + "grad_norm": 0.2488473504781723, + "learning_rate": 9.832953655791485e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3343831244319503, + "grad_norm": 0.24865828454494476, + "learning_rate": 9.832527411362665e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3348058591025343, + "grad_norm": 0.3912889063358307, + "learning_rate": 9.832100633074753e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3352285937731183, + "grad_norm": 0.19853812456130981, + "learning_rate": 9.831673320974896e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3356513284437023, + "grad_norm": 0.2744801938533783, + "learning_rate": 9.831245475110306e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.33607406311428634, + "grad_norm": 0.2519948482513428, + "learning_rate": 9.830817095528244e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.33649679778487035, + "grad_norm": 0.2905103862285614, + "learning_rate": 9.830388182276032e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3369195324554543, + "grad_norm": 0.3093611001968384, + "learning_rate": 9.829958735401056e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3373422671260383, + "grad_norm": 0.2457486093044281, + "learning_rate": 9.829528754950758e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.33776500179662233, + "grad_norm": 0.3635554611682892, + "learning_rate": 9.829098240972639e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 7990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.33818773646720635, + "grad_norm": 0.2576887011528015, + "learning_rate": 9.828667193514256e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.33861047113779036, + "grad_norm": 0.2433357983827591, + "learning_rate": 9.828235612623228e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.33903320580837437, + "grad_norm": 0.25116589665412903, + "learning_rate": 9.827803498347236e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3394559404789584, + "grad_norm": 0.3040851056575775, + "learning_rate": 9.827370850734014e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3398786751495424, + "grad_norm": 0.2519017457962036, + "learning_rate": 9.826937669831359e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3403014098201264, + "grad_norm": 0.2576379179954529, + "learning_rate": 9.826503955687123e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3407241444907104, + "grad_norm": 0.26142600178718567, + "learning_rate": 9.826069708349222e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.34114687916129444, + "grad_norm": 0.20519156754016876, + "learning_rate": 9.825634927865625e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.34156961383187845, + "grad_norm": 0.2132216840982437, + "learning_rate": 9.825199614284366e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3419923485024624, + "grad_norm": 0.23162990808486938, + "learning_rate": 9.824763767653534e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3424150831730464, + "grad_norm": 0.23499038815498352, + "learning_rate": 9.824327388021277e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.34283781784363043, + "grad_norm": 0.21673831343650818, + "learning_rate": 9.823890475435804e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.34326055251421445, + "grad_norm": 0.28828153014183044, + "learning_rate": 9.823453029945383e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.34368328718479846, + "grad_norm": 0.21572069823741913, + "learning_rate": 9.823015051598334e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.34410602185538247, + "grad_norm": 0.21658547222614288, + "learning_rate": 9.822576540443047e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3445287565259665, + "grad_norm": 0.3007248044013977, + "learning_rate": 9.822137496527962e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3449514911965505, + "grad_norm": 0.2166329026222229, + "learning_rate": 9.821697919901583e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3453742258671345, + "grad_norm": 0.2877567410469055, + "learning_rate": 9.82125781061247e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3457969605377185, + "grad_norm": 0.2204838991165161, + "learning_rate": 9.820817168709242e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.34621969520830254, + "grad_norm": 0.28193429112434387, + "learning_rate": 9.82037599424058e-05, + "loss": 0.3699, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3466424298788865, + "grad_norm": 0.337773859500885, + "learning_rate": 9.81993428725522e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3470651645494705, + "grad_norm": 0.22555501759052277, + "learning_rate": 9.819492047801957e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3474878992200545, + "grad_norm": 0.25079187750816345, + "learning_rate": 9.819049275929648e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.34791063389063853, + "grad_norm": 0.23132915794849396, + "learning_rate": 9.818605971687206e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.34833336856122254, + "grad_norm": 0.33426105976104736, + "learning_rate": 9.818162135123603e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.34875610323180656, + "grad_norm": 0.2018255591392517, + "learning_rate": 9.817717766287873e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.34917883790239057, + "grad_norm": 0.2548637390136719, + "learning_rate": 9.817272865229103e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3496015725729746, + "grad_norm": 0.20794861018657684, + "learning_rate": 9.816871999266136e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3500243072435586, + "grad_norm": 0.29489994049072266, + "learning_rate": 9.816426087119048e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3504470419141426, + "grad_norm": 0.2708848714828491, + "learning_rate": 9.815979642891618e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3508697765847266, + "grad_norm": 0.2392212599515915, + "learning_rate": 9.815532666633162e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3512925112553106, + "grad_norm": 0.19437748193740845, + "learning_rate": 9.81508515839306e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3517152459258946, + "grad_norm": 0.2017318606376648, + "learning_rate": 9.814637118220751e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3521379805964786, + "grad_norm": 0.22220578789710999, + "learning_rate": 9.814188546165729e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3525607152670626, + "grad_norm": 0.20897535979747772, + "learning_rate": 9.813739442277549e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.35298344993764663, + "grad_norm": 0.21742504835128784, + "learning_rate": 9.813289806605823e-05, + "loss": 0.3739, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.35340618460823064, + "grad_norm": 0.23422864079475403, + "learning_rate": 9.812839639200225e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.35382891927881466, + "grad_norm": 0.2176007777452469, + "learning_rate": 9.812388940110486e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.35425165394939867, + "grad_norm": 0.20152902603149414, + "learning_rate": 9.811937709386393e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3546743886199827, + "grad_norm": 0.25975990295410156, + "learning_rate": 9.811485947077798e-05, + "loss": 0.371, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3550971232905667, + "grad_norm": 0.2761105000972748, + "learning_rate": 9.811033653234607e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3555198579611507, + "grad_norm": 0.2686358094215393, + "learning_rate": 9.810580827906785e-05, + "loss": 0.3704, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.35594259263173467, + "grad_norm": 0.20046506822109222, + "learning_rate": 9.810127471144356e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3563653273023187, + "grad_norm": 0.27093997597694397, + "learning_rate": 9.809673582997404e-05, + "loss": 0.3759, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3567880619729027, + "grad_norm": 0.2794387638568878, + "learning_rate": 9.809219163516071e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3572107966434867, + "grad_norm": 0.21916869282722473, + "learning_rate": 9.808764212750558e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3576335313140707, + "grad_norm": 0.2523277997970581, + "learning_rate": 9.808308730751124e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.35805626598465473, + "grad_norm": 0.19965322315692902, + "learning_rate": 9.807852717568087e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.35847900065523874, + "grad_norm": 0.23017120361328125, + "learning_rate": 9.807396173251824e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.35890173532582276, + "grad_norm": 0.28299516439437866, + "learning_rate": 9.80693909785277e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.35932446999640677, + "grad_norm": 0.2158713936805725, + "learning_rate": 9.806481491421418e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3597472046669908, + "grad_norm": 0.2322736233472824, + "learning_rate": 9.806023354008322e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3601699393375748, + "grad_norm": 0.24263758957386017, + "learning_rate": 9.805564685664095e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.36059267400815875, + "grad_norm": 0.2243765890598297, + "learning_rate": 9.805105486439403e-05, + "loss": 0.3734, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.36101540867874277, + "grad_norm": 0.2790693938732147, + "learning_rate": 9.804645756384978e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3614381433493268, + "grad_norm": 0.2391006052494049, + "learning_rate": 9.804185495551606e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3618608780199108, + "grad_norm": 0.23779208958148956, + "learning_rate": 9.803724703990134e-05, + "loss": 0.3743, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3622836126904948, + "grad_norm": 0.21384571492671967, + "learning_rate": 9.803263381751464e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3627063473610788, + "grad_norm": 0.23705458641052246, + "learning_rate": 9.802801528886561e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.36312908203166283, + "grad_norm": 0.21498359739780426, + "learning_rate": 9.802339145446447e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.36355181670224684, + "grad_norm": 0.28936365246772766, + "learning_rate": 9.801876231482203e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.36397455137283086, + "grad_norm": 0.26594552397727966, + "learning_rate": 9.801412787044966e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.36439728604341487, + "grad_norm": 0.26514363288879395, + "learning_rate": 9.800948812185937e-05, + "loss": 0.3763, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3648200207139989, + "grad_norm": 0.3176560699939728, + "learning_rate": 9.800484306956368e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.36524275538458284, + "grad_norm": 0.25201523303985596, + "learning_rate": 9.800019271407577e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.36566549005516685, + "grad_norm": 0.25037917494773865, + "learning_rate": 9.799553705590936e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.36608822472575087, + "grad_norm": 0.2941805124282837, + "learning_rate": 9.799087609557878e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3665109593963349, + "grad_norm": 0.22764688730239868, + "learning_rate": 9.798620983359891e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3669336940669189, + "grad_norm": 0.24955663084983826, + "learning_rate": 9.798153827048527e-05, + "loss": 0.3684, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3673564287375029, + "grad_norm": 0.2802809476852417, + "learning_rate": 9.797686140675392e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3677791634080869, + "grad_norm": 0.2639462947845459, + "learning_rate": 9.797217924292155e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.36820189807867093, + "grad_norm": 0.2438303381204605, + "learning_rate": 9.796749177950539e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.36862463274925494, + "grad_norm": 0.27241644263267517, + "learning_rate": 9.796279901702325e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.36904736741983896, + "grad_norm": 0.23555874824523926, + "learning_rate": 9.795810095599358e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.36947010209042297, + "grad_norm": 0.2804928123950958, + "learning_rate": 9.795339759693539e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3698928367610069, + "grad_norm": 0.2272065281867981, + "learning_rate": 9.794868894036823e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.37031557143159094, + "grad_norm": 0.19635282456874847, + "learning_rate": 9.794397498681231e-05, + "loss": 0.3731, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.37073830610217495, + "grad_norm": 0.1916578710079193, + "learning_rate": 9.793925573678837e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.37116104077275897, + "grad_norm": 0.24849377572536469, + "learning_rate": 9.793453119081777e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.371583775443343, + "grad_norm": 0.2479267120361328, + "learning_rate": 9.792980134942245e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.372006510113927, + "grad_norm": 0.2485342174768448, + "learning_rate": 9.792506621312489e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.372429244784511, + "grad_norm": 0.26631468534469604, + "learning_rate": 9.792032578244823e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.372851979455095, + "grad_norm": 0.20237834751605988, + "learning_rate": 9.791558005791613e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.37327471412567903, + "grad_norm": 0.21135053038597107, + "learning_rate": 9.791082904005284e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.37369744879626304, + "grad_norm": 0.3268027901649475, + "learning_rate": 9.790607272938327e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.37412018346684706, + "grad_norm": 0.2612670063972473, + "learning_rate": 9.790131112643281e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.37454291813743107, + "grad_norm": 0.2896520495414734, + "learning_rate": 9.789654423172751e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.374965652808015, + "grad_norm": 0.24703249335289001, + "learning_rate": 9.789177204579398e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.37538838747859904, + "grad_norm": 0.28104111552238464, + "learning_rate": 9.78869945691594e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.37581112214918305, + "grad_norm": 0.1849287748336792, + "learning_rate": 9.788221180235153e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.37623385681976707, + "grad_norm": 0.22819362580776215, + "learning_rate": 9.787742374589877e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3766565914903511, + "grad_norm": 0.26588308811187744, + "learning_rate": 9.787263040033006e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3770793261609351, + "grad_norm": 0.18149937689304352, + "learning_rate": 9.786783176617491e-05, + "loss": 0.3693, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3775020608315191, + "grad_norm": 0.22806958854198456, + "learning_rate": 9.786302784396344e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3779247955021031, + "grad_norm": 0.19506597518920898, + "learning_rate": 9.785821863422638e-05, + "loss": 0.3711, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.37834753017268713, + "grad_norm": 0.25508901476860046, + "learning_rate": 9.785340413749495e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.37877026484327114, + "grad_norm": 0.2131553441286087, + "learning_rate": 9.784858435430107e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.37919299951385516, + "grad_norm": 0.19894713163375854, + "learning_rate": 9.784375928517718e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3796157341844391, + "grad_norm": 0.21777455508708954, + "learning_rate": 9.783892893065632e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3800384688550231, + "grad_norm": 0.2200314998626709, + "learning_rate": 9.783409329127209e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 8990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.38046120352560714, + "grad_norm": 0.1922096610069275, + "learning_rate": 9.78292523675587e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.38088393819619115, + "grad_norm": 0.2663286030292511, + "learning_rate": 9.782440616005094e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.38130667286677516, + "grad_norm": 0.26105281710624695, + "learning_rate": 9.781955466928418e-05, + "loss": 0.3732, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3817294075373592, + "grad_norm": 0.2547523081302643, + "learning_rate": 9.781469789579439e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3821521422079432, + "grad_norm": 0.3301612138748169, + "learning_rate": 9.780983584011806e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3825748768785272, + "grad_norm": 0.2539929151535034, + "learning_rate": 9.780496850279237e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3829976115491112, + "grad_norm": 0.20533424615859985, + "learning_rate": 9.780009588435499e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.38342034621969523, + "grad_norm": 0.2808014750480652, + "learning_rate": 9.77952179853442e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.38384308089027924, + "grad_norm": 0.27162840962409973, + "learning_rate": 9.77903348062989e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3842658155608632, + "grad_norm": 0.2757715582847595, + "learning_rate": 9.778544634775854e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3846885502314472, + "grad_norm": 0.22679242491722107, + "learning_rate": 9.778055261026315e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3851112849020312, + "grad_norm": 0.2519906461238861, + "learning_rate": 9.777565359435334e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.38553401957261524, + "grad_norm": 0.18308532238006592, + "learning_rate": 9.777074930057032e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.38595675424319925, + "grad_norm": 0.2980770468711853, + "learning_rate": 9.776583972945588e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.38637948891378326, + "grad_norm": 0.22036048769950867, + "learning_rate": 9.77609248815524e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3868022235843673, + "grad_norm": 0.2376008778810501, + "learning_rate": 9.775600475740284e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3872249582549513, + "grad_norm": 0.24028605222702026, + "learning_rate": 9.77510793575507e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3876476929255353, + "grad_norm": 0.2907998859882355, + "learning_rate": 9.774614868254013e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3880704275961193, + "grad_norm": 0.1902952343225479, + "learning_rate": 9.774121273291581e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.38849316226670333, + "grad_norm": 0.23247523605823517, + "learning_rate": 9.773627150922305e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3889158969372873, + "grad_norm": 0.23612502217292786, + "learning_rate": 9.773132501200771e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3893386316078713, + "grad_norm": 0.21535906195640564, + "learning_rate": 9.772637324181622e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3897613662784553, + "grad_norm": 0.23430025577545166, + "learning_rate": 9.772141619919561e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3901841009490393, + "grad_norm": 0.26375094056129456, + "learning_rate": 9.771645388469354e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.39060683561962334, + "grad_norm": 0.22493205964565277, + "learning_rate": 9.771148629885816e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.39102957029020735, + "grad_norm": 0.20513707399368286, + "learning_rate": 9.770651344223827e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.39145230496079136, + "grad_norm": 0.2521432936191559, + "learning_rate": 9.770153531538321e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3918750396313754, + "grad_norm": 0.24702438712120056, + "learning_rate": 9.769655191884297e-05, + "loss": 0.3718, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3922977743019594, + "grad_norm": 0.24996189773082733, + "learning_rate": 9.769156325316803e-05, + "loss": 0.3693, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3927205089725434, + "grad_norm": 0.24848753213882446, + "learning_rate": 9.768656931890952e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3931432436431274, + "grad_norm": 0.23773635923862457, + "learning_rate": 9.768157011661913e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3935659783137114, + "grad_norm": 0.1793491244316101, + "learning_rate": 9.767656564684912e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3939887129842954, + "grad_norm": 0.19026412069797516, + "learning_rate": 9.767155591015235e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3944114476548794, + "grad_norm": 0.24461321532726288, + "learning_rate": 9.766654090708226e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3948341823254634, + "grad_norm": 0.22568878531455994, + "learning_rate": 9.766152063819286e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3952569169960474, + "grad_norm": 0.2016621232032776, + "learning_rate": 9.765649510403876e-05, + "loss": 0.3724, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.39567965166663144, + "grad_norm": 0.21873046457767487, + "learning_rate": 9.765146430517514e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.39610238633721545, + "grad_norm": 0.22642914950847626, + "learning_rate": 9.764642824215772e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.39652512100779946, + "grad_norm": 0.21555684506893158, + "learning_rate": 9.764138691554292e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3969478556783835, + "grad_norm": 0.21587920188903809, + "learning_rate": 9.763634032588763e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3973705903489675, + "grad_norm": 0.24722005426883698, + "learning_rate": 9.763128847374932e-05, + "loss": 0.371, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3977933250195515, + "grad_norm": 0.23495447635650635, + "learning_rate": 9.762623135968613e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.39821605969013546, + "grad_norm": 0.22984762489795685, + "learning_rate": 9.762116898425673e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3986387943607195, + "grad_norm": 0.3103366196155548, + "learning_rate": 9.761610134802033e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3990615290313035, + "grad_norm": 0.2567926347255707, + "learning_rate": 9.761102845153678e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3994842637018875, + "grad_norm": 0.26591169834136963, + "learning_rate": 9.760595029536651e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.3999069983724715, + "grad_norm": 0.22688286006450653, + "learning_rate": 9.76008668800705e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4003297330430555, + "grad_norm": 0.23041173815727234, + "learning_rate": 9.759577820621033e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.40075246771363954, + "grad_norm": 0.2557883560657501, + "learning_rate": 9.759068427434814e-05, + "loss": 0.3728, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.40117520238422355, + "grad_norm": 0.25147172808647156, + "learning_rate": 9.758558508504669e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.40159793705480756, + "grad_norm": 0.20662228763103485, + "learning_rate": 9.758048063886929e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4020206717253916, + "grad_norm": 0.22599086165428162, + "learning_rate": 9.757537093637982e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4024434063959756, + "grad_norm": 0.1895257532596588, + "learning_rate": 9.75702559781428e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.40286614106655955, + "grad_norm": 0.1982758343219757, + "learning_rate": 9.756513576472325e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.40328887573714356, + "grad_norm": 0.25700950622558594, + "learning_rate": 9.756001029668682e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4037116104077276, + "grad_norm": 0.22963708639144897, + "learning_rate": 9.755487957459974e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4041343450783116, + "grad_norm": 0.21430498361587524, + "learning_rate": 9.754974359902883e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4045570797488956, + "grad_norm": 0.20528197288513184, + "learning_rate": 9.754460237054143e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4049798144194796, + "grad_norm": 0.23372389376163483, + "learning_rate": 9.753945588970552e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4054025490900636, + "grad_norm": 0.18928970396518707, + "learning_rate": 9.753430415708964e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.40582528376064764, + "grad_norm": 0.2100914865732193, + "learning_rate": 9.752914717326294e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.40624801843123165, + "grad_norm": 0.25520339608192444, + "learning_rate": 9.752398493879507e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.40667075310181566, + "grad_norm": 0.25289326906204224, + "learning_rate": 9.751881745425636e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4070934877723997, + "grad_norm": 0.2632318139076233, + "learning_rate": 9.751364472021764e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4075162224429837, + "grad_norm": 0.2723275125026703, + "learning_rate": 9.750846673725037e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.40793895711356765, + "grad_norm": 0.19466248154640198, + "learning_rate": 9.750328350592657e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.40836169178415166, + "grad_norm": 0.22363948822021484, + "learning_rate": 9.749809502681883e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.40878442645473567, + "grad_norm": 0.26775240898132324, + "learning_rate": 9.749290130050035e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4092071611253197, + "grad_norm": 0.20843690633773804, + "learning_rate": 9.74877023275449e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4096298957959037, + "grad_norm": 0.23161658644676208, + "learning_rate": 9.748249810852678e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4100526304664877, + "grad_norm": 0.20654229819774628, + "learning_rate": 9.747728864402093e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4104753651370717, + "grad_norm": 0.21360869705677032, + "learning_rate": 9.747207393460287e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.41089809980765574, + "grad_norm": 0.21699440479278564, + "learning_rate": 9.746685398084867e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.41132083447823975, + "grad_norm": 0.24811533093452454, + "learning_rate": 9.746162878333496e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.41174356914882376, + "grad_norm": 0.2401786893606186, + "learning_rate": 9.745639834263902e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4121663038194078, + "grad_norm": 0.22057972848415375, + "learning_rate": 9.745116265933865e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.41258903848999173, + "grad_norm": 0.21363788843154907, + "learning_rate": 9.744592173401224e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.41301177316057575, + "grad_norm": 0.18708862364292145, + "learning_rate": 9.744067556723878e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.41343450783115976, + "grad_norm": 0.283969521522522, + "learning_rate": 9.74354241595978e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.41385724250174377, + "grad_norm": 0.2343134880065918, + "learning_rate": 9.743016751166946e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4142799771723278, + "grad_norm": 0.25323253870010376, + "learning_rate": 9.742490562403446e-05, + "loss": 0.3704, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4147027118429118, + "grad_norm": 0.2321898341178894, + "learning_rate": 9.741963849727409e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4151254465134958, + "grad_norm": 0.226340189576149, + "learning_rate": 9.741436613197023e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4155481811840798, + "grad_norm": 0.20697996020317078, + "learning_rate": 9.740908852870531e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.41597091585466384, + "grad_norm": 0.230067178606987, + "learning_rate": 9.740380568806237e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.41639365052524785, + "grad_norm": 0.23676352202892303, + "learning_rate": 9.739851761062503e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.41681638519583186, + "grad_norm": 0.2596682608127594, + "learning_rate": 9.739322429697746e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4172391198664158, + "grad_norm": 0.24097390472888947, + "learning_rate": 9.738792574770441e-05, + "loss": 0.3694, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.41766185453699983, + "grad_norm": 0.25224509835243225, + "learning_rate": 9.738262196339124e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.41808458920758385, + "grad_norm": 0.20875753462314606, + "learning_rate": 9.737731294462387e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.41850732387816786, + "grad_norm": 0.19956901669502258, + "learning_rate": 9.737199869198878e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.41893005854875187, + "grad_norm": 0.2078605741262436, + "learning_rate": 9.736667920607307e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4193527932193359, + "grad_norm": 0.3606480360031128, + "learning_rate": 9.73613544874644e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4197755278899199, + "grad_norm": 0.21628861129283905, + "learning_rate": 9.735602453675096e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4201982625605039, + "grad_norm": 0.22885753214359283, + "learning_rate": 9.73506893545216e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4206209972310879, + "grad_norm": 0.2510856091976166, + "learning_rate": 9.73453489413657e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.42104373190167194, + "grad_norm": 0.19776605069637299, + "learning_rate": 9.73400032978732e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.42146646657225595, + "grad_norm": 0.21521241962909698, + "learning_rate": 9.733465242463468e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4218892012428399, + "grad_norm": 0.23773911595344543, + "learning_rate": 9.732929632224124e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4223119359134239, + "grad_norm": 0.23396220803260803, + "learning_rate": 9.732393499128458e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 9990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.42273467058400793, + "grad_norm": 0.2621050179004669, + "learning_rate": 9.7318568432357e-05, + "loss": 0.3715, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.42315740525459195, + "grad_norm": 0.20024971663951874, + "learning_rate": 9.731319664605134e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.42358013992517596, + "grad_norm": 0.2575528621673584, + "learning_rate": 9.730781963296101e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.42400287459575997, + "grad_norm": 0.2086641788482666, + "learning_rate": 9.730243739368006e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.424425609266344, + "grad_norm": 0.22837533056735992, + "learning_rate": 9.729704992880304e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.424848343936928, + "grad_norm": 0.23178130388259888, + "learning_rate": 9.729165723892515e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.425271078607512, + "grad_norm": 0.24160560965538025, + "learning_rate": 9.72862593246421e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.425693813278096, + "grad_norm": 0.2260974794626236, + "learning_rate": 9.728085618655022e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.42611654794868004, + "grad_norm": 0.2820652723312378, + "learning_rate": 9.72754478252464e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.426539282619264, + "grad_norm": 0.26406511664390564, + "learning_rate": 9.727003424132814e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.426962017289848, + "grad_norm": 0.1933652013540268, + "learning_rate": 9.726461543539345e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.427384751960432, + "grad_norm": 0.21673592925071716, + "learning_rate": 9.725919140804099e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.42780748663101603, + "grad_norm": 0.22939415276050568, + "learning_rate": 9.725376215986994e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.42823022130160004, + "grad_norm": 0.1944686472415924, + "learning_rate": 9.724832769148009e-05, + "loss": 0.3722, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.42865295597218406, + "grad_norm": 0.19308704137802124, + "learning_rate": 9.724288800347178e-05, + "loss": 0.3554, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.42907569064276807, + "grad_norm": 0.2527170479297638, + "learning_rate": 9.723744309644597e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4294984253133521, + "grad_norm": 0.23602983355522156, + "learning_rate": 9.723199297100416e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4299211599839361, + "grad_norm": 0.23528562486171722, + "learning_rate": 9.722653762774844e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4303438946545201, + "grad_norm": 0.21322618424892426, + "learning_rate": 9.722107706728145e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4307666293251041, + "grad_norm": 0.2246430218219757, + "learning_rate": 9.721561129020647e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4311893639956881, + "grad_norm": 0.20566107332706451, + "learning_rate": 9.721014029712727e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4316120986662721, + "grad_norm": 0.1902344673871994, + "learning_rate": 9.720466408864828e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4320348333368561, + "grad_norm": 0.1986648291349411, + "learning_rate": 9.719918266537445e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4324575680074401, + "grad_norm": 0.21230150759220123, + "learning_rate": 9.719369602791132e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.43288030267802413, + "grad_norm": 0.23784029483795166, + "learning_rate": 9.718820417686502e-05, + "loss": 0.3693, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.43330303734860814, + "grad_norm": 0.21719640493392944, + "learning_rate": 9.718270711284223e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.43372577201919216, + "grad_norm": 0.2189306616783142, + "learning_rate": 9.717720483645026e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.43414850668977617, + "grad_norm": 0.2204916924238205, + "learning_rate": 9.71716973482969e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4345712413603602, + "grad_norm": 0.18582825362682343, + "learning_rate": 9.716618464899061e-05, + "loss": 0.3729, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4349939760309442, + "grad_norm": 0.2246483415365219, + "learning_rate": 9.716066673914039e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4354167107015282, + "grad_norm": 0.2132016122341156, + "learning_rate": 9.715514361935582e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.43583944537211217, + "grad_norm": 0.2136061191558838, + "learning_rate": 9.714961529024702e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4362621800426962, + "grad_norm": 0.2032659351825714, + "learning_rate": 9.714408175242474e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4366849147132802, + "grad_norm": 0.2506222724914551, + "learning_rate": 9.713854300650027e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4371076493838642, + "grad_norm": 0.24101436138153076, + "learning_rate": 9.71329990530855e-05, + "loss": 0.3724, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4375303840544482, + "grad_norm": 0.2456894963979721, + "learning_rate": 9.712744989279288e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.43795311872503223, + "grad_norm": 0.23331362009048462, + "learning_rate": 9.712189552623542e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.43837585339561624, + "grad_norm": 0.22385476529598236, + "learning_rate": 9.711633595402673e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.43879858806620026, + "grad_norm": 0.21616771817207336, + "learning_rate": 9.711077117678099e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.43922132273678427, + "grad_norm": 0.174973264336586, + "learning_rate": 9.710520119511295e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4396440574073683, + "grad_norm": 0.23807978630065918, + "learning_rate": 9.709962600963795e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4400667920779523, + "grad_norm": 0.20934441685676575, + "learning_rate": 9.709404562097187e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4404895267485363, + "grad_norm": 0.21337923407554626, + "learning_rate": 9.70884600297312e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.44091226141912027, + "grad_norm": 0.22991161048412323, + "learning_rate": 9.7082869236533e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4413349960897043, + "grad_norm": 0.2259412407875061, + "learning_rate": 9.707727324199487e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4417577307602883, + "grad_norm": 0.18807969987392426, + "learning_rate": 9.707167204673504e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4421804654308723, + "grad_norm": 0.2274651974439621, + "learning_rate": 9.706606565137226e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4426032001014563, + "grad_norm": 0.2703275978565216, + "learning_rate": 9.706045405652591e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.44302593477204033, + "grad_norm": 0.2099880427122116, + "learning_rate": 9.705483726281588e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.44344866944262434, + "grad_norm": 0.2643686830997467, + "learning_rate": 9.704921527086268e-05, + "loss": 0.3554, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.44387140411320836, + "grad_norm": 0.22200019657611847, + "learning_rate": 9.70435880812874e-05, + "loss": 0.3718, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.44429413878379237, + "grad_norm": 0.24250812828540802, + "learning_rate": 9.703795569471167e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4447168734543764, + "grad_norm": 0.22793953120708466, + "learning_rate": 9.703231811175771e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4451396081249604, + "grad_norm": 0.1718393862247467, + "learning_rate": 9.702667533304833e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.44556234279554435, + "grad_norm": 0.23842135071754456, + "learning_rate": 9.702102735920688e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.44598507746612837, + "grad_norm": 0.21875496208667755, + "learning_rate": 9.701537419085733e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4464078121367124, + "grad_norm": 0.24570997059345245, + "learning_rate": 9.700971582862416e-05, + "loss": 0.3714, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4468305468072964, + "grad_norm": 0.2378559708595276, + "learning_rate": 9.70040522731325e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4472532814778804, + "grad_norm": 0.2129359096288681, + "learning_rate": 9.699838352500798e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4476760161484644, + "grad_norm": 0.18015888333320618, + "learning_rate": 9.699270958487687e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.44809875081904843, + "grad_norm": 0.1887054592370987, + "learning_rate": 9.698703045336594e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.44852148548963244, + "grad_norm": 0.28081417083740234, + "learning_rate": 9.69813461311026e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.44894422016021646, + "grad_norm": 0.21740929782390594, + "learning_rate": 9.697565661871484e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.44936695483080047, + "grad_norm": 0.20179533958435059, + "learning_rate": 9.696996191683114e-05, + "loss": 0.3736, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4497896895013845, + "grad_norm": 0.3131153881549835, + "learning_rate": 9.696426202608063e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.45021242417196844, + "grad_norm": 0.24566461145877838, + "learning_rate": 9.6958556947093e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.45063515884255245, + "grad_norm": 0.19505362212657928, + "learning_rate": 9.695284668049846e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.45105789351313647, + "grad_norm": 0.19916236400604248, + "learning_rate": 9.694713122692786e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4514806281837205, + "grad_norm": 0.20062240958213806, + "learning_rate": 9.694141058701261e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4519033628543045, + "grad_norm": 0.20407447218894958, + "learning_rate": 9.693568476138467e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4523260975248885, + "grad_norm": 0.2146925926208496, + "learning_rate": 9.692995375067659e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4527488321954725, + "grad_norm": 0.25913524627685547, + "learning_rate": 9.692421755552146e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.45317156686605653, + "grad_norm": 0.22472119331359863, + "learning_rate": 9.6918476176553e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.45359430153664054, + "grad_norm": 0.221579447388649, + "learning_rate": 9.691272961440546e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.45401703620722456, + "grad_norm": 0.17538678646087646, + "learning_rate": 9.690697786971368e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.45443977087780857, + "grad_norm": 0.2670307159423828, + "learning_rate": 9.690122094311305e-05, + "loss": 0.3731, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4548625055483925, + "grad_norm": 0.23955312371253967, + "learning_rate": 9.689545883523956e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.45528524021897654, + "grad_norm": 0.2116796225309372, + "learning_rate": 9.688969154672975e-05, + "loss": 0.3723, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.45570797488956055, + "grad_norm": 0.21097564697265625, + "learning_rate": 9.688391907822078e-05, + "loss": 0.3704, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.45613070956014456, + "grad_norm": 0.21189092099666595, + "learning_rate": 9.687814143035032e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4565534442307286, + "grad_norm": 0.2012442797422409, + "learning_rate": 9.687235860375662e-05, + "loss": 0.3731, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4569761789013126, + "grad_norm": 0.23691622912883759, + "learning_rate": 9.686657059907857e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4573989135718966, + "grad_norm": 0.21186082065105438, + "learning_rate": 9.686077741695554e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4578216482424806, + "grad_norm": 0.20432014763355255, + "learning_rate": 9.685497905802754e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.45824438291306463, + "grad_norm": 0.1915806233882904, + "learning_rate": 9.68491755229351e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.45866711758364864, + "grad_norm": 0.2917667329311371, + "learning_rate": 9.684336681231936e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.45908985225423266, + "grad_norm": 0.20697945356369019, + "learning_rate": 9.683755292682204e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4595125869248166, + "grad_norm": 0.21533119678497314, + "learning_rate": 9.683173386708538e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4599353215954006, + "grad_norm": 0.202070415019989, + "learning_rate": 9.682590963375226e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.46035805626598464, + "grad_norm": 0.25010058283805847, + "learning_rate": 9.682008022746605e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.46078079093656865, + "grad_norm": 0.22421178221702576, + "learning_rate": 9.681424564887078e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.46120352560715266, + "grad_norm": 0.23007512092590332, + "learning_rate": 9.680840589861097e-05, + "loss": 0.3716, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4616262602777367, + "grad_norm": 0.3392086625099182, + "learning_rate": 9.680256097733177e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4620489949483207, + "grad_norm": 0.1990291327238083, + "learning_rate": 9.679671088567888e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4624717296189047, + "grad_norm": 0.23429353535175323, + "learning_rate": 9.679085562429857e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4628944642894887, + "grad_norm": 0.18995985388755798, + "learning_rate": 9.678499519383768e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.46331719896007273, + "grad_norm": 0.2524673342704773, + "learning_rate": 9.677912959494361e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.46373993363065674, + "grad_norm": 0.16395512223243713, + "learning_rate": 9.677325882826438e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4641626683012407, + "grad_norm": 0.23175068199634552, + "learning_rate": 9.67673828944485e-05, + "loss": 0.3708, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4645854029718247, + "grad_norm": 0.25546813011169434, + "learning_rate": 9.676150179414515e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 10990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4650081376424087, + "grad_norm": 0.19441547989845276, + "learning_rate": 9.675561552800398e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.46543087231299274, + "grad_norm": 0.24472801387310028, + "learning_rate": 9.674972409667528e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.46585360698357675, + "grad_norm": 0.17579054832458496, + "learning_rate": 9.674382750080989e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.46627634165416076, + "grad_norm": 0.19884848594665527, + "learning_rate": 9.673792574105921e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4666990763247448, + "grad_norm": 0.2196606546640396, + "learning_rate": 9.673201881807523e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4671218109953288, + "grad_norm": 0.20688439905643463, + "learning_rate": 9.672610673251047e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4675445456659128, + "grad_norm": 0.22062909603118896, + "learning_rate": 9.672018948501809e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4679672803364968, + "grad_norm": 0.27289682626724243, + "learning_rate": 9.671426707625175e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.46839001500708083, + "grad_norm": 0.23843888938426971, + "learning_rate": 9.670833950686573e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4688127496776648, + "grad_norm": 0.2244870811700821, + "learning_rate": 9.670240677751485e-05, + "loss": 0.3551, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4692354843482488, + "grad_norm": 0.26602134108543396, + "learning_rate": 9.669646888885451e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4696582190188328, + "grad_norm": 0.19046220183372498, + "learning_rate": 9.669052584154069e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4700809536894168, + "grad_norm": 0.18161241710186005, + "learning_rate": 9.668457763622993e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.47050368836000084, + "grad_norm": 0.2130180299282074, + "learning_rate": 9.667862427357933e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.47092642303058485, + "grad_norm": 0.23081596195697784, + "learning_rate": 9.667266575424658e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.47134915770116886, + "grad_norm": 0.2210111916065216, + "learning_rate": 9.666670207888991e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4717718923717529, + "grad_norm": 0.23308539390563965, + "learning_rate": 9.666073324816818e-05, + "loss": 0.3725, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4721946270423369, + "grad_norm": 0.22300438582897186, + "learning_rate": 9.665475926274072e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4726173617129209, + "grad_norm": 0.1772969365119934, + "learning_rate": 9.664878012326754e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4730400963835049, + "grad_norm": 0.2358759641647339, + "learning_rate": 9.664279583040916e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.47346283105408893, + "grad_norm": 0.242350772023201, + "learning_rate": 9.663680638482666e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4738855657246729, + "grad_norm": 0.23785339295864105, + "learning_rate": 9.66308117871817e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4743083003952569, + "grad_norm": 0.21272200345993042, + "learning_rate": 9.662481203813654e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4747310350658409, + "grad_norm": 0.1792801320552826, + "learning_rate": 9.66194078600965e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4751537697364249, + "grad_norm": 0.2045346200466156, + "learning_rate": 9.661339832521743e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.47557650440700894, + "grad_norm": 0.2759554088115692, + "learning_rate": 9.660738364086185e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.47599923907759295, + "grad_norm": 0.2212691456079483, + "learning_rate": 9.66013638076942e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.47642197374817696, + "grad_norm": 0.2442902773618698, + "learning_rate": 9.659533882637952e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.476844708418761, + "grad_norm": 0.25824686884880066, + "learning_rate": 9.65893086975834e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.477267443089345, + "grad_norm": 0.21272648870944977, + "learning_rate": 9.658327342197201e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.477690177759929, + "grad_norm": 0.2129853367805481, + "learning_rate": 9.657723300021205e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.478112912430513, + "grad_norm": 0.2771369218826294, + "learning_rate": 9.657118743297084e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.478535647101097, + "grad_norm": 0.19074834883213043, + "learning_rate": 9.656513672091625e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.478958381771681, + "grad_norm": 0.18461407721042633, + "learning_rate": 9.65590808647167e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.479381116442265, + "grad_norm": 0.23033803701400757, + "learning_rate": 9.65530198650412e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.479803851112849, + "grad_norm": 0.2045537531375885, + "learning_rate": 9.654695372255931e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.480226585783433, + "grad_norm": 0.21143308281898499, + "learning_rate": 9.654088243794117e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.48064932045401704, + "grad_norm": 0.2075282484292984, + "learning_rate": 9.653480601185751e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.48107205512460105, + "grad_norm": 0.2275225967168808, + "learning_rate": 9.652872444497959e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.48149478979518506, + "grad_norm": 0.19404946267604828, + "learning_rate": 9.652263773797924e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4819175244657691, + "grad_norm": 0.18149276077747345, + "learning_rate": 9.65165458915289e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4823402591363531, + "grad_norm": 0.1824837625026703, + "learning_rate": 9.651044890630152e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4827629938069371, + "grad_norm": 0.21694940328598022, + "learning_rate": 9.650434678297066e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.48318572847752106, + "grad_norm": 0.21912747621536255, + "learning_rate": 9.649823952221044e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.48360846314810507, + "grad_norm": 0.22813470661640167, + "learning_rate": 9.649212712469553e-05, + "loss": 0.3716, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4840311978186891, + "grad_norm": 0.20699870586395264, + "learning_rate": 9.648600959110119e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4844539324892731, + "grad_norm": 0.22606922686100006, + "learning_rate": 9.64798869221032e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4848766671598571, + "grad_norm": 0.15329203009605408, + "learning_rate": 9.647375911837802e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4852994018304411, + "grad_norm": 0.2132069319486618, + "learning_rate": 9.646762618060252e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.48572213650102514, + "grad_norm": 0.20041705667972565, + "learning_rate": 9.646148810945427e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.48614487117160915, + "grad_norm": 0.20689642429351807, + "learning_rate": 9.645534490561133e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.48656760584219316, + "grad_norm": 0.22115319967269897, + "learning_rate": 9.644919656975235e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4869903405127772, + "grad_norm": 0.23917004466056824, + "learning_rate": 9.644304310255656e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4874130751833612, + "grad_norm": 0.21833665668964386, + "learning_rate": 9.643688450470376e-05, + "loss": 0.3553, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.48783580985394515, + "grad_norm": 0.19958029687404633, + "learning_rate": 9.643072077687429e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.48825854452452916, + "grad_norm": 0.22850047051906586, + "learning_rate": 9.642455191974904e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.48868127919511317, + "grad_norm": 0.23630423843860626, + "learning_rate": 9.641837793400954e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4891040138656972, + "grad_norm": 0.17530380189418793, + "learning_rate": 9.641219882033782e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4895267485362812, + "grad_norm": 0.19058901071548462, + "learning_rate": 9.640601457941652e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4899494832068652, + "grad_norm": 0.24192412197589874, + "learning_rate": 9.63998252119288e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4903722178774492, + "grad_norm": 0.18252550065517426, + "learning_rate": 9.639363071855842e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.49079495254803324, + "grad_norm": 0.1743532419204712, + "learning_rate": 9.63874310999897e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.49121768721861725, + "grad_norm": 0.21043944358825684, + "learning_rate": 9.638122635690753e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.49164042188920126, + "grad_norm": 0.2056455761194229, + "learning_rate": 9.637501648999735e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4920631565597853, + "grad_norm": 0.20600742101669312, + "learning_rate": 9.636880149994519e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.49248589123036923, + "grad_norm": 0.24145682156085968, + "learning_rate": 9.63625813874376e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.49290862590095325, + "grad_norm": 0.20418977737426758, + "learning_rate": 9.635635615316178e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.49333136057153726, + "grad_norm": 0.1713538020849228, + "learning_rate": 9.635012579780541e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.49375409524212127, + "grad_norm": 0.2029453068971634, + "learning_rate": 9.634389032205677e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4941768299127053, + "grad_norm": 0.20467451214790344, + "learning_rate": 9.633764972660473e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4945995645832893, + "grad_norm": 0.2055107057094574, + "learning_rate": 9.633140401213867e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4950222992538733, + "grad_norm": 0.2171732485294342, + "learning_rate": 9.632515317934858e-05, + "loss": 0.3693, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4954450339244573, + "grad_norm": 0.18927907943725586, + "learning_rate": 9.631889722892502e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.49586776859504134, + "grad_norm": 0.22383445501327515, + "learning_rate": 9.631263616155905e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.49629050326562535, + "grad_norm": 0.1930072009563446, + "learning_rate": 9.63063699779424e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.49671323793620936, + "grad_norm": 0.2184622585773468, + "learning_rate": 9.630009867876727e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4971359726067933, + "grad_norm": 0.18693263828754425, + "learning_rate": 9.629382226472648e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.49755870727737733, + "grad_norm": 0.17952308058738708, + "learning_rate": 9.62875407365134e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.49798144194796135, + "grad_norm": 0.23151594400405884, + "learning_rate": 9.628125409482193e-05, + "loss": 0.3744, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.49840417661854536, + "grad_norm": 0.20030078291893005, + "learning_rate": 9.62749623403466e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.49882691128912937, + "grad_norm": 0.21435809135437012, + "learning_rate": 9.626866547378248e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4992496459597134, + "grad_norm": 0.20338377356529236, + "learning_rate": 9.626236349582519e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.4996723806302974, + "grad_norm": 0.2392309159040451, + "learning_rate": 9.625605640717091e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5000951153008814, + "grad_norm": 0.19162926077842712, + "learning_rate": 9.62497442085164e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5005178499714654, + "grad_norm": 0.20979590713977814, + "learning_rate": 9.624342690055899e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5009405846420494, + "grad_norm": 0.2028031349182129, + "learning_rate": 9.623710448399655e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5013633193126334, + "grad_norm": 0.20504340529441833, + "learning_rate": 9.623077695952754e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5017860539832174, + "grad_norm": 0.2146553099155426, + "learning_rate": 9.622444432785098e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5022087886538015, + "grad_norm": 0.22940389811992645, + "learning_rate": 9.621810658966645e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5026315233243854, + "grad_norm": 0.18635182082653046, + "learning_rate": 9.621176374567406e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5030542579949695, + "grad_norm": 0.24061566591262817, + "learning_rate": 9.620541579657458e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5034769926655535, + "grad_norm": 0.18461842834949493, + "learning_rate": 9.619906274306922e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5038997273361375, + "grad_norm": 0.1894863098859787, + "learning_rate": 9.619270458585985e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5043224620067215, + "grad_norm": 0.19231359660625458, + "learning_rate": 9.618634132564886e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5047451966773054, + "grad_norm": 0.2051105946302414, + "learning_rate": 9.61799729631392e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5051679313478895, + "grad_norm": 0.2011992633342743, + "learning_rate": 9.617359949903442e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5055906660184735, + "grad_norm": 0.19059208035469055, + "learning_rate": 9.616722093403858e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5060134006890575, + "grad_norm": 0.24021196365356445, + "learning_rate": 9.616083726885634e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5064361353596415, + "grad_norm": 0.1647019386291504, + "learning_rate": 9.615444850419295e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5068588700302256, + "grad_norm": 0.18505112826824188, + "learning_rate": 9.614805464075414e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 11990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5072816047008095, + "grad_norm": 0.19882570207118988, + "learning_rate": 9.614165567924629e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5077043393713936, + "grad_norm": 0.22791042923927307, + "learning_rate": 9.613525162037628e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5081270740419775, + "grad_norm": 0.22000567615032196, + "learning_rate": 9.612884246485162e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5085498087125616, + "grad_norm": 0.21946455538272858, + "learning_rate": 9.61224282133803e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5089725433831456, + "grad_norm": 0.21274884045124054, + "learning_rate": 9.611600886667092e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5093952780537295, + "grad_norm": 0.18361110985279083, + "learning_rate": 9.610958442543267e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5098180127243136, + "grad_norm": 0.20018818974494934, + "learning_rate": 9.610315489037524e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5102407473948976, + "grad_norm": 0.1769731342792511, + "learning_rate": 9.609672026220892e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5106634820654816, + "grad_norm": 0.20699332654476166, + "learning_rate": 9.609028054164454e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5110862167360656, + "grad_norm": 0.2457803189754486, + "learning_rate": 9.608383572939356e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5115089514066496, + "grad_norm": 0.2137855589389801, + "learning_rate": 9.607738582616793e-05, + "loss": 0.374, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5119316860772336, + "grad_norm": 0.2077208161354065, + "learning_rate": 9.607093083268015e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5123544207478177, + "grad_norm": 0.19758087396621704, + "learning_rate": 9.606447074964335e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5127771554184016, + "grad_norm": 0.22500759363174438, + "learning_rate": 9.605800557777116e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5131998900889857, + "grad_norm": 0.20040248334407806, + "learning_rate": 9.605153531777784e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5136226247595697, + "grad_norm": 0.19374194741249084, + "learning_rate": 9.604505997037814e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5140453594301536, + "grad_norm": 0.19826428592205048, + "learning_rate": 9.603857953628743e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5144680941007377, + "grad_norm": 0.2037767767906189, + "learning_rate": 9.60320940162216e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5148908287713216, + "grad_norm": 0.17829342186450958, + "learning_rate": 9.602560341089713e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5153135634419057, + "grad_norm": 0.20653146505355835, + "learning_rate": 9.601910772103102e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5157362981124897, + "grad_norm": 0.1911090761423111, + "learning_rate": 9.601260694734089e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5161590327830737, + "grad_norm": 0.20907513797283173, + "learning_rate": 9.600610109054491e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5165817674536577, + "grad_norm": 0.1931779533624649, + "learning_rate": 9.599959015136175e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5170045021242418, + "grad_norm": 0.1743597686290741, + "learning_rate": 9.599307413051072e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5174272367948257, + "grad_norm": 0.18980051577091217, + "learning_rate": 9.598655302871165e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5178499714654098, + "grad_norm": 0.24403193593025208, + "learning_rate": 9.598002684668492e-05, + "loss": 0.3693, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5182727061359937, + "grad_norm": 0.19568020105361938, + "learning_rate": 9.597349558515153e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5186954408065777, + "grad_norm": 0.1763996183872223, + "learning_rate": 9.596695924483296e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5191181754771618, + "grad_norm": 0.19197949767112732, + "learning_rate": 9.596041782645133e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5195409101477457, + "grad_norm": 0.18677952885627747, + "learning_rate": 9.595387133072926e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5199636448183298, + "grad_norm": 0.20186470448970795, + "learning_rate": 9.594731975838997e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5203863794889138, + "grad_norm": 0.19092339277267456, + "learning_rate": 9.594076311015721e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5208091141594978, + "grad_norm": 0.2253124862909317, + "learning_rate": 9.59342013867553e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5212318488300818, + "grad_norm": 0.22820687294006348, + "learning_rate": 9.592763458890915e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5216545835006658, + "grad_norm": 0.1799338012933731, + "learning_rate": 9.59210627173442e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5220773181712498, + "grad_norm": 0.19860225915908813, + "learning_rate": 9.591448577278643e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5225000528418339, + "grad_norm": 0.2579852342605591, + "learning_rate": 9.590790375596246e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5229227875124178, + "grad_norm": 0.20779399573802948, + "learning_rate": 9.590131666759938e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5233455221830018, + "grad_norm": 0.19604183733463287, + "learning_rate": 9.58947245084249e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5237682568535859, + "grad_norm": 0.20630598068237305, + "learning_rate": 9.588812727916725e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5241909915241698, + "grad_norm": 0.19963769614696503, + "learning_rate": 9.588152498055526e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5246137261947539, + "grad_norm": 0.20133978128433228, + "learning_rate": 9.587491761331828e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5250364608653378, + "grad_norm": 0.1777358204126358, + "learning_rate": 9.586830517818625e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5254591955359219, + "grad_norm": 0.21273547410964966, + "learning_rate": 9.586168767588966e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5258819302065059, + "grad_norm": 0.1853322684764862, + "learning_rate": 9.585506510715954e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5263046648770899, + "grad_norm": 0.17746450006961823, + "learning_rate": 9.584843747272754e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5267273995476739, + "grad_norm": 0.1927810162305832, + "learning_rate": 9.584180477332579e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.527150134218258, + "grad_norm": 0.17888811230659485, + "learning_rate": 9.583516700968702e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5275728688888419, + "grad_norm": 0.19572827219963074, + "learning_rate": 9.582852418254454e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5279956035594259, + "grad_norm": 0.19500665366649628, + "learning_rate": 9.582187629263218e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5284183382300099, + "grad_norm": 0.18620096147060394, + "learning_rate": 9.581522334068436e-05, + "loss": 0.3722, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5288410729005939, + "grad_norm": 0.18900392949581146, + "learning_rate": 9.580856532743603e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.529263807571178, + "grad_norm": 0.20776870846748352, + "learning_rate": 9.580190225362271e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5296865422417619, + "grad_norm": 0.26729997992515564, + "learning_rate": 9.57952341199805e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.530109276912346, + "grad_norm": 0.18522551655769348, + "learning_rate": 9.578856092724603e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.53053201158293, + "grad_norm": 0.18002758920192719, + "learning_rate": 9.578188267615651e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.530954746253514, + "grad_norm": 0.19054776430130005, + "learning_rate": 9.577519936744968e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.531377480924098, + "grad_norm": 0.21291740238666534, + "learning_rate": 9.57685110018639e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.531800215594682, + "grad_norm": 0.21923348307609558, + "learning_rate": 9.5761817580138e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.532222950265266, + "grad_norm": 0.21016332507133484, + "learning_rate": 9.575511910301145e-05, + "loss": 0.3709, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.53264568493585, + "grad_norm": 0.21826370060443878, + "learning_rate": 9.574841557122422e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.533068419606434, + "grad_norm": 0.24784386157989502, + "learning_rate": 9.57417069855169e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.533491154277018, + "grad_norm": 0.19868743419647217, + "learning_rate": 9.573499334663055e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5339138889476021, + "grad_norm": 0.189010351896286, + "learning_rate": 9.572827465530687e-05, + "loss": 0.3712, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.534336623618186, + "grad_norm": 0.18294134736061096, + "learning_rate": 9.572155091228809e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5347593582887701, + "grad_norm": 0.24483995139598846, + "learning_rate": 9.5714822118317e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.535182092959354, + "grad_norm": 0.18776100873947144, + "learning_rate": 9.570808827413691e-05, + "loss": 0.3729, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5356048276299381, + "grad_norm": 0.22558192908763885, + "learning_rate": 9.570134938049174e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5360275623005221, + "grad_norm": 0.2008778601884842, + "learning_rate": 9.569460543812597e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5364502969711061, + "grad_norm": 0.1988404393196106, + "learning_rate": 9.568785644778458e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5368730316416901, + "grad_norm": 0.1840236634016037, + "learning_rate": 9.568110241021317e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.537295766312274, + "grad_norm": 0.18268108367919922, + "learning_rate": 9.567434332615787e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5377185009828581, + "grad_norm": 0.19013182818889618, + "learning_rate": 9.566757919636537e-05, + "loss": 0.3727, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5381412356534421, + "grad_norm": 0.21360746026039124, + "learning_rate": 9.566081002158289e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5385639703240261, + "grad_norm": 0.19604314863681793, + "learning_rate": 9.565403580255828e-05, + "loss": 0.3697, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5389867049946101, + "grad_norm": 0.20862199366092682, + "learning_rate": 9.564725654003988e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5394094396651942, + "grad_norm": 0.1809539645910263, + "learning_rate": 9.564047223477659e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5398321743357781, + "grad_norm": 0.18420305848121643, + "learning_rate": 9.563368288751792e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5402549090063622, + "grad_norm": 0.2033836841583252, + "learning_rate": 9.562688849901387e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5406776436769462, + "grad_norm": 0.17831693589687347, + "learning_rate": 9.562008907001506e-05, + "loss": 0.3544, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5411003783475302, + "grad_norm": 0.21228055655956268, + "learning_rate": 9.561328460127261e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5415231130181142, + "grad_norm": 0.19066952168941498, + "learning_rate": 9.560647509353826e-05, + "loss": 0.3713, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5419458476886981, + "grad_norm": 0.19016119837760925, + "learning_rate": 9.559966054756423e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5423685823592822, + "grad_norm": 0.22589746117591858, + "learning_rate": 9.559284096410334e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5427913170298662, + "grad_norm": 0.19247937202453613, + "learning_rate": 9.5586016343909e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5432140517004502, + "grad_norm": 0.22270944714546204, + "learning_rate": 9.557918668773511e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5436367863710342, + "grad_norm": 0.19629359245300293, + "learning_rate": 9.557235199633616e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5440595210416183, + "grad_norm": 0.22681385278701782, + "learning_rate": 9.55655122704672e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5444822557122022, + "grad_norm": 0.16625064611434937, + "learning_rate": 9.55586675108838e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5449049903827863, + "grad_norm": 0.18160100281238556, + "learning_rate": 9.555181771834216e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5453277250533702, + "grad_norm": 0.1956731677055359, + "learning_rate": 9.554496289359897e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5457504597239543, + "grad_norm": 0.18409115076065063, + "learning_rate": 9.553810303741148e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5461731943945383, + "grad_norm": 0.21630427241325378, + "learning_rate": 9.553123815053753e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5465959290651222, + "grad_norm": 0.22720500826835632, + "learning_rate": 9.55243682337355e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5470186637357063, + "grad_norm": 0.19064660370349884, + "learning_rate": 9.55174932877643e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5474413984062902, + "grad_norm": 0.21762709319591522, + "learning_rate": 9.551061331338345e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5478641330768743, + "grad_norm": 0.17867860198020935, + "learning_rate": 9.5503728311353e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5482868677474583, + "grad_norm": 0.19221161305904388, + "learning_rate": 9.54968382824335e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5487096024180423, + "grad_norm": 0.20748938620090485, + "learning_rate": 9.548994322738615e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5491323370886263, + "grad_norm": 0.20924051105976105, + "learning_rate": 9.548304314697266e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 12990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5495550717592104, + "grad_norm": 0.20203037559986115, + "learning_rate": 9.547613804195526e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5499778064297943, + "grad_norm": 0.22888073325157166, + "learning_rate": 9.54692279130968e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5504005411003784, + "grad_norm": 0.18988902866840363, + "learning_rate": 9.546231276116065e-05, + "loss": 0.3744, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5508232757709624, + "grad_norm": 0.18444906175136566, + "learning_rate": 9.545539258691075e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5512460104415463, + "grad_norm": 0.19896525144577026, + "learning_rate": 9.544846739111157e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5516687451121304, + "grad_norm": 0.22055573761463165, + "learning_rate": 9.544153717452818e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5520914797827143, + "grad_norm": 0.1874997615814209, + "learning_rate": 9.543460193792612e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5525142144532984, + "grad_norm": 0.18512339890003204, + "learning_rate": 9.54276616820716e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5529369491238824, + "grad_norm": 0.25395727157592773, + "learning_rate": 9.542071640773127e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5533596837944664, + "grad_norm": 0.21181254088878632, + "learning_rate": 9.541376611567244e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5537824184650504, + "grad_norm": 0.1740039736032486, + "learning_rate": 9.540681080666287e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5542051531356345, + "grad_norm": 0.16140532493591309, + "learning_rate": 9.539985048147097e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5546278878062184, + "grad_norm": 0.1906859129667282, + "learning_rate": 9.539288514086564e-05, + "loss": 0.3711, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5550506224768025, + "grad_norm": 0.2033836990594864, + "learning_rate": 9.538591478561638e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5554733571473864, + "grad_norm": 0.2008925825357437, + "learning_rate": 9.537893941649318e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5558960918179705, + "grad_norm": 0.1892184615135193, + "learning_rate": 9.537195903426665e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5563188264885545, + "grad_norm": 0.16894683241844177, + "learning_rate": 9.536497363970792e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5567415611591384, + "grad_norm": 0.18248498439788818, + "learning_rate": 9.535798323358869e-05, + "loss": 0.3755, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5571642958297225, + "grad_norm": 0.19140248000621796, + "learning_rate": 9.535098781668118e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5575870305003064, + "grad_norm": 0.23520712554454803, + "learning_rate": 9.534398738975821e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5580097651708905, + "grad_norm": 0.2121291160583496, + "learning_rate": 9.533698195359313e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5584324998414745, + "grad_norm": 0.1695408970117569, + "learning_rate": 9.532997150895984e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5588552345120585, + "grad_norm": 0.2166776806116104, + "learning_rate": 9.53229560566328e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5592779691826425, + "grad_norm": 0.20181049406528473, + "learning_rate": 9.5315935597387e-05, + "loss": 0.3751, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5597007038532266, + "grad_norm": 0.20222751796245575, + "learning_rate": 9.530891013199804e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5601234385238105, + "grad_norm": 0.21222788095474243, + "learning_rate": 9.530187966124202e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5605461731943946, + "grad_norm": 0.17034217715263367, + "learning_rate": 9.52948441858956e-05, + "loss": 0.3747, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5609689078649785, + "grad_norm": 0.20839989185333252, + "learning_rate": 9.528780370673602e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5613916425355625, + "grad_norm": 0.19396227598190308, + "learning_rate": 9.528075822454105e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5618143772061466, + "grad_norm": 0.2057632952928543, + "learning_rate": 9.527370774008901e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5622371118767305, + "grad_norm": 0.24256882071495056, + "learning_rate": 9.526665225415881e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5626598465473146, + "grad_norm": 0.1635393351316452, + "learning_rate": 9.525959176752985e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5630825812178986, + "grad_norm": 0.18837235867977142, + "learning_rate": 9.525252628098213e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5635053158884826, + "grad_norm": 0.1939820498228073, + "learning_rate": 9.524545579529619e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5639280505590666, + "grad_norm": 0.1878986954689026, + "learning_rate": 9.523838031125312e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5643507852296507, + "grad_norm": 0.19968904554843903, + "learning_rate": 9.523129982963457e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5647735199002346, + "grad_norm": 0.18338371813297272, + "learning_rate": 9.522421435122272e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5651962545708187, + "grad_norm": 0.16680116951465607, + "learning_rate": 9.521712387680033e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5656189892414026, + "grad_norm": 0.1515718251466751, + "learning_rate": 9.521002840715067e-05, + "loss": 0.3715, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5660417239119866, + "grad_norm": 0.17318212985992432, + "learning_rate": 9.520292794305765e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5664644585825707, + "grad_norm": 0.17909307777881622, + "learning_rate": 9.519582248530562e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5668871932531546, + "grad_norm": 0.19554804265499115, + "learning_rate": 9.518871203467956e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5673099279237387, + "grad_norm": 0.20735621452331543, + "learning_rate": 9.518159659196495e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5677326625943226, + "grad_norm": 0.20403486490249634, + "learning_rate": 9.517447615794788e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5681553972649067, + "grad_norm": 0.203049436211586, + "learning_rate": 9.516735073341495e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5685781319354907, + "grad_norm": 0.17673009634017944, + "learning_rate": 9.51602203191533e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5690008666060747, + "grad_norm": 0.2012271285057068, + "learning_rate": 9.515308491595066e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5694236012766587, + "grad_norm": 0.20451384782791138, + "learning_rate": 9.51459445245953e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5698463359472428, + "grad_norm": 0.17601759731769562, + "learning_rate": 9.5138799145876e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5702690706178267, + "grad_norm": 0.19093888998031616, + "learning_rate": 9.513164878058215e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5706918052884107, + "grad_norm": 0.179508775472641, + "learning_rate": 9.512449342950367e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5711145399589947, + "grad_norm": 0.175676167011261, + "learning_rate": 9.5117333093431e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5715372746295787, + "grad_norm": 0.17921531200408936, + "learning_rate": 9.51101677731552e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5719600093001628, + "grad_norm": 0.21372218430042267, + "learning_rate": 9.51029974694678e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5723827439707467, + "grad_norm": 0.15531377494335175, + "learning_rate": 9.509582218316092e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5728054786413308, + "grad_norm": 0.19364750385284424, + "learning_rate": 9.508864191502724e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5732282133119148, + "grad_norm": 0.18257613480091095, + "learning_rate": 9.508145666585999e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5736509479824988, + "grad_norm": 0.15000589191913605, + "learning_rate": 9.507426643645292e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5740736826530828, + "grad_norm": 0.16902996599674225, + "learning_rate": 9.506707122760035e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5744964173236669, + "grad_norm": 0.19705748558044434, + "learning_rate": 9.505987104009715e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5749191519942508, + "grad_norm": 0.21916094422340393, + "learning_rate": 9.505266587473874e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5753418866648348, + "grad_norm": 0.15644967555999756, + "learning_rate": 9.504545573232111e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5757646213354188, + "grad_norm": 0.171131432056427, + "learning_rate": 9.503824061364075e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5761873560060028, + "grad_norm": 0.17193485796451569, + "learning_rate": 9.503102051949475e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5766100906765869, + "grad_norm": 0.1922212690114975, + "learning_rate": 9.50237954506807e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5770328253471708, + "grad_norm": 0.20256853103637695, + "learning_rate": 9.501656540799679e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5774555600177549, + "grad_norm": 0.19717912375926971, + "learning_rate": 9.500933039224176e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5778782946883388, + "grad_norm": 0.18368108570575714, + "learning_rate": 9.500209040421483e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5783010293589229, + "grad_norm": 0.1804526299238205, + "learning_rate": 9.499484544471584e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5787237640295069, + "grad_norm": 0.1796707808971405, + "learning_rate": 9.498759551454515e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5791464987000909, + "grad_norm": 0.17042358219623566, + "learning_rate": 9.498034061450369e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5795692333706749, + "grad_norm": 0.17904022336006165, + "learning_rate": 9.497308074539289e-05, + "loss": 0.3684, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5799919680412589, + "grad_norm": 0.19944031536579132, + "learning_rate": 9.496581590801479e-05, + "loss": 0.3699, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5804147027118429, + "grad_norm": 0.20197473466396332, + "learning_rate": 9.495854610317196e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5808374373824269, + "grad_norm": 0.18054302036762238, + "learning_rate": 9.495127133166748e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.581260172053011, + "grad_norm": 0.18990656733512878, + "learning_rate": 9.494399159430503e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5816829067235949, + "grad_norm": 0.18552953004837036, + "learning_rate": 9.493670689188879e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.582105641394179, + "grad_norm": 0.16003400087356567, + "learning_rate": 9.492941722522355e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5825283760647629, + "grad_norm": 0.18862947821617126, + "learning_rate": 9.492212259511461e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.582951110735347, + "grad_norm": 0.1980922371149063, + "learning_rate": 9.49148230023678e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.583373845405931, + "grad_norm": 0.24393828213214874, + "learning_rate": 9.490751844778953e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.583796580076515, + "grad_norm": 0.23557788133621216, + "learning_rate": 9.490020893218677e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.584219314747099, + "grad_norm": 0.2066054493188858, + "learning_rate": 9.4892894456367e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5846420494176829, + "grad_norm": 0.1869368702173233, + "learning_rate": 9.488557502113825e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.585064784088267, + "grad_norm": 0.1979234665632248, + "learning_rate": 9.487825062730913e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.585487518758851, + "grad_norm": 0.18960845470428467, + "learning_rate": 9.487092127568878e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.585910253429435, + "grad_norm": 0.23855020105838776, + "learning_rate": 9.486358696708689e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.586332988100019, + "grad_norm": 0.19087092578411102, + "learning_rate": 9.485698185179561e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5867557227706031, + "grad_norm": 0.2155318558216095, + "learning_rate": 9.484963812716145e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.587178457441187, + "grad_norm": 0.15590235590934753, + "learning_rate": 9.484228944789692e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5876011921117711, + "grad_norm": 0.1801251918077469, + "learning_rate": 9.483493581481386e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.588023926782355, + "grad_norm": 0.2420959174633026, + "learning_rate": 9.482757722872466e-05, + "loss": 0.3714, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5884466614529391, + "grad_norm": 0.19211138784885406, + "learning_rate": 9.48202136904422e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5888693961235231, + "grad_norm": 0.1768791228532791, + "learning_rate": 9.481284520077998e-05, + "loss": 0.3712, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.589292130794107, + "grad_norm": 0.22089192271232605, + "learning_rate": 9.480547176055201e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5897148654646911, + "grad_norm": 0.2140570431947708, + "learning_rate": 9.479809337057281e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.590137600135275, + "grad_norm": 0.21330046653747559, + "learning_rate": 9.479071003165754e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5905603348058591, + "grad_norm": 0.17839555442333221, + "learning_rate": 9.478332174462181e-05, + "loss": 0.3549, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5909830694764431, + "grad_norm": 0.1954537034034729, + "learning_rate": 9.477592851028183e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5914058041470271, + "grad_norm": 0.2131800800561905, + "learning_rate": 9.476853032945437e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 13990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5918285388176111, + "grad_norm": 0.1931106597185135, + "learning_rate": 9.476112720295667e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5922512734881952, + "grad_norm": 0.19409088790416718, + "learning_rate": 9.475371913160662e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5926740081587791, + "grad_norm": 0.15970778465270996, + "learning_rate": 9.474630611622258e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5930967428293632, + "grad_norm": 0.20411179959774017, + "learning_rate": 9.473888815762348e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5935194774999472, + "grad_norm": 0.16581985354423523, + "learning_rate": 9.473146525662882e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5939422121705311, + "grad_norm": 0.2130800485610962, + "learning_rate": 9.47240374140586e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5943649468411152, + "grad_norm": 0.18768766522407532, + "learning_rate": 9.471660463073337e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5947876815116991, + "grad_norm": 0.1820385605096817, + "learning_rate": 9.47091669074743e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5952104161822832, + "grad_norm": 0.18931207060813904, + "learning_rate": 9.470172424510299e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5956331508528672, + "grad_norm": 0.2107534259557724, + "learning_rate": 9.46942766444417e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5960558855234512, + "grad_norm": 0.17755164206027985, + "learning_rate": 9.468682410631316e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5964786201940352, + "grad_norm": 0.1897057443857193, + "learning_rate": 9.467936663154064e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5969013548646193, + "grad_norm": 0.2167850285768509, + "learning_rate": 9.467190422094802e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5973240895352032, + "grad_norm": 0.1515897959470749, + "learning_rate": 9.466443687535966e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5977468242057873, + "grad_norm": 0.21184810996055603, + "learning_rate": 9.465696459560053e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5981695588763712, + "grad_norm": 0.15668217837810516, + "learning_rate": 9.464948738249606e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5985922935469552, + "grad_norm": 0.16837044060230255, + "learning_rate": 9.464200523687232e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5990150282175393, + "grad_norm": 0.19391438364982605, + "learning_rate": 9.463451815955585e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5994377628881232, + "grad_norm": 0.25367632508277893, + "learning_rate": 9.462702615137375e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.5998604975587073, + "grad_norm": 0.16653546690940857, + "learning_rate": 9.46195292131537e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6002832322292913, + "grad_norm": 0.22145292162895203, + "learning_rate": 9.461202734572389e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6007059668998753, + "grad_norm": 0.2432311326265335, + "learning_rate": 9.46045205499131e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6011287015704593, + "grad_norm": 0.17736324667930603, + "learning_rate": 9.459700882655058e-05, + "loss": 0.3684, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6015514362410433, + "grad_norm": 0.16668257117271423, + "learning_rate": 9.458949217646618e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6019741709116273, + "grad_norm": 0.16263549029827118, + "learning_rate": 9.458197060049027e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6023969055822114, + "grad_norm": 0.17305079102516174, + "learning_rate": 9.457444409945379e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6028196402527953, + "grad_norm": 0.17476993799209595, + "learning_rate": 9.456691267418821e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6032423749233793, + "grad_norm": 0.1542273908853531, + "learning_rate": 9.455937632552551e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6036651095939634, + "grad_norm": 0.18534603714942932, + "learning_rate": 9.455183505429829e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6040878442645473, + "grad_norm": 0.18297776579856873, + "learning_rate": 9.454428886133961e-05, + "loss": 0.3532, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6045105789351314, + "grad_norm": 0.24641339480876923, + "learning_rate": 9.453673774748314e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6049333136057153, + "grad_norm": 0.18649922311306, + "learning_rate": 9.452918171356306e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6053560482762994, + "grad_norm": 0.20270194113254547, + "learning_rate": 9.452162076041409e-05, + "loss": 0.3556, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6057787829468834, + "grad_norm": 0.19003432989120483, + "learning_rate": 9.451405488887152e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6062015176174674, + "grad_norm": 0.17065392434597015, + "learning_rate": 9.450648409977115e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6066242522880514, + "grad_norm": 0.17680789530277252, + "learning_rate": 9.449890839394936e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6070469869586355, + "grad_norm": 0.2127448469400406, + "learning_rate": 9.449132777224304e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6074697216292194, + "grad_norm": 0.17033278942108154, + "learning_rate": 9.448374223548963e-05, + "loss": 0.3736, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6078924562998034, + "grad_norm": 0.17127923667430878, + "learning_rate": 9.447615178452713e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6083151909703874, + "grad_norm": 0.22616232931613922, + "learning_rate": 9.446855642019408e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6087379256409714, + "grad_norm": 0.18117384612560272, + "learning_rate": 9.446095614332955e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6091606603115555, + "grad_norm": 0.17458172142505646, + "learning_rate": 9.445335095477315e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6095833949821394, + "grad_norm": 0.16281390190124512, + "learning_rate": 9.444574085536503e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6100061296527235, + "grad_norm": 0.1893586963415146, + "learning_rate": 9.443812584594593e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6104288643233075, + "grad_norm": 0.20248852670192719, + "learning_rate": 9.443050592735707e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6108515989938915, + "grad_norm": 0.18789134919643402, + "learning_rate": 9.442288110044025e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6112743336644755, + "grad_norm": 0.15900124609470367, + "learning_rate": 9.441525136603778e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6116970683350595, + "grad_norm": 0.20148754119873047, + "learning_rate": 9.440761672499253e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6121198030056435, + "grad_norm": 0.22931747138500214, + "learning_rate": 9.439997717814794e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6125425376762275, + "grad_norm": 0.2065892219543457, + "learning_rate": 9.439233272634795e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6129652723468115, + "grad_norm": 0.1823994219303131, + "learning_rate": 9.438468337043708e-05, + "loss": 0.3737, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6133880070173955, + "grad_norm": 0.18089735507965088, + "learning_rate": 9.437702911126034e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6138107416879796, + "grad_norm": 0.15217716991901398, + "learning_rate": 9.436936994966333e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6142334763585635, + "grad_norm": 0.17669057846069336, + "learning_rate": 9.436170588649216e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6146562110291476, + "grad_norm": 0.2200007140636444, + "learning_rate": 9.435403692259351e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6150789456997315, + "grad_norm": 0.1733706146478653, + "learning_rate": 9.434636305881457e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6155016803703156, + "grad_norm": 0.15672095119953156, + "learning_rate": 9.43386842960031e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6159244150408996, + "grad_norm": 0.17138436436653137, + "learning_rate": 9.433100063500739e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6163471497114836, + "grad_norm": 0.16867947578430176, + "learning_rate": 9.432331207667624e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6167698843820676, + "grad_norm": 0.181827574968338, + "learning_rate": 9.431561862185907e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6171926190526517, + "grad_norm": 0.19489766657352448, + "learning_rate": 9.430792027140574e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6176153537232356, + "grad_norm": 0.18291707336902618, + "learning_rate": 9.430021702616675e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6180380883938196, + "grad_norm": 0.16169050335884094, + "learning_rate": 9.429250888699306e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6184608230644036, + "grad_norm": 0.15340586006641388, + "learning_rate": 9.428479585473622e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6188835577349876, + "grad_norm": 0.20025157928466797, + "learning_rate": 9.427707793024829e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6193062924055717, + "grad_norm": 0.16804851591587067, + "learning_rate": 9.42693551143819e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6197290270761556, + "grad_norm": 0.16742485761642456, + "learning_rate": 9.42616274079902e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6201517617467397, + "grad_norm": 0.20491963624954224, + "learning_rate": 9.425389481192687e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6205744964173237, + "grad_norm": 0.19772061705589294, + "learning_rate": 9.424615732704619e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6209972310879077, + "grad_norm": 0.24099940061569214, + "learning_rate": 9.423841495420286e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6214199657584917, + "grad_norm": 0.20339785516262054, + "learning_rate": 9.423066769425227e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6218427004290757, + "grad_norm": 0.17577548325061798, + "learning_rate": 9.422291554805025e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6222654350996597, + "grad_norm": 0.17478814721107483, + "learning_rate": 9.421515851645317e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6226881697702437, + "grad_norm": 0.19201050698757172, + "learning_rate": 9.4207396600318e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6231109044408277, + "grad_norm": 0.18201161921024323, + "learning_rate": 9.41996298005022e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6235336391114117, + "grad_norm": 0.1986188441514969, + "learning_rate": 9.41918581178638e-05, + "loss": 0.3716, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6239563737819958, + "grad_norm": 0.19804272055625916, + "learning_rate": 9.418408155326131e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6243791084525797, + "grad_norm": 0.1795254796743393, + "learning_rate": 9.417630010755387e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6248018431231638, + "grad_norm": 0.16575391590595245, + "learning_rate": 9.41685137816011e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6252245777937477, + "grad_norm": 0.20458152890205383, + "learning_rate": 9.416072257626315e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6256473124643318, + "grad_norm": 0.15121738612651825, + "learning_rate": 9.415292649240075e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6260700471349158, + "grad_norm": 0.21010997891426086, + "learning_rate": 9.414512553087514e-05, + "loss": 0.3561, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6264927818054998, + "grad_norm": 0.18170802295207977, + "learning_rate": 9.41373196925481e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6269155164760838, + "grad_norm": 0.17866435647010803, + "learning_rate": 9.4129508978282e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6273382511466677, + "grad_norm": 0.18021976947784424, + "learning_rate": 9.412169338893965e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6277609858172518, + "grad_norm": 0.1566489338874817, + "learning_rate": 9.41138729253845e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6281837204878358, + "grad_norm": 0.16732943058013916, + "learning_rate": 9.410604758848045e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6286064551584198, + "grad_norm": 0.2116256207227707, + "learning_rate": 9.409821737909201e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6290291898290038, + "grad_norm": 0.18037298321723938, + "learning_rate": 9.40903822980842e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6294519244995879, + "grad_norm": 0.1967741847038269, + "learning_rate": 9.408254234632254e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6298746591701718, + "grad_norm": 0.16489069163799286, + "learning_rate": 9.407469752467319e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6302973938407559, + "grad_norm": 0.1801309883594513, + "learning_rate": 9.406684783400273e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6307201285113399, + "grad_norm": 0.1827356219291687, + "learning_rate": 9.405899327517833e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6311428631819239, + "grad_norm": 0.23621557652950287, + "learning_rate": 9.405113384906775e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6315655978525079, + "grad_norm": 0.17657719552516937, + "learning_rate": 9.404326955653917e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6319883325230918, + "grad_norm": 0.20024608075618744, + "learning_rate": 9.403540039846143e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6324110671936759, + "grad_norm": 0.23626649379730225, + "learning_rate": 9.402752637570382e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6328338018642599, + "grad_norm": 0.1905760020017624, + "learning_rate": 9.401964748913622e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6332565365348439, + "grad_norm": 0.1509421467781067, + "learning_rate": 9.401176373962901e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6336792712054279, + "grad_norm": 0.18266624212265015, + "learning_rate": 9.400387512805313e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 14990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.634102005876012, + "grad_norm": 0.1997155249118805, + "learning_rate": 9.399598165528004e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6345247405465959, + "grad_norm": 0.16626988351345062, + "learning_rate": 9.398808332218176e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.63494747521718, + "grad_norm": 0.20939688384532928, + "learning_rate": 9.398018012963085e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6353702098877639, + "grad_norm": 0.20309291779994965, + "learning_rate": 9.397227207850037e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.635792944558348, + "grad_norm": 0.16508708894252777, + "learning_rate": 9.396435916966392e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.636215679228932, + "grad_norm": 0.15987887978553772, + "learning_rate": 9.39564414039957e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6366384138995159, + "grad_norm": 0.22820498049259186, + "learning_rate": 9.394851878237039e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6370611485701, + "grad_norm": 0.22370317578315735, + "learning_rate": 9.394059130566318e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.637483883240684, + "grad_norm": 0.2311404049396515, + "learning_rate": 9.393265897474987e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.637906617911268, + "grad_norm": 0.20120257139205933, + "learning_rate": 9.392472179050678e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.638329352581852, + "grad_norm": 0.15694981813430786, + "learning_rate": 9.391677975381069e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.638752087252436, + "grad_norm": 0.16767945885658264, + "learning_rate": 9.390883286553901e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.63917482192302, + "grad_norm": 0.15651817619800568, + "learning_rate": 9.390088112656964e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6395975565936041, + "grad_norm": 0.240166574716568, + "learning_rate": 9.389292453778102e-05, + "loss": 0.3693, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.640020291264188, + "grad_norm": 0.13746953010559082, + "learning_rate": 9.388496310005215e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6404430259347721, + "grad_norm": 0.20631594955921173, + "learning_rate": 9.387699681426253e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.640865760605356, + "grad_norm": 0.16683673858642578, + "learning_rate": 9.386902568129221e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.64128849527594, + "grad_norm": 0.2201196849346161, + "learning_rate": 9.386104970202178e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6417112299465241, + "grad_norm": 0.16757643222808838, + "learning_rate": 9.385306887733238e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.642133964617108, + "grad_norm": 0.15820354223251343, + "learning_rate": 9.384508320810563e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6425566992876921, + "grad_norm": 0.21457920968532562, + "learning_rate": 9.383709269522376e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6429794339582761, + "grad_norm": 0.17668968439102173, + "learning_rate": 9.382909733956948e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6434021686288601, + "grad_norm": 0.1993025690317154, + "learning_rate": 9.382109714202605e-05, + "loss": 0.3714, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6438249032994441, + "grad_norm": 0.16978511214256287, + "learning_rate": 9.381309210347729e-05, + "loss": 0.3545, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6442476379700282, + "grad_norm": 0.22131787240505219, + "learning_rate": 9.38050822248075e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6446703726406121, + "grad_norm": 0.17997796833515167, + "learning_rate": 9.379706750690158e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6450931073111962, + "grad_norm": 0.1839887499809265, + "learning_rate": 9.378904795064491e-05, + "loss": 0.371, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6455158419817801, + "grad_norm": 0.14387157559394836, + "learning_rate": 9.378102355692344e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6459385766523641, + "grad_norm": 0.16288724541664124, + "learning_rate": 9.377299432662362e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6463613113229482, + "grad_norm": 0.19110482931137085, + "learning_rate": 9.376496026063248e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6467840459935321, + "grad_norm": 0.18446744978427887, + "learning_rate": 9.375692135983753e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6472067806641162, + "grad_norm": 0.16248928010463715, + "learning_rate": 9.374887762512689e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6476295153347001, + "grad_norm": 0.2197078913450241, + "learning_rate": 9.374082905738913e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6480522500052842, + "grad_norm": 0.16689161956310272, + "learning_rate": 9.37327756575134e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6484749846758682, + "grad_norm": 0.18884021043777466, + "learning_rate": 9.372471742638939e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6488977193464522, + "grad_norm": 0.1814882904291153, + "learning_rate": 9.371665436490728e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6493204540170362, + "grad_norm": 0.1908995509147644, + "learning_rate": 9.370858647395784e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6497431886876203, + "grad_norm": 0.24320141971111298, + "learning_rate": 9.370051375443233e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6501659233582042, + "grad_norm": 0.16903850436210632, + "learning_rate": 9.369243620722256e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6505886580287882, + "grad_norm": 0.1929364949464798, + "learning_rate": 9.36843538332209e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6510113926993722, + "grad_norm": 0.17236922681331635, + "learning_rate": 9.367626663332019e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6514341273699562, + "grad_norm": 0.1544257551431656, + "learning_rate": 9.366817460841387e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6518568620405403, + "grad_norm": 0.18631556630134583, + "learning_rate": 9.366007775939585e-05, + "loss": 0.3724, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6522795967111242, + "grad_norm": 0.1676522195339203, + "learning_rate": 9.36519760871606e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6527023313817083, + "grad_norm": 0.17733784019947052, + "learning_rate": 9.364386959260318e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6531250660522923, + "grad_norm": 0.16568008065223694, + "learning_rate": 9.36357582766191e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6535478007228763, + "grad_norm": 0.23636160790920258, + "learning_rate": 9.362764214010443e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6539705353934603, + "grad_norm": 0.16837367415428162, + "learning_rate": 9.361952118395579e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6543932700640444, + "grad_norm": 0.15755969285964966, + "learning_rate": 9.36113954090703e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6548160047346283, + "grad_norm": 0.18693573772907257, + "learning_rate": 9.360326481634563e-05, + "loss": 0.3716, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6552387394052123, + "grad_norm": 0.15331397950649261, + "learning_rate": 9.359512940668001e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6556614740757963, + "grad_norm": 0.13874609768390656, + "learning_rate": 9.358698918097214e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6560842087463803, + "grad_norm": 0.20930881798267365, + "learning_rate": 9.357884414012132e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6565069434169644, + "grad_norm": 0.1803918033838272, + "learning_rate": 9.357069428502731e-05, + "loss": 0.3545, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6569296780875483, + "grad_norm": 0.18346497416496277, + "learning_rate": 9.356253961659049e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6573524127581324, + "grad_norm": 0.17786507308483124, + "learning_rate": 9.355438013571169e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6577751474287163, + "grad_norm": 0.18249864876270294, + "learning_rate": 9.354621584329232e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6581978820993004, + "grad_norm": 0.1624058187007904, + "learning_rate": 9.35380467402343e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6586206167698844, + "grad_norm": 0.1825282722711563, + "learning_rate": 9.352987282744008e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6590433514404684, + "grad_norm": 0.17413344979286194, + "learning_rate": 9.352169410581264e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6594660861110524, + "grad_norm": 0.18360310792922974, + "learning_rate": 9.351351057625552e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6598888207816364, + "grad_norm": 0.15522487461566925, + "learning_rate": 9.350532223967278e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6603115554522204, + "grad_norm": 0.27923107147216797, + "learning_rate": 9.349712909696897e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6607342901228044, + "grad_norm": 0.17609870433807373, + "learning_rate": 9.348893114904925e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6611570247933884, + "grad_norm": 0.20469218492507935, + "learning_rate": 9.348072839681921e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6615797594639724, + "grad_norm": 0.16826170682907104, + "learning_rate": 9.347252084118506e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6620024941345565, + "grad_norm": 0.16059784591197968, + "learning_rate": 9.346430848305347e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6624252288051404, + "grad_norm": 0.17234016954898834, + "learning_rate": 9.345609132333172e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6628479634757245, + "grad_norm": 0.20594002306461334, + "learning_rate": 9.344786936292756e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6632706981463085, + "grad_norm": 0.16466572880744934, + "learning_rate": 9.343964260274926e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6636934328168925, + "grad_norm": 0.17499157786369324, + "learning_rate": 9.34314110437057e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6641161674874765, + "grad_norm": 0.1520400047302246, + "learning_rate": 9.34231746867062e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6645389021580604, + "grad_norm": 0.1660318225622177, + "learning_rate": 9.341493353266064e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6649616368286445, + "grad_norm": 0.1680949330329895, + "learning_rate": 9.340668758247946e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6653843714992285, + "grad_norm": 0.1797112673521042, + "learning_rate": 9.339843683707358e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6658071061698125, + "grad_norm": 0.1829652637243271, + "learning_rate": 9.339018129735453e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6662298408403965, + "grad_norm": 0.16152261197566986, + "learning_rate": 9.338192096423426e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6666525755109806, + "grad_norm": 0.2029210478067398, + "learning_rate": 9.337365583862531e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6670753101815645, + "grad_norm": 0.1426897943019867, + "learning_rate": 9.33653859214408e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6674980448521486, + "grad_norm": 0.21547558903694153, + "learning_rate": 9.335711121359425e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6679207795227325, + "grad_norm": 0.19747743010520935, + "learning_rate": 9.334883171599984e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6683435141933166, + "grad_norm": 0.1835537552833557, + "learning_rate": 9.33405474295722e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6687662488639006, + "grad_norm": 0.23173075914382935, + "learning_rate": 9.333225835522652e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6691889835344845, + "grad_norm": 0.1942657083272934, + "learning_rate": 9.33239644938785e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6696117182050686, + "grad_norm": 0.16394464671611786, + "learning_rate": 9.331566584644438e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6700344528756526, + "grad_norm": 0.1859986037015915, + "learning_rate": 9.330736241384093e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6704571875462366, + "grad_norm": 0.16082988679409027, + "learning_rate": 9.329905419698546e-05, + "loss": 0.3541, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6708799222168206, + "grad_norm": 0.17776557803153992, + "learning_rate": 9.329074119679578e-05, + "loss": 0.3697, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6713026568874046, + "grad_norm": 0.18288525938987732, + "learning_rate": 9.328242341419024e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6717253915579886, + "grad_norm": 0.21244622766971588, + "learning_rate": 9.327410085008775e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6721481262285727, + "grad_norm": 0.21970608830451965, + "learning_rate": 9.32657735054077e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6725708608991566, + "grad_norm": 0.17044483125209808, + "learning_rate": 9.325744138107002e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6729935955697407, + "grad_norm": 0.18386219441890717, + "learning_rate": 9.32491044779952e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6734163302403247, + "grad_norm": 0.1733333319425583, + "learning_rate": 9.324076279710422e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6738390649109086, + "grad_norm": 0.1553667187690735, + "learning_rate": 9.32324163393186e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6742617995814927, + "grad_norm": 0.1776440441608429, + "learning_rate": 9.32240651055604e-05, + "loss": 0.372, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6746845342520766, + "grad_norm": 0.16473746299743652, + "learning_rate": 9.321570909675219e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6751072689226607, + "grad_norm": 0.18019041419029236, + "learning_rate": 9.320734831381708e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6755300035932447, + "grad_norm": 0.15962977707386017, + "learning_rate": 9.319898275767869e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6759527382638287, + "grad_norm": 0.1746193915605545, + "learning_rate": 9.31906124292612e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 15990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6763754729344127, + "grad_norm": 0.2185192108154297, + "learning_rate": 9.31822373294893e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6767982076049968, + "grad_norm": 0.20818574726581573, + "learning_rate": 9.317385745928817e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6772209422755807, + "grad_norm": 0.16877615451812744, + "learning_rate": 9.316547281958358e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6776436769461648, + "grad_norm": 0.1393337845802307, + "learning_rate": 9.315708341130178e-05, + "loss": 0.3541, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6780664116167487, + "grad_norm": 0.18636219203472137, + "learning_rate": 9.314868923536961e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6784891462873327, + "grad_norm": 0.17153307795524597, + "learning_rate": 9.314029029271432e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6789118809579168, + "grad_norm": 0.1727413386106491, + "learning_rate": 9.313188658426382e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6793346156285007, + "grad_norm": 0.15489561855793, + "learning_rate": 9.312347811094646e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6797573502990848, + "grad_norm": 0.1779477894306183, + "learning_rate": 9.311506487369113e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6801800849696688, + "grad_norm": 0.1783541589975357, + "learning_rate": 9.310664687342727e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6806028196402528, + "grad_norm": 0.20526298880577087, + "learning_rate": 9.309822411108483e-05, + "loss": 0.3718, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6810255543108368, + "grad_norm": 0.2341020554304123, + "learning_rate": 9.30897965875943e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6814482889814208, + "grad_norm": 0.18823190033435822, + "learning_rate": 9.308136430388667e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6818710236520048, + "grad_norm": 0.17189131677150726, + "learning_rate": 9.30729272608935e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6822937583225889, + "grad_norm": 0.19551749527454376, + "learning_rate": 9.30644854595468e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6827164929931728, + "grad_norm": 0.16198168694972992, + "learning_rate": 9.30560389007792e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6831392276637569, + "grad_norm": 0.15398818254470825, + "learning_rate": 9.304758758552378e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6835619623343409, + "grad_norm": 0.18541452288627625, + "learning_rate": 9.303913151471417e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6839846970049248, + "grad_norm": 0.18782630562782288, + "learning_rate": 9.303067068928455e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6844074316755089, + "grad_norm": 0.21558599174022675, + "learning_rate": 9.30222051101696e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6848301663460928, + "grad_norm": 0.15627022087574005, + "learning_rate": 9.301373477830452e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6852529010166769, + "grad_norm": 0.1861966997385025, + "learning_rate": 9.300525969462505e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6856756356872609, + "grad_norm": 0.20868080854415894, + "learning_rate": 9.299677986006745e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6860983703578449, + "grad_norm": 0.16151078045368195, + "learning_rate": 9.298829527556852e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6865211050284289, + "grad_norm": 0.17910130321979523, + "learning_rate": 9.297980594206553e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.686943839699013, + "grad_norm": 0.16396988928318024, + "learning_rate": 9.297131186049635e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6873665743695969, + "grad_norm": 0.15563908219337463, + "learning_rate": 9.296281303179932e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.687789309040181, + "grad_norm": 0.20021747052669525, + "learning_rate": 9.295430945691332e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6882120437107649, + "grad_norm": 0.16517701745033264, + "learning_rate": 9.294580113677778e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6886347783813489, + "grad_norm": 0.18780308961868286, + "learning_rate": 9.293728807233261e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.689057513051933, + "grad_norm": 0.18311062455177307, + "learning_rate": 9.292877026451827e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6894802477225169, + "grad_norm": 0.1467227190732956, + "learning_rate": 9.292024771427575e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.689902982393101, + "grad_norm": 0.173720121383667, + "learning_rate": 9.291172042254655e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.690325717063685, + "grad_norm": 0.17011195421218872, + "learning_rate": 9.290318839027268e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.690748451734269, + "grad_norm": 0.14142554998397827, + "learning_rate": 9.28946516183967e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.691171186404853, + "grad_norm": 0.18103879690170288, + "learning_rate": 9.28861101078617e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.691593921075437, + "grad_norm": 0.17501525580883026, + "learning_rate": 9.287756385961126e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.692016655746021, + "grad_norm": 0.17482751607894897, + "learning_rate": 9.28690128745895e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6924393904166051, + "grad_norm": 0.1499638557434082, + "learning_rate": 9.286045715374108e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.692862125087189, + "grad_norm": 0.15446650981903076, + "learning_rate": 9.285189669801115e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.693284859757773, + "grad_norm": 0.17093788087368011, + "learning_rate": 9.284333150834544e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.693707594428357, + "grad_norm": 0.15086746215820312, + "learning_rate": 9.28347615856901e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.694130329098941, + "grad_norm": 0.20453046262264252, + "learning_rate": 9.282618693099192e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6945530637695251, + "grad_norm": 0.1775849461555481, + "learning_rate": 9.281760754519813e-05, + "loss": 0.3697, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.694975798440109, + "grad_norm": 0.1629277765750885, + "learning_rate": 9.280902342925653e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6953985331106931, + "grad_norm": 0.1637033224105835, + "learning_rate": 9.28004345841154e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6958212677812771, + "grad_norm": 0.15175163745880127, + "learning_rate": 9.27918410107236e-05, + "loss": 0.3549, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6962440024518611, + "grad_norm": 0.16190724074840546, + "learning_rate": 9.278324271003047e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6966667371224451, + "grad_norm": 0.17956334352493286, + "learning_rate": 9.277463968298585e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6970894717930292, + "grad_norm": 0.2084360420703888, + "learning_rate": 9.276603193054019e-05, + "loss": 0.3694, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6975122064636131, + "grad_norm": 0.16302300989627838, + "learning_rate": 9.275741945364435e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6979349411341971, + "grad_norm": 0.18155698478221893, + "learning_rate": 9.274880225324981e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6983576758047811, + "grad_norm": 0.17337359488010406, + "learning_rate": 9.274018033030852e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6987804104753651, + "grad_norm": 0.15734092891216278, + "learning_rate": 9.273155368577293e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6992031451459492, + "grad_norm": 0.17157913744449615, + "learning_rate": 9.272292232059607e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.6996258798165331, + "grad_norm": 0.19067303836345673, + "learning_rate": 9.271428623573147e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7000486144871172, + "grad_norm": 0.16257622838020325, + "learning_rate": 9.270564543213316e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7004713491577012, + "grad_norm": 0.1423061490058899, + "learning_rate": 9.269699991075572e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7008940838282852, + "grad_norm": 0.16716238856315613, + "learning_rate": 9.268834967255425e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7013168184988692, + "grad_norm": 0.19830194115638733, + "learning_rate": 9.26796947184843e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7017395531694532, + "grad_norm": 0.14905095100402832, + "learning_rate": 9.267103504950207e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7021622878400372, + "grad_norm": 0.1556502878665924, + "learning_rate": 9.266237066656418e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7025850225106212, + "grad_norm": 0.1623748391866684, + "learning_rate": 9.265370157062779e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7030077571812052, + "grad_norm": 0.16706974804401398, + "learning_rate": 9.264502776265062e-05, + "loss": 0.3509, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7034304918517892, + "grad_norm": 0.1730130910873413, + "learning_rate": 9.263634924359089e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7038532265223733, + "grad_norm": 0.16442915797233582, + "learning_rate": 9.262766601440727e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7042759611929572, + "grad_norm": 0.16391530632972717, + "learning_rate": 9.261897807605908e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7046986958635413, + "grad_norm": 0.17423772811889648, + "learning_rate": 9.261028542950608e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7051214305341252, + "grad_norm": 0.15849609673023224, + "learning_rate": 9.260158807570856e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7055441652047093, + "grad_norm": 0.18916133046150208, + "learning_rate": 9.259288601562732e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7059668998752933, + "grad_norm": 0.163535013794899, + "learning_rate": 9.258417925022369e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7063896345458773, + "grad_norm": 0.1703462153673172, + "learning_rate": 9.257546778045956e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7068123692164613, + "grad_norm": 0.1833663135766983, + "learning_rate": 9.256675160729728e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7072351038870452, + "grad_norm": 0.1550992727279663, + "learning_rate": 9.255890303084162e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7076578385576293, + "grad_norm": 0.16249877214431763, + "learning_rate": 9.255017792387607e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7080805732282133, + "grad_norm": 0.17006517946720123, + "learning_rate": 9.254144811630618e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7085033078987973, + "grad_norm": 0.18336176872253418, + "learning_rate": 9.253271360909636e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7089260425693813, + "grad_norm": 0.20026953518390656, + "learning_rate": 9.252397440321154e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7093487772399654, + "grad_norm": 0.17093396186828613, + "learning_rate": 9.251523049961716e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7097715119105493, + "grad_norm": 0.23290327191352844, + "learning_rate": 9.250648189927915e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7101942465811334, + "grad_norm": 0.1815282255411148, + "learning_rate": 9.249772860316401e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7106169812517174, + "grad_norm": 0.16916494071483612, + "learning_rate": 9.248897061223873e-05, + "loss": 0.3549, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7110397159223014, + "grad_norm": 0.24161294102668762, + "learning_rate": 9.248020792747081e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7114624505928854, + "grad_norm": 0.1384093463420868, + "learning_rate": 9.24714405498283e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7118851852634693, + "grad_norm": 0.14905039966106415, + "learning_rate": 9.246266848027974e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7123079199340534, + "grad_norm": 0.2028067409992218, + "learning_rate": 9.24538917197942e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7127306546046374, + "grad_norm": 0.18673573434352875, + "learning_rate": 9.244511026934127e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7131533892752214, + "grad_norm": 0.16834111511707306, + "learning_rate": 9.243632412989103e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7135761239458054, + "grad_norm": 0.1586921364068985, + "learning_rate": 9.242753330241415e-05, + "loss": 0.3559, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7139988586163895, + "grad_norm": 0.15274237096309662, + "learning_rate": 9.241873778788173e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7144215932869734, + "grad_norm": 0.16212409734725952, + "learning_rate": 9.240993758726544e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7148443279575575, + "grad_norm": 0.14300265908241272, + "learning_rate": 9.240113270153747e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7152670626281414, + "grad_norm": 0.18325026333332062, + "learning_rate": 9.23923231316705e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7156897972987255, + "grad_norm": 0.16940684616565704, + "learning_rate": 9.238350887863774e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7161125319693095, + "grad_norm": 0.1980118304491043, + "learning_rate": 9.237468994341291e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7165352666398934, + "grad_norm": 0.20734168589115143, + "learning_rate": 9.236586632697029e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7169580013104775, + "grad_norm": 0.16802473366260529, + "learning_rate": 9.235703803028459e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7173807359810614, + "grad_norm": 0.18122993409633636, + "learning_rate": 9.234820505433114e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7178034706516455, + "grad_norm": 0.1771591305732727, + "learning_rate": 9.233936740008571e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7182262053222295, + "grad_norm": 0.17647096514701843, + "learning_rate": 9.233052506852463e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 16990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7186489399928135, + "grad_norm": 0.16904090344905853, + "learning_rate": 9.232167806062471e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7190716746633975, + "grad_norm": 0.18522042036056519, + "learning_rate": 9.231282637736331e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7194944093339816, + "grad_norm": 0.24991348385810852, + "learning_rate": 9.230397001971829e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7199171440045655, + "grad_norm": 0.1793164759874344, + "learning_rate": 9.229510898866802e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7203398786751496, + "grad_norm": 0.19538886845111847, + "learning_rate": 9.228624328519142e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7207626133457335, + "grad_norm": 0.22215212881565094, + "learning_rate": 9.227737291026789e-05, + "loss": 0.3708, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7211853480163175, + "grad_norm": 0.19208653271198273, + "learning_rate": 9.226849786487734e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7216080826869016, + "grad_norm": 0.1716376543045044, + "learning_rate": 9.225961815000025e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7220308173574855, + "grad_norm": 0.14795683324337006, + "learning_rate": 9.225073376661755e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7224535520280696, + "grad_norm": 0.17479947209358215, + "learning_rate": 9.224184471571074e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7228762866986536, + "grad_norm": 0.18638430535793304, + "learning_rate": 9.223295099826178e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7232990213692376, + "grad_norm": 0.17338944971561432, + "learning_rate": 9.22240526152532e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7237217560398216, + "grad_norm": 0.16468387842178345, + "learning_rate": 9.221514956766802e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7241444907104057, + "grad_norm": 0.1629614681005478, + "learning_rate": 9.220624185648978e-05, + "loss": 0.3537, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7245672253809896, + "grad_norm": 0.17699775099754333, + "learning_rate": 9.219732948270253e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7249899600515737, + "grad_norm": 0.23311813175678253, + "learning_rate": 9.218841244729083e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7254126947221576, + "grad_norm": 0.1893596649169922, + "learning_rate": 9.217949075123978e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7258354293927416, + "grad_norm": 0.17212064564228058, + "learning_rate": 9.217056439553495e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7262581640633257, + "grad_norm": 0.20164251327514648, + "learning_rate": 9.216163338116247e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7266808987339096, + "grad_norm": 0.16990575194358826, + "learning_rate": 9.215269770910897e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7271036334044937, + "grad_norm": 0.16054989397525787, + "learning_rate": 9.21437573803616e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7275263680750776, + "grad_norm": 0.17113801836967468, + "learning_rate": 9.2134812395908e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7279491027456617, + "grad_norm": 0.17778822779655457, + "learning_rate": 9.212586275673634e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7283718374162457, + "grad_norm": 0.1692388504743576, + "learning_rate": 9.211690846383531e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7287945720868297, + "grad_norm": 0.139482781291008, + "learning_rate": 9.21079495181941e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7292173067574137, + "grad_norm": 0.13982060551643372, + "learning_rate": 9.209898592080245e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7296400414279978, + "grad_norm": 0.14917878806591034, + "learning_rate": 9.209001767265057e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7300627760985817, + "grad_norm": 0.14179889857769012, + "learning_rate": 9.208104477472919e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7304855107691657, + "grad_norm": 0.18173974752426147, + "learning_rate": 9.207206722802956e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7309082454397497, + "grad_norm": 0.1770969182252884, + "learning_rate": 9.206308503354348e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7313309801103337, + "grad_norm": 0.17040897905826569, + "learning_rate": 9.205409819226321e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7317537147809178, + "grad_norm": 0.13553164899349213, + "learning_rate": 9.204510670518153e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7321764494515017, + "grad_norm": 0.15462128818035126, + "learning_rate": 9.20361105732918e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7325991841220858, + "grad_norm": 0.2552527189254761, + "learning_rate": 9.202710979758777e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7330219187926698, + "grad_norm": 0.24277780950069427, + "learning_rate": 9.201810437906384e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7334446534632538, + "grad_norm": 0.18297719955444336, + "learning_rate": 9.20090943187148e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7338673881338378, + "grad_norm": 0.15833504498004913, + "learning_rate": 9.200007961753605e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7342901228044219, + "grad_norm": 0.16389691829681396, + "learning_rate": 9.199106027652344e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7347128574750058, + "grad_norm": 0.16455958783626556, + "learning_rate": 9.198203629667336e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7351355921455898, + "grad_norm": 0.1469174027442932, + "learning_rate": 9.197300767898274e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7355583268161738, + "grad_norm": 0.1589014232158661, + "learning_rate": 9.196397442444893e-05, + "loss": 0.3547, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7359810614867578, + "grad_norm": 0.19266380369663239, + "learning_rate": 9.19549365340699e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7364037961573419, + "grad_norm": 0.1484575718641281, + "learning_rate": 9.194589400884406e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7368265308279258, + "grad_norm": 0.15809530019760132, + "learning_rate": 9.193684684977036e-05, + "loss": 0.3684, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7372492654985099, + "grad_norm": 0.16511918604373932, + "learning_rate": 9.192779505784825e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7376720001690938, + "grad_norm": 0.16542008519172668, + "learning_rate": 9.191873863407771e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7380947348396779, + "grad_norm": 0.1732998490333557, + "learning_rate": 9.190967757945925e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7385174695102619, + "grad_norm": 0.14407671988010406, + "learning_rate": 9.19006118949938e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7389402041808459, + "grad_norm": 0.20628932118415833, + "learning_rate": 9.189154158168292e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7393629388514299, + "grad_norm": 0.19997437298297882, + "learning_rate": 9.18824666405286e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7397856735220139, + "grad_norm": 0.18688206374645233, + "learning_rate": 9.187338707253337e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7402084081925979, + "grad_norm": 0.1458885371685028, + "learning_rate": 9.186430287870027e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7406311428631819, + "grad_norm": 0.17127540707588196, + "learning_rate": 9.185521406003286e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.741053877533766, + "grad_norm": 0.15606620907783508, + "learning_rate": 9.184612061753517e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7414766122043499, + "grad_norm": 0.15599331259727478, + "learning_rate": 9.18370225522118e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.741899346874934, + "grad_norm": 0.16759440302848816, + "learning_rate": 9.182791986506784e-05, + "loss": 0.3542, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7423220815455179, + "grad_norm": 0.17137831449508667, + "learning_rate": 9.181881255710885e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.742744816216102, + "grad_norm": 0.15843388438224792, + "learning_rate": 9.180970062934094e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.743167550886686, + "grad_norm": 0.1330062747001648, + "learning_rate": 9.180058408277072e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.74359028555727, + "grad_norm": 0.21184471249580383, + "learning_rate": 9.179146291840535e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.744013020227854, + "grad_norm": 0.1743486076593399, + "learning_rate": 9.178233713725244e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7444357548984379, + "grad_norm": 0.19147731363773346, + "learning_rate": 9.177320674032011e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.744858489569022, + "grad_norm": 0.1582171767950058, + "learning_rate": 9.176407172861705e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.745281224239606, + "grad_norm": 0.1467825174331665, + "learning_rate": 9.17549321031524e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.74570395891019, + "grad_norm": 0.19057917594909668, + "learning_rate": 9.174578786493585e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.746126693580774, + "grad_norm": 0.1634766310453415, + "learning_rate": 9.173663901497756e-05, + "loss": 0.3777, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7465494282513581, + "grad_norm": 0.1727713644504547, + "learning_rate": 9.172748555428823e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.746972162921942, + "grad_norm": 0.16961269080638885, + "learning_rate": 9.171832748387909e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7473948975925261, + "grad_norm": 0.15405355393886566, + "learning_rate": 9.170916480476181e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.74781763226311, + "grad_norm": 0.16651012003421783, + "learning_rate": 9.169999751794862e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7482403669336941, + "grad_norm": 0.18768174946308136, + "learning_rate": 9.169082562445228e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7486631016042781, + "grad_norm": 0.2027789205312729, + "learning_rate": 9.168164912528599e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7490858362748621, + "grad_norm": 0.18371953070163727, + "learning_rate": 9.16724680214635e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7495085709454461, + "grad_norm": 0.18447770178318024, + "learning_rate": 9.16632823139991e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.74993130561603, + "grad_norm": 0.1711161881685257, + "learning_rate": 9.16540920039075e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7503540402866141, + "grad_norm": 0.15186884999275208, + "learning_rate": 9.164489709220402e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7507767749571981, + "grad_norm": 0.13977020978927612, + "learning_rate": 9.163569757990442e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7511995096277821, + "grad_norm": 0.2112639993429184, + "learning_rate": 9.162649346802498e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7516222442983661, + "grad_norm": 0.17720019817352295, + "learning_rate": 9.16172847575825e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7520449789689502, + "grad_norm": 0.18819104135036469, + "learning_rate": 9.160807144959431e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7524677136395341, + "grad_norm": 0.1308777928352356, + "learning_rate": 9.15988535450782e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7528904483101182, + "grad_norm": 0.15562200546264648, + "learning_rate": 9.15896310450525e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7533131829807022, + "grad_norm": 0.16839911043643951, + "learning_rate": 9.158040395053603e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7537359176512862, + "grad_norm": 0.171206533908844, + "learning_rate": 9.15711722625481e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7541586523218702, + "grad_norm": 0.1782035231590271, + "learning_rate": 9.156193598210862e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7545813869924541, + "grad_norm": 0.18944619596004486, + "learning_rate": 9.155269511023789e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7550041216630382, + "grad_norm": 0.13701152801513672, + "learning_rate": 9.154344964795678e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7554268563336222, + "grad_norm": 0.17107385396957397, + "learning_rate": 9.153419959628665e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7558495910042062, + "grad_norm": 0.15148049592971802, + "learning_rate": 9.15249449562494e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7562723256747902, + "grad_norm": 0.1706438660621643, + "learning_rate": 9.151568572886735e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7566950603453743, + "grad_norm": 0.12500141561031342, + "learning_rate": 9.150642191516345e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7571177950159582, + "grad_norm": 0.16568435728549957, + "learning_rate": 9.149715351616105e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7575405296865423, + "grad_norm": 0.16256234049797058, + "learning_rate": 9.148788053288409e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7579632643571262, + "grad_norm": 0.1812009960412979, + "learning_rate": 9.147860296635692e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7583859990277103, + "grad_norm": 0.16397355496883392, + "learning_rate": 9.146932081760448e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7588087336982943, + "grad_norm": 0.14508473873138428, + "learning_rate": 9.14600340876522e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7592314683688782, + "grad_norm": 0.16456781327724457, + "learning_rate": 9.145074277752598e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7596542030394623, + "grad_norm": 0.15180301666259766, + "learning_rate": 9.144144688825228e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7600769377100463, + "grad_norm": 0.17306385934352875, + "learning_rate": 9.1432146420858e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7604996723806303, + "grad_norm": 0.15803274512290955, + "learning_rate": 9.14228413763706e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 17990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7609224070512143, + "grad_norm": 0.1796748787164688, + "learning_rate": 9.141353175581804e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7613451417217983, + "grad_norm": 0.18817192316055298, + "learning_rate": 9.140421756022874e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7617678763923823, + "grad_norm": 0.18911202251911163, + "learning_rate": 9.13948987906317e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7621906110629664, + "grad_norm": 0.14844898879528046, + "learning_rate": 9.138557544805635e-05, + "loss": 0.3529, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7626133457335503, + "grad_norm": 0.16960369050502777, + "learning_rate": 9.137624753353267e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7630360804041344, + "grad_norm": 0.13289637863636017, + "learning_rate": 9.136691504809111e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7634588150747184, + "grad_norm": 0.15401454269886017, + "learning_rate": 9.13575779927627e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7638815497453023, + "grad_norm": 0.14320716261863708, + "learning_rate": 9.134823636857888e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7643042844158864, + "grad_norm": 0.15446153283119202, + "learning_rate": 9.133889017657164e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7647270190864703, + "grad_norm": 0.16152387857437134, + "learning_rate": 9.13295394177735e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7651497537570544, + "grad_norm": 0.15756845474243164, + "learning_rate": 9.132018409321744e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7655724884276384, + "grad_norm": 0.1968408077955246, + "learning_rate": 9.131082420393697e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7659952230982224, + "grad_norm": 0.15469029545783997, + "learning_rate": 9.130145975096608e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7664179577688064, + "grad_norm": 0.1492508500814438, + "learning_rate": 9.129209073533929e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7668406924393905, + "grad_norm": 0.1634460836648941, + "learning_rate": 9.128271715809162e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7672634271099744, + "grad_norm": 0.22790735960006714, + "learning_rate": 9.127333902025858e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7676861617805585, + "grad_norm": 0.16657525300979614, + "learning_rate": 9.126395632287619e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7681088964511424, + "grad_norm": 0.16572783887386322, + "learning_rate": 9.125456906698097e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7685316311217264, + "grad_norm": 0.13806071877479553, + "learning_rate": 9.124517725360998e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7689543657923105, + "grad_norm": 0.16764478385448456, + "learning_rate": 9.123578088380071e-05, + "loss": 0.3694, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7693771004628944, + "grad_norm": 0.19791623950004578, + "learning_rate": 9.122637995859125e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7697998351334785, + "grad_norm": 0.18245813250541687, + "learning_rate": 9.121697447902006e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7702225698040625, + "grad_norm": 0.17222879827022552, + "learning_rate": 9.120756444612627e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7706453044746465, + "grad_norm": 0.19252042472362518, + "learning_rate": 9.119814986094936e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7710680391452305, + "grad_norm": 0.1676987111568451, + "learning_rate": 9.118873072452942e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7714907738158145, + "grad_norm": 0.1553054004907608, + "learning_rate": 9.117930703790698e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7719135084863985, + "grad_norm": 0.15317219495773315, + "learning_rate": 9.11698788021231e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7723362431569826, + "grad_norm": 0.15951599180698395, + "learning_rate": 9.116044601821932e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7727589778275665, + "grad_norm": 0.16995370388031006, + "learning_rate": 9.11510086872377e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7731817124981505, + "grad_norm": 0.15942449867725372, + "learning_rate": 9.114156681022083e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7736044471687346, + "grad_norm": 0.12532013654708862, + "learning_rate": 9.113212038821178e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7740271818393185, + "grad_norm": 0.21298427879810333, + "learning_rate": 9.112266942225407e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7744499165099026, + "grad_norm": 0.15625497698783875, + "learning_rate": 9.111321391339178e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7748726511804865, + "grad_norm": 0.23081818222999573, + "learning_rate": 9.11037538626695e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7752953858510706, + "grad_norm": 0.23371154069900513, + "learning_rate": 9.109428927113228e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7757181205216546, + "grad_norm": 0.14819097518920898, + "learning_rate": 9.10848201398257e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7761408551922386, + "grad_norm": 0.15730786323547363, + "learning_rate": 9.107534646979585e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7765635898628226, + "grad_norm": 0.16486401855945587, + "learning_rate": 9.10658682620893e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7769863245334067, + "grad_norm": 0.14977635443210602, + "learning_rate": 9.10563855177531e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7774090592039906, + "grad_norm": 0.18806889653205872, + "learning_rate": 9.104689823783486e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7778317938745746, + "grad_norm": 0.15643849968910217, + "learning_rate": 9.103740642338264e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7782545285451586, + "grad_norm": 0.16516338288784027, + "learning_rate": 9.102791007544503e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7786772632157426, + "grad_norm": 0.15622670948505402, + "learning_rate": 9.101840919507109e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7790999978863267, + "grad_norm": 0.15083196759223938, + "learning_rate": 9.100890378331042e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7795227325569106, + "grad_norm": 0.16533644497394562, + "learning_rate": 9.099939384121312e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7799454672274947, + "grad_norm": 0.15415407717227936, + "learning_rate": 9.098987936982974e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7803682018980787, + "grad_norm": 0.15864001214504242, + "learning_rate": 9.098036037021137e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7807909365686627, + "grad_norm": 0.19377557933330536, + "learning_rate": 9.097083684340961e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7812136712392467, + "grad_norm": 0.13477754592895508, + "learning_rate": 9.096130879047653e-05, + "loss": 0.3538, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7816364059098307, + "grad_norm": 0.15882815420627594, + "learning_rate": 9.09517762124647e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7820591405804147, + "grad_norm": 0.15425506234169006, + "learning_rate": 9.094223911042723e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7824818752509987, + "grad_norm": 0.1535678505897522, + "learning_rate": 9.09326974854177e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7829046099215827, + "grad_norm": 0.1635875552892685, + "learning_rate": 9.092315133849017e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7833273445921667, + "grad_norm": 0.1615748107433319, + "learning_rate": 9.091360067069924e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7837500792627508, + "grad_norm": 0.1723988950252533, + "learning_rate": 9.090404548309999e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7841728139333347, + "grad_norm": 0.20080284774303436, + "learning_rate": 9.0894485776748e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7845955486039188, + "grad_norm": 0.2679409086704254, + "learning_rate": 9.088492155269934e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7850182832745027, + "grad_norm": 0.1686026006937027, + "learning_rate": 9.087535281201062e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7854410179450868, + "grad_norm": 0.17903785407543182, + "learning_rate": 9.086577955573887e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7858637526156708, + "grad_norm": 0.1738983392715454, + "learning_rate": 9.085620178494171e-05, + "loss": 0.3704, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7862864872862548, + "grad_norm": 0.17482620477676392, + "learning_rate": 9.08466195006772e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7867092219568388, + "grad_norm": 0.13490696251392365, + "learning_rate": 9.08370327040039e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7871319566274227, + "grad_norm": 0.13876250386238098, + "learning_rate": 9.08274413959809e-05, + "loss": 0.3546, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7875546912980068, + "grad_norm": 0.15357260406017303, + "learning_rate": 9.081784557766778e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7879774259685908, + "grad_norm": 0.1759643852710724, + "learning_rate": 9.080824525012459e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7884001606391748, + "grad_norm": 0.16617028415203094, + "learning_rate": 9.07986404144119e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7888228953097588, + "grad_norm": 0.16966083645820618, + "learning_rate": 9.078903107159078e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7892456299803429, + "grad_norm": 0.1750127077102661, + "learning_rate": 9.077941722272278e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7896683646509268, + "grad_norm": 0.17044320702552795, + "learning_rate": 9.076979886887e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7900910993215109, + "grad_norm": 0.19097596406936646, + "learning_rate": 9.076017601109497e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7905138339920948, + "grad_norm": 0.15469816327095032, + "learning_rate": 9.075054865046074e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7909365686626789, + "grad_norm": 0.1678457260131836, + "learning_rate": 9.074091678803088e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7913593033332629, + "grad_norm": 0.1566983163356781, + "learning_rate": 9.073128042486945e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7917820380038468, + "grad_norm": 0.23058664798736572, + "learning_rate": 9.072163956204096e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7922047726744309, + "grad_norm": 0.16064706444740295, + "learning_rate": 9.071199420061049e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7926275073450149, + "grad_norm": 0.1576828807592392, + "learning_rate": 9.070234434164358e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7930502420155989, + "grad_norm": 0.16671548783779144, + "learning_rate": 9.069268998620626e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7934729766861829, + "grad_norm": 0.15629857778549194, + "learning_rate": 9.068303113536506e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.793895711356767, + "grad_norm": 0.15498001873493195, + "learning_rate": 9.067336779018702e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7943184460273509, + "grad_norm": 0.1793157011270523, + "learning_rate": 9.066369995173967e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.794741180697935, + "grad_norm": 0.16659201681613922, + "learning_rate": 9.065402762109106e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7951639153685189, + "grad_norm": 0.16984206438064575, + "learning_rate": 9.064435079930967e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.795586650039103, + "grad_norm": 0.16837862133979797, + "learning_rate": 9.063466948746453e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.796009384709687, + "grad_norm": 0.17992742359638214, + "learning_rate": 9.062498368662518e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7964321193802709, + "grad_norm": 0.17276941239833832, + "learning_rate": 9.061529339786159e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.796854854050855, + "grad_norm": 0.16826891899108887, + "learning_rate": 9.06055986222443e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.797277588721439, + "grad_norm": 0.14354857802391052, + "learning_rate": 9.05958993608443e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.797700323392023, + "grad_norm": 0.1481567919254303, + "learning_rate": 9.058619561473307e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.798123058062607, + "grad_norm": 0.1416446566581726, + "learning_rate": 9.057648738498261e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.798545792733191, + "grad_norm": 0.15620338916778564, + "learning_rate": 9.056677467266544e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.798968527403775, + "grad_norm": 0.14677539467811584, + "learning_rate": 9.05570574788545e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.7993912620743591, + "grad_norm": 0.1629975140094757, + "learning_rate": 9.05473358046233e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.799813996744943, + "grad_norm": 0.17085592448711395, + "learning_rate": 9.05376096510458e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8002367314155271, + "grad_norm": 0.16687741875648499, + "learning_rate": 9.052787901919646e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.800659466086111, + "grad_norm": 0.13557545840740204, + "learning_rate": 9.051814391015025e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.801082200756695, + "grad_norm": 0.17648153007030487, + "learning_rate": 9.050840432498261e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8015049354272791, + "grad_norm": 0.1480809450149536, + "learning_rate": 9.049866026476953e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.801927670097863, + "grad_norm": 0.19413310289382935, + "learning_rate": 9.048891173058745e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8023504047684471, + "grad_norm": 0.18356971442699432, + "learning_rate": 9.047915872351327e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8027731394390311, + "grad_norm": 0.15257228910923004, + "learning_rate": 9.046940124462446e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 18990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8031958741096151, + "grad_norm": 0.164007306098938, + "learning_rate": 9.045963929499893e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8036186087801991, + "grad_norm": 0.16508705914020538, + "learning_rate": 9.044987287571511e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8040413434507832, + "grad_norm": 0.1556263566017151, + "learning_rate": 9.044010198785193e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8044640781213671, + "grad_norm": 0.16480344533920288, + "learning_rate": 9.043032663248878e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8048868127919512, + "grad_norm": 0.14813232421875, + "learning_rate": 9.042054681070555e-05, + "loss": 0.3522, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8053095474625351, + "grad_norm": 0.16159065067768097, + "learning_rate": 9.041076252358268e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8057322821331191, + "grad_norm": 0.18624331057071686, + "learning_rate": 9.040097377220102e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8061550168037032, + "grad_norm": 0.1621377021074295, + "learning_rate": 9.0391180557642e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8065777514742871, + "grad_norm": 0.1916872262954712, + "learning_rate": 9.038236284941634e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8070004861448712, + "grad_norm": 0.15901613235473633, + "learning_rate": 9.037256115780124e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8074232208154551, + "grad_norm": 0.17380642890930176, + "learning_rate": 9.036275500614755e-05, + "loss": 0.3739, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8078459554860392, + "grad_norm": 0.16125836968421936, + "learning_rate": 9.035294439553856e-05, + "loss": 0.3718, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8082686901566232, + "grad_norm": 0.19128313660621643, + "learning_rate": 9.034312932705808e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8086914248272072, + "grad_norm": 0.18765434622764587, + "learning_rate": 9.033330980179041e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8091141594977912, + "grad_norm": 0.17130303382873535, + "learning_rate": 9.032348582082034e-05, + "loss": 0.3712, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8095368941683753, + "grad_norm": 0.18035347759723663, + "learning_rate": 9.03136573852331e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8099596288389592, + "grad_norm": 0.16743646562099457, + "learning_rate": 9.03038244961145e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8103823635095432, + "grad_norm": 0.14604145288467407, + "learning_rate": 9.02939871545508e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8108050981801272, + "grad_norm": 0.14592592418193817, + "learning_rate": 9.028414536162873e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8112278328507112, + "grad_norm": 0.14813759922981262, + "learning_rate": 9.027429911843553e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8116505675212953, + "grad_norm": 0.24476434290409088, + "learning_rate": 9.026444842605894e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8120733021918792, + "grad_norm": 0.18185386061668396, + "learning_rate": 9.025459328558721e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8124960368624633, + "grad_norm": 0.18839503824710846, + "learning_rate": 9.024473369810903e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8129187715330473, + "grad_norm": 0.16993831098079681, + "learning_rate": 9.023486966471362e-05, + "loss": 0.3704, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8133415062036313, + "grad_norm": 0.1311447024345398, + "learning_rate": 9.022500118649068e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8137642408742153, + "grad_norm": 0.1690080761909485, + "learning_rate": 9.021512826453039e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8141869755447994, + "grad_norm": 0.17100581526756287, + "learning_rate": 9.020525089992344e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8146097102153833, + "grad_norm": 0.14509011805057526, + "learning_rate": 9.0195369093761e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8150324448859674, + "grad_norm": 0.14983369410037994, + "learning_rate": 9.018548284713474e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8154551795565513, + "grad_norm": 0.1793508529663086, + "learning_rate": 9.017559216113681e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8158779142271353, + "grad_norm": 0.22548291087150574, + "learning_rate": 9.016569703685985e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8163006488977194, + "grad_norm": 0.14229686558246613, + "learning_rate": 9.015579747539699e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8167233835683033, + "grad_norm": 0.15266580879688263, + "learning_rate": 9.014589347784187e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8171461182388874, + "grad_norm": 0.15344975888729095, + "learning_rate": 9.01359850452886e-05, + "loss": 0.3734, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8175688529094713, + "grad_norm": 0.13244196772575378, + "learning_rate": 9.012607217883177e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8179915875800554, + "grad_norm": 0.16874898970127106, + "learning_rate": 9.011615487956648e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8184143222506394, + "grad_norm": 0.1764620691537857, + "learning_rate": 9.010623314858833e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8188370569212234, + "grad_norm": 0.144644096493721, + "learning_rate": 9.009630698699339e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8192597915918074, + "grad_norm": 0.16224224865436554, + "learning_rate": 9.00863763958782e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8196825262623915, + "grad_norm": 0.16877985000610352, + "learning_rate": 9.007644137633984e-05, + "loss": 0.3748, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8201052609329754, + "grad_norm": 0.16110104322433472, + "learning_rate": 9.006650192947583e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8205279956035594, + "grad_norm": 0.17735455930233002, + "learning_rate": 9.005655805638422e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8209507302741434, + "grad_norm": 0.16817976534366608, + "learning_rate": 9.004660975816353e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8213734649447274, + "grad_norm": 0.15122373402118683, + "learning_rate": 9.003665703591274e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8217961996153115, + "grad_norm": 0.1477198302745819, + "learning_rate": 9.002669989073138e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8222189342858954, + "grad_norm": 0.16485168039798737, + "learning_rate": 9.001673832371942e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8226416689564795, + "grad_norm": 0.1393616497516632, + "learning_rate": 9.000677233597732e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8230644036270635, + "grad_norm": 0.1584545075893402, + "learning_rate": 8.999680192860609e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8234871382976475, + "grad_norm": 0.13590691983699799, + "learning_rate": 8.998682710270714e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8239098729682315, + "grad_norm": 0.1702461838722229, + "learning_rate": 8.99768478593824e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8243326076388156, + "grad_norm": 0.17346253991127014, + "learning_rate": 8.996686419973434e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8247553423093995, + "grad_norm": 0.14650434255599976, + "learning_rate": 8.995687612486586e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8251780769799835, + "grad_norm": 0.16629314422607422, + "learning_rate": 8.994688363588035e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8256008116505675, + "grad_norm": 0.16926543414592743, + "learning_rate": 8.99368867338817e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8260235463211515, + "grad_norm": 0.19602930545806885, + "learning_rate": 8.99268854199743e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8264462809917356, + "grad_norm": 0.16904504597187042, + "learning_rate": 8.9916879695263e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8268690156623195, + "grad_norm": 0.14950747787952423, + "learning_rate": 8.990686956085316e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8272917503329036, + "grad_norm": 0.14634554088115692, + "learning_rate": 8.989685501785064e-05, + "loss": 0.3694, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8277144850034875, + "grad_norm": 0.17242179811000824, + "learning_rate": 8.988683606736175e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8281372196740716, + "grad_norm": 0.13755643367767334, + "learning_rate": 8.987681271049328e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8285599543446556, + "grad_norm": 0.16780851781368256, + "learning_rate": 8.986678494835257e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8289826890152396, + "grad_norm": 0.18855097889900208, + "learning_rate": 8.985675278204739e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8294054236858236, + "grad_norm": 0.14866678416728973, + "learning_rate": 8.984671621268601e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8298281583564076, + "grad_norm": 0.19573698937892914, + "learning_rate": 8.98366752413772e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8302508930269916, + "grad_norm": 0.1547822207212448, + "learning_rate": 8.982662986923018e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8306736276975756, + "grad_norm": 0.19209803640842438, + "learning_rate": 8.981658009735474e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8310963623681596, + "grad_norm": 0.17928743362426758, + "learning_rate": 8.980652592686104e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8315190970387436, + "grad_norm": 0.18846456706523895, + "learning_rate": 8.97964673588598e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8319418317093277, + "grad_norm": 0.14549559354782104, + "learning_rate": 8.978640439446222e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8323645663799116, + "grad_norm": 0.1810663789510727, + "learning_rate": 8.977633703477998e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8327873010504957, + "grad_norm": 0.1698169708251953, + "learning_rate": 8.976626528092525e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8332100357210797, + "grad_norm": 0.156008780002594, + "learning_rate": 8.975618913401064e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8336327703916637, + "grad_norm": 0.1779133379459381, + "learning_rate": 8.974610859514932e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8340555050622477, + "grad_norm": 0.13051672279834747, + "learning_rate": 8.973602366545487e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8344782397328316, + "grad_norm": 0.14918188750743866, + "learning_rate": 8.972593434604141e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8349009744034157, + "grad_norm": 0.1743229180574417, + "learning_rate": 8.971584063802356e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8353237090739997, + "grad_norm": 0.1704939752817154, + "learning_rate": 8.970574254251636e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8357464437445837, + "grad_norm": 0.17789891362190247, + "learning_rate": 8.969564006063535e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8361691784151677, + "grad_norm": 0.13411468267440796, + "learning_rate": 8.968553319349662e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8365919130857518, + "grad_norm": 0.15231828391551971, + "learning_rate": 8.967542194221665e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8370146477563357, + "grad_norm": 0.15075571835041046, + "learning_rate": 8.966530630791248e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8374373824269198, + "grad_norm": 0.16992248594760895, + "learning_rate": 8.965518629170158e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8378601170975037, + "grad_norm": 0.18053822219371796, + "learning_rate": 8.964506189470197e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8382828517680878, + "grad_norm": 0.1655890941619873, + "learning_rate": 8.963493311803206e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8387055864386718, + "grad_norm": 0.1420520842075348, + "learning_rate": 8.962479996281082e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8391283211092557, + "grad_norm": 0.179177924990654, + "learning_rate": 8.96146624301577e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8395510557798398, + "grad_norm": 0.19239388406276703, + "learning_rate": 8.960452052119259e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8399737904504238, + "grad_norm": 0.14405885338783264, + "learning_rate": 8.959437423703589e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8403965251210078, + "grad_norm": 0.15010802447795868, + "learning_rate": 8.958422357880848e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8408192597915918, + "grad_norm": 0.1548919528722763, + "learning_rate": 8.957406854763173e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8412419944621758, + "grad_norm": 0.16045883297920227, + "learning_rate": 8.956390914462748e-05, + "loss": 0.3711, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8416647291327598, + "grad_norm": 0.1588050127029419, + "learning_rate": 8.955374537091808e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8420874638033439, + "grad_norm": 0.14595037698745728, + "learning_rate": 8.954357722762631e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8425101984739278, + "grad_norm": 0.14648394286632538, + "learning_rate": 8.953340471587548e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8429329331445119, + "grad_norm": 0.16590207815170288, + "learning_rate": 8.95232278367894e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8433556678150959, + "grad_norm": 0.18676656484603882, + "learning_rate": 8.951304659149227e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8437784024856798, + "grad_norm": 0.15403875708580017, + "learning_rate": 8.950286098110887e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8442011371562639, + "grad_norm": 0.13681897521018982, + "learning_rate": 8.949267100676441e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8446238718268478, + "grad_norm": 0.21795345842838287, + "learning_rate": 8.948247666958463e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8450466064974319, + "grad_norm": 0.17484261095523834, + "learning_rate": 8.947227797069566e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 19990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8454693411680159, + "grad_norm": 0.15124207735061646, + "learning_rate": 8.946207491122424e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8458920758385999, + "grad_norm": 0.1869671493768692, + "learning_rate": 8.945186749229747e-05, + "loss": 0.3724, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8463148105091839, + "grad_norm": 0.157602921128273, + "learning_rate": 8.9441655715043e-05, + "loss": 0.3554, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.846737545179768, + "grad_norm": 0.17065081000328064, + "learning_rate": 8.943143958058895e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 0.8471602798503519, + "grad_norm": 0.13068121671676636, + "learning_rate": 8.942121909006392e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.000317051002938, + "grad_norm": 0.31600263714790344, + "learning_rate": 8.941099424459698e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.000739785673522, + "grad_norm": 0.2514656186103821, + "learning_rate": 8.94007650453177e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0011625203441061, + "grad_norm": 0.3408504128456116, + "learning_rate": 8.93905314933561e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.00158525501469, + "grad_norm": 0.44023558497428894, + "learning_rate": 8.938029358984272e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.002007989685274, + "grad_norm": 0.4145229458808899, + "learning_rate": 8.937005133590857e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0024307243558581, + "grad_norm": 0.39907458424568176, + "learning_rate": 8.935980473268511e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.002853459026442, + "grad_norm": 0.2848278880119324, + "learning_rate": 8.93495537813043e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.003276193697026, + "grad_norm": 0.33733195066452026, + "learning_rate": 8.93392984828986e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.00369892836761, + "grad_norm": 0.2996121048927307, + "learning_rate": 8.932903883860093e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0041216630381942, + "grad_norm": 0.35906729102134705, + "learning_rate": 8.931877484954469e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.004544397708778, + "grad_norm": 0.3925764560699463, + "learning_rate": 8.930850651686378e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.004967132379362, + "grad_norm": 0.34081727266311646, + "learning_rate": 8.929823384169254e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0053898670499462, + "grad_norm": 0.4235617220401764, + "learning_rate": 8.928795682516583e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0058126017205302, + "grad_norm": 0.4632740318775177, + "learning_rate": 8.927767546841896e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.006235336391114, + "grad_norm": 0.24165527522563934, + "learning_rate": 8.926738977258772e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0066580710616981, + "grad_norm": 0.27294135093688965, + "learning_rate": 8.925709973880844e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0070808057322822, + "grad_norm": 0.2380644977092743, + "learning_rate": 8.924680536821784e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.007503540402866, + "grad_norm": 0.2707332968711853, + "learning_rate": 8.923650666195315e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0079262750734501, + "grad_norm": 0.218769833445549, + "learning_rate": 8.922620362115214e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0083490097440342, + "grad_norm": 0.3018561005592346, + "learning_rate": 8.921589624695296e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0087717444146183, + "grad_norm": 0.30343660712242126, + "learning_rate": 8.920558454049431e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.009194479085202, + "grad_norm": 0.2821163833141327, + "learning_rate": 8.919526850291532e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0096172137557862, + "grad_norm": 0.29758965969085693, + "learning_rate": 8.918494813535567e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0100399484263702, + "grad_norm": 0.3128913640975952, + "learning_rate": 8.917462343895543e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0104626830969543, + "grad_norm": 0.34825271368026733, + "learning_rate": 8.916429441485521e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0108854177675382, + "grad_norm": 0.27028000354766846, + "learning_rate": 8.915396106419607e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0113081524381222, + "grad_norm": 0.2911047339439392, + "learning_rate": 8.914362338811955e-05, + "loss": 0.3797, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0117308871087063, + "grad_norm": 0.413491427898407, + "learning_rate": 8.913328138776769e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0121536217792901, + "grad_norm": 0.3381093442440033, + "learning_rate": 8.912293506428298e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0125763564498742, + "grad_norm": 0.30061694979667664, + "learning_rate": 8.911258441880841e-05, + "loss": 0.372, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0129990911204583, + "grad_norm": 0.300001323223114, + "learning_rate": 8.910222945248743e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0134218257910423, + "grad_norm": 0.29286107420921326, + "learning_rate": 8.909187016646398e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0138445604616262, + "grad_norm": 0.3079686164855957, + "learning_rate": 8.908150656188246e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0142672951322103, + "grad_norm": 0.2909557819366455, + "learning_rate": 8.907113863988777e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0146900298027943, + "grad_norm": 0.2632535696029663, + "learning_rate": 8.906076640162525e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0151127644733784, + "grad_norm": 0.34169095754623413, + "learning_rate": 8.905038984824078e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0155354991439622, + "grad_norm": 0.24368317425251007, + "learning_rate": 8.904000898088066e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0159582338145463, + "grad_norm": 0.4868500530719757, + "learning_rate": 8.902962380069166e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0163809684851304, + "grad_norm": 0.37839454412460327, + "learning_rate": 8.901923430882111e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0168037031557142, + "grad_norm": 0.22965498268604279, + "learning_rate": 8.900884050641672e-05, + "loss": 0.3739, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0172264378262983, + "grad_norm": 0.264087051153183, + "learning_rate": 8.899844239462671e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0176491724968824, + "grad_norm": 0.2636336088180542, + "learning_rate": 8.89880399745998e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0180719071674664, + "grad_norm": 0.28412318229675293, + "learning_rate": 8.897763324748516e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0184946418380503, + "grad_norm": 0.3845331370830536, + "learning_rate": 8.896722221443243e-05, + "loss": 0.3746, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0189173765086343, + "grad_norm": 0.3326224982738495, + "learning_rate": 8.895680687659175e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0193401111792184, + "grad_norm": 0.24526092410087585, + "learning_rate": 8.894638723511372e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0197628458498025, + "grad_norm": 0.30149099230766296, + "learning_rate": 8.893596329114942e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0201855805203863, + "grad_norm": 0.3321683406829834, + "learning_rate": 8.892553504585042e-05, + "loss": 0.3734, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0206083151909704, + "grad_norm": 0.2605040967464447, + "learning_rate": 8.891510250036872e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0210310498615545, + "grad_norm": 0.3773607015609741, + "learning_rate": 8.890466565585684e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0214537845321383, + "grad_norm": 0.3219296932220459, + "learning_rate": 8.889422451346775e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0218765192027224, + "grad_norm": 0.24565336108207703, + "learning_rate": 8.888377907435492e-05, + "loss": 0.3722, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0222992538733064, + "grad_norm": 0.36848944425582886, + "learning_rate": 8.887332933967226e-05, + "loss": 0.3748, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0227219885438905, + "grad_norm": 0.34838876128196716, + "learning_rate": 8.88628753105742e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0231447232144744, + "grad_norm": 0.22722966969013214, + "learning_rate": 8.885241698821559e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0235674578850584, + "grad_norm": 0.30467483401298523, + "learning_rate": 8.884195437375179e-05, + "loss": 0.3684, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0239901925556425, + "grad_norm": 0.33453258872032166, + "learning_rate": 8.883148746833863e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0244129272262266, + "grad_norm": 0.28566327691078186, + "learning_rate": 8.882101627313241e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0248356618968104, + "grad_norm": 0.34725379943847656, + "learning_rate": 8.881054078928992e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0252583965673945, + "grad_norm": 0.3692667782306671, + "learning_rate": 8.880006101796838e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0256811312379785, + "grad_norm": 0.24282050132751465, + "learning_rate": 8.878957696032552e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0261038659085624, + "grad_norm": 0.3057636022567749, + "learning_rate": 8.877908861751956e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0265266005791465, + "grad_norm": 0.27893826365470886, + "learning_rate": 8.876859599070912e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0269493352497305, + "grad_norm": 0.24936456978321075, + "learning_rate": 8.875809908105339e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0273720699203146, + "grad_norm": 0.2957306504249573, + "learning_rate": 8.874759788971194e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0277948045908984, + "grad_norm": 0.2804791331291199, + "learning_rate": 8.873709241784489e-05, + "loss": 0.3724, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0282175392614825, + "grad_norm": 0.2943199574947357, + "learning_rate": 8.87265826666128e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0286402739320666, + "grad_norm": 0.23800145089626312, + "learning_rate": 8.87160686371767e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0290630086026507, + "grad_norm": 0.2624867260456085, + "learning_rate": 8.870555033069807e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0294857432732345, + "grad_norm": 0.2894609868526459, + "learning_rate": 8.869502774833894e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0299084779438186, + "grad_norm": 0.35819846391677856, + "learning_rate": 8.86845008912617e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0303312126144026, + "grad_norm": 0.2657470107078552, + "learning_rate": 8.867396976062933e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0307539472849865, + "grad_norm": 0.22951258718967438, + "learning_rate": 8.86634343576052e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0311766819555706, + "grad_norm": 0.363552451133728, + "learning_rate": 8.865289468335316e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0315994166261546, + "grad_norm": 0.2381111979484558, + "learning_rate": 8.864235073903757e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0320221512967387, + "grad_norm": 0.22332188487052917, + "learning_rate": 8.863180252582323e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0324448859673225, + "grad_norm": 0.2873195707798004, + "learning_rate": 8.862125004487545e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0328676206379066, + "grad_norm": 0.2713780701160431, + "learning_rate": 8.861069329735996e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0332903553084907, + "grad_norm": 0.34985262155532837, + "learning_rate": 8.860013228444299e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0337130899790747, + "grad_norm": 0.22515855729579926, + "learning_rate": 8.858956700729122e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0341358246496586, + "grad_norm": 0.22361665964126587, + "learning_rate": 8.857899746707185e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0345585593202427, + "grad_norm": 0.21226036548614502, + "learning_rate": 8.85684236649525e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0349812939908267, + "grad_norm": 0.24781720340251923, + "learning_rate": 8.855784560210128e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0354040286614106, + "grad_norm": 0.22264429926872253, + "learning_rate": 8.854726327968675e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0358267633319946, + "grad_norm": 0.22319132089614868, + "learning_rate": 8.8536676698878e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0362494980025787, + "grad_norm": 0.29789018630981445, + "learning_rate": 8.852608586084453e-05, + "loss": 0.3556, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0366722326731628, + "grad_norm": 0.3271600902080536, + "learning_rate": 8.851549076675634e-05, + "loss": 0.3741, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0370949673437466, + "grad_norm": 0.22392664849758148, + "learning_rate": 8.850489141778389e-05, + "loss": 0.3754, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0375177020143307, + "grad_norm": 0.30824658274650574, + "learning_rate": 8.849428781509809e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0379404366849148, + "grad_norm": 0.2957247793674469, + "learning_rate": 8.848367995987036e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0383631713554988, + "grad_norm": 0.241367369890213, + "learning_rate": 8.847306785327258e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0387859060260827, + "grad_norm": 0.2758485972881317, + "learning_rate": 8.846245149647707e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0392086406966667, + "grad_norm": 0.19209028780460358, + "learning_rate": 8.845183089065666e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0396313753672508, + "grad_norm": 0.23388119041919708, + "learning_rate": 8.844120603698461e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0400541100378347, + "grad_norm": 0.22992350161075592, + "learning_rate": 8.84305769366347e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 20990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0404768447084187, + "grad_norm": 0.24512311816215515, + "learning_rate": 8.841994359078111e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0408995793790028, + "grad_norm": 0.28884315490722656, + "learning_rate": 8.840930600059856e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0413223140495869, + "grad_norm": 0.19899237155914307, + "learning_rate": 8.839866416726217e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0417450487201707, + "grad_norm": 0.24522802233695984, + "learning_rate": 8.838801809194761e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0421677833907548, + "grad_norm": 0.2049858570098877, + "learning_rate": 8.837736777583093e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0425905180613388, + "grad_norm": 0.2350621074438095, + "learning_rate": 8.836671322008873e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.043013252731923, + "grad_norm": 0.3138928711414337, + "learning_rate": 8.8356054425898e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0434359874025068, + "grad_norm": 0.2857434153556824, + "learning_rate": 8.834539139443627e-05, + "loss": 0.3744, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0438587220730908, + "grad_norm": 0.2735729515552521, + "learning_rate": 8.833472412688152e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.044281456743675, + "grad_norm": 0.2764889895915985, + "learning_rate": 8.832405262441213e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0447041914142587, + "grad_norm": 0.28915268182754517, + "learning_rate": 8.831337688820706e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0451269260848428, + "grad_norm": 0.23798416554927826, + "learning_rate": 8.830269691944564e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0455496607554269, + "grad_norm": 0.27450376749038696, + "learning_rate": 8.829201271930773e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.045972395426011, + "grad_norm": 0.23599621653556824, + "learning_rate": 8.828132428897362e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0463951300965948, + "grad_norm": 0.2764001488685608, + "learning_rate": 8.82706316296241e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0468178647671789, + "grad_norm": 0.39217332005500793, + "learning_rate": 8.825993474244039e-05, + "loss": 0.3699, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.047240599437763, + "grad_norm": 0.2654752731323242, + "learning_rate": 8.824923362860422e-05, + "loss": 0.3699, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.047663334108347, + "grad_norm": 0.2682451903820038, + "learning_rate": 8.823852828929774e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0480860687789308, + "grad_norm": 0.2133459597826004, + "learning_rate": 8.82278187257036e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.048508803449515, + "grad_norm": 0.29729723930358887, + "learning_rate": 8.821710493900491e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.048931538120099, + "grad_norm": 0.2273814082145691, + "learning_rate": 8.820638693038525e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0493542727906828, + "grad_norm": 0.2602522075176239, + "learning_rate": 8.819566470102864e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.049777007461267, + "grad_norm": 0.23510102927684784, + "learning_rate": 8.818493825211961e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.050199742131851, + "grad_norm": 0.2803293764591217, + "learning_rate": 8.817420758484311e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.050622476802435, + "grad_norm": 0.31188076734542847, + "learning_rate": 8.81634727003846e-05, + "loss": 0.3736, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0510452114730189, + "grad_norm": 0.28241270780563354, + "learning_rate": 8.815273359992997e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.051467946143603, + "grad_norm": 0.34278151392936707, + "learning_rate": 8.81419902846656e-05, + "loss": 0.3724, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.051890680814187, + "grad_norm": 0.22264523804187775, + "learning_rate": 8.813124275577832e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.052313415484771, + "grad_norm": 0.27564331889152527, + "learning_rate": 8.812049101445542e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.052736150155355, + "grad_norm": 0.2354641556739807, + "learning_rate": 8.810973506188468e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.053158884825939, + "grad_norm": 0.28425219655036926, + "learning_rate": 8.809897489925434e-05, + "loss": 0.3709, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.053581619496523, + "grad_norm": 0.36276692152023315, + "learning_rate": 8.808821052775308e-05, + "loss": 0.3733, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.054004354167107, + "grad_norm": 0.22907894849777222, + "learning_rate": 8.807744194857006e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.054427088837691, + "grad_norm": 0.36543869972229004, + "learning_rate": 8.806666916289493e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.054849823508275, + "grad_norm": 0.2922789454460144, + "learning_rate": 8.805589217191776e-05, + "loss": 0.3714, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0552725581788591, + "grad_norm": 0.3043711185455322, + "learning_rate": 8.804511097682911e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.055695292849443, + "grad_norm": 0.2394549399614334, + "learning_rate": 8.803432557882001e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.056118027520027, + "grad_norm": 0.2444113940000534, + "learning_rate": 8.802353597908194e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.056540762190611, + "grad_norm": 0.289099782705307, + "learning_rate": 8.801274217880684e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0569634968611952, + "grad_norm": 0.2537568509578705, + "learning_rate": 8.800194417918713e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.057386231531779, + "grad_norm": 0.3494996130466461, + "learning_rate": 8.79911419814157e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.057808966202363, + "grad_norm": 0.20036177337169647, + "learning_rate": 8.798033558668586e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0582317008729472, + "grad_norm": 0.270785927772522, + "learning_rate": 8.796952499619144e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.058654435543531, + "grad_norm": 0.2935084104537964, + "learning_rate": 8.79587102111267e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.059077170214115, + "grad_norm": 0.21400193870067596, + "learning_rate": 8.794789123268637e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0594999048846991, + "grad_norm": 0.1790635585784912, + "learning_rate": 8.793706806206565e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0599226395552832, + "grad_norm": 0.32222482562065125, + "learning_rate": 8.792624070046019e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.060345374225867, + "grad_norm": 0.22178521752357483, + "learning_rate": 8.791540914906611e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0607681088964511, + "grad_norm": 0.21019595861434937, + "learning_rate": 8.790457340908e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0611908435670352, + "grad_norm": 0.231445774435997, + "learning_rate": 8.78937334816989e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0616135782376193, + "grad_norm": 0.301031231880188, + "learning_rate": 8.788288936812032e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.062036312908203, + "grad_norm": 0.3428511321544647, + "learning_rate": 8.787204106954226e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0624590475787872, + "grad_norm": 0.23962751030921936, + "learning_rate": 8.786118858716309e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0628817822493712, + "grad_norm": 0.2781759798526764, + "learning_rate": 8.785033192218175e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.063304516919955, + "grad_norm": 0.2628547251224518, + "learning_rate": 8.78394710757976e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0637272515905392, + "grad_norm": 0.27013733983039856, + "learning_rate": 8.782860604921045e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0641499862611232, + "grad_norm": 0.28938809037208557, + "learning_rate": 8.781773684362057e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0645727209317073, + "grad_norm": 0.27166879177093506, + "learning_rate": 8.780686346022872e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0649954556022911, + "grad_norm": 0.3046624958515167, + "learning_rate": 8.779598590023609e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0654181902728752, + "grad_norm": 0.2654518783092499, + "learning_rate": 8.778510416484438e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0658409249434593, + "grad_norm": 0.23430001735687256, + "learning_rate": 8.77742182552557e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0662636596140433, + "grad_norm": 0.25209829211235046, + "learning_rate": 8.776332817267258e-05, + "loss": 0.3722, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0666863942846272, + "grad_norm": 0.26443034410476685, + "learning_rate": 8.775243391829817e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0671091289552113, + "grad_norm": 0.30372706055641174, + "learning_rate": 8.77415354933359e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0675318636257953, + "grad_norm": 0.27471041679382324, + "learning_rate": 8.773063289898978e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0679545982963792, + "grad_norm": 0.27243900299072266, + "learning_rate": 8.771972613646424e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0683773329669632, + "grad_norm": 0.3003925681114197, + "learning_rate": 8.770881520696417e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0688000676375473, + "grad_norm": 0.31079211831092834, + "learning_rate": 8.76979001116949e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0692228023081314, + "grad_norm": 0.26512256264686584, + "learning_rate": 8.768698085186227e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0696455369787152, + "grad_norm": 0.2730177342891693, + "learning_rate": 8.767605742867255e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0700682716492993, + "grad_norm": 0.27977845072746277, + "learning_rate": 8.766512984333246e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0704910063198834, + "grad_norm": 0.28712064027786255, + "learning_rate": 8.76541980970492e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0709137409904674, + "grad_norm": 0.32339566946029663, + "learning_rate": 8.764326219103042e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0713364756610513, + "grad_norm": 0.22047801315784454, + "learning_rate": 8.763232212648422e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0717592103316353, + "grad_norm": 0.28998473286628723, + "learning_rate": 8.76213779046192e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0721819450022194, + "grad_norm": 0.23670214414596558, + "learning_rate": 8.761042952664438e-05, + "loss": 0.3742, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0726046796728033, + "grad_norm": 0.2243502289056778, + "learning_rate": 8.759947699376923e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0730274143433873, + "grad_norm": 0.22754327952861786, + "learning_rate": 8.758852030720371e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0734501490139714, + "grad_norm": 0.20629975199699402, + "learning_rate": 8.757755946815825e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0738728836845555, + "grad_norm": 0.24834124743938446, + "learning_rate": 8.756659447784368e-05, + "loss": 0.3724, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0742956183551393, + "grad_norm": 0.17839287221431732, + "learning_rate": 8.755562533747132e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0747183530257234, + "grad_norm": 0.34368476271629333, + "learning_rate": 8.754465204825301e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0751410876963075, + "grad_norm": 0.25856223702430725, + "learning_rate": 8.753367461140094e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0755638223668915, + "grad_norm": 0.28968435525894165, + "learning_rate": 8.752269302812781e-05, + "loss": 0.371, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0759865570374754, + "grad_norm": 0.220441997051239, + "learning_rate": 8.751170729964681e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0764092917080594, + "grad_norm": 0.25726547837257385, + "learning_rate": 8.750071742717153e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0768320263786435, + "grad_norm": 0.26150304079055786, + "learning_rate": 8.748972341191605e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0772547610492273, + "grad_norm": 0.21282455325126648, + "learning_rate": 8.74787252550949e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0776774957198114, + "grad_norm": 0.2348686307668686, + "learning_rate": 8.74677229579231e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0781002303903955, + "grad_norm": 0.2598373293876648, + "learning_rate": 8.745671652161604e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0785229650609796, + "grad_norm": 0.29769545793533325, + "learning_rate": 8.744570594738966e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0789456997315634, + "grad_norm": 0.2733001410961151, + "learning_rate": 8.743469123646031e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0793684344021475, + "grad_norm": 0.2912085950374603, + "learning_rate": 8.742367239004479e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0797911690727315, + "grad_norm": 0.24061249196529388, + "learning_rate": 8.741264940936042e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0802139037433156, + "grad_norm": 0.26503726840019226, + "learning_rate": 8.740162229562487e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0806366384138995, + "grad_norm": 0.3111477494239807, + "learning_rate": 8.73905910500564e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0810593730844835, + "grad_norm": 0.25783368945121765, + "learning_rate": 8.737955567387359e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0814821077550676, + "grad_norm": 0.2816965579986572, + "learning_rate": 8.736851616829557e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0819048424256514, + "grad_norm": 0.42872193455696106, + "learning_rate": 8.73574725345419e-05, + "loss": 0.3757, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0823275770962355, + "grad_norm": 0.23284964263439178, + "learning_rate": 8.734642477383258e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 21990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0827503117668196, + "grad_norm": 0.20869645476341248, + "learning_rate": 8.733537288738808e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0831730464374036, + "grad_norm": 0.3301194906234741, + "learning_rate": 8.732431687642932e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0835957811079875, + "grad_norm": 0.21367792785167694, + "learning_rate": 8.731325674217771e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0840185157785716, + "grad_norm": 0.2146732211112976, + "learning_rate": 8.730219248585505e-05, + "loss": 0.371, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0844412504491556, + "grad_norm": 0.24483755230903625, + "learning_rate": 8.729112410868366e-05, + "loss": 0.3723, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0848639851197397, + "grad_norm": 0.22931064665317535, + "learning_rate": 8.728005161188624e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0852867197903235, + "grad_norm": 0.28586822748184204, + "learning_rate": 8.726897499668604e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0857094544609076, + "grad_norm": 0.2183891236782074, + "learning_rate": 8.725789426430668e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0861321891314917, + "grad_norm": 0.17135745286941528, + "learning_rate": 8.724680941597231e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0865549238020757, + "grad_norm": 0.21738116443157196, + "learning_rate": 8.723572045290747e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0869776584726596, + "grad_norm": 0.30572447180747986, + "learning_rate": 8.722462737633716e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0874003931432437, + "grad_norm": 0.21945342421531677, + "learning_rate": 8.72135301874869e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0878231278138277, + "grad_norm": 0.2061932235956192, + "learning_rate": 8.720242888758259e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0882458624844116, + "grad_norm": 0.25666704773902893, + "learning_rate": 8.719132347785062e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0886685971549956, + "grad_norm": 0.21559686958789825, + "learning_rate": 8.718021395951783e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0890913318255797, + "grad_norm": 0.2687456011772156, + "learning_rate": 8.716910033381151e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0895140664961638, + "grad_norm": 0.2789709270000458, + "learning_rate": 8.71579826019594e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0899368011667476, + "grad_norm": 0.2424291968345642, + "learning_rate": 8.714686076518971e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0903595358373317, + "grad_norm": 0.21544356644153595, + "learning_rate": 8.713573482473108e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0907822705079158, + "grad_norm": 0.23350384831428528, + "learning_rate": 8.712460478181262e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0912050051784998, + "grad_norm": 0.3381618559360504, + "learning_rate": 8.711347063766387e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0916277398490837, + "grad_norm": 0.28608638048171997, + "learning_rate": 8.710233239351486e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0920504745196677, + "grad_norm": 0.2293860763311386, + "learning_rate": 8.709119005059607e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0924732091902518, + "grad_norm": 0.19962479174137115, + "learning_rate": 8.70800436101384e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0928959438608357, + "grad_norm": 0.3619195818901062, + "learning_rate": 8.706889307337322e-05, + "loss": 0.372, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0933186785314197, + "grad_norm": 0.3051026463508606, + "learning_rate": 8.705773844153233e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0937414132020038, + "grad_norm": 0.2861962616443634, + "learning_rate": 8.704657971584806e-05, + "loss": 0.3748, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0941641478725879, + "grad_norm": 0.20374305546283722, + "learning_rate": 8.703541689755308e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0945868825431717, + "grad_norm": 0.23980621993541718, + "learning_rate": 8.702424998788059e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0950096172137558, + "grad_norm": 0.29238757491111755, + "learning_rate": 8.701307898806423e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0954323518843398, + "grad_norm": 0.202943816781044, + "learning_rate": 8.700190389933809e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.095855086554924, + "grad_norm": 0.37346214056015015, + "learning_rate": 8.699072472293668e-05, + "loss": 0.3744, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0962778212255078, + "grad_norm": 0.25127214193344116, + "learning_rate": 8.697954146009499e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0967005558960918, + "grad_norm": 0.21935491263866425, + "learning_rate": 8.696835411204849e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.097123290566676, + "grad_norm": 0.17451803386211395, + "learning_rate": 8.695716268003302e-05, + "loss": 0.3734, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0975460252372597, + "grad_norm": 0.2796311676502228, + "learning_rate": 8.694596716528497e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0979687599078438, + "grad_norm": 0.3725579082965851, + "learning_rate": 8.693476756904109e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0983914945784279, + "grad_norm": 0.2082560509443283, + "learning_rate": 8.692356389253864e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.098814229249012, + "grad_norm": 0.23078025877475739, + "learning_rate": 8.691235613701532e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0992369639195958, + "grad_norm": 0.3020284175872803, + "learning_rate": 8.690114430370924e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0996596985901799, + "grad_norm": 0.25837260484695435, + "learning_rate": 8.688992839385905e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.100082433260764, + "grad_norm": 0.1998516321182251, + "learning_rate": 8.687870840870373e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.100505167931348, + "grad_norm": 0.21673625707626343, + "learning_rate": 8.686748434948284e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1009279026019319, + "grad_norm": 0.2513512670993805, + "learning_rate": 8.685625621743627e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.101350637272516, + "grad_norm": 0.2320239096879959, + "learning_rate": 8.684502401380445e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1017733719431, + "grad_norm": 0.2387266904115677, + "learning_rate": 8.68337877398282e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1021961066136838, + "grad_norm": 0.24107220768928528, + "learning_rate": 8.682254739674884e-05, + "loss": 0.3734, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.102618841284268, + "grad_norm": 0.23907552659511566, + "learning_rate": 8.681130298580808e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.103041575954852, + "grad_norm": 0.1582423597574234, + "learning_rate": 8.680005450824813e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.103464310625436, + "grad_norm": 0.20987583696842194, + "learning_rate": 8.678880196531165e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1038870452960199, + "grad_norm": 0.2759341299533844, + "learning_rate": 8.677754535824169e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.104309779966604, + "grad_norm": 0.2556970715522766, + "learning_rate": 8.676628468828184e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.104732514637188, + "grad_norm": 0.20676954090595245, + "learning_rate": 8.675501995667605e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.105155249307772, + "grad_norm": 0.21880924701690674, + "learning_rate": 8.674375116466878e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.105577983978356, + "grad_norm": 0.2684710919857025, + "learning_rate": 8.673247831350489e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.10600071864894, + "grad_norm": 0.202826589345932, + "learning_rate": 8.672120140442974e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.106423453319524, + "grad_norm": 0.2751288115978241, + "learning_rate": 8.67099204386891e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.106846187990108, + "grad_norm": 0.21071039140224457, + "learning_rate": 8.669863541752922e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.107268922660692, + "grad_norm": 0.27080610394477844, + "learning_rate": 8.668734634219676e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.107691657331276, + "grad_norm": 0.21902020275592804, + "learning_rate": 8.667605321393885e-05, + "loss": 0.373, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1081143920018601, + "grad_norm": 0.27531471848487854, + "learning_rate": 8.666475603400307e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.108537126672444, + "grad_norm": 0.2589554190635681, + "learning_rate": 8.665345480363743e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.108959861343028, + "grad_norm": 0.19111602008342743, + "learning_rate": 8.664214952409042e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.109382596013612, + "grad_norm": 0.19202496111392975, + "learning_rate": 8.663084019661094e-05, + "loss": 0.3699, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1098053306841962, + "grad_norm": 0.250652015209198, + "learning_rate": 8.661952682244837e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.11022806535478, + "grad_norm": 0.2201901227235794, + "learning_rate": 8.66082094028525e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.110650800025364, + "grad_norm": 0.2127203345298767, + "learning_rate": 8.659688793907361e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1110735346959482, + "grad_norm": 0.271756112575531, + "learning_rate": 8.65855624323624e-05, + "loss": 0.3714, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.111496269366532, + "grad_norm": 0.28506070375442505, + "learning_rate": 8.657423288396999e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.111919004037116, + "grad_norm": 0.2005506157875061, + "learning_rate": 8.656289929514802e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1123417387077001, + "grad_norm": 0.22274991869926453, + "learning_rate": 8.655156166714853e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1127644733782842, + "grad_norm": 0.27576449513435364, + "learning_rate": 8.654022000122399e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.113187208048868, + "grad_norm": 0.2468879073858261, + "learning_rate": 8.652887429862735e-05, + "loss": 0.3704, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1136099427194521, + "grad_norm": 0.25885194540023804, + "learning_rate": 8.6517524560612e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1140326773900362, + "grad_norm": 0.24771974980831146, + "learning_rate": 8.650617078843174e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1144554120606203, + "grad_norm": 0.26335737109184265, + "learning_rate": 8.649481298334087e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1148781467312041, + "grad_norm": 0.41233381628990173, + "learning_rate": 8.648345114659411e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1153008814017882, + "grad_norm": 0.1817072629928589, + "learning_rate": 8.64720852794466e-05, + "loss": 0.3744, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1157236160723722, + "grad_norm": 0.27164074778556824, + "learning_rate": 8.646071538315398e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.116146350742956, + "grad_norm": 0.27874755859375, + "learning_rate": 8.644934145897229e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1165690854135402, + "grad_norm": 0.26721999049186707, + "learning_rate": 8.643796350815804e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1169918200841242, + "grad_norm": 0.23437203466892242, + "learning_rate": 8.642658153196817e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1174145547547083, + "grad_norm": 0.2287806272506714, + "learning_rate": 8.641519553166006e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1178372894252921, + "grad_norm": 0.29786941409111023, + "learning_rate": 8.640380550849154e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1182600240958762, + "grad_norm": 0.2625124752521515, + "learning_rate": 8.639241146372092e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1186827587664603, + "grad_norm": 0.19618725776672363, + "learning_rate": 8.638101339860692e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1191054934370444, + "grad_norm": 0.23938676714897156, + "learning_rate": 8.636961131440867e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1195282281076282, + "grad_norm": 0.22838053107261658, + "learning_rate": 8.635820521238581e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1199509627782123, + "grad_norm": 0.1952606588602066, + "learning_rate": 8.63467950937984e-05, + "loss": 0.3709, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1203736974487963, + "grad_norm": 0.2186988741159439, + "learning_rate": 8.633538095990692e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1207964321193802, + "grad_norm": 0.20217843353748322, + "learning_rate": 8.63239628119723e-05, + "loss": 0.3726, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1212191667899642, + "grad_norm": 0.2415478527545929, + "learning_rate": 8.631254065125598e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1216419014605483, + "grad_norm": 0.2520994246006012, + "learning_rate": 8.630111447901974e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1220646361311324, + "grad_norm": 0.28548479080200195, + "learning_rate": 8.628968429652587e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1224873708017162, + "grad_norm": 0.22803650796413422, + "learning_rate": 8.627825010503708e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1229101054723003, + "grad_norm": 0.22013454139232635, + "learning_rate": 8.626681190581653e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1233328401428844, + "grad_norm": 0.3307306170463562, + "learning_rate": 8.625536970012785e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1237555748134684, + "grad_norm": 0.27428504824638367, + "learning_rate": 8.624392348923503e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1241783094840523, + "grad_norm": 0.2209569215774536, + "learning_rate": 8.623361847602706e-05, + "loss": 0.371, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1246010441546364, + "grad_norm": 0.22667156159877777, + "learning_rate": 8.622216465873047e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 22990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1250237788252204, + "grad_norm": 0.21510794758796692, + "learning_rate": 8.621070683989798e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1254465134958043, + "grad_norm": 0.24971966445446014, + "learning_rate": 8.619924502079537e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1258692481663883, + "grad_norm": 0.1941734254360199, + "learning_rate": 8.618777920268886e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1262919828369724, + "grad_norm": 0.3157132565975189, + "learning_rate": 8.61763093868451e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1267147175075565, + "grad_norm": 0.19631682336330414, + "learning_rate": 8.616483557453117e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1271374521781403, + "grad_norm": 0.25467854738235474, + "learning_rate": 8.615335776701461e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1275601868487244, + "grad_norm": 0.23583953082561493, + "learning_rate": 8.614187596556342e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1279829215193085, + "grad_norm": 0.3344886004924774, + "learning_rate": 8.613039017144599e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1284056561898925, + "grad_norm": 0.28689783811569214, + "learning_rate": 8.61189003859312e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1288283908604764, + "grad_norm": 0.24278029799461365, + "learning_rate": 8.610740661028831e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1292511255310604, + "grad_norm": 0.3073451817035675, + "learning_rate": 8.609590884578712e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1296738602016445, + "grad_norm": 0.22529354691505432, + "learning_rate": 8.608440709369775e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1300965948722284, + "grad_norm": 0.22247959673404694, + "learning_rate": 8.607290135529087e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1305193295428124, + "grad_norm": 0.22181425988674164, + "learning_rate": 8.606139163183753e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1309420642133965, + "grad_norm": 0.27997586131095886, + "learning_rate": 8.60498779246092e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1313647988839806, + "grad_norm": 0.3298487067222595, + "learning_rate": 8.603836023487788e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1317875335545644, + "grad_norm": 0.2911455035209656, + "learning_rate": 8.602683856391592e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1322102682251485, + "grad_norm": 0.2914423942565918, + "learning_rate": 8.601531291299611e-05, + "loss": 0.3709, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1326330028957325, + "grad_norm": 0.2611761689186096, + "learning_rate": 8.600378328339178e-05, + "loss": 0.3684, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1330557375663166, + "grad_norm": 0.26118606328964233, + "learning_rate": 8.59922496763766e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1334784722369005, + "grad_norm": 0.26921162009239197, + "learning_rate": 8.598071209322469e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1339012069074845, + "grad_norm": 0.17667533457279205, + "learning_rate": 8.596917053521066e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1343239415780686, + "grad_norm": 0.2292431741952896, + "learning_rate": 8.595762500360951e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1347466762486524, + "grad_norm": 0.23875713348388672, + "learning_rate": 8.594607549969671e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1351694109192365, + "grad_norm": 0.26144295930862427, + "learning_rate": 8.593452202474814e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1355921455898206, + "grad_norm": 0.18260517716407776, + "learning_rate": 8.592296458004017e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1360148802604046, + "grad_norm": 0.19545161724090576, + "learning_rate": 8.591140316684955e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1364376149309885, + "grad_norm": 0.18231293559074402, + "learning_rate": 8.589983778645349e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1368603496015726, + "grad_norm": 0.23659780621528625, + "learning_rate": 8.588826844012965e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1372830842721566, + "grad_norm": 0.2227717638015747, + "learning_rate": 8.587669512915612e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1377058189427407, + "grad_norm": 0.22401049733161926, + "learning_rate": 8.586511785481142e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1381285536133245, + "grad_norm": 0.25963565707206726, + "learning_rate": 8.585353661837451e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1385512882839086, + "grad_norm": 0.2634326219558716, + "learning_rate": 8.58419514211248e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1389740229544927, + "grad_norm": 0.2595807611942291, + "learning_rate": 8.583036226434215e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1393967576250765, + "grad_norm": 0.3059310019016266, + "learning_rate": 8.58187691493068e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1398194922956606, + "grad_norm": 0.2198801338672638, + "learning_rate": 8.58071720772995e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1402422269662447, + "grad_norm": 0.24364222586154938, + "learning_rate": 8.579557104960138e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1406649616368287, + "grad_norm": 0.22828055918216705, + "learning_rate": 8.578396606749404e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1410876963074126, + "grad_norm": 0.25138968229293823, + "learning_rate": 8.577235713225948e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1415104309779966, + "grad_norm": 0.22816012799739838, + "learning_rate": 8.576074424518022e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1419331656485807, + "grad_norm": 0.26521036028862, + "learning_rate": 8.574912740753909e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1423559003191648, + "grad_norm": 0.23888829350471497, + "learning_rate": 8.573750662061946e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1427786349897486, + "grad_norm": 0.2239394634962082, + "learning_rate": 8.572588188570511e-05, + "loss": 0.3549, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1432013696603327, + "grad_norm": 0.1986001878976822, + "learning_rate": 8.571425320408023e-05, + "loss": 0.3713, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1436241043309168, + "grad_norm": 0.2550206780433655, + "learning_rate": 8.570262057702948e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1440468390015006, + "grad_norm": 0.2764301598072052, + "learning_rate": 8.569098400583793e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1444695736720847, + "grad_norm": 0.2700566053390503, + "learning_rate": 8.56793434917911e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1448923083426688, + "grad_norm": 0.27509742975234985, + "learning_rate": 8.566769903617492e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1453150430132528, + "grad_norm": 0.24956074357032776, + "learning_rate": 8.565605064027582e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1457377776838367, + "grad_norm": 0.1668338179588318, + "learning_rate": 8.564439830538058e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1461605123544207, + "grad_norm": 0.21655569970607758, + "learning_rate": 8.563274203277649e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1465832470250048, + "grad_norm": 0.2219054102897644, + "learning_rate": 8.562108182375121e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1470059816955889, + "grad_norm": 0.17648781836032867, + "learning_rate": 8.56094176795929e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1474287163661727, + "grad_norm": 0.335479199886322, + "learning_rate": 8.559774960159008e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1478514510367568, + "grad_norm": 0.2537664771080017, + "learning_rate": 8.558607759103179e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1482741857073409, + "grad_norm": 0.22317783534526825, + "learning_rate": 8.557440164920744e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1486969203779247, + "grad_norm": 0.29342833161354065, + "learning_rate": 8.556272177740689e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1491196550485088, + "grad_norm": 0.28310924768447876, + "learning_rate": 8.555103797692046e-05, + "loss": 0.3712, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1495423897190928, + "grad_norm": 0.18726783990859985, + "learning_rate": 8.553935024903885e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.149965124389677, + "grad_norm": 0.28360575437545776, + "learning_rate": 8.552765859505325e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1503878590602608, + "grad_norm": 0.24884304404258728, + "learning_rate": 8.551596301625526e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1508105937308448, + "grad_norm": 0.20931705832481384, + "learning_rate": 8.550426351393691e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.151233328401429, + "grad_norm": 0.26963016390800476, + "learning_rate": 8.549256008939067e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.151656063072013, + "grad_norm": 0.21667884290218353, + "learning_rate": 8.548085274390944e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1520787977425968, + "grad_norm": 0.19810894131660461, + "learning_rate": 8.546914147878654e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1525015324131809, + "grad_norm": 0.20647138357162476, + "learning_rate": 8.545742629531577e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.152924267083765, + "grad_norm": 0.2547857165336609, + "learning_rate": 8.54457071947913e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1533470017543488, + "grad_norm": 0.3928770422935486, + "learning_rate": 8.543398417850777e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1537697364249329, + "grad_norm": 0.23178763687610626, + "learning_rate": 8.542225724776025e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.154192471095517, + "grad_norm": 0.2280016541481018, + "learning_rate": 8.541052640384423e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.154615205766101, + "grad_norm": 0.1720692366361618, + "learning_rate": 8.539879164805567e-05, + "loss": 0.3517, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1550379404366848, + "grad_norm": 0.2336922287940979, + "learning_rate": 8.53870529816909e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.155460675107269, + "grad_norm": 0.29054176807403564, + "learning_rate": 8.537531040604672e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.155883409777853, + "grad_norm": 0.18200966715812683, + "learning_rate": 8.536356392242036e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.156306144448437, + "grad_norm": 0.22406703233718872, + "learning_rate": 8.535181353210947e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.156728879119021, + "grad_norm": 0.255812406539917, + "learning_rate": 8.534005923641215e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.157151613789605, + "grad_norm": 0.19928503036499023, + "learning_rate": 8.532830103662692e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.157574348460189, + "grad_norm": 0.22669146955013275, + "learning_rate": 8.531653893405272e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1579970831307729, + "grad_norm": 0.2575737237930298, + "learning_rate": 8.530477292998897e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.158419817801357, + "grad_norm": 0.2149282693862915, + "learning_rate": 8.529300302573544e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.158842552471941, + "grad_norm": 0.34283486008644104, + "learning_rate": 8.528122922259238e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.159265287142525, + "grad_norm": 0.19989265501499176, + "learning_rate": 8.526945152186048e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.159688021813109, + "grad_norm": 0.31380683183670044, + "learning_rate": 8.525766992484085e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.160110756483693, + "grad_norm": 0.20847652852535248, + "learning_rate": 8.524588443283503e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.160533491154277, + "grad_norm": 0.2331671416759491, + "learning_rate": 8.523409504714496e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1609562258248611, + "grad_norm": 0.2660086154937744, + "learning_rate": 8.522230176907309e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.161378960495445, + "grad_norm": 0.1830943077802658, + "learning_rate": 8.521050459992218e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.161801695166029, + "grad_norm": 0.19297580420970917, + "learning_rate": 8.519870354099554e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1622244298366131, + "grad_norm": 0.382402628660202, + "learning_rate": 8.518689859359684e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.162647164507197, + "grad_norm": 0.22138646245002747, + "learning_rate": 8.517508975903017e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.163069899177781, + "grad_norm": 0.24474410712718964, + "learning_rate": 8.51632770386001e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.163492633848365, + "grad_norm": 0.17091520130634308, + "learning_rate": 8.515146043361162e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1639153685189492, + "grad_norm": 0.4253753125667572, + "learning_rate": 8.513963994537013e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.164338103189533, + "grad_norm": 0.24152208864688873, + "learning_rate": 8.512781557518143e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.164760837860117, + "grad_norm": 0.19621287286281586, + "learning_rate": 8.511598732435182e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1651835725307012, + "grad_norm": 0.24163362383842468, + "learning_rate": 8.510415519418796e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1656063072012852, + "grad_norm": 0.2647132873535156, + "learning_rate": 8.5092319185997e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.166029041871869, + "grad_norm": 0.16907969117164612, + "learning_rate": 8.508047930108647e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1664517765424531, + "grad_norm": 0.2576303482055664, + "learning_rate": 8.506863554076434e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1668745112130372, + "grad_norm": 0.22143369913101196, + "learning_rate": 8.505678790633902e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 23990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.167297245883621, + "grad_norm": 0.17984971404075623, + "learning_rate": 8.504493639911934e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1677199805542051, + "grad_norm": 0.3418881595134735, + "learning_rate": 8.503308102041459e-05, + "loss": 0.3749, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1681427152247892, + "grad_norm": 0.20188109576702118, + "learning_rate": 8.50212217715344e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1685654498953733, + "grad_norm": 0.28946980834007263, + "learning_rate": 8.500935865378894e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.168988184565957, + "grad_norm": 0.2537732422351837, + "learning_rate": 8.499749166848873e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1694109192365412, + "grad_norm": 0.21617871522903442, + "learning_rate": 8.498562081694474e-05, + "loss": 0.3746, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1698336539071252, + "grad_norm": 0.19907216727733612, + "learning_rate": 8.497374610046837e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1702563885777093, + "grad_norm": 0.2797276973724365, + "learning_rate": 8.496186752037143e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1706791232482932, + "grad_norm": 0.2331697791814804, + "learning_rate": 8.494998507796618e-05, + "loss": 0.3541, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1711018579188772, + "grad_norm": 0.1675199419260025, + "learning_rate": 8.493809877456531e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1715245925894613, + "grad_norm": 0.22035950422286987, + "learning_rate": 8.492620861148192e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1719473272600451, + "grad_norm": 0.24797217547893524, + "learning_rate": 8.491431459002952e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1723700619306292, + "grad_norm": 0.18068020045757294, + "learning_rate": 8.490241671152208e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1727927966012133, + "grad_norm": 0.2686847150325775, + "learning_rate": 8.489051497727398e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1732155312717973, + "grad_norm": 0.20182639360427856, + "learning_rate": 8.487860938860006e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1736382659423812, + "grad_norm": 0.25368213653564453, + "learning_rate": 8.48666999468155e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1740610006129653, + "grad_norm": 0.24423561990261078, + "learning_rate": 8.485478665323598e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1744837352835493, + "grad_norm": 0.21727514266967773, + "learning_rate": 8.484286950917762e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1749064699541334, + "grad_norm": 0.26019302010536194, + "learning_rate": 8.483094851595687e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1753292046247172, + "grad_norm": 0.20895452797412872, + "learning_rate": 8.481902367489074e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1757519392953013, + "grad_norm": 0.185626819729805, + "learning_rate": 8.480709498729651e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1761746739658854, + "grad_norm": 0.23675411939620972, + "learning_rate": 8.479516245449204e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1765974086364692, + "grad_norm": 0.22956474125385284, + "learning_rate": 8.47832260777955e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1770201433070533, + "grad_norm": 0.309598445892334, + "learning_rate": 8.477128585852552e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1774428779776374, + "grad_norm": 0.29135578870773315, + "learning_rate": 8.475934179800118e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1778656126482214, + "grad_norm": 0.22781439125537872, + "learning_rate": 8.474739389754196e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1782883473188053, + "grad_norm": 0.2050650417804718, + "learning_rate": 8.473544215846778e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1787110819893893, + "grad_norm": 0.48789164423942566, + "learning_rate": 8.472348658209895e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1791338166599734, + "grad_norm": 0.25804951786994934, + "learning_rate": 8.471152716975624e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1795565513305575, + "grad_norm": 0.23175744712352753, + "learning_rate": 8.469956392276082e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1799792860011413, + "grad_norm": 0.20627613365650177, + "learning_rate": 8.46875968424343e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1804020206717254, + "grad_norm": 0.21617895364761353, + "learning_rate": 8.467562593009873e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1808247553423095, + "grad_norm": 0.21336112916469574, + "learning_rate": 8.466365118707652e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1812474900128933, + "grad_norm": 0.2692069113254547, + "learning_rate": 8.465167261469056e-05, + "loss": 0.3727, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1816702246834774, + "grad_norm": 0.2027571201324463, + "learning_rate": 8.463969021426416e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1820929593540614, + "grad_norm": 0.2683245539665222, + "learning_rate": 8.462770398712103e-05, + "loss": 0.3693, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1825156940246455, + "grad_norm": 0.1879616677761078, + "learning_rate": 8.461571393458532e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1829384286952294, + "grad_norm": 0.2755313813686371, + "learning_rate": 8.46037200579816e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1833611633658134, + "grad_norm": 0.25791627168655396, + "learning_rate": 8.459172235863483e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1837838980363975, + "grad_norm": 0.19449429214000702, + "learning_rate": 8.457972083787045e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1842066327069816, + "grad_norm": 0.2512854039669037, + "learning_rate": 8.456771549701425e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1846293673775654, + "grad_norm": 0.20618936419487, + "learning_rate": 8.455570633739255e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1850521020481495, + "grad_norm": 0.21159295737743378, + "learning_rate": 8.454369336033198e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1854748367187335, + "grad_norm": 0.18416135013103485, + "learning_rate": 8.453167656715963e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1858975713893174, + "grad_norm": 0.23775532841682434, + "learning_rate": 8.451965595920306e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1863203060599015, + "grad_norm": 0.26616615056991577, + "learning_rate": 8.450763153779019e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1867430407304855, + "grad_norm": 0.2865838408470154, + "learning_rate": 8.449560330424937e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1871657754010696, + "grad_norm": 0.3085343539714813, + "learning_rate": 8.44835712599094e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1875885100716534, + "grad_norm": 0.29165762662887573, + "learning_rate": 8.447153540609947e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1880112447422375, + "grad_norm": 0.24853289127349854, + "learning_rate": 8.445949574414922e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1884339794128216, + "grad_norm": 0.2725174129009247, + "learning_rate": 8.444745227538869e-05, + "loss": 0.371, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1888567140834057, + "grad_norm": 0.21129079163074493, + "learning_rate": 8.443540500114835e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1892794487539895, + "grad_norm": 0.2165479212999344, + "learning_rate": 8.442335392275908e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1897021834245736, + "grad_norm": 0.20422452688217163, + "learning_rate": 8.441129904155218e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1901249180951576, + "grad_norm": 0.18252766132354736, + "learning_rate": 8.43992403588594e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1905476527657415, + "grad_norm": 0.23882660269737244, + "learning_rate": 8.438717787601287e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1909703874363256, + "grad_norm": 0.19677798449993134, + "learning_rate": 8.437511159434515e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1913931221069096, + "grad_norm": 0.2036718726158142, + "learning_rate": 8.436304151518925e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1918158567774937, + "grad_norm": 0.24367976188659668, + "learning_rate": 8.435096763987855e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1922385914480775, + "grad_norm": 0.30348023772239685, + "learning_rate": 8.433888996974691e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1926613261186616, + "grad_norm": 0.22553467750549316, + "learning_rate": 8.432680850612853e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1930840607892457, + "grad_norm": 0.19906918704509735, + "learning_rate": 8.431472325035811e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1935067954598297, + "grad_norm": 0.25439921021461487, + "learning_rate": 8.430263420377074e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1939295301304136, + "grad_norm": 0.22301620244979858, + "learning_rate": 8.429054136770188e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1943522648009977, + "grad_norm": 0.2775941789150238, + "learning_rate": 8.427844474348748e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1947749994715817, + "grad_norm": 0.2013201266527176, + "learning_rate": 8.426634433246385e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1951977341421656, + "grad_norm": 0.20837609469890594, + "learning_rate": 8.425424013596779e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1956204688127496, + "grad_norm": 0.2831878364086151, + "learning_rate": 8.424213215533642e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1960432034833337, + "grad_norm": 0.3018951416015625, + "learning_rate": 8.423002039190741e-05, + "loss": 0.3557, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1964659381539178, + "grad_norm": 0.20775051414966583, + "learning_rate": 8.42179048470187e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1968886728245016, + "grad_norm": 0.19409477710723877, + "learning_rate": 8.420578552200875e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1973114074950857, + "grad_norm": 0.21389298141002655, + "learning_rate": 8.41936624182164e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1977341421656698, + "grad_norm": 0.15417805314064026, + "learning_rate": 8.418153553698093e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1981568768362538, + "grad_norm": 0.1968441754579544, + "learning_rate": 8.4169404879642e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1985796115068377, + "grad_norm": 0.22624902427196503, + "learning_rate": 8.415727044753969e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1990023461774217, + "grad_norm": 0.28157806396484375, + "learning_rate": 8.414513224201457e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1994250808480058, + "grad_norm": 0.23530031740665436, + "learning_rate": 8.413299026440753e-05, + "loss": 0.3684, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1998478155185897, + "grad_norm": 0.24606861174106598, + "learning_rate": 8.412084451605992e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2002705501891737, + "grad_norm": 0.3025953471660614, + "learning_rate": 8.410869499831354e-05, + "loss": 0.3693, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2006932848597578, + "grad_norm": 0.17503267526626587, + "learning_rate": 8.409654171251053e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2011160195303419, + "grad_norm": 0.17825673520565033, + "learning_rate": 8.408438465999352e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2015387542009257, + "grad_norm": 0.20947562158107758, + "learning_rate": 8.407222384210551e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2019614888715098, + "grad_norm": 0.24348226189613342, + "learning_rate": 8.406005926018991e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2023842235420938, + "grad_norm": 0.33469048142433167, + "learning_rate": 8.404789091559063e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.202806958212678, + "grad_norm": 0.18535897135734558, + "learning_rate": 8.403571880965185e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2032296928832618, + "grad_norm": 0.2063058763742447, + "learning_rate": 8.402354294371828e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2036524275538458, + "grad_norm": 0.24529282748699188, + "learning_rate": 8.401136331913505e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.20407516222443, + "grad_norm": 0.19443297386169434, + "learning_rate": 8.399917993724762e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2044978968950137, + "grad_norm": 0.22631752490997314, + "learning_rate": 8.398699279940193e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2049206315655978, + "grad_norm": 0.26289770007133484, + "learning_rate": 8.397480190694431e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2053433662361819, + "grad_norm": 0.2302701473236084, + "learning_rate": 8.396260726122154e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.205766100906766, + "grad_norm": 0.21211282908916473, + "learning_rate": 8.395040886358075e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2061888355773498, + "grad_norm": 0.22165031731128693, + "learning_rate": 8.393820671536953e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2066115702479339, + "grad_norm": 0.20826639235019684, + "learning_rate": 8.39260008179359e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.207034304918518, + "grad_norm": 0.32546910643577576, + "learning_rate": 8.391379117262825e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.207457039589102, + "grad_norm": 0.23104873299598694, + "learning_rate": 8.390157778079541e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2078797742596858, + "grad_norm": 0.19840107858181, + "learning_rate": 8.388936064378661e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.20830250893027, + "grad_norm": 0.24026066064834595, + "learning_rate": 8.387713976295152e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.208725243600854, + "grad_norm": 0.18032829463481903, + "learning_rate": 8.386491513964018e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2091479782714378, + "grad_norm": 0.20732703804969788, + "learning_rate": 8.385268677520311e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 24990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.209570712942022, + "grad_norm": 0.25261926651000977, + "learning_rate": 8.384045467099115e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.209993447612606, + "grad_norm": 0.24506551027297974, + "learning_rate": 8.382821882835563e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.21041618228319, + "grad_norm": 0.18538720905780792, + "learning_rate": 8.381597924864828e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2108389169537739, + "grad_norm": 0.23380564153194427, + "learning_rate": 8.380373593322121e-05, + "loss": 0.3723, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.211261651624358, + "grad_norm": 0.30442512035369873, + "learning_rate": 8.379148888342698e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.211684386294942, + "grad_norm": 0.24571697413921356, + "learning_rate": 8.377923810061854e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.212107120965526, + "grad_norm": 0.21084906160831451, + "learning_rate": 8.376698358614927e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.21252985563611, + "grad_norm": 0.18121498823165894, + "learning_rate": 8.375472534137291e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.212952590306694, + "grad_norm": 0.27970588207244873, + "learning_rate": 8.37424633676437e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.213375324977278, + "grad_norm": 0.20693513751029968, + "learning_rate": 8.373019766631624e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.213798059647862, + "grad_norm": 0.3050481081008911, + "learning_rate": 8.371792823874554e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.214220794318446, + "grad_norm": 0.24458537995815277, + "learning_rate": 8.3705655086287e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.21464352898903, + "grad_norm": 0.29557085037231445, + "learning_rate": 8.36933782102965e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2150662636596141, + "grad_norm": 0.17567993700504303, + "learning_rate": 8.368109761213025e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.215488998330198, + "grad_norm": 0.19042450189590454, + "learning_rate": 8.366881329314496e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.215911733000782, + "grad_norm": 0.2945632040500641, + "learning_rate": 8.365652525469769e-05, + "loss": 0.3733, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.216334467671366, + "grad_norm": 0.1871052384376526, + "learning_rate": 8.36442334981459e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2167572023419502, + "grad_norm": 0.18965467810630798, + "learning_rate": 8.363193802484751e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.217179937012534, + "grad_norm": 0.2268313765525818, + "learning_rate": 8.36196388361608e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.217602671683118, + "grad_norm": 0.27094602584838867, + "learning_rate": 8.360733593344452e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2180254063537022, + "grad_norm": 0.216855987906456, + "learning_rate": 8.359502931805776e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.218448141024286, + "grad_norm": 0.18492691218852997, + "learning_rate": 8.35827189913601e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.21887087569487, + "grad_norm": 0.2344633787870407, + "learning_rate": 8.357040495471145e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2192936103654541, + "grad_norm": 0.20528855919837952, + "learning_rate": 8.355808720947218e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2197163450360382, + "grad_norm": 0.2209005206823349, + "learning_rate": 8.354576575700306e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.220139079706622, + "grad_norm": 0.214651957154274, + "learning_rate": 8.353344059866525e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2205618143772061, + "grad_norm": 0.34473344683647156, + "learning_rate": 8.352111173582036e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2209845490477902, + "grad_norm": 0.28614291548728943, + "learning_rate": 8.350877916983037e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2214072837183743, + "grad_norm": 0.2317659705877304, + "learning_rate": 8.349644290205768e-05, + "loss": 0.3721, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.221830018388958, + "grad_norm": 0.18661947548389435, + "learning_rate": 8.34841029338651e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2222527530595422, + "grad_norm": 0.21701350808143616, + "learning_rate": 8.347175926661588e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2226754877301262, + "grad_norm": 0.25783300399780273, + "learning_rate": 8.34594119016736e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.22309822240071, + "grad_norm": 0.2084406018257141, + "learning_rate": 8.344706084040235e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2235209570712942, + "grad_norm": 0.2323237657546997, + "learning_rate": 8.343470608416656e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2239436917418782, + "grad_norm": 0.26512575149536133, + "learning_rate": 8.342234763433106e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2243664264124623, + "grad_norm": 0.28127989172935486, + "learning_rate": 8.340998549226115e-05, + "loss": 0.3526, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2247891610830461, + "grad_norm": 0.18312034010887146, + "learning_rate": 8.339761965932247e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2252118957536302, + "grad_norm": 0.21859276294708252, + "learning_rate": 8.33852501368811e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2256346304242143, + "grad_norm": 0.16680267453193665, + "learning_rate": 8.337287692630356e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2260573650947983, + "grad_norm": 0.20622974634170532, + "learning_rate": 8.33605000289567e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2264800997653822, + "grad_norm": 0.36207833886146545, + "learning_rate": 8.334811944620785e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2269028344359663, + "grad_norm": 0.2744225263595581, + "learning_rate": 8.333573517942472e-05, + "loss": 0.3704, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2273255691065503, + "grad_norm": 0.2453402429819107, + "learning_rate": 8.33233472299754e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2277483037771342, + "grad_norm": 0.29478684067726135, + "learning_rate": 8.331095559922844e-05, + "loss": 0.3719, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2281710384477182, + "grad_norm": 0.2654857337474823, + "learning_rate": 8.329856028855274e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2285937731183023, + "grad_norm": 0.18268950283527374, + "learning_rate": 8.328616129931765e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2290165077888864, + "grad_norm": 0.18590524792671204, + "learning_rate": 8.327375863289293e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2294392424594702, + "grad_norm": 0.25760698318481445, + "learning_rate": 8.326135229064871e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2298619771300543, + "grad_norm": 0.2549433708190918, + "learning_rate": 8.324894227395553e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2302847118006384, + "grad_norm": 0.28303542733192444, + "learning_rate": 8.323652858418436e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2307074464712224, + "grad_norm": 0.31122881174087524, + "learning_rate": 8.322411122270658e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2311301811418063, + "grad_norm": 0.17819420993328094, + "learning_rate": 8.321169019089394e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2315529158123903, + "grad_norm": 0.27155426144599915, + "learning_rate": 8.319926549011863e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2319756504829744, + "grad_norm": 0.1872043013572693, + "learning_rate": 8.318683712175324e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2323983851535583, + "grad_norm": 0.2524847388267517, + "learning_rate": 8.317440508717073e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2328211198241423, + "grad_norm": 0.22271724045276642, + "learning_rate": 8.31619693877445e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2332438544947264, + "grad_norm": 0.2308654487133026, + "learning_rate": 8.314953002484838e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2336665891653105, + "grad_norm": 0.39999306201934814, + "learning_rate": 8.313708699985651e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2340893238358943, + "grad_norm": 0.22829604148864746, + "learning_rate": 8.312464031414356e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2345120585064784, + "grad_norm": 0.19826561212539673, + "learning_rate": 8.31121899690845e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2349347931770625, + "grad_norm": 0.16108277440071106, + "learning_rate": 8.309973596605475e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2353575278476465, + "grad_norm": Infinity, + "learning_rate": 8.308852423690016e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2357802625182304, + "grad_norm": 0.2087729126214981, + "learning_rate": 8.307606328751684e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2362029971888144, + "grad_norm": 0.1976931393146515, + "learning_rate": 8.306359868415383e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2366257318593985, + "grad_norm": 0.20564904808998108, + "learning_rate": 8.305113042818811e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2370484665299823, + "grad_norm": 0.20856821537017822, + "learning_rate": 8.303865852099706e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2374712012005664, + "grad_norm": 0.24046868085861206, + "learning_rate": 8.302618296395852e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2378939358711505, + "grad_norm": 0.2673070430755615, + "learning_rate": 8.301370375845066e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2383166705417346, + "grad_norm": 0.20270191133022308, + "learning_rate": 8.300122090585208e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2387394052123184, + "grad_norm": 0.23033249378204346, + "learning_rate": 8.29887344075418e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2391621398829025, + "grad_norm": 0.23740987479686737, + "learning_rate": 8.297624426489923e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2395848745534865, + "grad_norm": 0.19344154000282288, + "learning_rate": 8.296375047930416e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2400076092240706, + "grad_norm": 0.1962519735097885, + "learning_rate": 8.295125305213682e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2404303438946545, + "grad_norm": 0.21101585030555725, + "learning_rate": 8.293875198477783e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2408530785652385, + "grad_norm": 0.19173073768615723, + "learning_rate": 8.292624727860819e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2412758132358226, + "grad_norm": 0.28354549407958984, + "learning_rate": 8.291373893500934e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2416985479064064, + "grad_norm": 0.21791338920593262, + "learning_rate": 8.290122695536311e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2421212825769905, + "grad_norm": 0.25507837533950806, + "learning_rate": 8.288871134105169e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2425440172475746, + "grad_norm": 0.24642235040664673, + "learning_rate": 8.287619209345773e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2429667519181586, + "grad_norm": 0.2464262843132019, + "learning_rate": 8.286366921396424e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2433894865887425, + "grad_norm": 0.24755772948265076, + "learning_rate": 8.285114270395466e-05, + "loss": 0.371, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2438122212593266, + "grad_norm": 0.2645062804222107, + "learning_rate": 8.283861256481285e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2442349559299106, + "grad_norm": 0.19318415224552155, + "learning_rate": 8.282607879792296e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2446576906004947, + "grad_norm": 0.19131141901016235, + "learning_rate": 8.281354140466971e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2450804252710785, + "grad_norm": 0.2547811269760132, + "learning_rate": 8.280100038643806e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2455031599416626, + "grad_norm": 0.2653515338897705, + "learning_rate": 8.278845574461347e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2459258946122467, + "grad_norm": 0.30107560753822327, + "learning_rate": 8.277590748058179e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2463486292828305, + "grad_norm": 0.18273812532424927, + "learning_rate": 8.276335559572922e-05, + "loss": 0.3694, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2467713639534146, + "grad_norm": 0.1882835328578949, + "learning_rate": 8.27508000914424e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2471940986239987, + "grad_norm": 0.16166335344314575, + "learning_rate": 8.273824096910839e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2476168332945827, + "grad_norm": 0.23106324672698975, + "learning_rate": 8.272567823011459e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2480395679651666, + "grad_norm": 0.17514857649803162, + "learning_rate": 8.271311187584884e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2484623026357506, + "grad_norm": 0.17753849923610687, + "learning_rate": 8.270054190769938e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2488850373063347, + "grad_norm": 0.2066282480955124, + "learning_rate": 8.268796832705483e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2493077719769188, + "grad_norm": 0.17709092795848846, + "learning_rate": 8.267539113530422e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2497305066475026, + "grad_norm": 0.22603853046894073, + "learning_rate": 8.266281033383695e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2501532413180867, + "grad_norm": 0.23758798837661743, + "learning_rate": 8.265022592404292e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2505759759886708, + "grad_norm": 0.19856029748916626, + "learning_rate": 8.263763790731229e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2509987106592546, + "grad_norm": 0.21525390446186066, + "learning_rate": 8.26250462850357e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2514214453298387, + "grad_norm": 0.21758168935775757, + "learning_rate": 8.261245105860419e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 25990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2518441800004227, + "grad_norm": 0.24597103893756866, + "learning_rate": 8.259985222940915e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2522669146710068, + "grad_norm": 0.18457598984241486, + "learning_rate": 8.258724979884242e-05, + "loss": 0.3711, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2526896493415909, + "grad_norm": 0.2744332253932953, + "learning_rate": 8.257464376829623e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2531123840121747, + "grad_norm": 0.1889159232378006, + "learning_rate": 8.256203413916316e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2535351186827588, + "grad_norm": 0.1671445220708847, + "learning_rate": 8.254942091283623e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2539578533533429, + "grad_norm": 0.3246941864490509, + "learning_rate": 8.253680409070885e-05, + "loss": 0.3699, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2543805880239267, + "grad_norm": 0.17375382781028748, + "learning_rate": 8.252418367417485e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2548033226945108, + "grad_norm": 0.24793270230293274, + "learning_rate": 8.25115596646284e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2552260573650948, + "grad_norm": 0.21614965796470642, + "learning_rate": 8.249893206346412e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2556487920356787, + "grad_norm": 0.2684423327445984, + "learning_rate": 8.248630087207699e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2560715267062628, + "grad_norm": 0.2953091263771057, + "learning_rate": 8.247366609186242e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2564942613768468, + "grad_norm": 0.23127327859401703, + "learning_rate": 8.246102772421619e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.256916996047431, + "grad_norm": 0.2449747771024704, + "learning_rate": 8.244838577053449e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.257339730718015, + "grad_norm": 0.21265104413032532, + "learning_rate": 8.24357402322139e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2577624653885988, + "grad_norm": 0.19193042814731598, + "learning_rate": 8.24230911106514e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2581852000591829, + "grad_norm": 0.18046261370182037, + "learning_rate": 8.241043840724436e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.258607934729767, + "grad_norm": 0.2450888752937317, + "learning_rate": 8.239778212339058e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2590306694003508, + "grad_norm": 0.17323368787765503, + "learning_rate": 8.238512226048816e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2594534040709349, + "grad_norm": 0.16113954782485962, + "learning_rate": 8.237245881993572e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.259876138741519, + "grad_norm": 0.23095180094242096, + "learning_rate": 8.235979180313218e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2602988734121028, + "grad_norm": 0.3073817491531372, + "learning_rate": 8.234712121147692e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2607216080826869, + "grad_norm": 0.2121291607618332, + "learning_rate": 8.233444704636968e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.261144342753271, + "grad_norm": 0.2057884931564331, + "learning_rate": 8.232176930921057e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.261567077423855, + "grad_norm": 0.20418307185173035, + "learning_rate": 8.230908800140015e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.261989812094439, + "grad_norm": 0.2370065152645111, + "learning_rate": 8.229640312433937e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.262412546765023, + "grad_norm": 0.24309048056602478, + "learning_rate": 8.228371467942953e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.262835281435607, + "grad_norm": 0.21565215289592743, + "learning_rate": 8.227102266807233e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.263258016106191, + "grad_norm": 0.271422415971756, + "learning_rate": 8.225832709166992e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2636807507767749, + "grad_norm": 0.16019994020462036, + "learning_rate": 8.224562795162478e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.264103485447359, + "grad_norm": 0.20042872428894043, + "learning_rate": 8.223292524933981e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.264526220117943, + "grad_norm": 0.19952039420604706, + "learning_rate": 8.222021898621833e-05, + "loss": 0.3561, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2649489547885269, + "grad_norm": 0.24947479367256165, + "learning_rate": 8.220750916366397e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.265371689459111, + "grad_norm": 0.2121507078409195, + "learning_rate": 8.219479578308089e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.265794424129695, + "grad_norm": 0.1952965408563614, + "learning_rate": 8.21820788458735e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.266217158800279, + "grad_norm": 0.1861983686685562, + "learning_rate": 8.216935835344671e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2666398934708631, + "grad_norm": 0.21725419163703918, + "learning_rate": 8.215663430720572e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.267062628141447, + "grad_norm": 0.22187374532222748, + "learning_rate": 8.214390670855623e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.267485362812031, + "grad_norm": 0.2385069876909256, + "learning_rate": 8.213117555890428e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2679080974826151, + "grad_norm": 0.1737077236175537, + "learning_rate": 8.211844085965628e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.268330832153199, + "grad_norm": 0.2057843655347824, + "learning_rate": 8.210570261221908e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.268753566823783, + "grad_norm": 0.24823294579982758, + "learning_rate": 8.209296081799991e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.269176301494367, + "grad_norm": 0.23194678127765656, + "learning_rate": 8.208021547840634e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.269599036164951, + "grad_norm": 0.19376522302627563, + "learning_rate": 8.20674665948464e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.270021770835535, + "grad_norm": 0.23028507828712463, + "learning_rate": 8.20547141687285e-05, + "loss": 0.3726, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.270444505506119, + "grad_norm": 0.2556999623775482, + "learning_rate": 8.204195820146141e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2708672401767032, + "grad_norm": 0.26424282789230347, + "learning_rate": 8.20291986944543e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2712899748472872, + "grad_norm": 0.22345396876335144, + "learning_rate": 8.201643564911674e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.271712709517871, + "grad_norm": 0.18168853223323822, + "learning_rate": 8.20036690668587e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2721354441884551, + "grad_norm": 0.21601518988609314, + "learning_rate": 8.199089894909055e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2725581788590392, + "grad_norm": 0.19498197734355927, + "learning_rate": 8.197812529722298e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.272980913529623, + "grad_norm": 0.2419855147600174, + "learning_rate": 8.196534811266716e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2734036482002071, + "grad_norm": 0.25798293948173523, + "learning_rate": 8.195256739683459e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2738263828707912, + "grad_norm": 0.23863644897937775, + "learning_rate": 8.193978315113719e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.274249117541375, + "grad_norm": 0.21784166991710663, + "learning_rate": 8.192699537698727e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2746718522119591, + "grad_norm": 0.2247927337884903, + "learning_rate": 8.191420407579751e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2750945868825432, + "grad_norm": 0.2124001830816269, + "learning_rate": 8.1901409248981e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2755173215531272, + "grad_norm": 0.21077099442481995, + "learning_rate": 8.188861089795119e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2759400562237113, + "grad_norm": 0.19225168228149414, + "learning_rate": 8.187580902412197e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2763627908942952, + "grad_norm": 0.20789213478565216, + "learning_rate": 8.186300362890757e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2767855255648792, + "grad_norm": 0.24421198666095734, + "learning_rate": 8.185019471372264e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2772082602354633, + "grad_norm": 0.173319011926651, + "learning_rate": 8.183738227998219e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2776309949060471, + "grad_norm": 0.20499727129936218, + "learning_rate": 8.182456632910165e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2780537295766312, + "grad_norm": 0.19195227324962616, + "learning_rate": 8.18117468624968e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2784764642472153, + "grad_norm": 0.27645763754844666, + "learning_rate": 8.179892388158387e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2788991989177991, + "grad_norm": 0.27505558729171753, + "learning_rate": 8.17860973877794e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2793219335883832, + "grad_norm": 0.1974731832742691, + "learning_rate": 8.17732673825004e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2797446682589673, + "grad_norm": 0.1772654950618744, + "learning_rate": 8.17604338671642e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2801674029295513, + "grad_norm": 0.17847907543182373, + "learning_rate": 8.174759684318856e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2805901376001354, + "grad_norm": 0.19842563569545746, + "learning_rate": 8.173475631199159e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2810128722707192, + "grad_norm": 0.24004888534545898, + "learning_rate": 8.172191227499184e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2814356069413033, + "grad_norm": 0.19585742056369781, + "learning_rate": 8.170906473360818e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2818583416118874, + "grad_norm": 0.18900766968727112, + "learning_rate": 8.169621368925993e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2822810762824712, + "grad_norm": 0.19870607554912567, + "learning_rate": 8.168335914336676e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2827038109530553, + "grad_norm": 0.19296756386756897, + "learning_rate": 8.167050109734875e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2831265456236394, + "grad_norm": 0.2608616352081299, + "learning_rate": 8.165763955262635e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2835492802942232, + "grad_norm": 0.15783914923667908, + "learning_rate": 8.164477451062039e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2839720149648073, + "grad_norm": 0.2484789341688156, + "learning_rate": 8.163190597275209e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2843947496353914, + "grad_norm": 0.31635916233062744, + "learning_rate": 8.16190339404431e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2848174843059754, + "grad_norm": 0.29019051790237427, + "learning_rate": 8.160615841511538e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2852402189765595, + "grad_norm": 0.28475961089134216, + "learning_rate": 8.159327939819135e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2856629536471433, + "grad_norm": 0.23488181829452515, + "learning_rate": 8.158039689109376e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2860856883177274, + "grad_norm": 0.26329708099365234, + "learning_rate": 8.156751089524577e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2865084229883115, + "grad_norm": 0.18277758359909058, + "learning_rate": 8.155462141207091e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2869311576588953, + "grad_norm": 0.18710444867610931, + "learning_rate": 8.154172844299314e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2873538923294794, + "grad_norm": 0.21739055216312408, + "learning_rate": 8.152883198943675e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2877766270000635, + "grad_norm": 0.22413744032382965, + "learning_rate": 8.151593205282642e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2881993616706473, + "grad_norm": 0.23481786251068115, + "learning_rate": 8.150302863458726e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2886220963412314, + "grad_norm": 0.19237716495990753, + "learning_rate": 8.149012173614473e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2890448310118154, + "grad_norm": 0.18763558566570282, + "learning_rate": 8.147721135892469e-05, + "loss": 0.3778, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2894675656823995, + "grad_norm": 0.20669838786125183, + "learning_rate": 8.146429750435336e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2898903003529836, + "grad_norm": 0.214552640914917, + "learning_rate": 8.145138017385736e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2903130350235674, + "grad_norm": 0.17259790003299713, + "learning_rate": 8.14384593688637e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2907357696941515, + "grad_norm": 0.17571358382701874, + "learning_rate": 8.142553509079977e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2911585043647356, + "grad_norm": 0.19794446229934692, + "learning_rate": 8.141260734109332e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2915812390353194, + "grad_norm": 0.19787882268428802, + "learning_rate": 8.139967612117254e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2920039737059035, + "grad_norm": 0.2070438265800476, + "learning_rate": 8.138674143246594e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2924267083764875, + "grad_norm": 0.1917712241411209, + "learning_rate": 8.137380327640245e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2928494430470714, + "grad_norm": 0.1832377314567566, + "learning_rate": 8.136086165441139e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2932721777176555, + "grad_norm": 0.22470350563526154, + "learning_rate": 8.134791656792242e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2936949123882395, + "grad_norm": 0.2846519351005554, + "learning_rate": 8.133496801836564e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 26990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2941176470588236, + "grad_norm": 0.19530409574508667, + "learning_rate": 8.132201600717146e-05, + "loss": 0.3543, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2945403817294077, + "grad_norm": 0.15901432931423187, + "learning_rate": 8.130906053577076e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2949631163999915, + "grad_norm": 0.2121022492647171, + "learning_rate": 8.129610160559472e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2953858510705756, + "grad_norm": 0.21406616270542145, + "learning_rate": 8.128313921807496e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2958085857411596, + "grad_norm": 0.21262742578983307, + "learning_rate": 8.127017337464347e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2962313204117435, + "grad_norm": 0.23671092092990875, + "learning_rate": 8.125720407673259e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2966540550823276, + "grad_norm": 0.2303512990474701, + "learning_rate": 8.124423132577507e-05, + "loss": 0.3543, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2970767897529116, + "grad_norm": 0.19928567111492157, + "learning_rate": 8.123125512320407e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2974995244234955, + "grad_norm": 0.1990634799003601, + "learning_rate": 8.121827547045304e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2979222590940795, + "grad_norm": 0.19071029126644135, + "learning_rate": 8.12052923689559e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2983449937646636, + "grad_norm": 0.21691180765628815, + "learning_rate": 8.119230582014693e-05, + "loss": 0.3546, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2987677284352477, + "grad_norm": 0.25581151247024536, + "learning_rate": 8.117931582546076e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2991904631058317, + "grad_norm": 0.21773606538772583, + "learning_rate": 8.116632238633242e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2996131977764156, + "grad_norm": 0.2021005004644394, + "learning_rate": 8.115332550419733e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3000359324469997, + "grad_norm": 0.21950827538967133, + "learning_rate": 8.114032518049128e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3004586671175837, + "grad_norm": 0.20335645973682404, + "learning_rate": 8.112732141665047e-05, + "loss": 0.3704, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3008814017881676, + "grad_norm": 0.28324732184410095, + "learning_rate": 8.111431421411139e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3013041364587516, + "grad_norm": 0.17733284831047058, + "learning_rate": 8.110130357431104e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3017268711293357, + "grad_norm": 0.2419797033071518, + "learning_rate": 8.108828949868668e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3021496057999196, + "grad_norm": 0.264126181602478, + "learning_rate": 8.107527198867603e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3025723404705036, + "grad_norm": 0.20481590926647186, + "learning_rate": 8.106225104571714e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3029950751410877, + "grad_norm": 0.1719725877046585, + "learning_rate": 8.104922667124848e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3034178098116718, + "grad_norm": 0.1816101223230362, + "learning_rate": 8.103619886670887e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3038405444822558, + "grad_norm": 0.20095853507518768, + "learning_rate": 8.102316763353752e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3042632791528397, + "grad_norm": 0.1714000552892685, + "learning_rate": 8.101013297317402e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3046860138234238, + "grad_norm": 0.1638164222240448, + "learning_rate": 8.099709488705832e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3051087484940078, + "grad_norm": 0.21466492116451263, + "learning_rate": 8.098405337663076e-05, + "loss": 0.3697, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3055314831645917, + "grad_norm": 0.19750255346298218, + "learning_rate": 8.09710084433321e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3059542178351757, + "grad_norm": 0.20344507694244385, + "learning_rate": 8.095796008860342e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3063769525057598, + "grad_norm": 0.20606635510921478, + "learning_rate": 8.094490831388617e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3067996871763436, + "grad_norm": 0.20357412099838257, + "learning_rate": 8.093185312062223e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3072224218469277, + "grad_norm": 0.23092971742153168, + "learning_rate": 8.091879451025387e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3076451565175118, + "grad_norm": 0.20071591436862946, + "learning_rate": 8.090573248422362e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3080678911880959, + "grad_norm": 0.21365861594676971, + "learning_rate": 8.089266704397455e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.30849062585868, + "grad_norm": 0.2180056869983673, + "learning_rate": 8.087959819094996e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3089133605292638, + "grad_norm": 0.26418718695640564, + "learning_rate": 8.086652592659365e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3093360951998478, + "grad_norm": 0.266743540763855, + "learning_rate": 8.085345025234969e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.309758829870432, + "grad_norm": 0.15699784457683563, + "learning_rate": 8.084037116966262e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3101815645410158, + "grad_norm": 0.16410724818706512, + "learning_rate": 8.082728867997728e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3106042992115998, + "grad_norm": 0.18251633644104004, + "learning_rate": 8.081420278473893e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.311027033882184, + "grad_norm": 0.24508167803287506, + "learning_rate": 8.080111348539319e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3114497685527677, + "grad_norm": 0.21168547868728638, + "learning_rate": 8.078802078338607e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3118725032233518, + "grad_norm": 0.19519369304180145, + "learning_rate": 8.077492468016395e-05, + "loss": 0.3718, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3122952378939359, + "grad_norm": 0.24420584738254547, + "learning_rate": 8.076182517717356e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.31271797256452, + "grad_norm": 0.25406986474990845, + "learning_rate": 8.074872227586205e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.313140707235104, + "grad_norm": 0.17267563939094543, + "learning_rate": 8.073561597767692e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3135634419056879, + "grad_norm": 0.15127278864383698, + "learning_rate": 8.072250628406605e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.313986176576272, + "grad_norm": 0.25310033559799194, + "learning_rate": 8.070939319647771e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.314408911246856, + "grad_norm": 0.23532502353191376, + "learning_rate": 8.06962767163605e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3148316459174398, + "grad_norm": 0.250034362077713, + "learning_rate": 8.068315684516343e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.315254380588024, + "grad_norm": 0.18033726513385773, + "learning_rate": 8.067003358433589e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.315677115258608, + "grad_norm": 0.20874015986919403, + "learning_rate": 8.065690693532764e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3160998499291918, + "grad_norm": 0.1988876312971115, + "learning_rate": 8.064377689958879e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.316522584599776, + "grad_norm": 0.21011072397232056, + "learning_rate": 8.063064347856983e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.31694531927036, + "grad_norm": 0.22900529205799103, + "learning_rate": 8.061750667372167e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.317368053940944, + "grad_norm": 0.19839315116405487, + "learning_rate": 8.060436648649555e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.317790788611528, + "grad_norm": 0.2518600523471832, + "learning_rate": 8.059122291834307e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.318213523282112, + "grad_norm": 0.1841365545988083, + "learning_rate": 8.057807597071625e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.318636257952696, + "grad_norm": 0.23710696399211884, + "learning_rate": 8.056492564506744e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.31905899262328, + "grad_norm": 0.2469640076160431, + "learning_rate": 8.055177194284941e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.319481727293864, + "grad_norm": 0.18988077342510223, + "learning_rate": 8.053861486551527e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.319904461964448, + "grad_norm": 0.1580560952425003, + "learning_rate": 8.052545441451848e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.320327196635032, + "grad_norm": 0.23420017957687378, + "learning_rate": 8.051229059131294e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.320749931305616, + "grad_norm": 0.2972300946712494, + "learning_rate": 8.049912339735283e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3211726659762, + "grad_norm": 0.18743100762367249, + "learning_rate": 8.048595283409284e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.321595400646784, + "grad_norm": 0.20953017473220825, + "learning_rate": 8.047277890298788e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3220181353173681, + "grad_norm": 0.29030540585517883, + "learning_rate": 8.045960160549332e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3224408699879522, + "grad_norm": 0.2558341324329376, + "learning_rate": 8.044642094306489e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.322863604658536, + "grad_norm": 0.16698715090751648, + "learning_rate": 8.043323691715867e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.32328633932912, + "grad_norm": 0.20657779276371002, + "learning_rate": 8.042004952923117e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3237090739997042, + "grad_norm": 0.19838795065879822, + "learning_rate": 8.040685878073916e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.324131808670288, + "grad_norm": 0.3387894928455353, + "learning_rate": 8.039366467313989e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.324554543340872, + "grad_norm": 0.2313077449798584, + "learning_rate": 8.038046720789093e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3249772780114561, + "grad_norm": 0.19436082243919373, + "learning_rate": 8.036726638645025e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.32540001268204, + "grad_norm": 0.21932080388069153, + "learning_rate": 8.035406221027613e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.325822747352624, + "grad_norm": 0.18321119248867035, + "learning_rate": 8.034217558462796e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3262454820232081, + "grad_norm": 0.19988323748111725, + "learning_rate": 8.032896503847937e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3266682166937922, + "grad_norm": 0.3731772005558014, + "learning_rate": 8.031575114182857e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3270909513643763, + "grad_norm": 0.26582109928131104, + "learning_rate": 8.030253389613535e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3275136860349601, + "grad_norm": 0.2054569125175476, + "learning_rate": 8.028931330285987e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3279364207055442, + "grad_norm": 0.24078235030174255, + "learning_rate": 8.027608936346261e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3283591553761283, + "grad_norm": 0.251591295003891, + "learning_rate": 8.026286207940442e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.328781890046712, + "grad_norm": 0.2822265625, + "learning_rate": 8.024963145214656e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3292046247172962, + "grad_norm": 0.1929522603750229, + "learning_rate": 8.023639748315068e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3296273593878802, + "grad_norm": 0.16822074353694916, + "learning_rate": 8.022316017387873e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.330050094058464, + "grad_norm": 0.2093045860528946, + "learning_rate": 8.020991952579306e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3304728287290482, + "grad_norm": 0.2137058824300766, + "learning_rate": 8.019667554035642e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3308955633996322, + "grad_norm": 0.18998129665851593, + "learning_rate": 8.018342821903186e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3313182980702163, + "grad_norm": 0.18151231110095978, + "learning_rate": 8.017017756328287e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3317410327408004, + "grad_norm": 0.21586020290851593, + "learning_rate": 8.015692357457326e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3321637674113842, + "grad_norm": 0.18192631006240845, + "learning_rate": 8.014366625436724e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3325865020819683, + "grad_norm": 0.272786945104599, + "learning_rate": 8.013040560412934e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3330092367525523, + "grad_norm": 0.17233769595623016, + "learning_rate": 8.011714162532454e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3334319714231362, + "grad_norm": 0.19363436102867126, + "learning_rate": 8.010387431941811e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3338547060937203, + "grad_norm": 0.23661205172538757, + "learning_rate": 8.00906036878757e-05, + "loss": 0.3709, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3342774407643043, + "grad_norm": 0.17244744300842285, + "learning_rate": 8.007732973216338e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3347001754348882, + "grad_norm": 0.20320646464824677, + "learning_rate": 8.006405245374753e-05, + "loss": 0.3545, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3351229101054722, + "grad_norm": 0.3307655155658722, + "learning_rate": 8.005077185409493e-05, + "loss": 0.3723, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3355456447760563, + "grad_norm": 0.22762808203697205, + "learning_rate": 8.00374879346727e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3359683794466404, + "grad_norm": 0.21121402084827423, + "learning_rate": 8.002420069694832e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 27990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3363911141172244, + "grad_norm": 0.2603732943534851, + "learning_rate": 8.001091014238972e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3368138487878083, + "grad_norm": 0.15067167580127716, + "learning_rate": 7.99976162724651e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3372365834583924, + "grad_norm": 0.19679304957389832, + "learning_rate": 7.998431908864304e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3376593181289764, + "grad_norm": 0.18602626025676727, + "learning_rate": 7.997101859239253e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3380820527995603, + "grad_norm": 0.20290619134902954, + "learning_rate": 7.995771478518291e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3385047874701443, + "grad_norm": 0.19172218441963196, + "learning_rate": 7.994440766848388e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3389275221407284, + "grad_norm": 0.20704688131809235, + "learning_rate": 7.993109724376548e-05, + "loss": 0.3699, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3393502568113123, + "grad_norm": 0.20266832411289215, + "learning_rate": 7.991778351249814e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3397729914818963, + "grad_norm": 0.23728057742118835, + "learning_rate": 7.990446647615268e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3401957261524804, + "grad_norm": 0.24392709136009216, + "learning_rate": 7.989114613620024e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3406184608230645, + "grad_norm": 0.163332000374794, + "learning_rate": 7.987782249411238e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3410411954936485, + "grad_norm": 0.25969967246055603, + "learning_rate": 7.986449555136093e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3414639301642324, + "grad_norm": 0.22453095018863678, + "learning_rate": 7.985116530941819e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3418866648348164, + "grad_norm": 0.20513728260993958, + "learning_rate": 7.983783176975676e-05, + "loss": 0.3557, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3423093995054005, + "grad_norm": 0.19746603071689606, + "learning_rate": 7.982449493384964e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3427321341759844, + "grad_norm": 0.22161422669887543, + "learning_rate": 7.981115480317015e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3431548688465684, + "grad_norm": 0.20868511497974396, + "learning_rate": 7.979781137919202e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3435776035171525, + "grad_norm": 0.27339380979537964, + "learning_rate": 7.978446466338933e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3440003381877363, + "grad_norm": 0.24508897960186005, + "learning_rate": 7.97711146572365e-05, + "loss": 0.3539, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3444230728583204, + "grad_norm": 0.1997271329164505, + "learning_rate": 7.975776136220836e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3448458075289045, + "grad_norm": 0.1973196417093277, + "learning_rate": 7.974440477978005e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3452685421994885, + "grad_norm": 0.18367092311382294, + "learning_rate": 7.97310449114271e-05, + "loss": 0.3732, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3456912768700726, + "grad_norm": 0.2898399531841278, + "learning_rate": 7.971768175862542e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3461140115406565, + "grad_norm": 0.19060367345809937, + "learning_rate": 7.970431532285124e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3465367462112405, + "grad_norm": 0.25416967272758484, + "learning_rate": 7.96909456055812e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3469594808818246, + "grad_norm": 0.19673097133636475, + "learning_rate": 7.967757260829227e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3473822155524084, + "grad_norm": 0.26175758242607117, + "learning_rate": 7.966419633246178e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3478049502229925, + "grad_norm": 0.2219184935092926, + "learning_rate": 7.965081677956747e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3482276848935766, + "grad_norm": 0.23312118649482727, + "learning_rate": 7.963743395108737e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3486504195641604, + "grad_norm": 0.1679096221923828, + "learning_rate": 7.962404784849992e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3490731542347445, + "grad_norm": 0.19534240663051605, + "learning_rate": 7.96106584732839e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3494958889053286, + "grad_norm": 0.15966998040676117, + "learning_rate": 7.959726582691849e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3499186235759126, + "grad_norm": 0.17952045798301697, + "learning_rate": 7.95838699108832e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3503413582464967, + "grad_norm": 0.19895581901073456, + "learning_rate": 7.957047072665786e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3507640929170805, + "grad_norm": 0.1704859435558319, + "learning_rate": 7.955706827572275e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3511868275876646, + "grad_norm": 0.24025267362594604, + "learning_rate": 7.954366255955843e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3516095622582487, + "grad_norm": 0.18557773530483246, + "learning_rate": 7.95302535796459e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3520322969288325, + "grad_norm": 0.19996264576911926, + "learning_rate": 7.951684133746644e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3524550315994166, + "grad_norm": 0.18211138248443604, + "learning_rate": 7.950342583450175e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3528777662700007, + "grad_norm": 0.24291609227657318, + "learning_rate": 7.949000707223386e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3533005009405845, + "grad_norm": 0.22695855796337128, + "learning_rate": 7.947658505214515e-05, + "loss": 0.3559, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3537232356111686, + "grad_norm": 0.1957608461380005, + "learning_rate": 7.946315977571842e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3541459702817527, + "grad_norm": 0.2181302011013031, + "learning_rate": 7.944973124443675e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3545687049523367, + "grad_norm": 0.18276619911193848, + "learning_rate": 7.943629945978362e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3549914396229208, + "grad_norm": 0.26430463790893555, + "learning_rate": 7.942286442324289e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3554141742935046, + "grad_norm": 0.23338690400123596, + "learning_rate": 7.940942613629873e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3558369089640887, + "grad_norm": 0.2753579616546631, + "learning_rate": 7.939598460043572e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3562596436346728, + "grad_norm": 0.18431232869625092, + "learning_rate": 7.938253981713876e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3566823783052566, + "grad_norm": 0.19355931878089905, + "learning_rate": 7.936909178789311e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3571051129758407, + "grad_norm": 0.1831127554178238, + "learning_rate": 7.935564051418442e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3575278476464248, + "grad_norm": 0.28343653678894043, + "learning_rate": 7.934218599749868e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3579505823170086, + "grad_norm": 0.2074442207813263, + "learning_rate": 7.932872823932221e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3583733169875927, + "grad_norm": 0.20205427706241608, + "learning_rate": 7.931526724114175e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3587960516581767, + "grad_norm": 0.21337012946605682, + "learning_rate": 7.930180300444434e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3592187863287608, + "grad_norm": 0.22516457736492157, + "learning_rate": 7.928833553071743e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3596415209993449, + "grad_norm": 0.21587832272052765, + "learning_rate": 7.927486482144877e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3600642556699287, + "grad_norm": 0.18749304115772247, + "learning_rate": 7.926139087812652e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3604869903405128, + "grad_norm": 0.20850792527198792, + "learning_rate": 7.924791370223914e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3609097250110969, + "grad_norm": 0.20551766455173492, + "learning_rate": 7.923443329527551e-05, + "loss": 0.3537, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3613324596816807, + "grad_norm": 0.25432565808296204, + "learning_rate": 7.922094965872484e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3617551943522648, + "grad_norm": 0.23533572256565094, + "learning_rate": 7.920746279407666e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3621779290228488, + "grad_norm": 0.18458257615566254, + "learning_rate": 7.919397270282094e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3626006636934327, + "grad_norm": 0.18388888239860535, + "learning_rate": 7.918047938644792e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3630233983640168, + "grad_norm": 0.1853262186050415, + "learning_rate": 7.916698284644825e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3634461330346008, + "grad_norm": 0.23127034306526184, + "learning_rate": 7.915348308431293e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.363868867705185, + "grad_norm": 0.24390360713005066, + "learning_rate": 7.913998010153329e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.364291602375769, + "grad_norm": 0.23197577893733978, + "learning_rate": 7.912647389960101e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3647143370463528, + "grad_norm": 0.20416608452796936, + "learning_rate": 7.91129644800082e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3651370717169369, + "grad_norm": 0.2101297676563263, + "learning_rate": 7.909945184424724e-05, + "loss": 0.3739, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.365559806387521, + "grad_norm": 0.16164645552635193, + "learning_rate": 7.90859359938109e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3659825410581048, + "grad_norm": 0.20080533623695374, + "learning_rate": 7.90724169301923e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3664052757286889, + "grad_norm": 0.2205256223678589, + "learning_rate": 7.905889465488494e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.366828010399273, + "grad_norm": 0.24398967623710632, + "learning_rate": 7.904536916938263e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3672507450698568, + "grad_norm": 0.1690058708190918, + "learning_rate": 7.903184047517958e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3676734797404408, + "grad_norm": 0.1593443602323532, + "learning_rate": 7.90183085737703e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.368096214411025, + "grad_norm": 0.24659129977226257, + "learning_rate": 7.900477346664971e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.368518949081609, + "grad_norm": 0.22106043994426727, + "learning_rate": 7.899123515531307e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.368941683752193, + "grad_norm": 0.256473183631897, + "learning_rate": 7.897769364125595e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.369364418422777, + "grad_norm": 0.3066842257976532, + "learning_rate": 7.896414892597436e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.369787153093361, + "grad_norm": 0.19633391499519348, + "learning_rate": 7.895060101096456e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.370209887763945, + "grad_norm": 0.21222540736198425, + "learning_rate": 7.893704989772323e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3706326224345289, + "grad_norm": 0.22787593305110931, + "learning_rate": 7.892349558774741e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.371055357105113, + "grad_norm": 0.27162063121795654, + "learning_rate": 7.890993808253446e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.371478091775697, + "grad_norm": 0.23464682698249817, + "learning_rate": 7.889637738358209e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3719008264462809, + "grad_norm": 0.24556376039981842, + "learning_rate": 7.88828134923884e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.372323561116865, + "grad_norm": 0.20327149331569672, + "learning_rate": 7.88692464104518e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.372746295787449, + "grad_norm": 0.3021935522556305, + "learning_rate": 7.88556761392711e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.373169030458033, + "grad_norm": 0.1591435968875885, + "learning_rate": 7.884210268034542e-05, + "loss": 0.375, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3735917651286171, + "grad_norm": 0.17254115641117096, + "learning_rate": 7.882852603517424e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.374014499799201, + "grad_norm": 0.2138783484697342, + "learning_rate": 7.88149462052574e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.374437234469785, + "grad_norm": 0.20689761638641357, + "learning_rate": 7.880136319209511e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3748599691403691, + "grad_norm": 0.26122698187828064, + "learning_rate": 7.87877769971879e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.375282703810953, + "grad_norm": 0.23061875998973846, + "learning_rate": 7.877418762203666e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.375705438481537, + "grad_norm": 0.23787528276443481, + "learning_rate": 7.876059506814264e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.376128173152121, + "grad_norm": 0.17802546918392181, + "learning_rate": 7.874699933700744e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.376550907822705, + "grad_norm": 0.24508297443389893, + "learning_rate": 7.873340043013301e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.376973642493289, + "grad_norm": 0.17560893297195435, + "learning_rate": 7.871979834902166e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.377396377163873, + "grad_norm": 0.17323295772075653, + "learning_rate": 7.8706193095176e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3778191118344572, + "grad_norm": 0.20687459409236908, + "learning_rate": 7.869258467009906e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3782418465050412, + "grad_norm": 0.20332783460617065, + "learning_rate": 7.867897307529419e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 28990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.378664581175625, + "grad_norm": 0.23213732242584229, + "learning_rate": 7.866535831226508e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3790873158462091, + "grad_norm": 0.16363544762134552, + "learning_rate": 7.86517403825158e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3795100505167932, + "grad_norm": 0.1660105437040329, + "learning_rate": 7.863811928755072e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.379932785187377, + "grad_norm": 0.17196868360042572, + "learning_rate": 7.86244950288746e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3803555198579611, + "grad_norm": 0.19646190106868744, + "learning_rate": 7.861086760799256e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3807782545285452, + "grad_norm": 0.14825217425823212, + "learning_rate": 7.859723702641003e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.381200989199129, + "grad_norm": 0.20806407928466797, + "learning_rate": 7.858360328563281e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.381623723869713, + "grad_norm": 0.17969100177288055, + "learning_rate": 7.856996638716705e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3820464585402972, + "grad_norm": 0.237700417637825, + "learning_rate": 7.855632633251925e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3824691932108812, + "grad_norm": 0.25129783153533936, + "learning_rate": 7.854268312319624e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3828919278814653, + "grad_norm": 0.21711593866348267, + "learning_rate": 7.852903676070522e-05, + "loss": 0.3693, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3833146625520492, + "grad_norm": 0.18635688722133636, + "learning_rate": 7.851538724655374e-05, + "loss": 0.3547, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3837373972226332, + "grad_norm": 0.2313377410173416, + "learning_rate": 7.85017345822497e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3841601318932173, + "grad_norm": 0.19742201268672943, + "learning_rate": 7.84880787693013e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3845828665638011, + "grad_norm": 0.18515843152999878, + "learning_rate": 7.847441980921714e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3850056012343852, + "grad_norm": 0.2236216962337494, + "learning_rate": 7.846075770350617e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3854283359049693, + "grad_norm": 0.2254372090101242, + "learning_rate": 7.844709245367766e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3858510705755531, + "grad_norm": 0.2728784680366516, + "learning_rate": 7.843342406124124e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3862738052461372, + "grad_norm": 0.20646469295024872, + "learning_rate": 7.841975252770688e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3866965399167213, + "grad_norm": 0.23784244060516357, + "learning_rate": 7.840607785458489e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3871192745873053, + "grad_norm": 0.23578086495399475, + "learning_rate": 7.839240004338597e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3875420092578894, + "grad_norm": 0.171865776181221, + "learning_rate": 7.837871909562112e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3879647439284732, + "grad_norm": 0.282066285610199, + "learning_rate": 7.836503501280169e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3883874785990573, + "grad_norm": 0.2043439894914627, + "learning_rate": 7.83513477964394e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3888102132696414, + "grad_norm": 0.18756072223186493, + "learning_rate": 7.83376574480463e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3892329479402252, + "grad_norm": 0.22593015432357788, + "learning_rate": 7.83239639691348e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3896556826108093, + "grad_norm": 0.20132707059383392, + "learning_rate": 7.831026736121764e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3900784172813934, + "grad_norm": 0.18027891218662262, + "learning_rate": 7.82965676258079e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3905011519519772, + "grad_norm": 0.1513669341802597, + "learning_rate": 7.828286476441904e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3909238866225613, + "grad_norm": 0.1906861960887909, + "learning_rate": 7.826915877856485e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3913466212931453, + "grad_norm": 0.2585827708244324, + "learning_rate": 7.825544966975941e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3917693559637294, + "grad_norm": 0.1694851666688919, + "learning_rate": 7.824173743951723e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3921920906343135, + "grad_norm": 0.21186590194702148, + "learning_rate": 7.822802208935313e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3926148253048973, + "grad_norm": 0.19789519906044006, + "learning_rate": 7.821430362078226e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3930375599754814, + "grad_norm": 0.23896317183971405, + "learning_rate": 7.820058203532014e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3934602946460655, + "grad_norm": 0.2190677970647812, + "learning_rate": 7.818685733448261e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3938830293166493, + "grad_norm": 0.16046682000160217, + "learning_rate": 7.817312951978586e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3943057639872334, + "grad_norm": 0.19653698801994324, + "learning_rate": 7.815939859274644e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3947284986578175, + "grad_norm": 0.2681182324886322, + "learning_rate": 7.814566455488122e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3951512333284013, + "grad_norm": 0.19579924643039703, + "learning_rate": 7.813192740770745e-05, + "loss": 0.3747, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3955739679989854, + "grad_norm": 0.19434428215026855, + "learning_rate": 7.81181871527427e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3959967026695694, + "grad_norm": 0.19353392720222473, + "learning_rate": 7.810444379150486e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3964194373401535, + "grad_norm": 0.21136663854122162, + "learning_rate": 7.809069732551219e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3968421720107376, + "grad_norm": 0.23584406077861786, + "learning_rate": 7.80769477562833e-05, + "loss": 0.3716, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3972649066813214, + "grad_norm": 0.22343848645687103, + "learning_rate": 7.806319508533715e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3976876413519055, + "grad_norm": 0.24635356664657593, + "learning_rate": 7.804943931419299e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3981103760224896, + "grad_norm": 0.19727718830108643, + "learning_rate": 7.803568044437047e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3985331106930734, + "grad_norm": 0.16262061893939972, + "learning_rate": 7.802191847738954e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3989558453636575, + "grad_norm": 0.17884349822998047, + "learning_rate": 7.800815341477054e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3993785800342415, + "grad_norm": 0.1674683839082718, + "learning_rate": 7.79943852580341e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3998013147048254, + "grad_norm": 0.17501431703567505, + "learning_rate": 7.798061400870125e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4002240493754095, + "grad_norm": 0.22994464635849, + "learning_rate": 7.79668396682933e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4006467840459935, + "grad_norm": 0.18630284070968628, + "learning_rate": 7.795306223833192e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4010695187165776, + "grad_norm": 0.19601839780807495, + "learning_rate": 7.793928172033917e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4014922533871617, + "grad_norm": 0.22088420391082764, + "learning_rate": 7.792549811583737e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4019149880577455, + "grad_norm": 0.14869378507137299, + "learning_rate": 7.791171142634923e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4023377227283296, + "grad_norm": 0.17342473566532135, + "learning_rate": 7.789792165339782e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4027604573989136, + "grad_norm": 0.19453400373458862, + "learning_rate": 7.78841287985065e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4031831920694975, + "grad_norm": 0.14992018043994904, + "learning_rate": 7.787033286319901e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4036059267400816, + "grad_norm": 0.279554545879364, + "learning_rate": 7.78565338489994e-05, + "loss": 0.3554, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4040286614106656, + "grad_norm": 0.21534433960914612, + "learning_rate": 7.784273175743209e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4044513960812495, + "grad_norm": 0.21248118579387665, + "learning_rate": 7.78289265900218e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4048741307518335, + "grad_norm": 0.16997362673282623, + "learning_rate": 7.781511834829365e-05, + "loss": 0.3693, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4052968654224176, + "grad_norm": 0.2086527943611145, + "learning_rate": 7.780130703377304e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4057196000930017, + "grad_norm": 0.17588962614536285, + "learning_rate": 7.778749264798574e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4061423347635857, + "grad_norm": 0.2115054875612259, + "learning_rate": 7.777367519245785e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4065650694341696, + "grad_norm": 0.22448213398456573, + "learning_rate": 7.775985466871583e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4069878041047537, + "grad_norm": 0.21201643347740173, + "learning_rate": 7.774603107828644e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4074105387753377, + "grad_norm": 0.1791757494211197, + "learning_rate": 7.77322044226968e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4078332734459216, + "grad_norm": 0.25340786576271057, + "learning_rate": 7.771837470347437e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4082560081165056, + "grad_norm": 0.2203386425971985, + "learning_rate": 7.770454192214695e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4086787427870897, + "grad_norm": 0.22320182621479034, + "learning_rate": 7.76907060802427e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4091014774576736, + "grad_norm": 0.19679224491119385, + "learning_rate": 7.767686717929005e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4095242121282576, + "grad_norm": 0.2111138105392456, + "learning_rate": 7.766302522081786e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4099469467988417, + "grad_norm": 0.21536587178707123, + "learning_rate": 7.764918020635524e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4103696814694258, + "grad_norm": 0.23337692022323608, + "learning_rate": 7.763533213743168e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4107924161400098, + "grad_norm": 0.1745183914899826, + "learning_rate": 7.762148101557703e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4112151508105937, + "grad_norm": 0.22885435819625854, + "learning_rate": 7.760762684232141e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4116378854811777, + "grad_norm": 0.2730715572834015, + "learning_rate": 7.759376961919536e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4120606201517618, + "grad_norm": 0.2413124293088913, + "learning_rate": 7.757990934772968e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4124833548223457, + "grad_norm": 0.2157873660326004, + "learning_rate": 7.756604602945558e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4129060894929297, + "grad_norm": 0.2037578672170639, + "learning_rate": 7.755217966590456e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4133288241635138, + "grad_norm": 0.1988641768693924, + "learning_rate": 7.753831025860843e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4137515588340976, + "grad_norm": 0.1964307278394699, + "learning_rate": 7.75244378090994e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4141742935046817, + "grad_norm": 0.2390003502368927, + "learning_rate": 7.751056231891e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4145970281752658, + "grad_norm": 0.21033495664596558, + "learning_rate": 7.749668378957306e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4150197628458498, + "grad_norm": 0.17937099933624268, + "learning_rate": 7.748280222262176e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.415442497516434, + "grad_norm": 0.17518097162246704, + "learning_rate": 7.746891761958966e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4158652321870178, + "grad_norm": 0.23284995555877686, + "learning_rate": 7.74550299820106e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4162879668576018, + "grad_norm": 0.2563411295413971, + "learning_rate": 7.744113931141878e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.416710701528186, + "grad_norm": 0.2522108256816864, + "learning_rate": 7.742724560934873e-05, + "loss": 0.3546, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4171334361987697, + "grad_norm": 0.23667341470718384, + "learning_rate": 7.741334887733532e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4175561708693538, + "grad_norm": 0.22606025636196136, + "learning_rate": 7.739944911691371e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4179789055399379, + "grad_norm": 0.1330532729625702, + "learning_rate": 7.73855463296195e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4184016402105217, + "grad_norm": 0.22050514817237854, + "learning_rate": 7.737164051698852e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4188243748811058, + "grad_norm": 0.2523859441280365, + "learning_rate": 7.735773168055696e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4192471095516899, + "grad_norm": 0.18477709591388702, + "learning_rate": 7.73438198218614e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.419669844222274, + "grad_norm": 0.3423294723033905, + "learning_rate": 7.732990494243868e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.420092578892858, + "grad_norm": 0.26918306946754456, + "learning_rate": 7.731598704382603e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4205153135634419, + "grad_norm": 0.22941067814826965, + "learning_rate": 7.730206612756097e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 29990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.420938048234026, + "grad_norm": 0.20577207207679749, + "learning_rate": 7.728814219518134e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.42136078290461, + "grad_norm": 0.32937633991241455, + "learning_rate": 7.727421524822542e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4217835175751938, + "grad_norm": 0.14825187623500824, + "learning_rate": 7.726028528823168e-05, + "loss": 0.3538, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.422206252245778, + "grad_norm": 0.22049453854560852, + "learning_rate": 7.724635231673904e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.422628986916362, + "grad_norm": 0.1629963368177414, + "learning_rate": 7.723241633528666e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4230517215869458, + "grad_norm": 0.15978781878948212, + "learning_rate": 7.721847734541411e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4234744562575299, + "grad_norm": 0.27739301323890686, + "learning_rate": 7.720453534866125e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.423897190928114, + "grad_norm": 0.20434710383415222, + "learning_rate": 7.719059034656827e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.424319925598698, + "grad_norm": 0.2330351322889328, + "learning_rate": 7.71766423406757e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.424742660269282, + "grad_norm": 0.22841927409172058, + "learning_rate": 7.716269133252443e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.425165394939866, + "grad_norm": 0.2221193015575409, + "learning_rate": 7.714873732365564e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.42558812961045, + "grad_norm": 0.22350122034549713, + "learning_rate": 7.713478031561086e-05, + "loss": 0.3534, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.426010864281034, + "grad_norm": 0.2181900292634964, + "learning_rate": 7.712082030993193e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.426433598951618, + "grad_norm": 0.22581075131893158, + "learning_rate": 7.710685730816106e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.426856333622202, + "grad_norm": 0.1843654215335846, + "learning_rate": 7.709289131184078e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.427279068292786, + "grad_norm": 0.1843205988407135, + "learning_rate": 7.707892232251392e-05, + "loss": 0.3723, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.42770180296337, + "grad_norm": 0.19299361109733582, + "learning_rate": 7.706495034172367e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.428124537633954, + "grad_norm": 0.16545860469341278, + "learning_rate": 7.705097537101356e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.428547272304538, + "grad_norm": 0.19927778840065002, + "learning_rate": 7.703699741192741e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.428970006975122, + "grad_norm": 0.15674899518489838, + "learning_rate": 7.70230164660094e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4293927416457062, + "grad_norm": 0.19455501437187195, + "learning_rate": 7.700903253480403e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.42981547631629, + "grad_norm": 0.18482661247253418, + "learning_rate": 7.699504561985615e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.430238210986874, + "grad_norm": 0.2206011265516281, + "learning_rate": 7.698105572271091e-05, + "loss": 0.3553, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4306609456574582, + "grad_norm": 0.20824815332889557, + "learning_rate": 7.69670628449138e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.431083680328042, + "grad_norm": 0.1553601622581482, + "learning_rate": 7.695306698801063e-05, + "loss": 0.3514, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.431506414998626, + "grad_norm": 0.2166815549135208, + "learning_rate": 7.693906815354759e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4319291496692101, + "grad_norm": 0.1904243528842926, + "learning_rate": 7.692506634307113e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.432351884339794, + "grad_norm": 0.17571015655994415, + "learning_rate": 7.691106155812804e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.432774619010378, + "grad_norm": 0.20419831573963165, + "learning_rate": 7.68970538002655e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4331973536809621, + "grad_norm": 0.1775064319372177, + "learning_rate": 7.688304307103097e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4336200883515462, + "grad_norm": 0.20968618988990784, + "learning_rate": 7.686902937197222e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4340428230221303, + "grad_norm": 0.207704558968544, + "learning_rate": 7.685501270463737e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.434465557692714, + "grad_norm": 0.2264275848865509, + "learning_rate": 7.684099307057489e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4348882923632982, + "grad_norm": 0.18419675529003143, + "learning_rate": 7.682697047133356e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4353110270338822, + "grad_norm": 0.2494375854730606, + "learning_rate": 7.681294490846246e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.435733761704466, + "grad_norm": 0.18049532175064087, + "learning_rate": 7.679891638351103e-05, + "loss": 0.3697, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4361564963750502, + "grad_norm": 0.17345155775547028, + "learning_rate": 7.678488489802904e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4365792310456342, + "grad_norm": 0.19342203438282013, + "learning_rate": 7.677085045356658e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.437001965716218, + "grad_norm": 0.16173860430717468, + "learning_rate": 7.675681305167406e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4374247003868021, + "grad_norm": 0.21475622057914734, + "learning_rate": 7.67427726939022e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4378474350573862, + "grad_norm": 0.20559050142765045, + "learning_rate": 7.67287293818021e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4382701697279703, + "grad_norm": 0.1816290318965912, + "learning_rate": 7.671468311692511e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4386929043985544, + "grad_norm": 0.253269225358963, + "learning_rate": 7.670063390082298e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4391156390691382, + "grad_norm": 0.17080864310264587, + "learning_rate": 7.668658173504776e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4395383737397223, + "grad_norm": 0.1676914244890213, + "learning_rate": 7.667252662115182e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4399611084103063, + "grad_norm": 0.228523850440979, + "learning_rate": 7.665846856068783e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4403838430808902, + "grad_norm": 0.19752050936222076, + "learning_rate": 7.664440755520883e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4408065777514742, + "grad_norm": 0.17288115620613098, + "learning_rate": 7.66303436062682e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4412293124220583, + "grad_norm": 0.16266858577728271, + "learning_rate": 7.661627671541955e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4416520470926424, + "grad_norm": 0.191221222281456, + "learning_rate": 7.660220688421692e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4420747817632262, + "grad_norm": 0.2605246305465698, + "learning_rate": 7.658813411421461e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4424975164338103, + "grad_norm": 0.16843388974666595, + "learning_rate": 7.65740584069673e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4429202511043944, + "grad_norm": 0.152084618806839, + "learning_rate": 7.655997976402993e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4433429857749784, + "grad_norm": 0.16797667741775513, + "learning_rate": 7.654589818695781e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4437657204455623, + "grad_norm": 0.18050473928451538, + "learning_rate": 7.653181367730655e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4441884551161464, + "grad_norm": 0.23744720220565796, + "learning_rate": 7.651772623663211e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4446111897867304, + "grad_norm": 0.18671488761901855, + "learning_rate": 7.650363586649076e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4450339244573143, + "grad_norm": 0.20862692594528198, + "learning_rate": 7.648954256843908e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4454566591278983, + "grad_norm": 0.30062222480773926, + "learning_rate": 7.647544634403397e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4458793937984824, + "grad_norm": 0.21540136635303497, + "learning_rate": 7.646134719483268e-05, + "loss": 0.3713, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4463021284690665, + "grad_norm": 0.16046619415283203, + "learning_rate": 7.644724512239281e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4467248631396503, + "grad_norm": 0.21560850739479065, + "learning_rate": 7.643314012827219e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4471475978102344, + "grad_norm": 0.16657082736492157, + "learning_rate": 7.641903221402907e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4475703324808185, + "grad_norm": 0.16407112777233124, + "learning_rate": 7.640492138122192e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4479930671514025, + "grad_norm": 0.20602001249790192, + "learning_rate": 7.639080763140964e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4484158018219864, + "grad_norm": 0.19832570850849152, + "learning_rate": 7.637669096615142e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4488385364925704, + "grad_norm": 0.20780956745147705, + "learning_rate": 7.636257138700673e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4492612711631545, + "grad_norm": 0.23636479675769806, + "learning_rate": 7.634844889553538e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4496840058337384, + "grad_norm": 0.21195141971111298, + "learning_rate": 7.633432349329752e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4501067405043224, + "grad_norm": 0.16157890856266022, + "learning_rate": 7.63201951818536e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4505294751749065, + "grad_norm": 0.1468796283006668, + "learning_rate": 7.630606396276446e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4509522098454906, + "grad_norm": 0.2016214281320572, + "learning_rate": 7.629192983759111e-05, + "loss": 0.372, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4513749445160744, + "grad_norm": 0.17574995756149292, + "learning_rate": 7.627779280789505e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4517976791866585, + "grad_norm": 0.1619468629360199, + "learning_rate": 7.6263652875238e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4522204138572425, + "grad_norm": 0.2500952184200287, + "learning_rate": 7.624951004118204e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4526431485278266, + "grad_norm": 0.185195192694664, + "learning_rate": 7.623536430728953e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4530658831984105, + "grad_norm": 0.20819789171218872, + "learning_rate": 7.622121567512319e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4534886178689945, + "grad_norm": 0.1774941235780716, + "learning_rate": 7.620706414624606e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4539113525395786, + "grad_norm": 0.21852357685565948, + "learning_rate": 7.61929097222215e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4543340872101624, + "grad_norm": 0.21410465240478516, + "learning_rate": 7.617875240461313e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4547568218807465, + "grad_norm": 0.1835201233625412, + "learning_rate": 7.616459219498497e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4551795565513306, + "grad_norm": 0.24628236889839172, + "learning_rate": 7.615042909490133e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4556022912219146, + "grad_norm": 0.26366400718688965, + "learning_rate": 7.613626310592683e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4560250258924985, + "grad_norm": 0.2681550085544586, + "learning_rate": 7.612209422962642e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4564477605630826, + "grad_norm": 0.21404293179512024, + "learning_rate": 7.610792246756536e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4568704952336666, + "grad_norm": 0.22863951325416565, + "learning_rate": 7.609374782130922e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4572932299042507, + "grad_norm": 0.1806846261024475, + "learning_rate": 7.607957029242392e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4577159645748345, + "grad_norm": 0.16601665318012238, + "learning_rate": 7.606538988247567e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4581386992454186, + "grad_norm": 0.2870630621910095, + "learning_rate": 7.605120659303102e-05, + "loss": 0.3684, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4585614339160027, + "grad_norm": 0.25941646099090576, + "learning_rate": 7.603702042565683e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4589841685865865, + "grad_norm": 0.16925330460071564, + "learning_rate": 7.602283138192024e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4594069032571706, + "grad_norm": 0.1679328829050064, + "learning_rate": 7.600863946338878e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4598296379277547, + "grad_norm": 0.203200101852417, + "learning_rate": 7.599444467163026e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4602523725983387, + "grad_norm": 0.1942160278558731, + "learning_rate": 7.598024700821278e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4606751072689226, + "grad_norm": 0.2164372354745865, + "learning_rate": 7.59660464747048e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4610978419395066, + "grad_norm": 0.20410583913326263, + "learning_rate": 7.595184307267509e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4615205766100907, + "grad_norm": 0.24415376782417297, + "learning_rate": 7.59376368036927e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4619433112806748, + "grad_norm": 0.15761063992977142, + "learning_rate": 7.592342766932706e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4623660459512586, + "grad_norm": 0.18317128717899323, + "learning_rate": 7.590921567114787e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4627887806218427, + "grad_norm": 0.21491584181785583, + "learning_rate": 7.589500081072514e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 30990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4632115152924268, + "grad_norm": 0.18819867074489594, + "learning_rate": 7.588078308962923e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4636342499630106, + "grad_norm": 0.19679038226604462, + "learning_rate": 7.586656250943082e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4640569846335947, + "grad_norm": 0.30268099904060364, + "learning_rate": 7.585233907170086e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4644797193041788, + "grad_norm": 0.19358479976654053, + "learning_rate": 7.583811277801063e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4649024539747628, + "grad_norm": 0.21536274254322052, + "learning_rate": 7.582388362993175e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4653251886453467, + "grad_norm": 0.16241328418254852, + "learning_rate": 7.580965162903618e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4657479233159307, + "grad_norm": 0.20277421176433563, + "learning_rate": 7.579541677689612e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4661706579865148, + "grad_norm": 0.15304812788963318, + "learning_rate": 7.57811790750841e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4665933926570989, + "grad_norm": 0.1915677934885025, + "learning_rate": 7.576693852517304e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4670161273276827, + "grad_norm": 0.17668455839157104, + "learning_rate": 7.575269512873611e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4674388619982668, + "grad_norm": 0.23692652583122253, + "learning_rate": 7.573844888734678e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4678615966688509, + "grad_norm": 0.18863920867443085, + "learning_rate": 7.572419980257888e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4682843313394347, + "grad_norm": 0.1737750917673111, + "learning_rate": 7.570994787600653e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4687070660100188, + "grad_norm": 0.23706889152526855, + "learning_rate": 7.569569310920417e-05, + "loss": 0.3541, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4691298006806028, + "grad_norm": 0.18970713019371033, + "learning_rate": 7.568143550374657e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.469552535351187, + "grad_norm": 0.21988266706466675, + "learning_rate": 7.566860123308626e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4699752700217708, + "grad_norm": 0.2097463309764862, + "learning_rate": 7.565433823852324e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4703980046923548, + "grad_norm": 0.18575264513492584, + "learning_rate": 7.56400724098735e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.470820739362939, + "grad_norm": 0.2539443373680115, + "learning_rate": 7.562580374871305e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.471243474033523, + "grad_norm": 0.2529982328414917, + "learning_rate": 7.561153225661815e-05, + "loss": 0.3699, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4716662087041068, + "grad_norm": 0.17621812224388123, + "learning_rate": 7.559725793516543e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4720889433746909, + "grad_norm": 0.23832736909389496, + "learning_rate": 7.558298078593178e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.472511678045275, + "grad_norm": 0.15401698648929596, + "learning_rate": 7.556870081049443e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4729344127158588, + "grad_norm": 0.14070028066635132, + "learning_rate": 7.555441801043095e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4733571473864429, + "grad_norm": 0.21409358084201813, + "learning_rate": 7.554013238731912e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.473779882057027, + "grad_norm": 0.19398342072963715, + "learning_rate": 7.552584394273717e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.474202616727611, + "grad_norm": 0.1493297815322876, + "learning_rate": 7.551155267826354e-05, + "loss": 0.3718, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.474625351398195, + "grad_norm": 0.2146725058555603, + "learning_rate": 7.549725859547701e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.475048086068779, + "grad_norm": 0.19899950921535492, + "learning_rate": 7.54829616959567e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.475470820739363, + "grad_norm": 0.16121728718280792, + "learning_rate": 7.5468661981282e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.475893555409947, + "grad_norm": 0.23363876342773438, + "learning_rate": 7.545435945303263e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.476316290080531, + "grad_norm": 0.1663818210363388, + "learning_rate": 7.544005411278863e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.476739024751115, + "grad_norm": 0.24716056883335114, + "learning_rate": 7.542574596213033e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.477161759421699, + "grad_norm": 0.1474209427833557, + "learning_rate": 7.541143500263838e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4775844940922829, + "grad_norm": 0.19117887318134308, + "learning_rate": 7.539712123589374e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.478007228762867, + "grad_norm": 0.13608041405677795, + "learning_rate": 7.538280466347769e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.478429963433451, + "grad_norm": 0.1880825012922287, + "learning_rate": 7.53684852869718e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.478852698104035, + "grad_norm": 0.20057804882526398, + "learning_rate": 7.535416310795796e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4792754327746191, + "grad_norm": 0.19263394176959991, + "learning_rate": 7.53398381280184e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.479698167445203, + "grad_norm": 0.24183042347431183, + "learning_rate": 7.532551034873559e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.480120902115787, + "grad_norm": 0.18192963302135468, + "learning_rate": 7.531117977169235e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4805436367863711, + "grad_norm": 0.2029470056295395, + "learning_rate": 7.529684639847184e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.480966371456955, + "grad_norm": 0.24068504571914673, + "learning_rate": 7.528251023065748e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.481389106127539, + "grad_norm": 0.21440866589546204, + "learning_rate": 7.5268171269833e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4818118407981231, + "grad_norm": 0.25701916217803955, + "learning_rate": 7.525382951758246e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.482234575468707, + "grad_norm": 0.19601795077323914, + "learning_rate": 7.523948497549024e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.482657310139291, + "grad_norm": 0.1784592568874359, + "learning_rate": 7.522513764514103e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.483080044809875, + "grad_norm": 0.1893230527639389, + "learning_rate": 7.521078752811974e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4835027794804592, + "grad_norm": 0.2571808397769928, + "learning_rate": 7.519643462601172e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4839255141510432, + "grad_norm": 0.16800974309444427, + "learning_rate": 7.518207894040254e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.484348248821627, + "grad_norm": 0.19111192226409912, + "learning_rate": 7.516772047287807e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4847709834922111, + "grad_norm": 0.18536439538002014, + "learning_rate": 7.51533592250246e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4851937181627952, + "grad_norm": 0.22771020233631134, + "learning_rate": 7.513899519842857e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.485616452833379, + "grad_norm": 0.16630251705646515, + "learning_rate": 7.512462839467684e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4860391875039631, + "grad_norm": 0.21320411562919617, + "learning_rate": 7.511025881535652e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4864619221745472, + "grad_norm": 0.22648151218891144, + "learning_rate": 7.509588646205506e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.486884656845131, + "grad_norm": 0.15437094867229462, + "learning_rate": 7.50815113363602e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4873073915157151, + "grad_norm": 0.21894961595535278, + "learning_rate": 7.506713343985998e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4877301261862992, + "grad_norm": 0.15433047711849213, + "learning_rate": 7.505275277414277e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4881528608568833, + "grad_norm": 0.3630816340446472, + "learning_rate": 7.503836934079723e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4885755955274673, + "grad_norm": 0.2179049849510193, + "learning_rate": 7.502398314141232e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4889983301980512, + "grad_norm": 0.20470957458019257, + "learning_rate": 7.500959417757731e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4894210648686352, + "grad_norm": 0.18025265634059906, + "learning_rate": 7.499520245088179e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4898437995392193, + "grad_norm": 0.18347813189029694, + "learning_rate": 7.498080796291564e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4902665342098032, + "grad_norm": 0.18360403180122375, + "learning_rate": 7.496641071526905e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4906892688803872, + "grad_norm": 0.20299312472343445, + "learning_rate": 7.495201070953249e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4911120035509713, + "grad_norm": 0.19589443504810333, + "learning_rate": 7.493760794729678e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4915347382215551, + "grad_norm": 0.1794820874929428, + "learning_rate": 7.492320243015303e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4919574728921392, + "grad_norm": 0.19373729825019836, + "learning_rate": 7.49087941596926e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4923802075627233, + "grad_norm": 0.2078065574169159, + "learning_rate": 7.489438313750727e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4928029422333073, + "grad_norm": 0.3012414872646332, + "learning_rate": 7.487996936518902e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4932256769038914, + "grad_norm": 0.17960713803768158, + "learning_rate": 7.486555284433015e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4936484115744753, + "grad_norm": 0.16878628730773926, + "learning_rate": 7.485113357652332e-05, + "loss": 0.3721, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4940711462450593, + "grad_norm": 0.22218649089336395, + "learning_rate": 7.483671156336141e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4944938809156434, + "grad_norm": 0.17198976874351501, + "learning_rate": 7.48222868064377e-05, + "loss": 0.3694, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4949166155862272, + "grad_norm": 0.1891041398048401, + "learning_rate": 7.480785930734569e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4953393502568113, + "grad_norm": 0.16241738200187683, + "learning_rate": 7.479342906767923e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4957620849273954, + "grad_norm": 0.1926957368850708, + "learning_rate": 7.477899608903243e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4961848195979792, + "grad_norm": 0.15654367208480835, + "learning_rate": 7.476456037299977e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4966075542685633, + "grad_norm": 0.3324658274650574, + "learning_rate": 7.475012192117597e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4970302889391474, + "grad_norm": 0.16921575367450714, + "learning_rate": 7.473568073515607e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4974530236097314, + "grad_norm": 0.1664334237575531, + "learning_rate": 7.472123681653544e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4978757582803155, + "grad_norm": 0.19822469353675842, + "learning_rate": 7.47067901669097e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4982984929508993, + "grad_norm": 0.2187281847000122, + "learning_rate": 7.469234078787482e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4987212276214834, + "grad_norm": 0.16143709421157837, + "learning_rate": 7.467788868102705e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4991439622920675, + "grad_norm": 0.21538633108139038, + "learning_rate": 7.466343384796294e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4995666969626513, + "grad_norm": 0.24456867575645447, + "learning_rate": 7.464897629027934e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4999894316332354, + "grad_norm": 0.1985274851322174, + "learning_rate": 7.463451600957343e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5004121663038195, + "grad_norm": 0.23867695033550262, + "learning_rate": 7.462005300744263e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5008349009744033, + "grad_norm": 0.20372022688388824, + "learning_rate": 7.460558728548472e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5012576356449874, + "grad_norm": 0.17719681560993195, + "learning_rate": 7.459111884529774e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5016803703155714, + "grad_norm": 0.17960041761398315, + "learning_rate": 7.457664768848008e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5021031049861553, + "grad_norm": 0.1962599754333496, + "learning_rate": 7.456217381663038e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5025258396567396, + "grad_norm": 0.24226485192775726, + "learning_rate": 7.454769723134758e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5029485743273234, + "grad_norm": 0.14996597170829773, + "learning_rate": 7.453321793423096e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5033713089979075, + "grad_norm": 0.19218507409095764, + "learning_rate": 7.451873592688008e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5037940436684916, + "grad_norm": 0.18644843995571136, + "learning_rate": 7.450425121089478e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5042167783390754, + "grad_norm": 0.26627394556999207, + "learning_rate": 7.448976378787522e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5046395130096595, + "grad_norm": 0.13485369086265564, + "learning_rate": 7.447527365942186e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5050622476802435, + "grad_norm": 0.19062304496765137, + "learning_rate": 7.446078082713547e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 31990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5054849823508274, + "grad_norm": 0.14586971700191498, + "learning_rate": 7.444628529261708e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5059077170214115, + "grad_norm": 0.19786973297595978, + "learning_rate": 7.443178705746803e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5063304516919955, + "grad_norm": 0.18429391086101532, + "learning_rate": 7.441728612329e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5067531863625794, + "grad_norm": 0.2242199033498764, + "learning_rate": 7.44027824916849e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5071759210331637, + "grad_norm": 0.17537198960781097, + "learning_rate": 7.438827616425503e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5075986557037475, + "grad_norm": 0.2166065275669098, + "learning_rate": 7.437376714260289e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5080213903743316, + "grad_norm": 0.22207431495189667, + "learning_rate": 7.435925542833134e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5084441250449157, + "grad_norm": 0.22723853588104248, + "learning_rate": 7.43447410230435e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5088668597154995, + "grad_norm": 0.2210739552974701, + "learning_rate": 7.433022392834282e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5092895943860836, + "grad_norm": 0.2014153152704239, + "learning_rate": 7.431570414583303e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5097123290566676, + "grad_norm": 0.19834783673286438, + "learning_rate": 7.430118167711817e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5101350637272515, + "grad_norm": 0.2611103951931, + "learning_rate": 7.428665652380254e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5105577983978355, + "grad_norm": 0.19772465527057648, + "learning_rate": 7.427212868749078e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5109805330684196, + "grad_norm": 0.20450164377689362, + "learning_rate": 7.425759816978784e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5114032677390035, + "grad_norm": 0.349706768989563, + "learning_rate": 7.424306497229888e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5118260024095878, + "grad_norm": 0.20054075121879578, + "learning_rate": 7.422852909662943e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5122487370801716, + "grad_norm": 0.2312685251235962, + "learning_rate": 7.421399054438531e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5126714717507557, + "grad_norm": 0.22752921283245087, + "learning_rate": 7.419944931717263e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5130942064213397, + "grad_norm": 0.19452627003192902, + "learning_rate": 7.418490541659777e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5135169410919236, + "grad_norm": 0.22596073150634766, + "learning_rate": 7.417035884426743e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5139396757625077, + "grad_norm": 0.1941961944103241, + "learning_rate": 7.415580960178859e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5143624104330917, + "grad_norm": 0.18328188359737396, + "learning_rate": 7.414125769076857e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5147851451036756, + "grad_norm": 0.18266382813453674, + "learning_rate": 7.412670311281489e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5152078797742596, + "grad_norm": 0.15746387839317322, + "learning_rate": 7.411214586953547e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5156306144448437, + "grad_norm": 0.20061811804771423, + "learning_rate": 7.409758596253848e-05, + "loss": 0.3694, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5160533491154276, + "grad_norm": 0.2183850258588791, + "learning_rate": 7.408302339343235e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5164760837860118, + "grad_norm": 0.2303389608860016, + "learning_rate": 7.406845816382586e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5168988184565957, + "grad_norm": 0.20662254095077515, + "learning_rate": 7.405389027532806e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5173215531271798, + "grad_norm": 0.22640742361545563, + "learning_rate": 7.403931972954828e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5177442877977638, + "grad_norm": 0.12803633511066437, + "learning_rate": 7.402474652809617e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5181670224683477, + "grad_norm": 0.22320155799388885, + "learning_rate": 7.401017067258165e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5185897571389317, + "grad_norm": 0.2388225495815277, + "learning_rate": 7.399559216461496e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5190124918095158, + "grad_norm": 0.19158059358596802, + "learning_rate": 7.398101100580661e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5194352264800997, + "grad_norm": 0.18737000226974487, + "learning_rate": 7.396642719776741e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5198579611506837, + "grad_norm": 0.18045923113822937, + "learning_rate": 7.395184074210844e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5202806958212678, + "grad_norm": 0.15797477960586548, + "learning_rate": 7.393725164044114e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5207034304918516, + "grad_norm": 0.20981431007385254, + "learning_rate": 7.392265989437718e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.521126165162436, + "grad_norm": 0.17948924005031586, + "learning_rate": 7.390806550552852e-05, + "loss": 0.3554, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5215488998330198, + "grad_norm": 0.23598085343837738, + "learning_rate": 7.389346847550744e-05, + "loss": 0.3723, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5219716345036038, + "grad_norm": 0.1756444275379181, + "learning_rate": 7.387886880592653e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.522394369174188, + "grad_norm": 0.18476559221744537, + "learning_rate": 7.386426649839862e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5228171038447718, + "grad_norm": 0.17826512455940247, + "learning_rate": 7.384966155453685e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5232398385153558, + "grad_norm": 0.2527117431163788, + "learning_rate": 7.383505397595467e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.52366257318594, + "grad_norm": 0.1783933937549591, + "learning_rate": 7.382044376426582e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5240853078565237, + "grad_norm": 0.15764668583869934, + "learning_rate": 7.38058309210843e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5245080425271078, + "grad_norm": 0.1849871575832367, + "learning_rate": 7.379121544802444e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5249307771976919, + "grad_norm": 0.20413857698440552, + "learning_rate": 7.377659734670081e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5253535118682757, + "grad_norm": 0.18773502111434937, + "learning_rate": 7.376197661872833e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.52577624653886, + "grad_norm": 0.219281867146492, + "learning_rate": 7.374735326572216e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5261989812094439, + "grad_norm": 0.23381124436855316, + "learning_rate": 7.37327272892978e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.526621715880028, + "grad_norm": 0.22131942212581635, + "learning_rate": 7.371809869107098e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.527044450550612, + "grad_norm": 0.24901697039604187, + "learning_rate": 7.370346747265777e-05, + "loss": 0.3699, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5274671852211958, + "grad_norm": 0.1808401197195053, + "learning_rate": 7.36888336356745e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.52788991989178, + "grad_norm": 0.19119302928447723, + "learning_rate": 7.367419718173783e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.528312654562364, + "grad_norm": 0.2054755985736847, + "learning_rate": 7.365955811246463e-05, + "loss": 0.3723, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5287353892329478, + "grad_norm": 0.17232078313827515, + "learning_rate": 7.364491642947213e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.529158123903532, + "grad_norm": 0.21439070999622345, + "learning_rate": 7.363027213437783e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.529580858574116, + "grad_norm": 0.15245220065116882, + "learning_rate": 7.361562522879953e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5300035932446998, + "grad_norm": 0.20732304453849792, + "learning_rate": 7.360097571435527e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.530426327915284, + "grad_norm": 0.12429032474756241, + "learning_rate": 7.358632359266342e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.530849062585868, + "grad_norm": 0.2039223611354828, + "learning_rate": 7.357166886534263e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.531271797256452, + "grad_norm": 0.17898042500019073, + "learning_rate": 7.355701153401186e-05, + "loss": 0.3723, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.531694531927036, + "grad_norm": 0.1966220587491989, + "learning_rate": 7.354235160029033e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.53211726659762, + "grad_norm": 0.1742803007364273, + "learning_rate": 7.352768906579753e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.532540001268204, + "grad_norm": 0.21695494651794434, + "learning_rate": 7.351302393215328e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.532962735938788, + "grad_norm": 0.2208176851272583, + "learning_rate": 7.349835620097764e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.533385470609372, + "grad_norm": 0.16871176660060883, + "learning_rate": 7.348368587389102e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.533808205279956, + "grad_norm": 0.18242816627025604, + "learning_rate": 7.346901295251406e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.53423093995054, + "grad_norm": 0.16987977921962738, + "learning_rate": 7.345433743846772e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.534653674621124, + "grad_norm": 0.18100418150424957, + "learning_rate": 7.34396593333732e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5350764092917082, + "grad_norm": 0.18298430740833282, + "learning_rate": 7.342497863885207e-05, + "loss": 0.3729, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.535499143962292, + "grad_norm": 0.1713106781244278, + "learning_rate": 7.341029535652609e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.535921878632876, + "grad_norm": 0.1670207679271698, + "learning_rate": 7.339560948801739e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5363446133034602, + "grad_norm": 0.1700994372367859, + "learning_rate": 7.338092103494832e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.536767347974044, + "grad_norm": 0.17960430681705475, + "learning_rate": 7.336622999894155e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.537190082644628, + "grad_norm": 0.2754652500152588, + "learning_rate": 7.335153638162005e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5376128173152122, + "grad_norm": 0.1765468269586563, + "learning_rate": 7.333684018460702e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.538035551985796, + "grad_norm": 0.26907286047935486, + "learning_rate": 7.332214140952599e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.53845828665638, + "grad_norm": 0.19197218120098114, + "learning_rate": 7.330744005800076e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5388810213269641, + "grad_norm": 0.163532555103302, + "learning_rate": 7.329273613165546e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.539303755997548, + "grad_norm": 0.16497185826301575, + "learning_rate": 7.32780296321144e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5397264906681323, + "grad_norm": 0.1945858746767044, + "learning_rate": 7.326332056100228e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5401492253387161, + "grad_norm": 0.2660249173641205, + "learning_rate": 7.324860891994402e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5405719600093002, + "grad_norm": 0.2080010026693344, + "learning_rate": 7.323389471056485e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5409946946798843, + "grad_norm": 0.17795951664447784, + "learning_rate": 7.321917793449028e-05, + "loss": 0.3712, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.541417429350468, + "grad_norm": 0.25293946266174316, + "learning_rate": 7.32044585933461e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5418401640210522, + "grad_norm": 0.18814989924430847, + "learning_rate": 7.31897366887584e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5422628986916362, + "grad_norm": 0.18503190577030182, + "learning_rate": 7.31750122223535e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.54268563336222, + "grad_norm": 0.24299843609333038, + "learning_rate": 7.316028519575808e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5431083680328042, + "grad_norm": 0.1740874946117401, + "learning_rate": 7.314555561059907e-05, + "loss": 0.3717, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5435311027033882, + "grad_norm": 0.18764325976371765, + "learning_rate": 7.313082346850363e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.543953837373972, + "grad_norm": 0.22377650439739227, + "learning_rate": 7.311608877109929e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5443765720445564, + "grad_norm": 0.1598145216703415, + "learning_rate": 7.310135152001381e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5447993067151402, + "grad_norm": 0.18938124179840088, + "learning_rate": 7.308661171687523e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5452220413857243, + "grad_norm": 0.16498412191867828, + "learning_rate": 7.307186936331192e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5456447760563083, + "grad_norm": 0.26461002230644226, + "learning_rate": 7.305712446095248e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5460675107268922, + "grad_norm": 0.21443846821784973, + "learning_rate": 7.304237701142578e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5464902453974763, + "grad_norm": 0.1718457192182541, + "learning_rate": 7.302762701636105e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5469129800680603, + "grad_norm": 0.21873816847801208, + "learning_rate": 7.301287447738772e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5473357147386442, + "grad_norm": 0.215025395154953, + "learning_rate": 7.299811939613555e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 32990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5477584494092282, + "grad_norm": 0.1648964136838913, + "learning_rate": 7.298336177423455e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5481811840798123, + "grad_norm": 0.2099006175994873, + "learning_rate": 7.296860161331503e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5486039187503962, + "grad_norm": 0.21918955445289612, + "learning_rate": 7.295383891500756e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5490266534209804, + "grad_norm": 0.14247925579547882, + "learning_rate": 7.293907368094305e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5494493880915643, + "grad_norm": 0.2119143009185791, + "learning_rate": 7.292430591275262e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5498721227621484, + "grad_norm": 0.19585275650024414, + "learning_rate": 7.290953561206765e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5502948574327324, + "grad_norm": 0.1876424103975296, + "learning_rate": 7.289476278051991e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5507175921033163, + "grad_norm": 0.1799250692129135, + "learning_rate": 7.287998741974135e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5511403267739003, + "grad_norm": 0.19527234137058258, + "learning_rate": 7.286520953136427e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5515630614444844, + "grad_norm": 0.21870630979537964, + "learning_rate": 7.285042911702115e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5519857961150683, + "grad_norm": 0.19501234591007233, + "learning_rate": 7.283564617834487e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5524085307856526, + "grad_norm": 0.18305452167987823, + "learning_rate": 7.282086071696852e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5528312654562364, + "grad_norm": 0.16867713630199432, + "learning_rate": 7.280607273452547e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5532540001268202, + "grad_norm": 0.21759441494941711, + "learning_rate": 7.279128223264938e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5536767347974045, + "grad_norm": 0.19502225518226624, + "learning_rate": 7.277648921297415e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5540994694679884, + "grad_norm": 0.18322263658046722, + "learning_rate": 7.276169367713407e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5545222041385724, + "grad_norm": 0.18763470649719238, + "learning_rate": 7.274689562676357e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5549449388091565, + "grad_norm": 0.2064439207315445, + "learning_rate": 7.273209506349747e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5553676734797404, + "grad_norm": 0.18815167248249054, + "learning_rate": 7.271729198897076e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5557904081503244, + "grad_norm": 0.17899002134799957, + "learning_rate": 7.270248640481884e-05, + "loss": 0.3546, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5562131428209085, + "grad_norm": 0.1779845952987671, + "learning_rate": 7.268767831267724e-05, + "loss": 0.373, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5566358774914923, + "grad_norm": 0.23820893466472626, + "learning_rate": 7.267286771418188e-05, + "loss": 0.3531, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5570586121620766, + "grad_norm": 0.15443189442157745, + "learning_rate": 7.265805461096891e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5574813468326605, + "grad_norm": 0.2216169685125351, + "learning_rate": 7.264323900467475e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5579040815032443, + "grad_norm": 0.18652620911598206, + "learning_rate": 7.262842089693613e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5583268161738286, + "grad_norm": 0.17452168464660645, + "learning_rate": 7.261360028939003e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5587495508444125, + "grad_norm": 0.19348464906215668, + "learning_rate": 7.259877718367371e-05, + "loss": 0.3728, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5591722855149965, + "grad_norm": 0.16705317795276642, + "learning_rate": 7.258395158142471e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5595950201855806, + "grad_norm": 0.30435892939567566, + "learning_rate": 7.256912348428083e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5600177548561645, + "grad_norm": 0.1916864514350891, + "learning_rate": 7.255429289388018e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5604404895267485, + "grad_norm": 0.2302228808403015, + "learning_rate": 7.253945981186113e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5608632241973326, + "grad_norm": 0.25609326362609863, + "learning_rate": 7.252462423986229e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5612859588679164, + "grad_norm": 0.19403406977653503, + "learning_rate": 7.25097861795226e-05, + "loss": 0.3732, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5617086935385007, + "grad_norm": 0.2158900499343872, + "learning_rate": 7.249494563248124e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5621314282090846, + "grad_norm": 0.22036144137382507, + "learning_rate": 7.248010260037771e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5625541628796684, + "grad_norm": 0.18157225847244263, + "learning_rate": 7.246525708485169e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5629768975502527, + "grad_norm": 0.23653404414653778, + "learning_rate": 7.245040908754323e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5633996322208366, + "grad_norm": 0.2336994707584381, + "learning_rate": 7.243555861009261e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5638223668914206, + "grad_norm": 0.15973235666751862, + "learning_rate": 7.242070565414041e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5642451015620047, + "grad_norm": 0.20790451765060425, + "learning_rate": 7.240585022132745e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5646678362325885, + "grad_norm": 0.16782882809638977, + "learning_rate": 7.239099231329482e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5650905709031726, + "grad_norm": 0.20285890996456146, + "learning_rate": 7.237613193168393e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5655133055737567, + "grad_norm": 0.1927211731672287, + "learning_rate": 7.236126907813643e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5659360402443405, + "grad_norm": 0.20702853798866272, + "learning_rate": 7.234640375429427e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5663587749149248, + "grad_norm": 0.22035498917102814, + "learning_rate": 7.233153596179962e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5667815095855087, + "grad_norm": 0.1554679423570633, + "learning_rate": 7.231666570229497e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5672042442560925, + "grad_norm": 0.19757075607776642, + "learning_rate": 7.230179297742305e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5676269789266768, + "grad_norm": 0.17796772718429565, + "learning_rate": 7.228691778882693e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5680497135972606, + "grad_norm": 0.2134508341550827, + "learning_rate": 7.227204013814985e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5684724482678447, + "grad_norm": 0.2640018165111542, + "learning_rate": 7.225716002703537e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5688951829384288, + "grad_norm": 0.1512029469013214, + "learning_rate": 7.224227745712736e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5693179176090126, + "grad_norm": 0.21859902143478394, + "learning_rate": 7.222739243006992e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5697406522795967, + "grad_norm": 0.16683223843574524, + "learning_rate": 7.221250494750744e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5701633869501808, + "grad_norm": 0.16731446981430054, + "learning_rate": 7.219761501108453e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5705861216207646, + "grad_norm": 0.2568252980709076, + "learning_rate": 7.218272262244614e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.571008856291349, + "grad_norm": 0.2118065059185028, + "learning_rate": 7.216782778323748e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5714315909619327, + "grad_norm": 0.17505408823490143, + "learning_rate": 7.215293049510396e-05, + "loss": 0.3704, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5718543256325166, + "grad_norm": 0.16573794186115265, + "learning_rate": 7.213803075969136e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5722770603031009, + "grad_norm": 0.1906414031982422, + "learning_rate": 7.212312857864567e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5726997949736847, + "grad_norm": 0.16610924899578094, + "learning_rate": 7.210822395361318e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5731225296442688, + "grad_norm": 0.17031294107437134, + "learning_rate": 7.20933168862404e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5735452643148529, + "grad_norm": 0.19361288845539093, + "learning_rate": 7.207840737817416e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5739679989854367, + "grad_norm": 0.18420878052711487, + "learning_rate": 7.206349543106155e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5743907336560208, + "grad_norm": 0.17228145897388458, + "learning_rate": 7.204858104654992e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5748134683266048, + "grad_norm": 0.14849552512168884, + "learning_rate": 7.203366422628688e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5752362029971887, + "grad_norm": 0.206895112991333, + "learning_rate": 7.201874497192033e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.575658937667773, + "grad_norm": 0.17153550684452057, + "learning_rate": 7.200382328509844e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5760816723383568, + "grad_norm": 0.23540662229061127, + "learning_rate": 7.198889916746964e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5765044070089407, + "grad_norm": 0.24772396683692932, + "learning_rate": 7.19739726206826e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.576927141679525, + "grad_norm": 0.17315314710140228, + "learning_rate": 7.195904364638632e-05, + "loss": 0.3546, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5773498763501088, + "grad_norm": 0.19077616930007935, + "learning_rate": 7.194411224623001e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5777726110206929, + "grad_norm": 0.208912193775177, + "learning_rate": 7.192917842186318e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.578195345691277, + "grad_norm": 0.21014149487018585, + "learning_rate": 7.191424217493559e-05, + "loss": 0.3551, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5786180803618608, + "grad_norm": 0.18509837985038757, + "learning_rate": 7.18993035070973e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5790408150324449, + "grad_norm": 0.22562891244888306, + "learning_rate": 7.18843624199986e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.579463549703029, + "grad_norm": 0.2183673232793808, + "learning_rate": 7.186941891529007e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5798862843736128, + "grad_norm": 0.18160386383533478, + "learning_rate": 7.185447299462252e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.580309019044197, + "grad_norm": 0.21409958600997925, + "learning_rate": 7.183952465964711e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.580731753714781, + "grad_norm": 0.15673388540744781, + "learning_rate": 7.182457391201516e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5811544883853648, + "grad_norm": 0.1815895140171051, + "learning_rate": 7.180962075337835e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.581577223055949, + "grad_norm": 0.2209566831588745, + "learning_rate": 7.179466518538857e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.581999957726533, + "grad_norm": 0.17307136952877045, + "learning_rate": 7.177970720969797e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.582422692397117, + "grad_norm": 0.17843548953533173, + "learning_rate": 7.176474682795901e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.582845427067701, + "grad_norm": 0.29512134194374084, + "learning_rate": 7.174978404182439e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5832681617382849, + "grad_norm": 0.21695491671562195, + "learning_rate": 7.17348188529471e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.583690896408869, + "grad_norm": 0.18223144114017487, + "learning_rate": 7.171985126298035e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.584113631079453, + "grad_norm": 0.177684023976326, + "learning_rate": 7.170488127357764e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5845363657500369, + "grad_norm": 0.1449350118637085, + "learning_rate": 7.168990888639273e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5849591004206212, + "grad_norm": 0.16460181772708893, + "learning_rate": 7.167493410307967e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.585381835091205, + "grad_norm": 0.1752050817012787, + "learning_rate": 7.165995692529273e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5858045697617889, + "grad_norm": 0.2151636928319931, + "learning_rate": 7.16449773546865e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5862273044323731, + "grad_norm": 0.1430511772632599, + "learning_rate": 7.16299953929158e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.586650039102957, + "grad_norm": 0.26903221011161804, + "learning_rate": 7.161501104163568e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.587072773773541, + "grad_norm": 0.20074094831943512, + "learning_rate": 7.160002430250152e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5874955084441251, + "grad_norm": 0.21460150182247162, + "learning_rate": 7.158503517716893e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.587918243114709, + "grad_norm": 0.22785025835037231, + "learning_rate": 7.15700436672938e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.588340977785293, + "grad_norm": 0.19441264867782593, + "learning_rate": 7.155504977453226e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.588763712455877, + "grad_norm": 0.18623626232147217, + "learning_rate": 7.154005350054073e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.589186447126461, + "grad_norm": 0.17011842131614685, + "learning_rate": 7.152505484697587e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5896091817970452, + "grad_norm": 0.15168695151805878, + "learning_rate": 7.15100538154946e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 33990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.590031916467629, + "grad_norm": 0.2688586711883545, + "learning_rate": 7.149505040775411e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.590454651138213, + "grad_norm": 0.21967321634292603, + "learning_rate": 7.148004462541187e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5908773858087972, + "grad_norm": 0.19828078150749207, + "learning_rate": 7.146503647012563e-05, + "loss": 0.3713, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.591300120479381, + "grad_norm": 0.202500119805336, + "learning_rate": 7.145002594355332e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5917228551499651, + "grad_norm": 0.20198634266853333, + "learning_rate": 7.143501304735322e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5921455898205492, + "grad_norm": 0.15568551421165466, + "learning_rate": 7.141999778318381e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.592568324491133, + "grad_norm": 0.2201562523841858, + "learning_rate": 7.140498015270387e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5929910591617171, + "grad_norm": 0.1571197658777237, + "learning_rate": 7.138996015757242e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5934137938323012, + "grad_norm": 0.17698118090629578, + "learning_rate": 7.137493779944873e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.593836528502885, + "grad_norm": 0.1739145815372467, + "learning_rate": 7.135991307999241e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5942592631734693, + "grad_norm": 0.2672419250011444, + "learning_rate": 7.134488600086323e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5946819978440532, + "grad_norm": 0.15684941411018372, + "learning_rate": 7.132985656372126e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.595104732514637, + "grad_norm": 0.17250977456569672, + "learning_rate": 7.131482477022683e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5955274671852213, + "grad_norm": 0.29114165902137756, + "learning_rate": 7.129979062204056e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5959502018558052, + "grad_norm": 0.16262973845005035, + "learning_rate": 7.128475412082326e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5963729365263892, + "grad_norm": 0.15703348815441132, + "learning_rate": 7.126971526823609e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5967956711969733, + "grad_norm": 0.15414559841156006, + "learning_rate": 7.125467406594039e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5972184058675571, + "grad_norm": 0.16699405014514923, + "learning_rate": 7.123963051559781e-05, + "loss": 0.3721, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5976411405381412, + "grad_norm": 0.20646478235721588, + "learning_rate": 7.122458461887022e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5980638752087253, + "grad_norm": 0.25901326537132263, + "learning_rate": 7.120953637741978e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5984866098793091, + "grad_norm": 0.22677722573280334, + "learning_rate": 7.119448579290893e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5989093445498934, + "grad_norm": 0.17954885959625244, + "learning_rate": 7.11794328670003e-05, + "loss": 0.3537, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5993320792204773, + "grad_norm": 0.21866394579410553, + "learning_rate": 7.116437760135682e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5997548138910611, + "grad_norm": 0.2358858287334442, + "learning_rate": 7.11493199976417e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6001775485616454, + "grad_norm": 0.20439879596233368, + "learning_rate": 7.113426005751838e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6006002832322292, + "grad_norm": 0.16550639271736145, + "learning_rate": 7.111919778265052e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6010230179028133, + "grad_norm": 0.199580579996109, + "learning_rate": 7.110413317470213e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6014457525733974, + "grad_norm": 0.2111940085887909, + "learning_rate": 7.108906623533742e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6018684872439812, + "grad_norm": 0.18701626360416412, + "learning_rate": 7.107399696622083e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6022912219145653, + "grad_norm": 0.2581392526626587, + "learning_rate": 7.105892536901713e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6027139565851494, + "grad_norm": 0.14059896767139435, + "learning_rate": 7.104385144539129e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6031366912557332, + "grad_norm": 0.15355058014392853, + "learning_rate": 7.102877519700857e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6035594259263175, + "grad_norm": 0.20448969304561615, + "learning_rate": 7.101369662553446e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6039821605969014, + "grad_norm": 0.1877664476633072, + "learning_rate": 7.099861573263473e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6044048952674852, + "grad_norm": 0.16742299497127533, + "learning_rate": 7.09835325199754e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6048276299380695, + "grad_norm": 0.17716653645038605, + "learning_rate": 7.096844698922274e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6052503646086533, + "grad_norm": 0.245112344622612, + "learning_rate": 7.095335914204326e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6056730992792374, + "grad_norm": 0.2702179253101349, + "learning_rate": 7.093826898010378e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6060958339498215, + "grad_norm": 0.19954687356948853, + "learning_rate": 7.092317650507133e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6065185686204053, + "grad_norm": 0.1767444610595703, + "learning_rate": 7.090808171861318e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6069413032909894, + "grad_norm": 0.15158729255199432, + "learning_rate": 7.08929846223969e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6073640379615735, + "grad_norm": 0.17309272289276123, + "learning_rate": 7.08778852180903e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6077867726321573, + "grad_norm": 0.19586840271949768, + "learning_rate": 7.086278350736146e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6082095073027416, + "grad_norm": 0.21113067865371704, + "learning_rate": 7.084767949187865e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6086322419733254, + "grad_norm": 0.13379709422588348, + "learning_rate": 7.083257317331048e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6090549766439093, + "grad_norm": 0.1572800576686859, + "learning_rate": 7.081746455332576e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6094777113144936, + "grad_norm": 0.1635204553604126, + "learning_rate": 7.080235363359358e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6099004459850774, + "grad_norm": 0.20199626684188843, + "learning_rate": 7.078724041578325e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6103231806556615, + "grad_norm": 0.19281406700611115, + "learning_rate": 7.077212490156437e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6107459153262456, + "grad_norm": 0.33494919538497925, + "learning_rate": 7.07570070926068e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6111686499968294, + "grad_norm": 0.14989347755908966, + "learning_rate": 7.074188699058061e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6115913846674135, + "grad_norm": 0.18792930245399475, + "learning_rate": 7.072676459715618e-05, + "loss": 0.3494, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6120141193379975, + "grad_norm": 0.14552879333496094, + "learning_rate": 7.071163991400406e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6124368540085814, + "grad_norm": 0.1923365294933319, + "learning_rate": 7.069651294279516e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6128595886791657, + "grad_norm": 0.1818966567516327, + "learning_rate": 7.068138368520055e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6132823233497495, + "grad_norm": 0.15485994517803192, + "learning_rate": 7.066625214289161e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6137050580203334, + "grad_norm": 0.19659654796123505, + "learning_rate": 7.065111831753993e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6141277926909177, + "grad_norm": 0.14869160950183868, + "learning_rate": 7.06359822108174e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6145505273615015, + "grad_norm": 0.17590606212615967, + "learning_rate": 7.062084382439612e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6149732620320856, + "grad_norm": 0.26929211616516113, + "learning_rate": 7.060570315994846e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6153959967026696, + "grad_norm": 0.15012745559215546, + "learning_rate": 7.059056021914705e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6158187313732535, + "grad_norm": 0.19610725343227386, + "learning_rate": 7.057541500366474e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6162414660438376, + "grad_norm": 0.20056487619876862, + "learning_rate": 7.056026751517469e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6166642007144216, + "grad_norm": 0.17183470726013184, + "learning_rate": 7.054511775535023e-05, + "loss": 0.3684, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6170869353850055, + "grad_norm": 0.23111332952976227, + "learning_rate": 7.052996572586501e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6175096700555898, + "grad_norm": 0.14908741414546967, + "learning_rate": 7.051481142839288e-05, + "loss": 0.3537, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6179324047261736, + "grad_norm": 0.15998528897762299, + "learning_rate": 7.0499654864608e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6183551393967577, + "grad_norm": 0.13802585005760193, + "learning_rate": 7.048449603618475e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6187778740673417, + "grad_norm": 0.24361640214920044, + "learning_rate": 7.046933494479773e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6192006087379256, + "grad_norm": 0.1493324488401413, + "learning_rate": 7.045417159212182e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6196233434085097, + "grad_norm": 0.17390286922454834, + "learning_rate": 7.043900597983216e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6200460780790937, + "grad_norm": 0.18711979687213898, + "learning_rate": 7.042383810960411e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6204688127496776, + "grad_norm": 0.18945221602916718, + "learning_rate": 7.04086679831133e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6208915474202616, + "grad_norm": 0.2038637101650238, + "learning_rate": 7.039349560203561e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6213142820908457, + "grad_norm": 0.20714536309242249, + "learning_rate": 7.037832096804715e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6217370167614296, + "grad_norm": 0.19713479280471802, + "learning_rate": 7.036314408282433e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6221597514320139, + "grad_norm": 0.23077072203159332, + "learning_rate": 7.034796494804372e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6225824861025977, + "grad_norm": 0.17424644529819489, + "learning_rate": 7.033278356538222e-05, + "loss": 0.3557, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6230052207731818, + "grad_norm": 0.19177284836769104, + "learning_rate": 7.031759993651697e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6234279554437658, + "grad_norm": 0.16229797899723053, + "learning_rate": 7.030241406312528e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6238506901143497, + "grad_norm": 0.17884095013141632, + "learning_rate": 7.028722594688478e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6242734247849338, + "grad_norm": 0.19630764424800873, + "learning_rate": 7.027203558947338e-05, + "loss": 0.3684, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6246961594555178, + "grad_norm": 0.22099129855632782, + "learning_rate": 7.025684299256914e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6251188941261017, + "grad_norm": 0.18404164910316467, + "learning_rate": 7.024164815785041e-05, + "loss": 0.3726, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6255416287966857, + "grad_norm": 0.2115260362625122, + "learning_rate": 7.022645108699584e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6259643634672698, + "grad_norm": 0.253582239151001, + "learning_rate": 7.021125178168426e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6263870981378536, + "grad_norm": 0.2912209928035736, + "learning_rate": 7.019605024359474e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.626809832808438, + "grad_norm": 0.1846146136522293, + "learning_rate": 7.018084647440668e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6272325674790218, + "grad_norm": 0.22348575294017792, + "learning_rate": 7.016564047579962e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6276553021496059, + "grad_norm": 0.2020118534564972, + "learning_rate": 7.015043224945343e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.62807803682019, + "grad_norm": 0.1772250235080719, + "learning_rate": 7.013522179704818e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6285007714907738, + "grad_norm": 0.13697190582752228, + "learning_rate": 7.01200091202642e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6289235061613578, + "grad_norm": 0.141384556889534, + "learning_rate": 7.010479422078207e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.629346240831942, + "grad_norm": 0.16684655845165253, + "learning_rate": 7.00895771002826e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6297689755025258, + "grad_norm": 0.1872880607843399, + "learning_rate": 7.007435776044686e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6301917101731098, + "grad_norm": 0.216222882270813, + "learning_rate": 7.005913620295617e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6306144448436939, + "grad_norm": 0.2153814285993576, + "learning_rate": 7.004391242949209e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6310371795142777, + "grad_norm": 0.2304246425628662, + "learning_rate": 7.002868644173641e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.631459914184862, + "grad_norm": 0.17588962614536285, + "learning_rate": 7.001345824137115e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6318826488554459, + "grad_norm": 0.22089818120002747, + "learning_rate": 6.999822783007866e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 34990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.63230538352603, + "grad_norm": 0.18127740919589996, + "learning_rate": 6.998299520954144e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.632728118196614, + "grad_norm": 0.21715925633907318, + "learning_rate": 6.996776038144226e-05, + "loss": 0.3719, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6331508528671979, + "grad_norm": 0.24138382077217102, + "learning_rate": 6.995252334746414e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.633573587537782, + "grad_norm": 0.2120877057313919, + "learning_rate": 6.993728410929038e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.633996322208366, + "grad_norm": 0.16983211040496826, + "learning_rate": 6.992204266860446e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6344190568789498, + "grad_norm": 0.17773616313934326, + "learning_rate": 6.990679902709014e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.634841791549534, + "grad_norm": 0.20561492443084717, + "learning_rate": 6.989155318643142e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.635264526220118, + "grad_norm": 0.1440826654434204, + "learning_rate": 6.987630514831255e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6356872608907018, + "grad_norm": 0.14900822937488556, + "learning_rate": 6.986105491441798e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6361099955612861, + "grad_norm": 0.1580997109413147, + "learning_rate": 6.984580248643245e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.63653273023187, + "grad_norm": 0.10881076753139496, + "learning_rate": 6.983054786604095e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.636955464902454, + "grad_norm": 0.20559054613113403, + "learning_rate": 6.981529105492865e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.637378199573038, + "grad_norm": 0.22470320761203766, + "learning_rate": 6.9800032054781e-05, + "loss": 0.371, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.637800934243622, + "grad_norm": 0.2105870544910431, + "learning_rate": 6.978477086728374e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.638223668914206, + "grad_norm": 0.24581307172775269, + "learning_rate": 6.976950749412276e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.63864640358479, + "grad_norm": 0.16930967569351196, + "learning_rate": 6.975576859092905e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.639069138255374, + "grad_norm": 0.2008742094039917, + "learning_rate": 6.974050106965265e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.639491872925958, + "grad_norm": 0.23944807052612305, + "learning_rate": 6.972523136760312e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.639914607596542, + "grad_norm": 0.14322350919246674, + "learning_rate": 6.970995948646733e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.640337342267126, + "grad_norm": 0.16971606016159058, + "learning_rate": 6.969468542793242e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6407600769377102, + "grad_norm": 0.22615587711334229, + "learning_rate": 6.967940919368571e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.641182811608294, + "grad_norm": 0.1847233772277832, + "learning_rate": 6.966413078541482e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6416055462788781, + "grad_norm": 0.1844257414340973, + "learning_rate": 6.964885020480755e-05, + "loss": 0.3738, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6420282809494622, + "grad_norm": 0.15699739754199982, + "learning_rate": 6.963356745355205e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.642451015620046, + "grad_norm": 0.30373430252075195, + "learning_rate": 6.961828253333657e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.64287375029063, + "grad_norm": 0.19765183329582214, + "learning_rate": 6.96029954458497e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6432964849612142, + "grad_norm": 0.19445540010929108, + "learning_rate": 6.95877061927802e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.643719219631798, + "grad_norm": 0.1612277626991272, + "learning_rate": 6.957241477581714e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.644141954302382, + "grad_norm": 0.1470363289117813, + "learning_rate": 6.95571211966498e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6445646889729661, + "grad_norm": 0.19828054308891296, + "learning_rate": 6.954182545696766e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.64498742364355, + "grad_norm": 0.19972658157348633, + "learning_rate": 6.952652755846047e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6454101583141343, + "grad_norm": 0.2434036284685135, + "learning_rate": 6.951122750281827e-05, + "loss": 0.3541, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6458328929847181, + "grad_norm": 0.21196088194847107, + "learning_rate": 6.949592529173124e-05, + "loss": 0.3708, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6462556276553022, + "grad_norm": 0.18150001764297485, + "learning_rate": 6.948062092688987e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6466783623258863, + "grad_norm": 0.13460491597652435, + "learning_rate": 6.946531440998482e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6471010969964701, + "grad_norm": 0.19236841797828674, + "learning_rate": 6.94500057427071e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6475238316670542, + "grad_norm": 0.1901264190673828, + "learning_rate": 6.943469492674786e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6479465663376383, + "grad_norm": 0.1562507450580597, + "learning_rate": 6.94193819637985e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.648369301008222, + "grad_norm": 0.14968423545360565, + "learning_rate": 6.940406685555069e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6487920356788062, + "grad_norm": 0.2713511884212494, + "learning_rate": 6.938874960369633e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6492147703493902, + "grad_norm": 0.2155286818742752, + "learning_rate": 6.93734302099275e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.649637505019974, + "grad_norm": 0.17856267094612122, + "learning_rate": 6.935810867593664e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6500602396905584, + "grad_norm": 0.1829906553030014, + "learning_rate": 6.934278500341629e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6504829743611422, + "grad_norm": 0.19610261917114258, + "learning_rate": 6.932745919405932e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6509057090317263, + "grad_norm": 0.1832851618528366, + "learning_rate": 6.931213124955878e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6513284437023104, + "grad_norm": 0.19078291952610016, + "learning_rate": 6.9296801171608e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6517511783728942, + "grad_norm": 0.24290737509727478, + "learning_rate": 6.928146896190051e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6521739130434783, + "grad_norm": 0.1562001258134842, + "learning_rate": 6.92661346221301e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6525966477140623, + "grad_norm": 0.18696127831935883, + "learning_rate": 6.925079815399078e-05, + "loss": 0.351, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6530193823846462, + "grad_norm": 0.22000116109848022, + "learning_rate": 6.92354595591768e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6534421170552303, + "grad_norm": 0.19075913727283478, + "learning_rate": 6.922011883938266e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6538648517258143, + "grad_norm": 0.25478988885879517, + "learning_rate": 6.920477599630306e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6542875863963982, + "grad_norm": 0.18967315554618835, + "learning_rate": 6.918943103163296e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6547103210669825, + "grad_norm": 0.3085315227508545, + "learning_rate": 6.917408394706756e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6551330557375663, + "grad_norm": 0.16641974449157715, + "learning_rate": 6.915873474430227e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6555557904081504, + "grad_norm": 0.1730516105890274, + "learning_rate": 6.914338342503274e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6559785250787344, + "grad_norm": 0.1987943947315216, + "learning_rate": 6.91280299909549e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6564012597493183, + "grad_norm": 0.18779359757900238, + "learning_rate": 6.911267444376485e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6568239944199024, + "grad_norm": 0.1504994034767151, + "learning_rate": 6.909731678515893e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6572467290904864, + "grad_norm": 0.14483723044395447, + "learning_rate": 6.908195701683375e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6576694637610703, + "grad_norm": 0.15008944272994995, + "learning_rate": 6.906659514048615e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6580921984316543, + "grad_norm": 0.17310209572315216, + "learning_rate": 6.905123115781316e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6585149331022384, + "grad_norm": 0.14268265664577484, + "learning_rate": 6.903586507051208e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6589376677728223, + "grad_norm": 0.17380565404891968, + "learning_rate": 6.902049688028044e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6593604024434065, + "grad_norm": 0.218623548746109, + "learning_rate": 6.900512658881599e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6597831371139904, + "grad_norm": 0.22233688831329346, + "learning_rate": 6.898975419781672e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6602058717845745, + "grad_norm": 0.14386527240276337, + "learning_rate": 6.897437970898086e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6606286064551585, + "grad_norm": 0.23703978955745697, + "learning_rate": 6.895900312400683e-05, + "loss": 0.3553, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6610513411257424, + "grad_norm": 0.23846063017845154, + "learning_rate": 6.894362444459334e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6614740757963264, + "grad_norm": 0.2432442307472229, + "learning_rate": 6.892824367243928e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6618968104669105, + "grad_norm": 0.24323293566703796, + "learning_rate": 6.891286080924381e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6623195451374944, + "grad_norm": 0.18379826843738556, + "learning_rate": 6.889747585670632e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6627422798080784, + "grad_norm": 0.21797659993171692, + "learning_rate": 6.88820888165264e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6631650144786625, + "grad_norm": 0.1483817994594574, + "learning_rate": 6.886669969040388e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6635877491492463, + "grad_norm": 0.22039467096328735, + "learning_rate": 6.885130848003883e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6640104838198306, + "grad_norm": 0.22051571309566498, + "learning_rate": 6.883591518713158e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6644332184904145, + "grad_norm": 0.2563035786151886, + "learning_rate": 6.882051981338261e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6648559531609985, + "grad_norm": 0.21708545088768005, + "learning_rate": 6.880512236049271e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6652786878315826, + "grad_norm": 0.17413167655467987, + "learning_rate": 6.878972283016287e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6657014225021665, + "grad_norm": 0.22587072849273682, + "learning_rate": 6.877432122409428e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6661241571727505, + "grad_norm": 0.26026588678359985, + "learning_rate": 6.875891754398841e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6665468918433346, + "grad_norm": 0.1812833845615387, + "learning_rate": 6.874351179154693e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6669696265139184, + "grad_norm": 0.15175962448120117, + "learning_rate": 6.872810396847174e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6673923611845025, + "grad_norm": 0.2032918930053711, + "learning_rate": 6.8712694076465e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6678150958550866, + "grad_norm": 0.19980725646018982, + "learning_rate": 6.869728211722903e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6682378305256704, + "grad_norm": 0.1748647540807724, + "learning_rate": 6.868186809246643e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6686605651962547, + "grad_norm": 0.21616411209106445, + "learning_rate": 6.866645200388005e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6690832998668386, + "grad_norm": 0.13403186202049255, + "learning_rate": 6.865103385317291e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6695060345374226, + "grad_norm": 0.15922145545482635, + "learning_rate": 6.863561364204826e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6699287692080067, + "grad_norm": 0.19633503258228302, + "learning_rate": 6.862019137220967e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6703515038785905, + "grad_norm": 0.1539631336927414, + "learning_rate": 6.860476704536082e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6707742385491746, + "grad_norm": 0.1464311182498932, + "learning_rate": 6.858934066320567e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6711969732197587, + "grad_norm": 0.19442588090896606, + "learning_rate": 6.857391222744841e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6716197078903425, + "grad_norm": 0.19832977652549744, + "learning_rate": 6.855848173979347e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6720424425609266, + "grad_norm": 0.1621909737586975, + "learning_rate": 6.854304920194544e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6724651772315107, + "grad_norm": 0.1432267725467682, + "learning_rate": 6.852761461560924e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6728879119020945, + "grad_norm": 0.23419524729251862, + "learning_rate": 6.85121779824899e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6733106465726788, + "grad_norm": 0.1542084962129593, + "learning_rate": 6.84967393042928e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6737333812432627, + "grad_norm": 0.16036352515220642, + "learning_rate": 6.848129858272343e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6741561159138467, + "grad_norm": 0.16526754200458527, + "learning_rate": 6.846585581948757e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 35990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6745788505844308, + "grad_norm": 0.19615836441516876, + "learning_rate": 6.845041101629124e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6750015852550146, + "grad_norm": 0.14333203434944153, + "learning_rate": 6.843496417484065e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6754243199255987, + "grad_norm": 0.18114034831523895, + "learning_rate": 6.841951529684222e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6758470545961828, + "grad_norm": 0.1855798363685608, + "learning_rate": 6.840406438400262e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6762697892667666, + "grad_norm": 0.17513610422611237, + "learning_rate": 6.838861143802877e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6766925239373507, + "grad_norm": 0.18761739134788513, + "learning_rate": 6.837315646062778e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6771152586079348, + "grad_norm": 0.18026868999004364, + "learning_rate": 6.835769945350699e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6775379932785186, + "grad_norm": 0.20536063611507416, + "learning_rate": 6.834224041837395e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.677960727949103, + "grad_norm": 0.1753503978252411, + "learning_rate": 6.832677935693647e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6783834626196867, + "grad_norm": 0.1577736884355545, + "learning_rate": 6.831286267056411e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6788061972902708, + "grad_norm": 0.18863032758235931, + "learning_rate": 6.829739776385396e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6792289319608549, + "grad_norm": 0.17745840549468994, + "learning_rate": 6.828193083579322e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6796516666314387, + "grad_norm": 0.18640346825122833, + "learning_rate": 6.826646188809053e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6800744013020228, + "grad_norm": 0.209023118019104, + "learning_rate": 6.825099092245484e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6804971359726069, + "grad_norm": 0.21104739606380463, + "learning_rate": 6.823551794059521e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6809198706431907, + "grad_norm": 0.17761848866939545, + "learning_rate": 6.822004294422098e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6813426053137748, + "grad_norm": 0.1661517322063446, + "learning_rate": 6.820456593504171e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6817653399843588, + "grad_norm": 0.1502256542444229, + "learning_rate": 6.818908691476717e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6821880746549427, + "grad_norm": 0.2988208532333374, + "learning_rate": 6.817360588510737e-05, + "loss": 0.3704, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.682610809325527, + "grad_norm": 0.21776129305362701, + "learning_rate": 6.815812284777252e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6830335439961108, + "grad_norm": 0.17996807396411896, + "learning_rate": 6.814263780447307e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.683456278666695, + "grad_norm": 0.11702002584934235, + "learning_rate": 6.812715075691966e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.683879013337279, + "grad_norm": 0.17739595472812653, + "learning_rate": 6.811166170682323e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6843017480078628, + "grad_norm": 0.23537661135196686, + "learning_rate": 6.809617065589483e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6847244826784469, + "grad_norm": 0.1853407323360443, + "learning_rate": 6.808067760584581e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.685147217349031, + "grad_norm": 0.2014223337173462, + "learning_rate": 6.806518255838772e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6855699520196148, + "grad_norm": 0.14162133634090424, + "learning_rate": 6.804968551523235e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6859926866901989, + "grad_norm": 0.19648310542106628, + "learning_rate": 6.803418647809164e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.686415421360783, + "grad_norm": 0.1794089823961258, + "learning_rate": 6.801868544867784e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6868381560313668, + "grad_norm": 0.16454128921031952, + "learning_rate": 6.800318242870336e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.687260890701951, + "grad_norm": 0.19817809760570526, + "learning_rate": 6.798767741988086e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.687683625372535, + "grad_norm": 0.17142784595489502, + "learning_rate": 6.79721704239232e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.688106360043119, + "grad_norm": 0.20509853959083557, + "learning_rate": 6.79566614425435e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.688529094713703, + "grad_norm": 0.2255437672138214, + "learning_rate": 6.794115047745505e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.688951829384287, + "grad_norm": 0.18511295318603516, + "learning_rate": 6.792563753037135e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.689374564054871, + "grad_norm": 0.1477203518152237, + "learning_rate": 6.791012260300616e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.689797298725455, + "grad_norm": 0.22647660970687866, + "learning_rate": 6.789460569707348e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6902200333960389, + "grad_norm": 0.273762971162796, + "learning_rate": 6.787908681428747e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.690642768066623, + "grad_norm": 0.23341061174869537, + "learning_rate": 6.786356595636251e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.691065502737207, + "grad_norm": 0.20004406571388245, + "learning_rate": 6.784804312501325e-05, + "loss": 0.3536, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6914882374077909, + "grad_norm": 0.14718450605869293, + "learning_rate": 6.783251832195454e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6919109720783752, + "grad_norm": 0.1913825124502182, + "learning_rate": 6.78169915489014e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.692333706748959, + "grad_norm": 0.1426674872636795, + "learning_rate": 6.780146280756912e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.692756441419543, + "grad_norm": 0.20060068368911743, + "learning_rate": 6.77859320996732e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6931791760901271, + "grad_norm": 0.20608270168304443, + "learning_rate": 6.777039942692935e-05, + "loss": 0.3713, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.693601910760711, + "grad_norm": 0.21160133183002472, + "learning_rate": 6.775486479105348e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.694024645431295, + "grad_norm": 0.2068934142589569, + "learning_rate": 6.773932819376174e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6944473801018791, + "grad_norm": 0.1768866330385208, + "learning_rate": 6.772378963677048e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.694870114772463, + "grad_norm": 0.2169232815504074, + "learning_rate": 6.77082491217963e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.695292849443047, + "grad_norm": 0.2273823320865631, + "learning_rate": 6.769270665055596e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.695715584113631, + "grad_norm": 0.2003522664308548, + "learning_rate": 6.767716222476651e-05, + "loss": 0.3719, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.696138318784215, + "grad_norm": 0.14898011088371277, + "learning_rate": 6.766161584614515e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6965610534547992, + "grad_norm": 0.20518463850021362, + "learning_rate": 6.764606751640929e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.696983788125383, + "grad_norm": 0.19073030352592468, + "learning_rate": 6.763051723727662e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6974065227959672, + "grad_norm": 0.19202381372451782, + "learning_rate": 6.761496501046503e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6978292574665512, + "grad_norm": 0.1961967647075653, + "learning_rate": 6.759941083769258e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.698251992137135, + "grad_norm": 0.2335834801197052, + "learning_rate": 6.758385472067757e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6986747268077191, + "grad_norm": 0.18204016983509064, + "learning_rate": 6.756829666113851e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6990974614783032, + "grad_norm": 0.2291363775730133, + "learning_rate": 6.755273666079414e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.699520196148887, + "grad_norm": 0.18478089570999146, + "learning_rate": 6.753717472136342e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6999429308194711, + "grad_norm": 0.22412031888961792, + "learning_rate": 6.752161084456547e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7003656654900552, + "grad_norm": 0.18830354511737823, + "learning_rate": 6.750604503211969e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.700788400160639, + "grad_norm": 0.15337826311588287, + "learning_rate": 6.749047728574568e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7012111348312233, + "grad_norm": 0.1356258988380432, + "learning_rate": 6.747490760716322e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7016338695018072, + "grad_norm": 0.17138993740081787, + "learning_rate": 6.745933599809231e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7020566041723912, + "grad_norm": 0.17264166474342346, + "learning_rate": 6.744376246025322e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7024793388429753, + "grad_norm": 0.1630132794380188, + "learning_rate": 6.742818699536634e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7029020735135592, + "grad_norm": 0.1842886507511139, + "learning_rate": 6.741260960515235e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7033248081841432, + "grad_norm": 0.20779699087142944, + "learning_rate": 6.739703029133212e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7037475428547273, + "grad_norm": 0.27226805686950684, + "learning_rate": 6.738144905562673e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7041702775253111, + "grad_norm": 0.19294339418411255, + "learning_rate": 6.736586589975746e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7045930121958952, + "grad_norm": 0.16516010463237762, + "learning_rate": 6.735028082544581e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7050157468664793, + "grad_norm": 0.16309703886508942, + "learning_rate": 6.733469383441351e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7054384815370631, + "grad_norm": 0.22039717435836792, + "learning_rate": 6.731910492838247e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7058612162076474, + "grad_norm": 0.186434805393219, + "learning_rate": 6.730351410907483e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7062839508782313, + "grad_norm": 0.14621557295322418, + "learning_rate": 6.728792137821295e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7067066855488153, + "grad_norm": 0.18196000158786774, + "learning_rate": 6.727232673751938e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7071294202193994, + "grad_norm": 0.14147181808948517, + "learning_rate": 6.725673018871691e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7075521548899832, + "grad_norm": 0.17399680614471436, + "learning_rate": 6.724113173352849e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7079748895605673, + "grad_norm": 0.15663361549377441, + "learning_rate": 6.722553137367734e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7083976242311514, + "grad_norm": 0.18698999285697937, + "learning_rate": 6.720992911088686e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7088203589017352, + "grad_norm": 0.19103595614433289, + "learning_rate": 6.719432494688066e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7092430935723193, + "grad_norm": 0.17894509434700012, + "learning_rate": 6.717871888338255e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7096658282429034, + "grad_norm": 0.20108380913734436, + "learning_rate": 6.716311092211658e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7100885629134872, + "grad_norm": 0.1561557650566101, + "learning_rate": 6.714750106480698e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7105112975840715, + "grad_norm": 0.2390889823436737, + "learning_rate": 6.713188931317822e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7109340322546553, + "grad_norm": 0.22337807714939117, + "learning_rate": 6.711627566895496e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7113567669252394, + "grad_norm": 0.28807175159454346, + "learning_rate": 6.710066013386207e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7117795015958235, + "grad_norm": 0.15185444056987762, + "learning_rate": 6.70850427096246e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7122022362664073, + "grad_norm": 0.1772097498178482, + "learning_rate": 6.706942339796787e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7126249709369914, + "grad_norm": 0.1845446079969406, + "learning_rate": 6.705380220061737e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7130477056075755, + "grad_norm": 0.14076881110668182, + "learning_rate": 6.703817911929881e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7134704402781593, + "grad_norm": 0.28918319940567017, + "learning_rate": 6.70225541557381e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7138931749487434, + "grad_norm": 0.20929643511772156, + "learning_rate": 6.700692731166135e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7143159096193274, + "grad_norm": 0.20714245736598969, + "learning_rate": 6.699129858879491e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7147386442899113, + "grad_norm": 0.14480142295360565, + "learning_rate": 6.69756679888653e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7151613789604956, + "grad_norm": 0.22409111261367798, + "learning_rate": 6.696003551359926e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7155841136310794, + "grad_norm": 0.15984752774238586, + "learning_rate": 6.694440116472376e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7160068483016635, + "grad_norm": 0.2491873949766159, + "learning_rate": 6.692876494396594e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7164295829722476, + "grad_norm": 0.1843026727437973, + "learning_rate": 6.691312685305318e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 36990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7168523176428314, + "grad_norm": 0.19272646307945251, + "learning_rate": 6.689748689371304e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7172750523134155, + "grad_norm": 0.19254058599472046, + "learning_rate": 6.688184506767332e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7176977869839996, + "grad_norm": 0.17235739529132843, + "learning_rate": 6.686620137666196e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7181205216545834, + "grad_norm": 0.16684047877788544, + "learning_rate": 6.68505558224072e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7185432563251675, + "grad_norm": 0.29488053917884827, + "learning_rate": 6.683490840663739e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7189659909957515, + "grad_norm": 0.2191985547542572, + "learning_rate": 6.681925913108117e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7193887256663354, + "grad_norm": 0.19085267186164856, + "learning_rate": 6.680360799746734e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7198114603369197, + "grad_norm": 0.17478777468204498, + "learning_rate": 6.67879550075249e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7202341950075035, + "grad_norm": 0.13864746689796448, + "learning_rate": 6.677230016298307e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7206569296780876, + "grad_norm": 0.1568397432565689, + "learning_rate": 6.675664346557128e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7210796643486717, + "grad_norm": 0.17266812920570374, + "learning_rate": 6.674098491701913e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7215023990192555, + "grad_norm": 0.23069548606872559, + "learning_rate": 6.672532451905649e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7219251336898396, + "grad_norm": 0.2659701704978943, + "learning_rate": 6.670966227341337e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7223478683604236, + "grad_norm": 0.2793314456939697, + "learning_rate": 6.669399818182004e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7227706030310075, + "grad_norm": 0.15097340941429138, + "learning_rate": 6.66783322460069e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7231933377015916, + "grad_norm": 0.18835417926311493, + "learning_rate": 6.666266446770463e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7236160723721756, + "grad_norm": 0.24271175265312195, + "learning_rate": 6.664699484864407e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7240388070427595, + "grad_norm": 0.22950614988803864, + "learning_rate": 6.663132339055628e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7244615417133438, + "grad_norm": 0.16942009329795837, + "learning_rate": 6.661565009517252e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7248842763839276, + "grad_norm": 0.17648084461688995, + "learning_rate": 6.659997496422423e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7253070110545117, + "grad_norm": 0.15488950908184052, + "learning_rate": 6.65842979994431e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7257297457250957, + "grad_norm": 0.1490887552499771, + "learning_rate": 6.656861920256099e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7261524803956796, + "grad_norm": 0.17148010432720184, + "learning_rate": 6.655293857530994e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7265752150662637, + "grad_norm": 0.2785150408744812, + "learning_rate": 6.653725611942226e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7269979497368477, + "grad_norm": 0.24367228150367737, + "learning_rate": 6.65215718366304e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7274206844074316, + "grad_norm": 0.23001717031002045, + "learning_rate": 6.650588572866703e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7278434190780156, + "grad_norm": 0.18711207807064056, + "learning_rate": 6.649019779726507e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7282661537485997, + "grad_norm": 0.2530584931373596, + "learning_rate": 6.647450804415755e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7286888884191836, + "grad_norm": 0.1867014467716217, + "learning_rate": 6.645881647107775e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7291116230897678, + "grad_norm": 0.1621614694595337, + "learning_rate": 6.644312307975917e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7295343577603517, + "grad_norm": 0.1722922921180725, + "learning_rate": 6.642742787193548e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7299570924309358, + "grad_norm": 0.17483901977539062, + "learning_rate": 6.641173084934059e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7303798271015198, + "grad_norm": 0.1811724752187729, + "learning_rate": 6.639603201370852e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7308025617721037, + "grad_norm": 0.2045820653438568, + "learning_rate": 6.638033136677359e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7312252964426877, + "grad_norm": 0.1612471640110016, + "learning_rate": 6.636462891027031e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7316480311132718, + "grad_norm": 0.11973670870065689, + "learning_rate": 6.634892464593332e-05, + "loss": 0.3544, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7320707657838557, + "grad_norm": 0.1681145876646042, + "learning_rate": 6.633321857549751e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7324935004544397, + "grad_norm": 0.15230949223041534, + "learning_rate": 6.631751070069795e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7329162351250238, + "grad_norm": 0.1660209447145462, + "learning_rate": 6.630180102326999e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7333389697956076, + "grad_norm": 0.200142964720726, + "learning_rate": 6.628608954494902e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.733761704466192, + "grad_norm": 0.16509981453418732, + "learning_rate": 6.627037626747075e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7341844391367758, + "grad_norm": 0.144532710313797, + "learning_rate": 6.625466119257109e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7346071738073598, + "grad_norm": 0.156905397772789, + "learning_rate": 6.623894432198607e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.735029908477944, + "grad_norm": 0.1750328540802002, + "learning_rate": 6.622322565745199e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7354526431485278, + "grad_norm": 0.16878357529640198, + "learning_rate": 6.620750520070532e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7358753778191118, + "grad_norm": 0.17048275470733643, + "learning_rate": 6.619178295348273e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.736298112489696, + "grad_norm": 0.1704857051372528, + "learning_rate": 6.617605891752107e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7367208471602797, + "grad_norm": 0.18537601828575134, + "learning_rate": 6.616033309455743e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7371435818308638, + "grad_norm": 0.14329543709754944, + "learning_rate": 6.614460548632908e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7375663165014479, + "grad_norm": 0.17068688571453094, + "learning_rate": 6.612887609457346e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7379890511720317, + "grad_norm": 0.16982607543468475, + "learning_rate": 6.611314492102823e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.738411785842616, + "grad_norm": 0.19095902144908905, + "learning_rate": 6.609741196743124e-05, + "loss": 0.3756, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7388345205131999, + "grad_norm": 0.1876472383737564, + "learning_rate": 6.608167723552057e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.739257255183784, + "grad_norm": 0.1368756741285324, + "learning_rate": 6.606594072703445e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.739679989854368, + "grad_norm": 0.15315327048301697, + "learning_rate": 6.605020244371131e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7401027245249518, + "grad_norm": 0.2434554547071457, + "learning_rate": 6.603446238728979e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.740525459195536, + "grad_norm": 0.16884472966194153, + "learning_rate": 6.601872055950875e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.74094819386612, + "grad_norm": 0.20787809789180756, + "learning_rate": 6.600297696210722e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7413709285367038, + "grad_norm": 0.1986960470676422, + "learning_rate": 6.59872315968244e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.741793663207288, + "grad_norm": 0.1531389355659485, + "learning_rate": 6.597148446539975e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.742216397877872, + "grad_norm": 0.14409691095352173, + "learning_rate": 6.595573556957284e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7426391325484558, + "grad_norm": 0.18326975405216217, + "learning_rate": 6.593998491108352e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.74306186721904, + "grad_norm": 0.1501602828502655, + "learning_rate": 6.592423249167179e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.743484601889624, + "grad_norm": 0.16950829327106476, + "learning_rate": 6.590847831307785e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.743907336560208, + "grad_norm": 0.1939764767885208, + "learning_rate": 6.58927223770421e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.744330071230792, + "grad_norm": 0.1882573515176773, + "learning_rate": 6.58769646853051e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.744752805901376, + "grad_norm": 0.214157372713089, + "learning_rate": 6.586120523960767e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.74517554057196, + "grad_norm": 0.20121026039123535, + "learning_rate": 6.584544404169079e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.745598275242544, + "grad_norm": 0.1795603632926941, + "learning_rate": 6.58296810932956e-05, + "loss": 0.3732, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.746021009913128, + "grad_norm": 0.19873255491256714, + "learning_rate": 6.581391639616348e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.746443744583712, + "grad_norm": 0.1598486751317978, + "learning_rate": 6.579814995203599e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.746866479254296, + "grad_norm": 0.1714780181646347, + "learning_rate": 6.57823817626549e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.74728921392488, + "grad_norm": 0.18214631080627441, + "learning_rate": 6.576661182976211e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7477119485954642, + "grad_norm": 0.1623738706111908, + "learning_rate": 6.57508401550998e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.748134683266048, + "grad_norm": 0.1905973255634308, + "learning_rate": 6.573506674041028e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.748557417936632, + "grad_norm": 0.18241767585277557, + "learning_rate": 6.571929158743607e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7489801526072162, + "grad_norm": 0.1457391232252121, + "learning_rate": 6.570351469791987e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7494028872778, + "grad_norm": 0.1930670440196991, + "learning_rate": 6.568773607360461e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.749825621948384, + "grad_norm": 0.1462724506855011, + "learning_rate": 6.567195571623338e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7502483566189682, + "grad_norm": 0.20329029858112335, + "learning_rate": 6.565617362754945e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.750671091289552, + "grad_norm": 0.17446193099021912, + "learning_rate": 6.564038980929633e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.751093825960136, + "grad_norm": 0.1302718073129654, + "learning_rate": 6.562460426321768e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7515165606307201, + "grad_norm": 0.20719552040100098, + "learning_rate": 6.560881699105733e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.751939295301304, + "grad_norm": 0.233018159866333, + "learning_rate": 6.559302799455937e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7523620299718883, + "grad_norm": 0.1496685892343521, + "learning_rate": 6.557723727546803e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7527847646424721, + "grad_norm": 0.15700602531433105, + "learning_rate": 6.556144483552774e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7532074993130562, + "grad_norm": 0.16411122679710388, + "learning_rate": 6.554565067648312e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7536302339836403, + "grad_norm": 0.1626468002796173, + "learning_rate": 6.552985480007899e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.754052968654224, + "grad_norm": 0.17274163663387299, + "learning_rate": 6.551405720806035e-05, + "loss": 0.3693, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7544757033248082, + "grad_norm": 0.1700245440006256, + "learning_rate": 6.54982579021724e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7548984379953922, + "grad_norm": 0.14136220514774323, + "learning_rate": 6.54824568841605e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.755321172665976, + "grad_norm": 0.18148313462734222, + "learning_rate": 6.546665415577023e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7557439073365602, + "grad_norm": 0.147983118891716, + "learning_rate": 6.545084971874738e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7561666420071442, + "grad_norm": 0.18198291957378387, + "learning_rate": 6.543504357483786e-05, + "loss": 0.3537, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.756589376677728, + "grad_norm": 0.16504597663879395, + "learning_rate": 6.541923572578781e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7570121113483124, + "grad_norm": 0.16402776539325714, + "learning_rate": 6.540342617334356e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7574348460188962, + "grad_norm": 0.13580235838890076, + "learning_rate": 6.538761491925164e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7578575806894803, + "grad_norm": 0.18252314627170563, + "learning_rate": 6.537180196525872e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7582803153600643, + "grad_norm": 0.15295182168483734, + "learning_rate": 6.535598731311172e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7587030500306482, + "grad_norm": 0.15868835151195526, + "learning_rate": 6.534017096455772e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 37990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7591257847012323, + "grad_norm": 0.18334901332855225, + "learning_rate": 6.532435292134394e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7595485193718163, + "grad_norm": 0.18286040425300598, + "learning_rate": 6.530853318521785e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7599712540424002, + "grad_norm": 0.162068173289299, + "learning_rate": 6.529271175792713e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7603939887129842, + "grad_norm": 0.18969690799713135, + "learning_rate": 6.527688864121955e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7608167233835683, + "grad_norm": 0.16712094843387604, + "learning_rate": 6.526106383684314e-05, + "loss": 0.3516, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7612394580541522, + "grad_norm": 0.17659242451190948, + "learning_rate": 6.52452373465461e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7616621927247365, + "grad_norm": 0.11842940747737885, + "learning_rate": 6.522940917207684e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7620849273953203, + "grad_norm": 0.1775246560573578, + "learning_rate": 6.52135793151839e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7625076620659044, + "grad_norm": 0.20265020430088043, + "learning_rate": 6.519774777761604e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7629303967364884, + "grad_norm": 0.16523832082748413, + "learning_rate": 6.51819145611222e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7633531314070723, + "grad_norm": 0.13868705928325653, + "learning_rate": 6.516607966745152e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7637758660776564, + "grad_norm": 0.20530551671981812, + "learning_rate": 6.515024309835331e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7641986007482404, + "grad_norm": 0.1819809377193451, + "learning_rate": 6.513440485557705e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7646213354188243, + "grad_norm": 0.18884927034378052, + "learning_rate": 6.511856494087243e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7650440700894083, + "grad_norm": 0.1548035889863968, + "learning_rate": 6.510272335598935e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7654668047599924, + "grad_norm": 0.19570575654506683, + "learning_rate": 6.508688010267782e-05, + "loss": 0.3717, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7658895394305762, + "grad_norm": 0.1816026121377945, + "learning_rate": 6.507103518268809e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7663122741011605, + "grad_norm": 0.15821652114391327, + "learning_rate": 6.505518859777057e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7667350087717444, + "grad_norm": 0.2840416729450226, + "learning_rate": 6.50393403496759e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7671577434423285, + "grad_norm": 0.21392948925495148, + "learning_rate": 6.502349044015483e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7675804781129125, + "grad_norm": 0.15750926733016968, + "learning_rate": 6.500763887095837e-05, + "loss": 0.3551, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7680032127834964, + "grad_norm": 0.18229663372039795, + "learning_rate": 6.499178564383763e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7684259474540804, + "grad_norm": 0.13263888657093048, + "learning_rate": 6.497593076054398e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7688486821246645, + "grad_norm": 0.2187338024377823, + "learning_rate": 6.496007422282892e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7692714167952484, + "grad_norm": 0.2501921057701111, + "learning_rate": 6.494421603244417e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7696941514658324, + "grad_norm": 0.17115738987922668, + "learning_rate": 6.492835619114162e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7701168861364165, + "grad_norm": 0.1724601536989212, + "learning_rate": 6.49124947006733e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7705396208070003, + "grad_norm": 0.19077010452747345, + "learning_rate": 6.489663156279151e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7709623554775846, + "grad_norm": 0.23312801122665405, + "learning_rate": 6.488076677924866e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7713850901481685, + "grad_norm": 0.17736288905143738, + "learning_rate": 6.486490035179737e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7718078248187525, + "grad_norm": 0.18519726395606995, + "learning_rate": 6.484903228219043e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7722305594893366, + "grad_norm": 0.2749939560890198, + "learning_rate": 6.483316257218082e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7726532941599205, + "grad_norm": 0.19686384499073029, + "learning_rate": 6.481729122352171e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7730760288305045, + "grad_norm": 0.16844283044338226, + "learning_rate": 6.480141823796645e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7734987635010886, + "grad_norm": 0.19233882427215576, + "learning_rate": 6.478554361726852e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7739214981716724, + "grad_norm": 0.14661183953285217, + "learning_rate": 6.476966736318163e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7743442328422565, + "grad_norm": 0.20104947686195374, + "learning_rate": 6.475378947745969e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7747669675128406, + "grad_norm": 0.18447697162628174, + "learning_rate": 6.473790996185676e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7751897021834244, + "grad_norm": 0.15502822399139404, + "learning_rate": 6.472202881812705e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7756124368540087, + "grad_norm": 0.12682487070560455, + "learning_rate": 6.470614604802502e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7760351715245926, + "grad_norm": 0.13837847113609314, + "learning_rate": 6.469026165330524e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7764579061951766, + "grad_norm": 0.17162492871284485, + "learning_rate": 6.46743756357225e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7768806408657607, + "grad_norm": 0.19584694504737854, + "learning_rate": 6.465848799703178e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7773033755363445, + "grad_norm": 0.15326863527297974, + "learning_rate": 6.464259873898821e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7777261102069286, + "grad_norm": 0.21390238404273987, + "learning_rate": 6.46267078633471e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7781488448775127, + "grad_norm": 0.1807081550359726, + "learning_rate": 6.461081537186393e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7785715795480965, + "grad_norm": 0.1442054957151413, + "learning_rate": 6.459492126629442e-05, + "loss": 0.3546, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7789943142186806, + "grad_norm": 0.15978984534740448, + "learning_rate": 6.457902554839441e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7794170488892647, + "grad_norm": 0.15592138469219208, + "learning_rate": 6.45631282199199e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7798397835598485, + "grad_norm": 0.16355344653129578, + "learning_rate": 6.454722928262712e-05, + "loss": 0.3544, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7802625182304328, + "grad_norm": 0.16497090458869934, + "learning_rate": 6.453132873827248e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7806852529010166, + "grad_norm": 0.19914880394935608, + "learning_rate": 6.451542658861251e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7811079875716007, + "grad_norm": 0.23200738430023193, + "learning_rate": 6.449952283540397e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7815307222421848, + "grad_norm": 0.13793577253818512, + "learning_rate": 6.448361748040379e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7819534569127686, + "grad_norm": 0.18305882811546326, + "learning_rate": 6.446771052536906e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7823761915833527, + "grad_norm": 0.25854700803756714, + "learning_rate": 6.445180197205702e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7827989262539368, + "grad_norm": 0.20676258206367493, + "learning_rate": 6.443589182222517e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7832216609245206, + "grad_norm": 0.22760426998138428, + "learning_rate": 6.441998007763112e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7836443955951047, + "grad_norm": 0.1423061043024063, + "learning_rate": 6.440406674003266e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7840671302656887, + "grad_norm": 0.19855111837387085, + "learning_rate": 6.438815181118777e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7844898649362726, + "grad_norm": 0.15570172667503357, + "learning_rate": 6.437223529285463e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7849125996068569, + "grad_norm": 0.20553697645664215, + "learning_rate": 6.435631718679155e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7853353342774407, + "grad_norm": 0.1500580906867981, + "learning_rate": 6.434039749475702e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7857580689480248, + "grad_norm": 0.22479170560836792, + "learning_rate": 6.432447621850974e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7861808036186089, + "grad_norm": 0.19106486439704895, + "learning_rate": 6.430855335980857e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7866035382891927, + "grad_norm": 0.14930403232574463, + "learning_rate": 6.429262892041255e-05, + "loss": 0.3538, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7870262729597768, + "grad_norm": 0.23676422238349915, + "learning_rate": 6.427670290208084e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7874490076303609, + "grad_norm": 0.16152967512607574, + "learning_rate": 6.426077530657285e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7878717423009447, + "grad_norm": 0.18609030544757843, + "learning_rate": 6.424484613564814e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7882944769715288, + "grad_norm": 0.19341814517974854, + "learning_rate": 6.422891539106644e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7887172116421128, + "grad_norm": 0.17102110385894775, + "learning_rate": 6.421298307458762e-05, + "loss": 0.3535, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7891399463126967, + "grad_norm": 0.1772083193063736, + "learning_rate": 6.419704918797178e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.789562680983281, + "grad_norm": 0.19408364593982697, + "learning_rate": 6.418111373297919e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7899854156538648, + "grad_norm": 0.1712692528963089, + "learning_rate": 6.416517671137021e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7904081503244489, + "grad_norm": 0.20292158424854279, + "learning_rate": 6.41492381249055e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.790830884995033, + "grad_norm": 0.18373392522335052, + "learning_rate": 6.413329797534579e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7912536196656168, + "grad_norm": 0.153968945145607, + "learning_rate": 6.411735626445203e-05, + "loss": 0.3519, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7916763543362009, + "grad_norm": 0.16517266631126404, + "learning_rate": 6.410141299398534e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.792099089006785, + "grad_norm": 0.17635604739189148, + "learning_rate": 6.4085468165707e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7925218236773688, + "grad_norm": 0.16149593889713287, + "learning_rate": 6.406952178137847e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7929445583479529, + "grad_norm": 0.15233787894248962, + "learning_rate": 6.405357384276135e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.793367293018537, + "grad_norm": 0.18520110845565796, + "learning_rate": 6.403762435161748e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7937900276891208, + "grad_norm": 0.1899884194135666, + "learning_rate": 6.402167330970883e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.794212762359705, + "grad_norm": 0.16039694845676422, + "learning_rate": 6.400572071879753e-05, + "loss": 0.3535, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.794635497030289, + "grad_norm": 0.17868323624134064, + "learning_rate": 6.39897665806459e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.795058231700873, + "grad_norm": 0.2018188238143921, + "learning_rate": 6.397381089701641e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.795480966371457, + "grad_norm": 0.16680531203746796, + "learning_rate": 6.395785366967175e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.795903701042041, + "grad_norm": 0.16196377575397491, + "learning_rate": 6.394189490037473e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.796326435712625, + "grad_norm": 0.19558420777320862, + "learning_rate": 6.392593459088832e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.796749170383209, + "grad_norm": 0.22907355427742004, + "learning_rate": 6.390997274297572e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7971719050537929, + "grad_norm": 0.16136617958545685, + "learning_rate": 6.389400935840028e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.797594639724377, + "grad_norm": 0.24986334145069122, + "learning_rate": 6.38780444389255e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.798017374394961, + "grad_norm": 0.19760987162590027, + "learning_rate": 6.3862077986315e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7984401090655449, + "grad_norm": 0.18269464373588562, + "learning_rate": 6.38461100023327e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7988628437361291, + "grad_norm": 0.20467868447303772, + "learning_rate": 6.383014048874259e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.799285578406713, + "grad_norm": 0.17462033033370972, + "learning_rate": 6.381416944730884e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.799708313077297, + "grad_norm": 0.18797288835048676, + "learning_rate": 6.379819687979582e-05, + "loss": 0.3519, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8001310477478811, + "grad_norm": 0.1421322524547577, + "learning_rate": 6.378222278796807e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.800553782418465, + "grad_norm": 0.1810416579246521, + "learning_rate": 6.376624717359022e-05, + "loss": 0.3736, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.800976517089049, + "grad_norm": 0.17555570602416992, + "learning_rate": 6.375027003842717e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 38990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8013992517596331, + "grad_norm": 0.17229560017585754, + "learning_rate": 6.373429138424397e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.801821986430217, + "grad_norm": 0.1517285257577896, + "learning_rate": 6.371831121280579e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.802244721100801, + "grad_norm": 0.16496050357818604, + "learning_rate": 6.370232952587796e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.802667455771385, + "grad_norm": 0.2810753881931305, + "learning_rate": 6.368634632522604e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.803090190441969, + "grad_norm": 0.2154088020324707, + "learning_rate": 6.367036161261574e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8035129251125532, + "grad_norm": 0.16710442304611206, + "learning_rate": 6.36543753898129e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.803935659783137, + "grad_norm": 0.1712462306022644, + "learning_rate": 6.363838765858357e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8043583944537211, + "grad_norm": 0.19022956490516663, + "learning_rate": 6.36223984206939e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8047811291243052, + "grad_norm": 0.18199172616004944, + "learning_rate": 6.360640767791032e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.805203863794889, + "grad_norm": 0.1672675609588623, + "learning_rate": 6.359041543199934e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8056265984654731, + "grad_norm": 0.18860207498073578, + "learning_rate": 6.357442168472762e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8060493331360572, + "grad_norm": 0.22541072964668274, + "learning_rate": 6.355842643786205e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.806472067806641, + "grad_norm": 0.1847050040960312, + "learning_rate": 6.354242969316967e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8068948024772251, + "grad_norm": 0.1861688643693924, + "learning_rate": 6.352643145241763e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8073175371478092, + "grad_norm": 0.14799055457115173, + "learning_rate": 6.351043171737334e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.807740271818393, + "grad_norm": 0.17289164662361145, + "learning_rate": 6.34944304898043e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8081630064889773, + "grad_norm": 0.2034475952386856, + "learning_rate": 6.347842777147818e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8085857411595612, + "grad_norm": 0.1890154480934143, + "learning_rate": 6.346242356416283e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8090084758301452, + "grad_norm": 0.17547082901000977, + "learning_rate": 6.344641786962631e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8094312105007293, + "grad_norm": 0.16332820057868958, + "learning_rate": 6.343041068963679e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8098539451713131, + "grad_norm": 0.19198717176914215, + "learning_rate": 6.341440202596258e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8102766798418972, + "grad_norm": 0.18603920936584473, + "learning_rate": 6.33983918803722e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8106994145124813, + "grad_norm": 0.17436282336711884, + "learning_rate": 6.338238025463436e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8111221491830651, + "grad_norm": 0.1738974004983902, + "learning_rate": 6.336636715051788e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8115448838536492, + "grad_norm": 0.2166961431503296, + "learning_rate": 6.335035256979174e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8119676185242333, + "grad_norm": 0.13101987540721893, + "learning_rate": 6.33343365142251e-05, + "loss": 0.3515, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8123903531948171, + "grad_norm": 0.13407185673713684, + "learning_rate": 6.331831898558733e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8128130878654014, + "grad_norm": 0.19696149230003357, + "learning_rate": 6.330229998564788e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8132358225359853, + "grad_norm": 0.27154794335365295, + "learning_rate": 6.328627951617639e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8136585572065693, + "grad_norm": 0.17136971652507782, + "learning_rate": 6.327025757894271e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8140812918771534, + "grad_norm": 0.14372660219669342, + "learning_rate": 6.32542341757168e-05, + "loss": 0.3723, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8145040265477372, + "grad_norm": 0.14024965465068817, + "learning_rate": 6.323820930826879e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8149267612183213, + "grad_norm": 0.17325414717197418, + "learning_rate": 6.3222182978369e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8153494958889054, + "grad_norm": 0.1543845385313034, + "learning_rate": 6.320615518778788e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8157722305594892, + "grad_norm": 0.13625961542129517, + "learning_rate": 6.319012593829606e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8161949652300735, + "grad_norm": 0.1943732500076294, + "learning_rate": 6.31740952316643e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8166176999006574, + "grad_norm": 0.1465575098991394, + "learning_rate": 6.315806306966357e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8170404345712412, + "grad_norm": 0.2598472833633423, + "learning_rate": 6.314202945406496e-05, + "loss": 0.3708, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8174631692418255, + "grad_norm": 0.20524486899375916, + "learning_rate": 6.312599438663974e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8178859039124093, + "grad_norm": 0.12671341001987457, + "learning_rate": 6.310995786915934e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8183086385829934, + "grad_norm": 0.20054635405540466, + "learning_rate": 6.309391990339535e-05, + "loss": 0.3544, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8187313732535775, + "grad_norm": 0.21306519210338593, + "learning_rate": 6.307788049111951e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8191541079241613, + "grad_norm": 0.16622327268123627, + "learning_rate": 6.306183963410372e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8195768425947454, + "grad_norm": 0.27467554807662964, + "learning_rate": 6.304579733412005e-05, + "loss": 0.3713, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8199995772653295, + "grad_norm": 0.2254326492547989, + "learning_rate": 6.302975359294074e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8204223119359133, + "grad_norm": 0.19752560555934906, + "learning_rate": 6.301370841233816e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8208450466064976, + "grad_norm": 0.22138427197933197, + "learning_rate": 6.299766179408486e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8212677812770814, + "grad_norm": 0.172446146607399, + "learning_rate": 6.298161373995352e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8216905159476653, + "grad_norm": 0.16067396104335785, + "learning_rate": 6.296556425171706e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8221132506182496, + "grad_norm": 0.12553620338439941, + "learning_rate": 6.294951333114842e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8225359852888334, + "grad_norm": 0.19148162007331848, + "learning_rate": 6.293346098002084e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8229587199594175, + "grad_norm": 0.13076727092266083, + "learning_rate": 6.291740720010762e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8233814546300016, + "grad_norm": 0.12910644710063934, + "learning_rate": 6.29013519931823e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8238041893005854, + "grad_norm": 0.14327123761177063, + "learning_rate": 6.288529536101846e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8242269239711695, + "grad_norm": 0.14918817579746246, + "learning_rate": 6.286923730538996e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8246496586417535, + "grad_norm": 0.17618145048618317, + "learning_rate": 6.285317782807077e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8250723933123374, + "grad_norm": 0.16918833553791046, + "learning_rate": 6.283711693083496e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8254951279829217, + "grad_norm": 0.17009621858596802, + "learning_rate": 6.282105461545687e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8259178626535055, + "grad_norm": 0.1525796353816986, + "learning_rate": 6.28049908837109e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8263405973240894, + "grad_norm": 0.2200230211019516, + "learning_rate": 6.278892573737167e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8267633319946737, + "grad_norm": 0.19448129832744598, + "learning_rate": 6.27728591782139e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8271860666652575, + "grad_norm": 0.17921967804431915, + "learning_rate": 6.275679120801251e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8276088013358416, + "grad_norm": 0.2068474441766739, + "learning_rate": 6.274072182854258e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8280315360064257, + "grad_norm": 0.18759620189666748, + "learning_rate": 6.272465104157928e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8284542706770095, + "grad_norm": 0.149568110704422, + "learning_rate": 6.270857884889802e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8288770053475936, + "grad_norm": 0.26323509216308594, + "learning_rate": 6.269250525227432e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8292997400181776, + "grad_norm": 0.1626085340976715, + "learning_rate": 6.267643025348386e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8297224746887615, + "grad_norm": 0.140762597322464, + "learning_rate": 6.26603538543025e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8301452093593458, + "grad_norm": 0.1591922789812088, + "learning_rate": 6.264427605650618e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8305679440299296, + "grad_norm": 0.2570759356021881, + "learning_rate": 6.26281968618711e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8309906787005135, + "grad_norm": 0.23096956312656403, + "learning_rate": 6.261211627217352e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8314134133710978, + "grad_norm": 0.21789328753948212, + "learning_rate": 6.259603428918992e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8318361480416816, + "grad_norm": 0.21448510885238647, + "learning_rate": 6.25799509146969e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8322588827122657, + "grad_norm": 0.20171649754047394, + "learning_rate": 6.256386615047124e-05, + "loss": 0.3547, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8326816173828497, + "grad_norm": 0.1940046101808548, + "learning_rate": 6.254777999828983e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8331043520534336, + "grad_norm": 0.1836828738451004, + "learning_rate": 6.253169245992974e-05, + "loss": 0.3708, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8335270867240177, + "grad_norm": 0.19211143255233765, + "learning_rate": 6.251560353716823e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8339498213946017, + "grad_norm": 0.17540064454078674, + "learning_rate": 6.249951323178265e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8343725560651856, + "grad_norm": 0.14370636641979218, + "learning_rate": 6.248342154555052e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8347952907357699, + "grad_norm": 0.19260689616203308, + "learning_rate": 6.246732848024953e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8352180254063537, + "grad_norm": 0.19714608788490295, + "learning_rate": 6.245123403765753e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8356407600769375, + "grad_norm": 0.15460366010665894, + "learning_rate": 6.243513821955247e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8360634947475218, + "grad_norm": 0.17579443752765656, + "learning_rate": 6.241904102771252e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8364862294181057, + "grad_norm": 0.13576410710811615, + "learning_rate": 6.240294246391595e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8369089640886898, + "grad_norm": 0.17374685406684875, + "learning_rate": 6.238684252994121e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8373316987592738, + "grad_norm": 0.18195085227489471, + "learning_rate": 6.23707412275669e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8377544334298577, + "grad_norm": 0.15794679522514343, + "learning_rate": 6.235463855857175e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8381771681004417, + "grad_norm": 0.18713243305683136, + "learning_rate": 6.233853452473464e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8385999027710258, + "grad_norm": 0.16371667385101318, + "learning_rate": 6.232242912783466e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8390226374416097, + "grad_norm": 0.17217126488685608, + "learning_rate": 6.230632236965096e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.839445372112194, + "grad_norm": 0.16742025315761566, + "learning_rate": 6.229021425196292e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8398681067827778, + "grad_norm": 0.21748852729797363, + "learning_rate": 6.227410477655e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8402908414533616, + "grad_norm": 0.14175668358802795, + "learning_rate": 6.22579939451919e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.840713576123946, + "grad_norm": 0.16382405161857605, + "learning_rate": 6.224188175966836e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8411363107945298, + "grad_norm": 0.1616884469985962, + "learning_rate": 6.222576822175937e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8415590454651138, + "grad_norm": 0.1558239758014679, + "learning_rate": 6.2209653333245e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.841981780135698, + "grad_norm": 0.1710091531276703, + "learning_rate": 6.219353709590549e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8424045148062818, + "grad_norm": 0.14569959044456482, + "learning_rate": 6.217741951152124e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8428272494768658, + "grad_norm": 0.19808807969093323, + "learning_rate": 6.21613005818728e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.84324998414745, + "grad_norm": 0.18652762472629547, + "learning_rate": 6.214518030874087e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 39990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8436727188180337, + "grad_norm": 0.14265617728233337, + "learning_rate": 6.212905869390626e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6022210121154785, + "memory_reserved_GB": 54.376953125, + "step": 40000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.000422734670584, + "grad_norm": 0.10476814955472946, + "learning_rate": 6.211293573914997e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.000845469341168, + "grad_norm": 0.08178041130304337, + "learning_rate": 6.209681144625315e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.001268204011752, + "grad_norm": 0.0777583196759224, + "learning_rate": 6.208068581699708e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.001690938682336, + "grad_norm": 0.08239153772592545, + "learning_rate": 6.206455885316316e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0021136733529201, + "grad_norm": 0.08469659090042114, + "learning_rate": 6.2048430556533e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.002536408023504, + "grad_norm": 0.09902235120534897, + "learning_rate": 6.203230092888833e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.002959142694088, + "grad_norm": 0.08606366813182831, + "learning_rate": 6.201616997201099e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.003381877364672, + "grad_norm": 0.0787261351943016, + "learning_rate": 6.200003768768303e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0038046120352562, + "grad_norm": 0.08118993788957596, + "learning_rate": 6.19839040776866e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.00422734670584, + "grad_norm": 0.0939461886882782, + "learning_rate": 6.196776914380402e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.004650081376424, + "grad_norm": 0.08771821856498718, + "learning_rate": 6.195163288781773e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0050728160470082, + "grad_norm": 0.07917117327451706, + "learning_rate": 6.193549531151038e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.005495550717592, + "grad_norm": 0.09293017536401749, + "learning_rate": 6.191935641666468e-05, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.005918285388176, + "grad_norm": 0.08747422695159912, + "learning_rate": 6.190321620506353e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0063410200587601, + "grad_norm": 0.10116787999868393, + "learning_rate": 6.188707467848998e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0067637547293442, + "grad_norm": 0.06932225823402405, + "learning_rate": 6.187093183872724e-05, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.007186489399928, + "grad_norm": 0.08058372139930725, + "learning_rate": 6.18547876875586e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0076092240705121, + "grad_norm": 0.07826986908912659, + "learning_rate": 6.183864222676755e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0080319587410962, + "grad_norm": 0.10039258003234863, + "learning_rate": 6.182249545813773e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0084546934116803, + "grad_norm": 0.11778634041547775, + "learning_rate": 6.180634738345289e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.008877428082264, + "grad_norm": 0.07674644887447357, + "learning_rate": 6.179019800449694e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0093001627528482, + "grad_norm": 0.08452805131673813, + "learning_rate": 6.177404732305396e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0097228974234322, + "grad_norm": 0.07645272463560104, + "learning_rate": 6.17578953409081e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.010145632094016, + "grad_norm": 0.10661257803440094, + "learning_rate": 6.174174205984375e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0105683667646002, + "grad_norm": 0.07347101718187332, + "learning_rate": 6.172558748164536e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0109911014351842, + "grad_norm": 0.08173619955778122, + "learning_rate": 6.170943160809758e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0114138361057683, + "grad_norm": 0.07893264293670654, + "learning_rate": 6.169327444098519e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0118365707763521, + "grad_norm": 0.07064063847064972, + "learning_rate": 6.16771159820931e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0122593054469362, + "grad_norm": 0.09284202009439468, + "learning_rate": 6.166095623320632e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0126820401175203, + "grad_norm": 0.08996313065290451, + "learning_rate": 6.164479519611013e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0131047747881043, + "grad_norm": 0.07748985290527344, + "learning_rate": 6.162863287258982e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0135275094586882, + "grad_norm": 0.09307822585105896, + "learning_rate": 6.161246926443087e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0139502441292723, + "grad_norm": 0.08155128359794617, + "learning_rate": 6.159630437341894e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0143729787998563, + "grad_norm": 0.08215989172458649, + "learning_rate": 6.158013820133977e-05, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0147957134704402, + "grad_norm": 0.07698041945695877, + "learning_rate": 6.156397074997931e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0152184481410242, + "grad_norm": 0.07797358185052872, + "learning_rate": 6.154780202112354e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0156411828116083, + "grad_norm": 0.08985483646392822, + "learning_rate": 6.153163201655872e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0160639174821924, + "grad_norm": 0.07897371053695679, + "learning_rate": 6.151546073807115e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0164866521527762, + "grad_norm": 0.08709454536437988, + "learning_rate": 6.149928818744732e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0169093868233603, + "grad_norm": 0.08587776869535446, + "learning_rate": 6.148311436647383e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0173321214939444, + "grad_norm": 0.08072572201490402, + "learning_rate": 6.146693927693743e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0177548561645284, + "grad_norm": 0.07959054410457611, + "learning_rate": 6.145076292062505e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0181775908351123, + "grad_norm": 0.11088083684444427, + "learning_rate": 6.143458529932369e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0186003255056963, + "grad_norm": 0.08362340927124023, + "learning_rate": 6.141840641482054e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0190230601762804, + "grad_norm": 0.08628549426794052, + "learning_rate": 6.14022262689029e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0194457948468643, + "grad_norm": 0.07693153619766235, + "learning_rate": 6.138604486335824e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0198685295174483, + "grad_norm": 0.08830767869949341, + "learning_rate": 6.136986219997414e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0202912641880324, + "grad_norm": 0.0907522439956665, + "learning_rate": 6.135367828053834e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0207139988586165, + "grad_norm": 0.08282622694969177, + "learning_rate": 6.13374931068387e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0211367335292003, + "grad_norm": 0.08400332182645798, + "learning_rate": 6.132130668066325e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0215594681997844, + "grad_norm": 0.07932775467634201, + "learning_rate": 6.130511900380011e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0219822028703685, + "grad_norm": 0.08345729857683182, + "learning_rate": 6.128893007803758e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0224049375409525, + "grad_norm": 0.09531852602958679, + "learning_rate": 6.12727399051641e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0228276722115364, + "grad_norm": 0.08659953624010086, + "learning_rate": 6.12565484869682e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0232504068821204, + "grad_norm": 0.08060099929571152, + "learning_rate": 6.12403558252386e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0236731415527045, + "grad_norm": 0.09522851556539536, + "learning_rate": 6.122416192176412e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0240958762232883, + "grad_norm": 0.105952188372612, + "learning_rate": 6.120796677833375e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0245186108938724, + "grad_norm": 0.0840628519654274, + "learning_rate": 6.119177039673658e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0249413455644565, + "grad_norm": 0.07436853647232056, + "learning_rate": 6.117557277876188e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0253640802350406, + "grad_norm": 0.07509947568178177, + "learning_rate": 6.115937392619902e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0257868149056244, + "grad_norm": 0.09118208289146423, + "learning_rate": 6.114317384083753e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0262095495762085, + "grad_norm": 0.08782035112380981, + "learning_rate": 6.112697252446704e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0266322842467925, + "grad_norm": 0.08185473084449768, + "learning_rate": 6.111076997887737e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0270550189173766, + "grad_norm": 0.0920252576470375, + "learning_rate": 6.109456620585845e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0274777535879605, + "grad_norm": 0.0761370062828064, + "learning_rate": 6.107836120720031e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0279004882585445, + "grad_norm": 0.07233726978302002, + "learning_rate": 6.10621549846932e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0283232229291286, + "grad_norm": 0.08083537220954895, + "learning_rate": 6.10459475401274e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0287459575997124, + "grad_norm": 0.0711037665605545, + "learning_rate": 6.102973887529343e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0291686922702965, + "grad_norm": 0.07986274361610413, + "learning_rate": 6.101352899198185e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0295914269408806, + "grad_norm": 0.09342242032289505, + "learning_rate": 6.099731789198344e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0300141616114646, + "grad_norm": 0.09491825103759766, + "learning_rate": 6.098110557708905e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0304368962820485, + "grad_norm": 0.09813341498374939, + "learning_rate": 6.096489204908966e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0308596309526326, + "grad_norm": 0.09314680099487305, + "learning_rate": 6.094867730977646e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0312823656232166, + "grad_norm": 0.09988696873188019, + "learning_rate": 6.0932461360940695e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0317051002938007, + "grad_norm": 0.07997357100248337, + "learning_rate": 6.091624420437381e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0321278349643845, + "grad_norm": 0.08603973686695099, + "learning_rate": 6.090002584186729e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0325505696349686, + "grad_norm": 0.07562430948019028, + "learning_rate": 6.0883806275212854e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0329733043055527, + "grad_norm": 0.08198703825473785, + "learning_rate": 6.08675855062023e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0333960389761365, + "grad_norm": 0.07598953694105148, + "learning_rate": 6.0851363536627556e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0338187736467206, + "grad_norm": 0.10106083750724792, + "learning_rate": 6.0835140368280716e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0342415083173047, + "grad_norm": 0.08411753177642822, + "learning_rate": 6.081891600295396e-05, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0346642429878887, + "grad_norm": 0.08385971188545227, + "learning_rate": 6.080269044243967e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0350869776584726, + "grad_norm": 0.07872991263866425, + "learning_rate": 6.078646368853027e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0355097123290566, + "grad_norm": 0.09406008571386337, + "learning_rate": 6.077023574301839e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0359324469996407, + "grad_norm": 0.08736207336187363, + "learning_rate": 6.075400660769676e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0363551816702248, + "grad_norm": 0.08520353585481644, + "learning_rate": 6.073777628435824e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0367779163408086, + "grad_norm": 0.0828063040971756, + "learning_rate": 6.0721544774795814e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0372006510113927, + "grad_norm": 0.09549610316753387, + "learning_rate": 6.070531208080264e-05, + "loss": 0.3704, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0376233856819768, + "grad_norm": 0.0940566286444664, + "learning_rate": 6.0689078204171953e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0380461203525606, + "grad_norm": 0.09131242334842682, + "learning_rate": 6.067284314669716e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0384688550231447, + "grad_norm": 0.08050619065761566, + "learning_rate": 6.065660691017175e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0388915896937287, + "grad_norm": 0.08735200017690659, + "learning_rate": 6.0640369496389406e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0393143243643128, + "grad_norm": 0.08399651944637299, + "learning_rate": 6.062413090714392e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0397370590348967, + "grad_norm": 0.09953972697257996, + "learning_rate": 6.060789114422913e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0401597937054807, + "grad_norm": 0.09259029477834702, + "learning_rate": 6.059165020943916e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0405825283760648, + "grad_norm": 0.07287124544382095, + "learning_rate": 6.057540810456812e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0410052630466489, + "grad_norm": 0.08315343409776688, + "learning_rate": 6.055916483141034e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0414279977172327, + "grad_norm": 0.08975405246019363, + "learning_rate": 6.054292039176024e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0418507323878168, + "grad_norm": 0.09195095300674438, + "learning_rate": 6.052667478741235e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 40990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0422734670584008, + "grad_norm": 0.09086114913225174, + "learning_rate": 6.05104280201614e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.042696201728985, + "grad_norm": 0.08545536547899246, + "learning_rate": 6.0494180091802176e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0431189363995688, + "grad_norm": 0.09223129600286484, + "learning_rate": 6.047793100412964e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0435416710701528, + "grad_norm": 0.09541522711515427, + "learning_rate": 6.046168075893882e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.043964405740737, + "grad_norm": 0.08608562499284744, + "learning_rate": 6.0445429358024965e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0443871404113207, + "grad_norm": 0.07962255924940109, + "learning_rate": 6.0429176803183354e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0448098750819048, + "grad_norm": 0.09127944707870483, + "learning_rate": 6.0412923096209473e-05, + "loss": 0.3503, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0452326097524889, + "grad_norm": 0.10218477994203568, + "learning_rate": 6.039666823889889e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.045655344423073, + "grad_norm": 0.08431293815374374, + "learning_rate": 6.0380412233047314e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0460780790936568, + "grad_norm": 0.08360455930233002, + "learning_rate": 6.036415508045057e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0465008137642409, + "grad_norm": 0.08407074958086014, + "learning_rate": 6.034789678290461e-05, + "loss": 0.3531, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.046923548434825, + "grad_norm": 0.09539264440536499, + "learning_rate": 6.033163734220557e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.047346283105409, + "grad_norm": 0.11859599500894547, + "learning_rate": 6.031537676014961e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0477690177759929, + "grad_norm": 0.08759909123182297, + "learning_rate": 6.0299115038533095e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.048191752446577, + "grad_norm": 0.09407926350831985, + "learning_rate": 6.028285217915248e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.048614487117161, + "grad_norm": 0.11199334263801575, + "learning_rate": 6.026658818380437e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0490372217877448, + "grad_norm": 0.08602666109800339, + "learning_rate": 6.0250323054285465e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.049459956458329, + "grad_norm": 0.12432131916284561, + "learning_rate": 6.02340567923926e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.049882691128913, + "grad_norm": 0.07516790181398392, + "learning_rate": 6.021778939992277e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.050305425799497, + "grad_norm": 0.09726890921592712, + "learning_rate": 6.020152087867305e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0507281604700809, + "grad_norm": 0.09622564166784286, + "learning_rate": 6.018525123044067e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.051150895140665, + "grad_norm": 0.1173269972205162, + "learning_rate": 6.016898045702294e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.051573629811249, + "grad_norm": 0.0973401591181755, + "learning_rate": 6.0152708560217365e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.051996364481833, + "grad_norm": 0.09150896966457367, + "learning_rate": 6.013643554182149e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.052419099152417, + "grad_norm": 0.08779557049274445, + "learning_rate": 6.012016140363308e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.052841833823001, + "grad_norm": 0.09864025563001633, + "learning_rate": 6.010388614744993e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.053264568493585, + "grad_norm": 0.091642826795578, + "learning_rate": 6.008760977507002e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.053687303164169, + "grad_norm": 0.0969335064291954, + "learning_rate": 6.007133228829143e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.054110037834753, + "grad_norm": 0.08621793985366821, + "learning_rate": 6.005505368891235e-05, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.054532772505337, + "grad_norm": 0.09021608531475067, + "learning_rate": 6.003877397873115e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0549555071759211, + "grad_norm": 0.08650938421487808, + "learning_rate": 6.002249315954624e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.055378241846505, + "grad_norm": 0.09626170992851257, + "learning_rate": 6.000621123315622e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.055800976517089, + "grad_norm": 0.08983534574508667, + "learning_rate": 5.998992820135978e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.056223711187673, + "grad_norm": 0.1043412834405899, + "learning_rate": 5.997364406595576e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0566464458582572, + "grad_norm": 0.08873516321182251, + "learning_rate": 5.995735882874306e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.057069180528841, + "grad_norm": 0.1136532574892044, + "learning_rate": 5.994107249152077e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.057491915199425, + "grad_norm": 0.10337638854980469, + "learning_rate": 5.9924785056088074e-05, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0579146498700092, + "grad_norm": 0.10853567719459534, + "learning_rate": 5.990849652424426e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.058337384540593, + "grad_norm": 0.07910740375518799, + "learning_rate": 5.989220689778878e-05, + "loss": 0.3532, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.058760119211177, + "grad_norm": 0.11634808033704758, + "learning_rate": 5.9875916178521176e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0591828538817611, + "grad_norm": 0.08834295719861984, + "learning_rate": 5.985962436824111e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0596055885523452, + "grad_norm": 0.09460067003965378, + "learning_rate": 5.984333146874835e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.060028323222929, + "grad_norm": 0.13082462549209595, + "learning_rate": 5.982703748184286e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0604510578935131, + "grad_norm": 0.09349897503852844, + "learning_rate": 5.9810742409324614e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0608737925640972, + "grad_norm": 0.0990620106458664, + "learning_rate": 5.97944462529938e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0612965272346813, + "grad_norm": 0.09582630544900894, + "learning_rate": 5.9778149014650665e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.061719261905265, + "grad_norm": 0.08677355200052261, + "learning_rate": 5.976185069609561e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0621419965758492, + "grad_norm": 0.07105392217636108, + "learning_rate": 5.974555129912914e-05, + "loss": 0.3535, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0625647312464332, + "grad_norm": 0.09148695319890976, + "learning_rate": 5.972925082555189e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.062987465917017, + "grad_norm": 0.08264856040477753, + "learning_rate": 5.9712949277164586e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0634102005876012, + "grad_norm": 0.08057369291782379, + "learning_rate": 5.969664665576811e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0638329352581852, + "grad_norm": 0.0915549024939537, + "learning_rate": 5.968034296316345e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0642556699287693, + "grad_norm": 0.08151998370885849, + "learning_rate": 5.9664038201151684e-05, + "loss": 0.3546, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0646784045993531, + "grad_norm": 0.0920330360531807, + "learning_rate": 5.964773237153404e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0651011392699372, + "grad_norm": 0.1065903753042221, + "learning_rate": 5.963142547611188e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0655238739405213, + "grad_norm": 0.0902526006102562, + "learning_rate": 5.9615117516686646e-05, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0659466086111054, + "grad_norm": 0.08871868252754211, + "learning_rate": 5.959880849505989e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0663693432816892, + "grad_norm": 0.0900803953409195, + "learning_rate": 5.9582498413033325e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0667920779522733, + "grad_norm": 0.08739163726568222, + "learning_rate": 5.956618727240877e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0672148126228573, + "grad_norm": 0.10680422931909561, + "learning_rate": 5.9549875074988114e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0676375472934412, + "grad_norm": 0.10155176371335983, + "learning_rate": 5.9533561822573436e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0680602819640252, + "grad_norm": 0.08502600342035294, + "learning_rate": 5.9517247516966856e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0684830166346093, + "grad_norm": 0.09731502830982208, + "learning_rate": 5.950093215997069e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0689057513051934, + "grad_norm": 0.09344518929719925, + "learning_rate": 5.9484615753387286e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0693284859757772, + "grad_norm": 0.09779806435108185, + "learning_rate": 5.946829829901919e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0697512206463613, + "grad_norm": 0.11047535389661789, + "learning_rate": 5.9451979798669e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0701739553169454, + "grad_norm": 0.09288278222084045, + "learning_rate": 5.943566025413947e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0705966899875294, + "grad_norm": 0.08501014113426208, + "learning_rate": 5.941933966723342e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0710194246581133, + "grad_norm": 0.09402347356081009, + "learning_rate": 5.9403018039753854e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0714421593286974, + "grad_norm": 0.09844399243593216, + "learning_rate": 5.938669537350385e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0718648939992814, + "grad_norm": 0.07503347098827362, + "learning_rate": 5.93703716702866e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0722876286698653, + "grad_norm": 0.08970875293016434, + "learning_rate": 5.935404693190539e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0727103633404493, + "grad_norm": 0.09426885843276978, + "learning_rate": 5.9337721160163695e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0731330980110334, + "grad_norm": 0.08733231574296951, + "learning_rate": 5.932139435686503e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0735558326816175, + "grad_norm": 0.08551807701587677, + "learning_rate": 5.930506652381306e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0739785673522013, + "grad_norm": 0.08163776248693466, + "learning_rate": 5.928873766281152e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0744013020227854, + "grad_norm": 0.09189893305301666, + "learning_rate": 5.9272407775664354e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0748240366933695, + "grad_norm": 0.11384383589029312, + "learning_rate": 5.925607686417549e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0752467713639535, + "grad_norm": 0.10938870161771774, + "learning_rate": 5.92397449301491e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0756695060345374, + "grad_norm": 0.09407839924097061, + "learning_rate": 5.9223411975389355e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0760922407051214, + "grad_norm": 0.07880797237157822, + "learning_rate": 5.920707800170062e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0765149753757055, + "grad_norm": 0.08349015563726425, + "learning_rate": 5.919074301088733e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0769377100462894, + "grad_norm": 0.08876492083072662, + "learning_rate": 5.917440700475405e-05, + "loss": 0.373, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0773604447168734, + "grad_norm": 0.09067980945110321, + "learning_rate": 5.915806998510544e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0777831793874575, + "grad_norm": 0.11044806241989136, + "learning_rate": 5.9141731953746306e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0782059140580416, + "grad_norm": 0.09427513927221298, + "learning_rate": 5.9125392912481516e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0786286487286254, + "grad_norm": 0.08387494087219238, + "learning_rate": 5.910905286311608e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0790513833992095, + "grad_norm": 0.09889518469572067, + "learning_rate": 5.909271180745516e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0794741180697935, + "grad_norm": 0.12332729250192642, + "learning_rate": 5.907636974730393e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0798968527403776, + "grad_norm": 0.09234867990016937, + "learning_rate": 5.906002668446775e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0803195874109615, + "grad_norm": 0.09530169516801834, + "learning_rate": 5.904368262075208e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0807423220815455, + "grad_norm": 0.08110204339027405, + "learning_rate": 5.90273375579625e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0811650567521296, + "grad_norm": 0.09738267958164215, + "learning_rate": 5.901099149790463e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0815877914227134, + "grad_norm": 0.07645143568515778, + "learning_rate": 5.899464444238428e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0820105260932975, + "grad_norm": 0.08882393687963486, + "learning_rate": 5.897829639320736e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0824332607638816, + "grad_norm": 0.08917392790317535, + "learning_rate": 5.896194735217984e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0828559954344656, + "grad_norm": 0.09216690063476562, + "learning_rate": 5.894559732110786e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0832787301050495, + "grad_norm": 0.09369371086359024, + "learning_rate": 5.892924630179761e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0837014647756336, + "grad_norm": 0.08938176184892654, + "learning_rate": 5.891289429605546e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0841241994462176, + "grad_norm": 0.09979899227619171, + "learning_rate": 5.88965413056878e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 41990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0845469341168017, + "grad_norm": 0.08132889866828918, + "learning_rate": 5.888018733250122e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0849696687873855, + "grad_norm": 0.09677410125732422, + "learning_rate": 5.886383237830236e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0853924034579696, + "grad_norm": 0.09996481239795685, + "learning_rate": 5.8847476444898e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0858151381285537, + "grad_norm": 0.07656709849834442, + "learning_rate": 5.8831119534094984e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0862378727991375, + "grad_norm": 0.08868414163589478, + "learning_rate": 5.881476164770031e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0866606074697216, + "grad_norm": 0.08129125833511353, + "learning_rate": 5.8798402787521064e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0870833421403057, + "grad_norm": 0.08745982497930527, + "learning_rate": 5.878204295536446e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0875060768108897, + "grad_norm": 0.10812318325042725, + "learning_rate": 5.876568215303777e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0879288114814736, + "grad_norm": 0.09231845289468765, + "learning_rate": 5.8749320382348414e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0883515461520576, + "grad_norm": 0.08286385238170624, + "learning_rate": 5.873295764510395e-05, + "loss": 0.352, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0887742808226417, + "grad_norm": 0.13470399379730225, + "learning_rate": 5.871659394311194e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0891970154932258, + "grad_norm": 0.11670459806919098, + "learning_rate": 5.870022927818015e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0896197501638096, + "grad_norm": 0.08477222919464111, + "learning_rate": 5.8683863652116424e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0900424848343937, + "grad_norm": 0.08517004549503326, + "learning_rate": 5.866749706672867e-05, + "loss": 0.3514, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0904652195049778, + "grad_norm": 0.07730690389871597, + "learning_rate": 5.8651129523824986e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0908879541755616, + "grad_norm": 0.08122383803129196, + "learning_rate": 5.863476102521349e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0913106888461457, + "grad_norm": 0.08890222758054733, + "learning_rate": 5.861839157270247e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0917334235167298, + "grad_norm": 0.10494726896286011, + "learning_rate": 5.8602021168100265e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0921561581873138, + "grad_norm": 0.09268008917570114, + "learning_rate": 5.858564981321537e-05, + "loss": 0.373, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0925788928578977, + "grad_norm": 0.08740192651748657, + "learning_rate": 5.856927750985634e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0930016275284817, + "grad_norm": 0.10004998743534088, + "learning_rate": 5.855290425983189e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0934243621990658, + "grad_norm": 0.08497469872236252, + "learning_rate": 5.853653006495077e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0938470968696499, + "grad_norm": 0.10791978240013123, + "learning_rate": 5.8520154927021884e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0942698315402337, + "grad_norm": 0.0863315686583519, + "learning_rate": 5.850377884785424e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0946925662108178, + "grad_norm": 0.09651383012533188, + "learning_rate": 5.8487401829256925e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0951153008814019, + "grad_norm": 0.11064164340496063, + "learning_rate": 5.8471023873039135e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0955380355519857, + "grad_norm": 0.09269340336322784, + "learning_rate": 5.8454644981010176e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0959607702225698, + "grad_norm": 0.08166880160570145, + "learning_rate": 5.84382651549795e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0963835048931538, + "grad_norm": 0.08989539742469788, + "learning_rate": 5.842188439675654e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.096806239563738, + "grad_norm": 0.07753394544124603, + "learning_rate": 5.840550270815097e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0972289742343218, + "grad_norm": 0.08984547853469849, + "learning_rate": 5.8389120090972505e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0976517089049058, + "grad_norm": 0.09415189176797867, + "learning_rate": 5.8372736547030936e-05, + "loss": 0.354, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.09807444357549, + "grad_norm": 0.11404819041490555, + "learning_rate": 5.8356352078136226e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.098497178246074, + "grad_norm": 0.1007344126701355, + "learning_rate": 5.8339966686098355e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0989199129166578, + "grad_norm": 0.121720090508461, + "learning_rate": 5.8323580372727494e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.0993426475872419, + "grad_norm": 0.08154258877038956, + "learning_rate": 5.830719313983384e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.099765382257826, + "grad_norm": 0.08809786289930344, + "learning_rate": 5.829080498922774e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1001881169284098, + "grad_norm": 0.09259792417287827, + "learning_rate": 5.827441592271962e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1006108515989939, + "grad_norm": 0.08810403198003769, + "learning_rate": 5.825802594212002e-05, + "loss": 0.371, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.101033586269578, + "grad_norm": 0.08113130927085876, + "learning_rate": 5.8241635049239574e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.101456320940162, + "grad_norm": 0.08548571914434433, + "learning_rate": 5.822524324588901e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1018790556107458, + "grad_norm": 0.09750810265541077, + "learning_rate": 5.820885053387917e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.10230179028133, + "grad_norm": 0.09221397340297699, + "learning_rate": 5.819245691502099e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.102724524951914, + "grad_norm": 0.07666308432817459, + "learning_rate": 5.817606239112548e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.103147259622498, + "grad_norm": 0.08969339728355408, + "learning_rate": 5.8159666964003825e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.103569994293082, + "grad_norm": 0.07648283988237381, + "learning_rate": 5.814327063546724e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.103992728963666, + "grad_norm": 0.10156691819429398, + "learning_rate": 5.8126873407327045e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.10441546363425, + "grad_norm": 0.09829109907150269, + "learning_rate": 5.811047528139468e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1048381983048339, + "grad_norm": 0.07361382991075516, + "learning_rate": 5.809407625948169e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.105260932975418, + "grad_norm": 0.10071655362844467, + "learning_rate": 5.807767634339972e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.105683667646002, + "grad_norm": 0.09671303629875183, + "learning_rate": 5.806127553496047e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.106106402316586, + "grad_norm": 0.1048320010304451, + "learning_rate": 5.8044873835975776e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.10652913698717, + "grad_norm": 0.08205673098564148, + "learning_rate": 5.80284712482576e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.106951871657754, + "grad_norm": 0.09890247881412506, + "learning_rate": 5.801206777361793e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.107374606328338, + "grad_norm": 0.09042614698410034, + "learning_rate": 5.799566341386893e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1077973409989221, + "grad_norm": 0.08697666972875595, + "learning_rate": 5.797925817082277e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.108220075669506, + "grad_norm": 0.09246934950351715, + "learning_rate": 5.796285204629182e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.10864281034009, + "grad_norm": 0.11421091854572296, + "learning_rate": 5.794644504208847e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1090655450106741, + "grad_norm": 0.08384612947702408, + "learning_rate": 5.793003716002525e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.109488279681258, + "grad_norm": 0.08602730184793472, + "learning_rate": 5.7913628401914766e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.109911014351842, + "grad_norm": 0.10790293663740158, + "learning_rate": 5.7897218769569725e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.110333749022426, + "grad_norm": 0.11337929964065552, + "learning_rate": 5.788080826480292e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1107564836930102, + "grad_norm": 0.10659360140562057, + "learning_rate": 5.7864396889427275e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.111179218363594, + "grad_norm": 0.09413287043571472, + "learning_rate": 5.784798464525579e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.111601953034178, + "grad_norm": 0.07798313349485397, + "learning_rate": 5.783157153410155e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1120246877047621, + "grad_norm": 0.09905228018760681, + "learning_rate": 5.781515755777772e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1124474223753462, + "grad_norm": 0.09580715000629425, + "learning_rate": 5.7798742718097607e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.11287015704593, + "grad_norm": 0.08952053636312485, + "learning_rate": 5.778232701687463e-05, + "loss": 0.3544, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1132928917165141, + "grad_norm": 0.11047738045454025, + "learning_rate": 5.776591045592219e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1137156263870982, + "grad_norm": 0.1066151112318039, + "learning_rate": 5.7749493037053904e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.114138361057682, + "grad_norm": 0.09374792128801346, + "learning_rate": 5.773307476208344e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1145610957282661, + "grad_norm": 0.0863880068063736, + "learning_rate": 5.7716655632824535e-05, + "loss": 0.3538, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1149838303988502, + "grad_norm": 0.09020383656024933, + "learning_rate": 5.770023565109106e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1154065650694343, + "grad_norm": 0.10169560462236404, + "learning_rate": 5.768381481869695e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.115829299740018, + "grad_norm": 0.08483520895242691, + "learning_rate": 5.766739313745627e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1162520344106022, + "grad_norm": 0.10484199970960617, + "learning_rate": 5.765097060918313e-05, + "loss": 0.3538, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1166747690811862, + "grad_norm": 0.09057670831680298, + "learning_rate": 5.763454723569178e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1170975037517703, + "grad_norm": 0.09867528080940247, + "learning_rate": 5.761812301879652e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1175202384223542, + "grad_norm": 0.08529260754585266, + "learning_rate": 5.7601697960311806e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1179429730929382, + "grad_norm": 0.10013540834188461, + "learning_rate": 5.758527206205211e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1183657077635223, + "grad_norm": 0.0843067616224289, + "learning_rate": 5.7568845325832047e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1187884424341061, + "grad_norm": 0.10242714732885361, + "learning_rate": 5.7552417753466315e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1192111771046902, + "grad_norm": 0.11806081980466843, + "learning_rate": 5.7535989346769714e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1196339117752743, + "grad_norm": 0.0792735368013382, + "learning_rate": 5.751956010755709e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1200566464458583, + "grad_norm": 0.0868421196937561, + "learning_rate": 5.750313003764343e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1204793811164422, + "grad_norm": 0.0788443461060524, + "learning_rate": 5.748669913884382e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1209021157870263, + "grad_norm": 0.10037576407194138, + "learning_rate": 5.747026741297338e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1213248504576103, + "grad_norm": 0.09770877659320831, + "learning_rate": 5.7453834861847366e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1217475851281944, + "grad_norm": 0.09275007247924805, + "learning_rate": 5.743740148728114e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1221703197987782, + "grad_norm": 0.12286175042390823, + "learning_rate": 5.742096729109009e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1225930544693623, + "grad_norm": 0.08838073164224625, + "learning_rate": 5.740453227508976e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1230157891399464, + "grad_norm": 0.0885235145688057, + "learning_rate": 5.738809644109575e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1234385238105302, + "grad_norm": 0.10003279894590378, + "learning_rate": 5.7371659790923783e-05, + "loss": 0.352, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1238612584811143, + "grad_norm": 0.07690471410751343, + "learning_rate": 5.73552223263896e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1242839931516984, + "grad_norm": 0.0930933877825737, + "learning_rate": 5.733878404930913e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1247067278222824, + "grad_norm": 0.09846707433462143, + "learning_rate": 5.732234496149832e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1251294624928663, + "grad_norm": 0.08327905088663101, + "learning_rate": 5.730590506477325e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1255521971634503, + "grad_norm": 0.1103440374135971, + "learning_rate": 5.728946436095003e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1259749318340344, + "grad_norm": 0.11522388458251953, + "learning_rate": 5.727302285184491e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1263976665046185, + "grad_norm": 0.10530074685811996, + "learning_rate": 5.7256580539274255e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 42990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1268204011752023, + "grad_norm": 0.0842413604259491, + "learning_rate": 5.724013742505445e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1272431358457864, + "grad_norm": 0.11099981516599655, + "learning_rate": 5.722369351100199e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1276658705163705, + "grad_norm": 0.09051145613193512, + "learning_rate": 5.7207248798933464e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1280886051869543, + "grad_norm": 0.08281892538070679, + "learning_rate": 5.7190803290665607e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1285113398575384, + "grad_norm": 0.10113485157489777, + "learning_rate": 5.717435698801512e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1289340745281224, + "grad_norm": 0.08475875109434128, + "learning_rate": 5.715790989279889e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1293568091987065, + "grad_norm": 0.09174630790948868, + "learning_rate": 5.714146200683386e-05, + "loss": 0.3513, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1297795438692904, + "grad_norm": 0.11312538385391235, + "learning_rate": 5.712501333193706e-05, + "loss": 0.3526, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1302022785398744, + "grad_norm": 0.0939832478761673, + "learning_rate": 5.7108563869925615e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1306250132104585, + "grad_norm": 0.07871294766664505, + "learning_rate": 5.709211362261671e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1310477478810426, + "grad_norm": 0.1092599406838417, + "learning_rate": 5.707566259182766e-05, + "loss": 0.3704, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1314704825516264, + "grad_norm": 0.0892147645354271, + "learning_rate": 5.705921077937583e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1318932172222105, + "grad_norm": 0.1103362962603569, + "learning_rate": 5.704275818707869e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1323159518927945, + "grad_norm": 0.08319982141256332, + "learning_rate": 5.702630481675379e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1327386865633784, + "grad_norm": 0.12361940741539001, + "learning_rate": 5.700985067021878e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1331614212339625, + "grad_norm": 0.09348434954881668, + "learning_rate": 5.6993395749291344e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1335841559045465, + "grad_norm": 0.09410406649112701, + "learning_rate": 5.6976940055789326e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1340068905751306, + "grad_norm": 0.08082722872495651, + "learning_rate": 5.696048359153062e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1344296252457144, + "grad_norm": 0.09029616415500641, + "learning_rate": 5.69440263583332e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1348523599162985, + "grad_norm": 0.10757911950349808, + "learning_rate": 5.692756835801512e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1352750945868826, + "grad_norm": 0.08651499450206757, + "learning_rate": 5.691110959239452e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1356978292574667, + "grad_norm": 0.1047215536236763, + "learning_rate": 5.6894650063289664e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1361205639280505, + "grad_norm": 0.09154172986745834, + "learning_rate": 5.6878189772518866e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1365432985986346, + "grad_norm": 0.10046570003032684, + "learning_rate": 5.686172872190049e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1369660332692186, + "grad_norm": 0.09524280577898026, + "learning_rate": 5.684526691325304e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1373887679398025, + "grad_norm": 0.09783512353897095, + "learning_rate": 5.682880434839511e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1378115026103865, + "grad_norm": 0.0923166275024414, + "learning_rate": 5.681234102914533e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1382342372809706, + "grad_norm": 0.10986103117465973, + "learning_rate": 5.6795876957322435e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1386569719515547, + "grad_norm": 0.09028996527194977, + "learning_rate": 5.677941213474527e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1390797066221385, + "grad_norm": 0.09778852760791779, + "learning_rate": 5.6762946563232685e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1395024412927226, + "grad_norm": 0.08611955493688583, + "learning_rate": 5.6746480244603716e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1399251759633067, + "grad_norm": 0.0904955342411995, + "learning_rate": 5.673001318067741e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1403479106338907, + "grad_norm": 0.09032813459634781, + "learning_rate": 5.671354537327293e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1407706453044746, + "grad_norm": 0.10195748507976532, + "learning_rate": 5.669707682420947e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1411933799750587, + "grad_norm": 0.09257902204990387, + "learning_rate": 5.66806075353064e-05, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1416161146456427, + "grad_norm": 0.0808587297797203, + "learning_rate": 5.666413750838306e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1420388493162266, + "grad_norm": 0.08692534267902374, + "learning_rate": 5.664766674525897e-05, + "loss": 0.3523, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1424615839868106, + "grad_norm": 0.0949949100613594, + "learning_rate": 5.663119524775367e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1428843186573947, + "grad_norm": 0.08268823474645615, + "learning_rate": 5.661472301768679e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1433070533279788, + "grad_norm": 0.08389247208833694, + "learning_rate": 5.659825005687808e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1437297879985626, + "grad_norm": 0.0854337140917778, + "learning_rate": 5.658177636714731e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1441525226691467, + "grad_norm": 0.08652301877737045, + "learning_rate": 5.656530195031437e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1445752573397308, + "grad_norm": 0.07721663266420364, + "learning_rate": 5.654882680819924e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1449979920103148, + "grad_norm": 0.13384346663951874, + "learning_rate": 5.653235094262197e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1454207266808987, + "grad_norm": 0.10121861100196838, + "learning_rate": 5.651587435540263e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1458434613514827, + "grad_norm": 0.08196169883012772, + "learning_rate": 5.649939704836147e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1462661960220668, + "grad_norm": 0.10233590006828308, + "learning_rate": 5.648291902331875e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1466889306926507, + "grad_norm": 0.0942339077591896, + "learning_rate": 5.646644028209484e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1471116653632347, + "grad_norm": 0.09965990483760834, + "learning_rate": 5.644996082651017e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1475344000338188, + "grad_norm": 0.09043484926223755, + "learning_rate": 5.643348065838527e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1479571347044029, + "grad_norm": 0.08775683492422104, + "learning_rate": 5.6416999779540735e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1483798693749867, + "grad_norm": 0.10358236730098724, + "learning_rate": 5.6400518191797234e-05, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1488026040455708, + "grad_norm": 0.09599031507968903, + "learning_rate": 5.638403589697553e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1492253387161548, + "grad_norm": 0.10094565153121948, + "learning_rate": 5.636755289689645e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.149648073386739, + "grad_norm": 0.09448839724063873, + "learning_rate": 5.6351069193380914e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1500708080573228, + "grad_norm": 0.09352319687604904, + "learning_rate": 5.633458478824989e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1504935427279068, + "grad_norm": 0.09051396697759628, + "learning_rate": 5.631809968332445e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.150916277398491, + "grad_norm": 0.08546216785907745, + "learning_rate": 5.630161388042576e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1513390120690747, + "grad_norm": 0.07439272850751877, + "learning_rate": 5.628512738137503e-05, + "loss": 0.3522, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1517617467396588, + "grad_norm": 0.09892278164625168, + "learning_rate": 5.626864018799353e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1521844814102429, + "grad_norm": 0.10633665323257446, + "learning_rate": 5.6252152302102654e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.152607216080827, + "grad_norm": 0.08853457868099213, + "learning_rate": 5.623566372552388e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1530299507514108, + "grad_norm": 0.09226633608341217, + "learning_rate": 5.621917446007867e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1534526854219949, + "grad_norm": 0.11990613490343094, + "learning_rate": 5.6202684507588674e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.153875420092579, + "grad_norm": 0.10949277132749557, + "learning_rate": 5.618619386987556e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.154298154763163, + "grad_norm": 0.10153020173311234, + "learning_rate": 5.616970254876108e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1547208894337468, + "grad_norm": 0.11935162544250488, + "learning_rate": 5.615321054606708e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.155143624104331, + "grad_norm": 0.08455776423215866, + "learning_rate": 5.613671786361544e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.155566358774915, + "grad_norm": 0.08830656111240387, + "learning_rate": 5.6120224503228146e-05, + "loss": 0.3537, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1559890934454988, + "grad_norm": 0.09401902556419373, + "learning_rate": 5.6103730466727255e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.156411828116083, + "grad_norm": 0.10673562437295914, + "learning_rate": 5.608723575593491e-05, + "loss": 0.3538, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.156834562786667, + "grad_norm": 0.10768885165452957, + "learning_rate": 5.6070740372673295e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.157257297457251, + "grad_norm": 0.1002105250954628, + "learning_rate": 5.605424431876469e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1576800321278349, + "grad_norm": 0.11399950832128525, + "learning_rate": 5.6037747596031466e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.158102766798419, + "grad_norm": 0.09223894774913788, + "learning_rate": 5.602125020629603e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.158525501469003, + "grad_norm": 0.1082492396235466, + "learning_rate": 5.600475215138089e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.158948236139587, + "grad_norm": 0.0847020223736763, + "learning_rate": 5.5988253433108626e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.159370970810171, + "grad_norm": 0.09865730255842209, + "learning_rate": 5.597175405330187e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.159793705480755, + "grad_norm": 0.08704496175050735, + "learning_rate": 5.595525401378335e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.160216440151339, + "grad_norm": 0.09517794847488403, + "learning_rate": 5.593875331637588e-05, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.160639174821923, + "grad_norm": 0.09635066986083984, + "learning_rate": 5.5922251962902295e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.161061909492507, + "grad_norm": 0.0946757048368454, + "learning_rate": 5.5905749955185526e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.161484644163091, + "grad_norm": 0.09604890644550323, + "learning_rate": 5.5889247295048595e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1619073788336751, + "grad_norm": 0.10445383191108704, + "learning_rate": 5.5872743984314615e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.162330113504259, + "grad_norm": 0.10738769918680191, + "learning_rate": 5.585624002480671e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.162752848174843, + "grad_norm": 0.09070858359336853, + "learning_rate": 5.583973541834809e-05, + "loss": 0.374, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.163175582845427, + "grad_norm": 0.10406176745891571, + "learning_rate": 5.5823230166762085e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1635983175160112, + "grad_norm": 0.0957663431763649, + "learning_rate": 5.5806724271872044e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.164021052186595, + "grad_norm": 0.10863222926855087, + "learning_rate": 5.579021773550143e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.164443786857179, + "grad_norm": 0.10173186659812927, + "learning_rate": 5.577371055947371e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1648665215277632, + "grad_norm": 0.10629399120807648, + "learning_rate": 5.575720274561252e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.165289256198347, + "grad_norm": 0.10071703791618347, + "learning_rate": 5.574069429574147e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.165711990868931, + "grad_norm": 0.07310228794813156, + "learning_rate": 5.572418521168429e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1661347255395151, + "grad_norm": 0.12707041203975677, + "learning_rate": 5.570767549526478e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1665574602100992, + "grad_norm": 0.11612638086080551, + "learning_rate": 5.569116514830681e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.166980194880683, + "grad_norm": 0.08557753264904022, + "learning_rate": 5.567465417263429e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1674029295512671, + "grad_norm": 0.11861085146665573, + "learning_rate": 5.565814257007123e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1678256642218512, + "grad_norm": 0.08240491151809692, + "learning_rate": 5.564163034244171e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1682483988924353, + "grad_norm": 0.08242667466402054, + "learning_rate": 5.5625117491569855e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.168671133563019, + "grad_norm": 0.08942780643701553, + "learning_rate": 5.560860401927988e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 43990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1690938682336032, + "grad_norm": 0.09014695882797241, + "learning_rate": 5.5592089927396054e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1695166029041872, + "grad_norm": 0.09813050180673599, + "learning_rate": 5.557557521774275e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.169939337574771, + "grad_norm": 0.08341061323881149, + "learning_rate": 5.555905989214435e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1703620722453552, + "grad_norm": 0.08964329212903976, + "learning_rate": 5.554254395242533e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1707848069159392, + "grad_norm": 0.09800016134977341, + "learning_rate": 5.5526027400410266e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1712075415865233, + "grad_norm": 0.12027203291654587, + "learning_rate": 5.550951023792377e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1716302762571071, + "grad_norm": 0.09777501225471497, + "learning_rate": 5.5492992466790516e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1720530109276912, + "grad_norm": 0.10338729619979858, + "learning_rate": 5.547647408883526e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1724757455982753, + "grad_norm": 0.09743473678827286, + "learning_rate": 5.545995510588282e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1728984802688593, + "grad_norm": 0.10196640342473984, + "learning_rate": 5.544343551975808e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1733212149394432, + "grad_norm": 0.08740736544132233, + "learning_rate": 5.542691533228599e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1737439496100273, + "grad_norm": 0.09488692879676819, + "learning_rate": 5.5410394545291586e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1741666842806113, + "grad_norm": 0.07999753206968307, + "learning_rate": 5.539387316059994e-05, + "loss": 0.353, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1745894189511952, + "grad_norm": 0.10509753227233887, + "learning_rate": 5.53773511800362e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1750121536217792, + "grad_norm": 0.09074835479259491, + "learning_rate": 5.536082860542557e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1754348882923633, + "grad_norm": 0.11486048251390457, + "learning_rate": 5.534430543859337e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1758576229629474, + "grad_norm": 0.09741701185703278, + "learning_rate": 5.532778168136492e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1762803576335312, + "grad_norm": 0.08313345909118652, + "learning_rate": 5.531125733556562e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1767030923041153, + "grad_norm": 0.1215381994843483, + "learning_rate": 5.529473240302098e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1771258269746994, + "grad_norm": 0.15379953384399414, + "learning_rate": 5.5278206885556526e-05, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1775485616452834, + "grad_norm": 0.09904167056083679, + "learning_rate": 5.5261680784997886e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1779712963158673, + "grad_norm": 0.0801088809967041, + "learning_rate": 5.5245154103170685e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1783940309864513, + "grad_norm": 0.09969054162502289, + "learning_rate": 5.52286268419007e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1788167656570354, + "grad_norm": 0.09909475594758987, + "learning_rate": 5.521209900301372e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1792395003276193, + "grad_norm": 0.11171580106019974, + "learning_rate": 5.519557058833561e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1796622349982033, + "grad_norm": 0.10191657394170761, + "learning_rate": 5.517904159969229e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1800849696687874, + "grad_norm": 0.11669538170099258, + "learning_rate": 5.5162512038909765e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1805077043393715, + "grad_norm": 0.12791317701339722, + "learning_rate": 5.514598190781407e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1809304390099553, + "grad_norm": 0.0892268493771553, + "learning_rate": 5.512945120823134e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1813531736805394, + "grad_norm": 0.09096461534500122, + "learning_rate": 5.511291994198774e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1817759083511234, + "grad_norm": 0.10067658126354218, + "learning_rate": 5.509638811090952e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1821986430217075, + "grad_norm": 0.0783545970916748, + "learning_rate": 5.5079855716822995e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1826213776922914, + "grad_norm": 0.1043284609913826, + "learning_rate": 5.50633227615545e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1830441123628754, + "grad_norm": 0.08795084804296494, + "learning_rate": 5.504678924693051e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1834668470334595, + "grad_norm": 0.09091020375490189, + "learning_rate": 5.503025517477749e-05, + "loss": 0.3531, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1838895817040433, + "grad_norm": 0.12152696400880814, + "learning_rate": 5.501372054692198e-05, + "loss": 0.3723, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1843123163746274, + "grad_norm": 0.09947801381349564, + "learning_rate": 5.4997185365190606e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1847350510452115, + "grad_norm": 0.08620429039001465, + "learning_rate": 5.498064963141005e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1851577857157956, + "grad_norm": 0.10063761472702026, + "learning_rate": 5.496411334740705e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1855805203863794, + "grad_norm": 0.0847126916050911, + "learning_rate": 5.4947576515008395e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1860032550569635, + "grad_norm": 0.09166482090950012, + "learning_rate": 5.493103913604093e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1864259897275475, + "grad_norm": 0.10883255302906036, + "learning_rate": 5.491450121233159e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1868487243981316, + "grad_norm": 0.08604561537504196, + "learning_rate": 5.489796274570737e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1872714590687155, + "grad_norm": 0.11310838162899017, + "learning_rate": 5.488142373799525e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1876941937392995, + "grad_norm": 0.12493573874235153, + "learning_rate": 5.486488419102239e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1881169284098836, + "grad_norm": 0.09482189267873764, + "learning_rate": 5.484834410661591e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1885396630804674, + "grad_norm": 0.11083784699440002, + "learning_rate": 5.483180348660304e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1889623977510515, + "grad_norm": 0.08381123840808868, + "learning_rate": 5.4815262332811056e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1893851324216356, + "grad_norm": 0.08198591321706772, + "learning_rate": 5.4798720647067295e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1898078670922196, + "grad_norm": 0.0874655619263649, + "learning_rate": 5.478217843119913e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1902306017628035, + "grad_norm": 0.11344427615404129, + "learning_rate": 5.476563568703403e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1906533364333876, + "grad_norm": 0.08820348232984543, + "learning_rate": 5.4749092416399525e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1910760711039716, + "grad_norm": 0.09886840730905533, + "learning_rate": 5.473254862112316e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1914988057745557, + "grad_norm": 0.09055047482252121, + "learning_rate": 5.4716004303032556e-05, + "loss": 0.3542, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1919215404451395, + "grad_norm": 0.09779883176088333, + "learning_rate": 5.4699459463955393e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1923442751157236, + "grad_norm": 0.09080412238836288, + "learning_rate": 5.468291410571944e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1927670097863077, + "grad_norm": 0.10180157423019409, + "learning_rate": 5.46663682301525e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1931897444568915, + "grad_norm": 0.08997660875320435, + "learning_rate": 5.464982183908238e-05, + "loss": 0.3757, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1936124791274756, + "grad_norm": 0.09144040942192078, + "learning_rate": 5.463327493433703e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1940352137980597, + "grad_norm": 0.10182417929172516, + "learning_rate": 5.461672751774444e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1944579484686437, + "grad_norm": 0.09746097028255463, + "learning_rate": 5.460017959113259e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1948806831392276, + "grad_norm": 0.08633415400981903, + "learning_rate": 5.458363115632958e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1953034178098116, + "grad_norm": 0.08104413002729416, + "learning_rate": 5.4567082215163566e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1957261524803957, + "grad_norm": 0.10049859434366226, + "learning_rate": 5.455053276946273e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1961488871509798, + "grad_norm": 0.1187153235077858, + "learning_rate": 5.453398282105533e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1965716218215636, + "grad_norm": 0.09608186781406403, + "learning_rate": 5.451743237176965e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1969943564921477, + "grad_norm": 0.1014750748872757, + "learning_rate": 5.450088142343408e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1974170911627318, + "grad_norm": 0.09627977013587952, + "learning_rate": 5.4484329977877015e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1978398258333156, + "grad_norm": 0.08894477039575577, + "learning_rate": 5.446777803692693e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1982625605038997, + "grad_norm": 0.11362889409065247, + "learning_rate": 5.445122560241237e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1986852951744837, + "grad_norm": 0.09186697006225586, + "learning_rate": 5.4434672676161905e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1991080298450678, + "grad_norm": 0.1007007583975792, + "learning_rate": 5.441811926000416e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1995307645156517, + "grad_norm": 0.08779989928007126, + "learning_rate": 5.440156535576783e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.1999534991862357, + "grad_norm": 0.12672953307628632, + "learning_rate": 5.438501096528168e-05, + "loss": 0.3538, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2003762338568198, + "grad_norm": 0.08267486840486526, + "learning_rate": 5.436845609037448e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2007989685274039, + "grad_norm": 0.088584303855896, + "learning_rate": 5.4351900732875075e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2012217031979877, + "grad_norm": 0.08565649390220642, + "learning_rate": 5.433534489461238e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2016444378685718, + "grad_norm": 0.09635582566261292, + "learning_rate": 5.431878857741538e-05, + "loss": 0.3543, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2020671725391558, + "grad_norm": 0.09639997780323029, + "learning_rate": 5.430223178311306e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2024899072097397, + "grad_norm": 0.09700984507799149, + "learning_rate": 5.4285674513534456e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2029126418803238, + "grad_norm": 0.11319504678249359, + "learning_rate": 5.426911677050872e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2033353765509078, + "grad_norm": 0.09400757402181625, + "learning_rate": 5.425255855586502e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.203758111221492, + "grad_norm": 0.10742656141519547, + "learning_rate": 5.4235999871432556e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2041808458920757, + "grad_norm": 0.10084401071071625, + "learning_rate": 5.4219440719040605e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2046035805626598, + "grad_norm": 0.15057431161403656, + "learning_rate": 5.42028811005185e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2050263152332439, + "grad_norm": 0.1047198697924614, + "learning_rate": 5.418632101769559e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.205449049903828, + "grad_norm": 0.08676180988550186, + "learning_rate": 5.4169760472401335e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2058717845744118, + "grad_norm": 0.08417205512523651, + "learning_rate": 5.41531994664652e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2062945192449959, + "grad_norm": 0.0828075110912323, + "learning_rate": 5.413663800171671e-05, + "loss": 0.3533, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.20671725391558, + "grad_norm": 0.09724697470664978, + "learning_rate": 5.412007607998543e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2071399885861638, + "grad_norm": 0.09442050755023956, + "learning_rate": 5.4103513703101006e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2075627232567478, + "grad_norm": 0.09727151691913605, + "learning_rate": 5.4086950872893116e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.207985457927332, + "grad_norm": 0.09622685611248016, + "learning_rate": 5.40703875911915e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.208408192597916, + "grad_norm": 0.10007715970277786, + "learning_rate": 5.405382385982589e-05, + "loss": 0.3546, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2088309272684998, + "grad_norm": 0.09229162335395813, + "learning_rate": 5.403725968062616e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.209253661939084, + "grad_norm": 0.11281964182853699, + "learning_rate": 5.40206950554222e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.209676396609668, + "grad_norm": 0.08686292171478271, + "learning_rate": 5.400412998604391e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.210099131280252, + "grad_norm": 0.1085188016295433, + "learning_rate": 5.398756447432125e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2105218659508359, + "grad_norm": 0.09697142988443375, + "learning_rate": 5.397099852208427e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.21094460062142, + "grad_norm": 0.10461508482694626, + "learning_rate": 5.395443213116306e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 44990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.211367335292004, + "grad_norm": 0.1255318820476532, + "learning_rate": 5.3937865303387715e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2117900699625879, + "grad_norm": 0.09055915474891663, + "learning_rate": 5.3921298040588405e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.212212804633172, + "grad_norm": 0.11885541677474976, + "learning_rate": 5.390473034459538e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.212635539303756, + "grad_norm": 0.10027501732110977, + "learning_rate": 5.388816221723886e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.21305827397434, + "grad_norm": 0.08537711203098297, + "learning_rate": 5.38715936603492e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.213481008644924, + "grad_norm": 0.08508100360631943, + "learning_rate": 5.385502467575675e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.213903743315508, + "grad_norm": 0.0886409729719162, + "learning_rate": 5.383845526529192e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.214326477986092, + "grad_norm": 0.09024330228567123, + "learning_rate": 5.382188543078515e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2147492126566761, + "grad_norm": 0.1025211438536644, + "learning_rate": 5.3805315174066964e-05, + "loss": 0.3711, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.21517194732726, + "grad_norm": 0.10792037099599838, + "learning_rate": 5.37887444969679e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.215594681997844, + "grad_norm": 0.099496029317379, + "learning_rate": 5.377217340131857e-05, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.216017416668428, + "grad_norm": 0.08639927953481674, + "learning_rate": 5.37556018889496e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.216440151339012, + "grad_norm": 0.09979204833507538, + "learning_rate": 5.373902996169168e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.216862886009596, + "grad_norm": 0.10604370385408401, + "learning_rate": 5.372245762137555e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.21728562068018, + "grad_norm": 0.07948228716850281, + "learning_rate": 5.370588486983199e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2177083553507642, + "grad_norm": 0.08792918175458908, + "learning_rate": 5.368931170889182e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.218131090021348, + "grad_norm": 0.10339033603668213, + "learning_rate": 5.3672738140385915e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.218553824691932, + "grad_norm": 0.1151989996433258, + "learning_rate": 5.365616416614519e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2189765593625161, + "grad_norm": 0.09035161137580872, + "learning_rate": 5.363958978800061e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2193992940331002, + "grad_norm": 0.13772504031658173, + "learning_rate": 5.3623015007783175e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.219822028703684, + "grad_norm": 0.09961637854576111, + "learning_rate": 5.3606439827323916e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2202447633742681, + "grad_norm": 0.10429703444242477, + "learning_rate": 5.358986424845397e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2206674980448522, + "grad_norm": 0.1084342896938324, + "learning_rate": 5.357328827300445e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.221090232715436, + "grad_norm": 0.10177828371524811, + "learning_rate": 5.355671190280652e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.22151296738602, + "grad_norm": 0.09512303024530411, + "learning_rate": 5.3540135139691435e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2219357020566042, + "grad_norm": 0.08305928111076355, + "learning_rate": 5.352355798549045e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2223584367271882, + "grad_norm": 0.08270833641290665, + "learning_rate": 5.350698044203487e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.222781171397772, + "grad_norm": 0.0949757844209671, + "learning_rate": 5.349040251115608e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2232039060683562, + "grad_norm": 0.10474893450737, + "learning_rate": 5.347382419468545e-05, + "loss": 0.3511, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2236266407389402, + "grad_norm": 0.09386909008026123, + "learning_rate": 5.345724549445441e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2240493754095243, + "grad_norm": 0.11634019017219543, + "learning_rate": 5.344066641229446e-05, + "loss": 0.371, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2244721100801081, + "grad_norm": 0.08916794508695602, + "learning_rate": 5.342408695003713e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2248948447506922, + "grad_norm": 0.08544261008501053, + "learning_rate": 5.3407507109514e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2253175794212763, + "grad_norm": 0.08829126507043839, + "learning_rate": 5.3390926892556626e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2257403140918601, + "grad_norm": 0.09880044311285019, + "learning_rate": 5.33743463009967e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2261630487624442, + "grad_norm": 0.10821458697319031, + "learning_rate": 5.335776533666592e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2265857834330283, + "grad_norm": 0.10508088022470474, + "learning_rate": 5.3341184001396005e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2270085181036123, + "grad_norm": 0.0923800840973854, + "learning_rate": 5.33246022970187e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2274312527741962, + "grad_norm": 0.09062051773071289, + "learning_rate": 5.330802022536586e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2278539874447802, + "grad_norm": 0.09691043943166733, + "learning_rate": 5.3291437788269336e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2282767221153643, + "grad_norm": 0.10289674997329712, + "learning_rate": 5.327485498756101e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2286994567859484, + "grad_norm": 0.10042490810155869, + "learning_rate": 5.325827182507282e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2291221914565322, + "grad_norm": 0.11014240980148315, + "learning_rate": 5.3241688302636736e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2295449261271163, + "grad_norm": 0.09883951395750046, + "learning_rate": 5.322510442208478e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2299676607977004, + "grad_norm": 0.09039386361837387, + "learning_rate": 5.320852018524901e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2303903954682842, + "grad_norm": 0.08904414623975754, + "learning_rate": 5.319193559396153e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2308131301388683, + "grad_norm": 0.10291086882352829, + "learning_rate": 5.3175350650054455e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2312358648094524, + "grad_norm": 0.10142164677381516, + "learning_rate": 5.3158765355359955e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2316585994800364, + "grad_norm": 0.0826110988855362, + "learning_rate": 5.3142179711710235e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2320813341506203, + "grad_norm": 0.0928187370300293, + "learning_rate": 5.3125593720937584e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2325040688212043, + "grad_norm": 0.08681370317935944, + "learning_rate": 5.310900738487426e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2329268034917884, + "grad_norm": 0.11952708661556244, + "learning_rate": 5.3092420705352594e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2333495381623725, + "grad_norm": 0.09829475730657578, + "learning_rate": 5.3075833684204945e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2337722728329563, + "grad_norm": 0.09156789630651474, + "learning_rate": 5.305924632326372e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2341950075035404, + "grad_norm": 0.08890827000141144, + "learning_rate": 5.304265862436137e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2346177421741245, + "grad_norm": 0.09244557470083237, + "learning_rate": 5.3026070589330344e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2350404768447083, + "grad_norm": 0.13738976418972015, + "learning_rate": 5.300948222000317e-05, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2354632115152924, + "grad_norm": 0.09628726541996002, + "learning_rate": 5.299289351821242e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2358859461858764, + "grad_norm": 0.10712302476167679, + "learning_rate": 5.297630448579065e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2363086808564605, + "grad_norm": 0.09741566330194473, + "learning_rate": 5.2959715124570494e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2367314155270446, + "grad_norm": 0.08223295956850052, + "learning_rate": 5.2943125436384625e-05, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2371541501976284, + "grad_norm": 0.08684010803699493, + "learning_rate": 5.2926535423065714e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2375768848682125, + "grad_norm": 0.10489754378795624, + "learning_rate": 5.290994508644651e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2379996195387966, + "grad_norm": 0.1042776107788086, + "learning_rate": 5.2893354428359786e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2384223542093804, + "grad_norm": 0.1054479330778122, + "learning_rate": 5.287676345063835e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2388450888799645, + "grad_norm": 0.10393734276294708, + "learning_rate": 5.2860172155115005e-05, + "loss": 0.3544, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2392678235505485, + "grad_norm": 0.10526377707719803, + "learning_rate": 5.2843580543622664e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2396905582211324, + "grad_norm": 0.08361738175153732, + "learning_rate": 5.282698861799422e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2401132928917165, + "grad_norm": 0.10201571136713028, + "learning_rate": 5.281039638006262e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2405360275623005, + "grad_norm": 0.12510398030281067, + "learning_rate": 5.2793803831660835e-05, + "loss": 0.3699, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2409587622328846, + "grad_norm": 0.0983736664056778, + "learning_rate": 5.277721097462188e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2413814969034687, + "grad_norm": 0.0968593880534172, + "learning_rate": 5.276061781077882e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2418042315740525, + "grad_norm": 0.08969403058290482, + "learning_rate": 5.274402434196472e-05, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2422269662446366, + "grad_norm": 0.09686990082263947, + "learning_rate": 5.272743057001267e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2426497009152206, + "grad_norm": 0.09388262778520584, + "learning_rate": 5.271083649675586e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2430724355858045, + "grad_norm": 0.12199035286903381, + "learning_rate": 5.2694242124027446e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2434951702563886, + "grad_norm": 0.08694256097078323, + "learning_rate": 5.267764745366066e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2439179049269726, + "grad_norm": 0.09194976836442947, + "learning_rate": 5.266105248748872e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2443406395975565, + "grad_norm": 0.10068871825933456, + "learning_rate": 5.264445722734492e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2447633742681405, + "grad_norm": 0.07985133677721024, + "learning_rate": 5.26278616750626e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2451861089387246, + "grad_norm": 0.08150294423103333, + "learning_rate": 5.261126583247505e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2456088436093087, + "grad_norm": 0.1038246899843216, + "learning_rate": 5.25946697014157e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2460315782798927, + "grad_norm": 0.12003882229328156, + "learning_rate": 5.2578073283717924e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2464543129504766, + "grad_norm": 0.10058058798313141, + "learning_rate": 5.2561476581215166e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2468770476210607, + "grad_norm": 0.0877610519528389, + "learning_rate": 5.254487959574089e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2472997822916447, + "grad_norm": 0.11085722595453262, + "learning_rate": 5.2528282329128645e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2477225169622286, + "grad_norm": 0.08803414553403854, + "learning_rate": 5.251168478321191e-05, + "loss": 0.3709, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2481452516328126, + "grad_norm": 0.0900462195277214, + "learning_rate": 5.249508695982427e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2485679863033967, + "grad_norm": 0.08447648584842682, + "learning_rate": 5.247848886079932e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2489907209739806, + "grad_norm": 0.08696545660495758, + "learning_rate": 5.24618904879707e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2494134556445646, + "grad_norm": 0.1118616908788681, + "learning_rate": 5.244529184317205e-05, + "loss": 0.3539, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2498361903151487, + "grad_norm": 0.1044958308339119, + "learning_rate": 5.242869292823705e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2502589249857328, + "grad_norm": 0.1237001046538353, + "learning_rate": 5.241209374499941e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2506816596563168, + "grad_norm": 0.09636931121349335, + "learning_rate": 5.239549429529291e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2511043943269007, + "grad_norm": 0.08125850558280945, + "learning_rate": 5.237889458095131e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2515271289974848, + "grad_norm": 0.07709396630525589, + "learning_rate": 5.236229460380838e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2519498636680688, + "grad_norm": 0.09548135101795197, + "learning_rate": 5.2345694365698e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2523725983386527, + "grad_norm": 0.0898728147149086, + "learning_rate": 5.232909386845402e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2527953330092367, + "grad_norm": 0.08101865649223328, + "learning_rate": 5.23124931139103e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2532180676798208, + "grad_norm": 0.08815939724445343, + "learning_rate": 5.22958921039008e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 45990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2536408023504046, + "grad_norm": 0.09713520854711533, + "learning_rate": 5.2279290840259454e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2540635370209887, + "grad_norm": 0.08811241388320923, + "learning_rate": 5.226268932482022e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2544862716915728, + "grad_norm": 0.08241473883390427, + "learning_rate": 5.224608755941711e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2549090063621569, + "grad_norm": 0.09255080670118332, + "learning_rate": 5.2229485545884184e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.255331741032741, + "grad_norm": 0.09591182321310043, + "learning_rate": 5.221288328605546e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2557544757033248, + "grad_norm": 0.1031048372387886, + "learning_rate": 5.2196280781765026e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2561772103739088, + "grad_norm": 0.10870203375816345, + "learning_rate": 5.2179678034847014e-05, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.256599945044493, + "grad_norm": 0.10580818355083466, + "learning_rate": 5.216307504713557e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2570226797150768, + "grad_norm": 0.08422563225030899, + "learning_rate": 5.214647182046484e-05, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2574454143856608, + "grad_norm": 0.10514155775308609, + "learning_rate": 5.2129868356668995e-05, + "loss": 0.3519, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2578681490562449, + "grad_norm": 0.10322435945272446, + "learning_rate": 5.2113264657582295e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2582908837268287, + "grad_norm": 0.10067180544137955, + "learning_rate": 5.209666072503898e-05, + "loss": 0.3535, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2587136183974128, + "grad_norm": 0.0958351194858551, + "learning_rate": 5.2080056560873304e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2591363530679969, + "grad_norm": 0.11068792641162872, + "learning_rate": 5.2063452166919554e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.259559087738581, + "grad_norm": 0.11247739940881729, + "learning_rate": 5.204684754501208e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.259981822409165, + "grad_norm": 0.09564632922410965, + "learning_rate": 5.203024269698521e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2604045570797489, + "grad_norm": 0.11972518265247345, + "learning_rate": 5.2013637624673315e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.260827291750333, + "grad_norm": 0.11331027746200562, + "learning_rate": 5.1997032329910786e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.261250026420917, + "grad_norm": 0.09109491109848022, + "learning_rate": 5.198042681453207e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2616727610915008, + "grad_norm": 0.09513763338327408, + "learning_rate": 5.196382108037158e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.262095495762085, + "grad_norm": 0.10986578464508057, + "learning_rate": 5.194721512926379e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.262518230432669, + "grad_norm": 0.1229046881198883, + "learning_rate": 5.193060896304321e-05, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2629409651032528, + "grad_norm": 0.10641653835773468, + "learning_rate": 5.1914002583544365e-05, + "loss": 0.3511, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.263363699773837, + "grad_norm": 0.11315356940031052, + "learning_rate": 5.189739599260175e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.263786434444421, + "grad_norm": 0.13330018520355225, + "learning_rate": 5.188078919204997e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.264209169115005, + "grad_norm": 0.0941455066204071, + "learning_rate": 5.1864182183723596e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.264631903785589, + "grad_norm": 0.13479086756706238, + "learning_rate": 5.184757496945726e-05, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.265054638456173, + "grad_norm": 0.10441029816865921, + "learning_rate": 5.183096755108555e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.265477373126757, + "grad_norm": 0.10069210082292557, + "learning_rate": 5.1814359930443146e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.265900107797341, + "grad_norm": 0.1467449963092804, + "learning_rate": 5.179775210936475e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.266322842467925, + "grad_norm": 0.08493734151124954, + "learning_rate": 5.178114408968503e-05, + "loss": 0.3528, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.266745577138509, + "grad_norm": 0.11187466233968735, + "learning_rate": 5.1764535873238694e-05, + "loss": 0.3703, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.267168311809093, + "grad_norm": 0.11128666996955872, + "learning_rate": 5.1747927461860524e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.267591046479677, + "grad_norm": 0.08399184793233871, + "learning_rate": 5.173131885738527e-05, + "loss": 0.3536, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.268013781150261, + "grad_norm": 0.09243622422218323, + "learning_rate": 5.171471006164772e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.268436515820845, + "grad_norm": 0.1143975779414177, + "learning_rate": 5.169810107648265e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2688592504914291, + "grad_norm": 0.10724137723445892, + "learning_rate": 5.1681491903724935e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2692819851620132, + "grad_norm": 0.10958132892847061, + "learning_rate": 5.1664882545209404e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.269704719832597, + "grad_norm": 0.09391099214553833, + "learning_rate": 5.164827300277092e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.270127454503181, + "grad_norm": 0.10616032034158707, + "learning_rate": 5.163166327824439e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2705501891737652, + "grad_norm": 0.08992412686347961, + "learning_rate": 5.161505337346472e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.270972923844349, + "grad_norm": 0.09075083583593369, + "learning_rate": 5.159844329026681e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.271395658514933, + "grad_norm": 0.0932781845331192, + "learning_rate": 5.1581833030485636e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2718183931855171, + "grad_norm": 0.10692227631807327, + "learning_rate": 5.156522259595617e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.272241127856101, + "grad_norm": 0.09724421799182892, + "learning_rate": 5.154861198851341e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.272663862526685, + "grad_norm": 0.09966065734624863, + "learning_rate": 5.153200120999233e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2730865971972691, + "grad_norm": 0.08175136148929596, + "learning_rate": 5.151539026222797e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2735093318678532, + "grad_norm": 0.12012498080730438, + "learning_rate": 5.149877914705539e-05, + "loss": 0.3745, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2739320665384373, + "grad_norm": 0.08896081894636154, + "learning_rate": 5.148216786630964e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2743548012090211, + "grad_norm": 0.08049015700817108, + "learning_rate": 5.14655564218258e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2747775358796052, + "grad_norm": 0.09399588406085968, + "learning_rate": 5.144894481543897e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2752002705501893, + "grad_norm": 0.10340666025876999, + "learning_rate": 5.1432333048984284e-05, + "loss": 0.3528, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.275623005220773, + "grad_norm": 0.09396959841251373, + "learning_rate": 5.1415721124296854e-05, + "loss": 0.3506, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2760457398913572, + "grad_norm": 0.13340790569782257, + "learning_rate": 5.1399109043211845e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2764684745619412, + "grad_norm": 0.0955764576792717, + "learning_rate": 5.138249680756443e-05, + "loss": 0.3528, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.276891209232525, + "grad_norm": 0.08906297385692596, + "learning_rate": 5.13658844191898e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2773139439031092, + "grad_norm": 0.07655584067106247, + "learning_rate": 5.134927187992314e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2777366785736932, + "grad_norm": 0.08644583076238632, + "learning_rate": 5.13326591915997e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2781594132442773, + "grad_norm": 0.08850102126598358, + "learning_rate": 5.131604635605469e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2785821479148614, + "grad_norm": 0.08641798794269562, + "learning_rate": 5.129943337512336e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2790048825854452, + "grad_norm": 0.09792396426200867, + "learning_rate": 5.1282820250641004e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2794276172560293, + "grad_norm": 0.08858868479728699, + "learning_rate": 5.126620698444291e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2798503519266133, + "grad_norm": 0.08806836605072021, + "learning_rate": 5.124959357836436e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2802730865971972, + "grad_norm": 0.08652223646640778, + "learning_rate": 5.1232980034240664e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2806958212677813, + "grad_norm": 0.1344958394765854, + "learning_rate": 5.121636635390718e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2811185559383653, + "grad_norm": 0.1079467162489891, + "learning_rate": 5.119975253919923e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2815412906089492, + "grad_norm": 0.0833987295627594, + "learning_rate": 5.1183138591952206e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2819640252795332, + "grad_norm": 0.1222674697637558, + "learning_rate": 5.1166524514001444e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2823867599501173, + "grad_norm": 0.09811662137508392, + "learning_rate": 5.114991030718237e-05, + "loss": 0.3684, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2828094946207014, + "grad_norm": 0.11202782392501831, + "learning_rate": 5.113329597333038e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2832322292912854, + "grad_norm": 0.09044857323169708, + "learning_rate": 5.111668151428087e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2836549639618693, + "grad_norm": 0.10140926390886307, + "learning_rate": 5.11000669318693e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2840776986324534, + "grad_norm": 0.0823800265789032, + "learning_rate": 5.1083452227931106e-05, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2845004333030374, + "grad_norm": 0.09452810883522034, + "learning_rate": 5.106683740430176e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2849231679736213, + "grad_norm": 0.133199542760849, + "learning_rate": 5.1050222462816724e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2853459026442053, + "grad_norm": 0.0925094336271286, + "learning_rate": 5.103360740531148e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2857686373147894, + "grad_norm": 0.11720823496580124, + "learning_rate": 5.101699223362153e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2861913719853733, + "grad_norm": 0.08357150107622147, + "learning_rate": 5.1000376949582394e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2866141066559573, + "grad_norm": 0.11410360783338547, + "learning_rate": 5.0983761555029585e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2870368413265414, + "grad_norm": 0.08929812908172607, + "learning_rate": 5.096714605179866e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2874595759971255, + "grad_norm": 0.10622162371873856, + "learning_rate": 5.095053044172514e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2878823106677095, + "grad_norm": 0.10180142521858215, + "learning_rate": 5.0933914726644604e-05, + "loss": 0.3716, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2883050453382934, + "grad_norm": 0.08543667197227478, + "learning_rate": 5.09172989083926e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2887277800088774, + "grad_norm": 0.08837399631738663, + "learning_rate": 5.090068298880475e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2891505146794615, + "grad_norm": 0.0953977108001709, + "learning_rate": 5.088406696971661e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2895732493500454, + "grad_norm": 0.12747669219970703, + "learning_rate": 5.086745085296381e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2899959840206294, + "grad_norm": 0.11083022505044937, + "learning_rate": 5.085083464038195e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2904187186912135, + "grad_norm": 0.09556537121534348, + "learning_rate": 5.083421833380667e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2908414533617973, + "grad_norm": 0.09769898653030396, + "learning_rate": 5.081760193507361e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2912641880323814, + "grad_norm": 0.10684385150671005, + "learning_rate": 5.080098544601839e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2916869227029655, + "grad_norm": 0.08770085871219635, + "learning_rate": 5.0784368868476684e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2921096573735495, + "grad_norm": 0.07883848994970322, + "learning_rate": 5.076775220428418e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2925323920441336, + "grad_norm": 0.09641653299331665, + "learning_rate": 5.0751135455276535e-05, + "loss": 0.3542, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2929551267147175, + "grad_norm": 0.11538711935281754, + "learning_rate": 5.073451862328942e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2933778613853015, + "grad_norm": 0.09625036269426346, + "learning_rate": 5.071790171015854e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2938005960558856, + "grad_norm": 0.12042287737131119, + "learning_rate": 5.0701284717719624e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2942233307264694, + "grad_norm": 0.095904640853405, + "learning_rate": 5.068466764780835e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2946460653970535, + "grad_norm": 0.088405080139637, + "learning_rate": 5.0668050502260465e-05, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2950688000676376, + "grad_norm": 0.10310807824134827, + "learning_rate": 5.06514332829117e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2954915347382214, + "grad_norm": 0.09385984390974045, + "learning_rate": 5.063481599159775e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 46990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2959142694088055, + "grad_norm": 0.09593669325113297, + "learning_rate": 5.06181986301544e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2963370040793896, + "grad_norm": 0.07989755272865295, + "learning_rate": 5.0601581200417416e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2967597387499736, + "grad_norm": 0.10244401544332504, + "learning_rate": 5.058496370422252e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2971824734205577, + "grad_norm": 0.1049409806728363, + "learning_rate": 5.05683461434055e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2976052080911415, + "grad_norm": 0.08247484266757965, + "learning_rate": 5.055172851980213e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2980279427617256, + "grad_norm": 0.10791067034006119, + "learning_rate": 5.05351108352482e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2984506774323097, + "grad_norm": 0.13324151933193207, + "learning_rate": 5.0518493091579496e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2988734121028935, + "grad_norm": 0.09721367806196213, + "learning_rate": 5.050187529063181e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2992961467734776, + "grad_norm": 0.10145121067762375, + "learning_rate": 5.048525743424093e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.2997188814440617, + "grad_norm": 0.08849140256643295, + "learning_rate": 5.046863952424269e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3001416161146455, + "grad_norm": 0.08777057379484177, + "learning_rate": 5.0452021562472894e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3005643507852296, + "grad_norm": 0.11157343536615372, + "learning_rate": 5.043540355076735e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3009870854558137, + "grad_norm": 0.10769639164209366, + "learning_rate": 5.04187854909619e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3014098201263977, + "grad_norm": 0.13108545541763306, + "learning_rate": 5.040216738489236e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3018325547969818, + "grad_norm": 0.09282837808132172, + "learning_rate": 5.038554923439458e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3022552894675656, + "grad_norm": 0.09369857609272003, + "learning_rate": 5.03689310413044e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3026780241381497, + "grad_norm": 0.08974523097276688, + "learning_rate": 5.0352312807457666e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3031007588087338, + "grad_norm": 0.09122706949710846, + "learning_rate": 5.0335694534690216e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3035234934793176, + "grad_norm": 0.11171845346689224, + "learning_rate": 5.03190762248379e-05, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3039462281499017, + "grad_norm": 0.126339390873909, + "learning_rate": 5.030245787973661e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3043689628204858, + "grad_norm": 0.09118304401636124, + "learning_rate": 5.028583950122218e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3047916974910696, + "grad_norm": 0.10711286962032318, + "learning_rate": 5.026922109113047e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3052144321616537, + "grad_norm": 0.10217303037643433, + "learning_rate": 5.0252602651297354e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3056371668322377, + "grad_norm": 0.10403811931610107, + "learning_rate": 5.023598418355873e-05, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3060599015028218, + "grad_norm": 0.08794286847114563, + "learning_rate": 5.0219365689750455e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3064826361734059, + "grad_norm": 0.08720427751541138, + "learning_rate": 5.02027471717084e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3069053708439897, + "grad_norm": 0.09649229794740677, + "learning_rate": 5.018612863126845e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3073281055145738, + "grad_norm": 0.1194763109087944, + "learning_rate": 5.01695100702665e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3077508401851579, + "grad_norm": 0.09207306802272797, + "learning_rate": 5.015289149053843e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3081735748557417, + "grad_norm": 0.09684416651725769, + "learning_rate": 5.0136272893920124e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3085963095263258, + "grad_norm": 0.08126208931207657, + "learning_rate": 5.011965428224747e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3090190441969098, + "grad_norm": 0.10719352960586548, + "learning_rate": 5.010303565735638e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3094417788674937, + "grad_norm": 0.11515513062477112, + "learning_rate": 5.008641702108272e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3098645135380778, + "grad_norm": 0.09660335630178452, + "learning_rate": 5.006979837526241e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3102872482086618, + "grad_norm": 0.0983145460486412, + "learning_rate": 5.0053179721731316e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.310709982879246, + "grad_norm": 0.08924131095409393, + "learning_rate": 5.003656106232536e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.31113271754983, + "grad_norm": 0.10306818783283234, + "learning_rate": 5.0019942398880426e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3115554522204138, + "grad_norm": 0.07797092944383621, + "learning_rate": 5.000332373323242e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3119781868909979, + "grad_norm": 0.09411925822496414, + "learning_rate": 4.9986705067217235e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.312400921561582, + "grad_norm": 0.08576841652393341, + "learning_rate": 4.9970086402670755e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3128236562321658, + "grad_norm": 0.11154215782880783, + "learning_rate": 4.9953467741428896e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3132463909027499, + "grad_norm": 0.1365000307559967, + "learning_rate": 4.993684908532756e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.313669125573334, + "grad_norm": 0.08872441202402115, + "learning_rate": 4.992023043620262e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3140918602439178, + "grad_norm": 0.0881567895412445, + "learning_rate": 4.990361179588999e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3145145949145018, + "grad_norm": 0.1015072837471962, + "learning_rate": 4.9886993166225574e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.314937329585086, + "grad_norm": 0.09930194169282913, + "learning_rate": 4.987037454904524e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.31536006425567, + "grad_norm": 0.116024449467659, + "learning_rate": 4.985375594618489e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.315782798926254, + "grad_norm": 0.0915931910276413, + "learning_rate": 4.9837137359480416e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.316205533596838, + "grad_norm": 0.08566391468048096, + "learning_rate": 4.9820518790767704e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.316628268267422, + "grad_norm": 0.11133268475532532, + "learning_rate": 4.980390024188266e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.317051002938006, + "grad_norm": 0.09573909640312195, + "learning_rate": 4.978728171466114e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3174737376085899, + "grad_norm": 0.10254465788602829, + "learning_rate": 4.9770663210939034e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.317896472279174, + "grad_norm": 0.092756487429142, + "learning_rate": 4.975404473255225e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.318319206949758, + "grad_norm": 0.09008413553237915, + "learning_rate": 4.973742628133664e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3187419416203419, + "grad_norm": 0.10237392783164978, + "learning_rate": 4.972080785912807e-05, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.319164676290926, + "grad_norm": 0.08173462003469467, + "learning_rate": 4.970418946776241e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.31958741096151, + "grad_norm": 0.1100350171327591, + "learning_rate": 4.968757110907556e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.320010145632094, + "grad_norm": 0.09972905367612839, + "learning_rate": 4.9670952784903346e-05, + "loss": 0.3536, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3204328803026781, + "grad_norm": 0.0984075665473938, + "learning_rate": 4.965433449708165e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.320855614973262, + "grad_norm": 0.10896451771259308, + "learning_rate": 4.963771624744633e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.321278349643846, + "grad_norm": 0.09587808698415756, + "learning_rate": 4.9621098037833214e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3217010843144301, + "grad_norm": 0.0949619933962822, + "learning_rate": 4.9604479870078164e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.322123818985014, + "grad_norm": 0.0818856731057167, + "learning_rate": 4.958786174601705e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.322546553655598, + "grad_norm": 0.08887366205453873, + "learning_rate": 4.957124366748566e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.322969288326182, + "grad_norm": 0.0903364047408104, + "learning_rate": 4.955462563631987e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.323392022996766, + "grad_norm": 0.1016039177775383, + "learning_rate": 4.953800765435547e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.32381475766735, + "grad_norm": 0.0878167524933815, + "learning_rate": 4.9521389723428295e-05, + "loss": 0.3544, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.324237492337934, + "grad_norm": Infinity, + "learning_rate": 4.9506433630748e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3246602270085182, + "grad_norm": 0.09359484165906906, + "learning_rate": 4.948981580184924e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3250829616791022, + "grad_norm": 0.09488935023546219, + "learning_rate": 4.9473198029311555e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.325505696349686, + "grad_norm": 0.1273331642150879, + "learning_rate": 4.945658031497076e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3259284310202701, + "grad_norm": 0.09430437535047531, + "learning_rate": 4.9439962660662636e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3263511656908542, + "grad_norm": 0.08411432057619095, + "learning_rate": 4.942334506822296e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.326773900361438, + "grad_norm": 0.11326649785041809, + "learning_rate": 4.9406727539487545e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3271966350320221, + "grad_norm": 0.09284378588199615, + "learning_rate": 4.939011007629213e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3276193697026062, + "grad_norm": 0.09026549011468887, + "learning_rate": 4.9373492680472486e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.32804210437319, + "grad_norm": 0.09578590095043182, + "learning_rate": 4.935687535386439e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.328464839043774, + "grad_norm": 0.11896573752164841, + "learning_rate": 4.934025809830356e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3288875737143582, + "grad_norm": 0.09927390515804291, + "learning_rate": 4.932364091562576e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3293103083849422, + "grad_norm": 0.0872841626405716, + "learning_rate": 4.9307023807666715e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3297330430555263, + "grad_norm": 0.12469273805618286, + "learning_rate": 4.9290406776262146e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3301557777261102, + "grad_norm": 0.10811583697795868, + "learning_rate": 4.9273789823247794e-05, + "loss": 0.3535, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3305785123966942, + "grad_norm": 0.10913654416799545, + "learning_rate": 4.925717295045933e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3310012470672783, + "grad_norm": 0.10449724644422531, + "learning_rate": 4.924055615973249e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3314239817378621, + "grad_norm": 0.08912888169288635, + "learning_rate": 4.922393945290295e-05, + "loss": 0.3538, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3318467164084462, + "grad_norm": 0.08243471384048462, + "learning_rate": 4.9207322831806404e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3322694510790303, + "grad_norm": 0.1112334206700325, + "learning_rate": 4.9190706298278485e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3326921857496141, + "grad_norm": 0.11130591481924057, + "learning_rate": 4.917408985415488e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3331149204201982, + "grad_norm": 0.10132153332233429, + "learning_rate": 4.9157473501271257e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3335376550907823, + "grad_norm": 0.11309989541769028, + "learning_rate": 4.9140857241463226e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3339603897613663, + "grad_norm": 0.0878327488899231, + "learning_rate": 4.9124241076566444e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3343831244319504, + "grad_norm": 0.09507478773593903, + "learning_rate": 4.9107625008416525e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3348058591025342, + "grad_norm": 0.12862169742584229, + "learning_rate": 4.909100903884907e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3352285937731183, + "grad_norm": 0.08322888612747192, + "learning_rate": 4.907439316969969e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3356513284437024, + "grad_norm": 0.10388191789388657, + "learning_rate": 4.9057777402804e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3360740631142862, + "grad_norm": 0.08106246590614319, + "learning_rate": 4.904116173999751e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3364967977848703, + "grad_norm": 0.10833148658275604, + "learning_rate": 4.902454618311584e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3369195324554544, + "grad_norm": 0.10685468465089798, + "learning_rate": 4.900793073399453e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3373422671260382, + "grad_norm": 0.09906148910522461, + "learning_rate": 4.899131539446911e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3377650017966223, + "grad_norm": 0.13196055591106415, + "learning_rate": 4.897470016637514e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 47990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3381877364672063, + "grad_norm": 0.08936287462711334, + "learning_rate": 4.8958085051548094e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3386104711377904, + "grad_norm": 0.10136767476797104, + "learning_rate": 4.894147005182351e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3390332058083745, + "grad_norm": 0.1003401055932045, + "learning_rate": 4.892485516903689e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3394559404789583, + "grad_norm": 0.09993898868560791, + "learning_rate": 4.89082404050237e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3398786751495424, + "grad_norm": 0.0939754843711853, + "learning_rate": 4.8891625761619385e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3403014098201265, + "grad_norm": 0.10431106388568878, + "learning_rate": 4.887501124065942e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3407241444907103, + "grad_norm": 0.10324683040380478, + "learning_rate": 4.8858396843979246e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3411468791612944, + "grad_norm": 0.08001330494880676, + "learning_rate": 4.8841782573414276e-05, + "loss": 0.3512, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3415696138318784, + "grad_norm": 0.08555348217487335, + "learning_rate": 4.8825168430799934e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3419923485024623, + "grad_norm": 0.08552663028240204, + "learning_rate": 4.8808554417971606e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3424150831730464, + "grad_norm": 0.08218184858560562, + "learning_rate": 4.87919405367647e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3428378178436304, + "grad_norm": 0.10363283008337021, + "learning_rate": 4.877532678901456e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3432605525142145, + "grad_norm": 0.10558462888002396, + "learning_rate": 4.875871317655658e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3436832871847986, + "grad_norm": 0.10152757912874222, + "learning_rate": 4.874209970122603e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3441060218553824, + "grad_norm": 0.10162777453660965, + "learning_rate": 4.87254863648583e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3445287565259665, + "grad_norm": 0.13902923464775085, + "learning_rate": 4.870887316928865e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3449514911965506, + "grad_norm": 0.08982224762439728, + "learning_rate": 4.86922601163524e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3453742258671344, + "grad_norm": 0.1241435781121254, + "learning_rate": 4.867564720788483e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3457969605377185, + "grad_norm": 0.09961753338575363, + "learning_rate": 4.8659034445721194e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3462196952083025, + "grad_norm": 0.11077834665775299, + "learning_rate": 4.864242183169673e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3466424298788864, + "grad_norm": 0.1053781807422638, + "learning_rate": 4.8625809367646684e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3470651645494705, + "grad_norm": 0.0898115336894989, + "learning_rate": 4.860919705540628e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3474878992200545, + "grad_norm": 0.08491522818803787, + "learning_rate": 4.8592584896810664e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3479106338906386, + "grad_norm": 0.08853030949831009, + "learning_rate": 4.857597289369505e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3483333685612227, + "grad_norm": 0.10519371926784515, + "learning_rate": 4.855936104789459e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3487561032318065, + "grad_norm": 0.08513705432415009, + "learning_rate": 4.854274936124445e-05, + "loss": 0.3536, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3491788379023906, + "grad_norm": 0.09278418123722076, + "learning_rate": 4.8526137835579724e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3496015725729746, + "grad_norm": 0.08241936564445496, + "learning_rate": 4.850952647273554e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3500243072435585, + "grad_norm": 0.10103567689657211, + "learning_rate": 4.849291527454699e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3504470419141426, + "grad_norm": 0.1022987887263298, + "learning_rate": 4.8476304242849136e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3508697765847266, + "grad_norm": 0.09180518239736557, + "learning_rate": 4.8459693379477056e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3512925112553105, + "grad_norm": 0.07915348559617996, + "learning_rate": 4.844308268626574e-05, + "loss": 0.353, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3517152459258945, + "grad_norm": 0.090513214468956, + "learning_rate": 4.8426472165050256e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3521379805964786, + "grad_norm": 0.10225801914930344, + "learning_rate": 4.840986181766556e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3525607152670627, + "grad_norm": 0.10426893830299377, + "learning_rate": 4.839325164594665e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3529834499376467, + "grad_norm": 0.09761743992567062, + "learning_rate": 4.83766416517285e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3534061846082306, + "grad_norm": 0.09588484466075897, + "learning_rate": 4.8360031836846023e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3538289192788147, + "grad_norm": 0.1034303680062294, + "learning_rate": 4.834342220313415e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3542516539493987, + "grad_norm": 0.08964015543460846, + "learning_rate": 4.832681275242779e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3546743886199826, + "grad_norm": 0.08526735007762909, + "learning_rate": 4.831020348656181e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3550971232905666, + "grad_norm": 0.09513309597969055, + "learning_rate": 4.8293594407371075e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3555198579611507, + "grad_norm": 0.12110524624586105, + "learning_rate": 4.8276985516690414e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3559425926317346, + "grad_norm": 0.09050546586513519, + "learning_rate": 4.8260376816354644e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3563653273023186, + "grad_norm": 0.09732869267463684, + "learning_rate": 4.824376830819859e-05, + "loss": 0.371, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3567880619729027, + "grad_norm": 0.12057312577962875, + "learning_rate": 4.8227159994056995e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3572107966434868, + "grad_norm": 0.10942800343036652, + "learning_rate": 4.8210551875764625e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3576335313140708, + "grad_norm": 0.09369415789842606, + "learning_rate": 4.8193943955156226e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3580562659846547, + "grad_norm": 0.0925382673740387, + "learning_rate": 4.817733623406648e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3584790006552387, + "grad_norm": 0.10881581157445908, + "learning_rate": 4.8160728714330116e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3589017353258228, + "grad_norm": 0.08722969889640808, + "learning_rate": 4.814412139778175e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3593244699964067, + "grad_norm": 0.11878567934036255, + "learning_rate": 4.812751428625607e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3597472046669907, + "grad_norm": 0.08950339257717133, + "learning_rate": 4.8110907381587665e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3601699393375748, + "grad_norm": 0.07976116240024567, + "learning_rate": 4.8094300685611146e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3605926740081586, + "grad_norm": 0.09754155576229095, + "learning_rate": 4.80776942001611e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3610154086787427, + "grad_norm": 0.1431632936000824, + "learning_rate": 4.8061087927072056e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3614381433493268, + "grad_norm": 0.11593350023031235, + "learning_rate": 4.804448186817856e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3618608780199108, + "grad_norm": 0.09603799134492874, + "learning_rate": 4.802787602531512e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.362283612690495, + "grad_norm": 0.10000796616077423, + "learning_rate": 4.801127040031621e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3627063473610788, + "grad_norm": 0.11559220403432846, + "learning_rate": 4.799466499501629e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3631290820316628, + "grad_norm": 0.09147010743618011, + "learning_rate": 4.797805981124978e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.363551816702247, + "grad_norm": 0.10434659570455551, + "learning_rate": 4.7961454850851086e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3639745513728307, + "grad_norm": 0.08805891126394272, + "learning_rate": 4.794485011565462e-05, + "loss": 0.3542, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3643972860434148, + "grad_norm": 0.10055190324783325, + "learning_rate": 4.7928245607494716e-05, + "loss": 0.3717, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3648200207139989, + "grad_norm": 0.09045260399580002, + "learning_rate": 4.791164132820571e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3652427553845827, + "grad_norm": 0.08472739905118942, + "learning_rate": 4.789503727962193e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3656654900551668, + "grad_norm": 0.1404300183057785, + "learning_rate": 4.787843346357763e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3660882247257509, + "grad_norm": 0.11979419738054276, + "learning_rate": 4.7861829881907105e-05, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.366510959396335, + "grad_norm": 0.1009168028831482, + "learning_rate": 4.7845226536444544e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.366933694066919, + "grad_norm": 0.09814280271530151, + "learning_rate": 4.782862342902418e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3673564287375028, + "grad_norm": 0.1068277433514595, + "learning_rate": 4.7812020561480174e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.367779163408087, + "grad_norm": 0.08729896694421768, + "learning_rate": 4.779541793564669e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.368201898078671, + "grad_norm": 0.1060883179306984, + "learning_rate": 4.7778815553357854e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3686246327492548, + "grad_norm": 0.0972028374671936, + "learning_rate": 4.776221341644776e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.369047367419839, + "grad_norm": 0.0938730537891388, + "learning_rate": 4.774561152675047e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.369470102090423, + "grad_norm": 0.09521536529064178, + "learning_rate": 4.772900988610006e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3698928367610068, + "grad_norm": 0.0969172939658165, + "learning_rate": 4.771240849633053e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3703155714315909, + "grad_norm": 0.08096811175346375, + "learning_rate": 4.769580735927586e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.370738306102175, + "grad_norm": 0.07844288647174835, + "learning_rate": 4.767920647677e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.371161040772759, + "grad_norm": 0.1018235981464386, + "learning_rate": 4.766260585064691e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.371583775443343, + "grad_norm": 0.10285267978906631, + "learning_rate": 4.76460054827405e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.372006510113927, + "grad_norm": 0.08874792605638504, + "learning_rate": 4.7629405374884614e-05, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.372429244784511, + "grad_norm": 0.08461496978998184, + "learning_rate": 4.7612805528913115e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.372851979455095, + "grad_norm": 0.0870908796787262, + "learning_rate": 4.759620594665984e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.373274714125679, + "grad_norm": 0.0755457952618599, + "learning_rate": 4.757960662995855e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.373697448796263, + "grad_norm": 0.11614327132701874, + "learning_rate": 4.7563007580643034e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.374120183466847, + "grad_norm": 0.09860429167747498, + "learning_rate": 4.754640880054699e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3745429181374311, + "grad_norm": 0.1189316064119339, + "learning_rate": 4.752981029150415e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.374965652808015, + "grad_norm": 0.11532343178987503, + "learning_rate": 4.751321205534815e-05, + "loss": 0.3527, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.375388387478599, + "grad_norm": 0.12899087369441986, + "learning_rate": 4.749661409391265e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.375811122149183, + "grad_norm": 0.0987379178404808, + "learning_rate": 4.748001640903127e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3762338568197672, + "grad_norm": 0.0813845843076706, + "learning_rate": 4.746341900253758e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.376656591490351, + "grad_norm": 0.09131285548210144, + "learning_rate": 4.7446821876265123e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.377079326160935, + "grad_norm": 0.09140612930059433, + "learning_rate": 4.7430225032047424e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3775020608315192, + "grad_norm": 0.10394272953271866, + "learning_rate": 4.7413628471717994e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.377924795502103, + "grad_norm": 0.08517194539308548, + "learning_rate": 4.739703219711025e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.378347530172687, + "grad_norm": 0.12941570580005646, + "learning_rate": 4.738043621005763e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3787702648432711, + "grad_norm": 0.10527820885181427, + "learning_rate": 4.736384051239352e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3791929995138552, + "grad_norm": 0.09521045535802841, + "learning_rate": 4.734724510595129e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.379615734184439, + "grad_norm": 0.09542769938707352, + "learning_rate": 4.7330649992564264e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3800384688550231, + "grad_norm": 0.0920417308807373, + "learning_rate": 4.731405517406574e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 48990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3804612035256072, + "grad_norm": 0.1031099259853363, + "learning_rate": 4.729746065228898e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3808839381961913, + "grad_norm": 0.09335385262966156, + "learning_rate": 4.728086642906721e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.381306672866775, + "grad_norm": 0.10060110688209534, + "learning_rate": 4.7264272506233657e-05, + "loss": 0.3684, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3817294075373592, + "grad_norm": 0.11782325059175491, + "learning_rate": 4.7247678885621435e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3821521422079432, + "grad_norm": 0.12253709137439728, + "learning_rate": 4.723108556906372e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.382574876878527, + "grad_norm": 0.10349424183368683, + "learning_rate": 4.7214492558393567e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3829976115491112, + "grad_norm": 0.09386591613292694, + "learning_rate": 4.7197899855444074e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3834203462196952, + "grad_norm": 0.09726658463478088, + "learning_rate": 4.718130746204825e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3838430808902793, + "grad_norm": 0.12357879430055618, + "learning_rate": 4.7164715380039106e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3842658155608631, + "grad_norm": 0.09160351008176804, + "learning_rate": 4.7148123611249596e-05, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3846885502314472, + "grad_norm": 0.10497936606407166, + "learning_rate": 4.7131532157512636e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3851112849020313, + "grad_norm": 0.10922761261463165, + "learning_rate": 4.711494102066116e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3855340195726153, + "grad_norm": 0.07886990904808044, + "learning_rate": 4.7098350202527976e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3859567542431992, + "grad_norm": 0.1126222237944603, + "learning_rate": 4.70817597049459e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3863794889137833, + "grad_norm": 0.0954027771949768, + "learning_rate": 4.7065169529747754e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3868022235843673, + "grad_norm": 0.09073817729949951, + "learning_rate": 4.704857967876628e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3872249582549512, + "grad_norm": 0.10229144245386124, + "learning_rate": 4.703199015383418e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3876476929255352, + "grad_norm": 0.10898447036743164, + "learning_rate": 4.701540095678413e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3880704275961193, + "grad_norm": 0.07715528458356857, + "learning_rate": 4.6998812089448794e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3884931622667034, + "grad_norm": 0.10956315696239471, + "learning_rate": 4.698222355366076e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3889158969372872, + "grad_norm": 0.09294068813323975, + "learning_rate": 4.6965635351252615e-05, + "loss": 0.3524, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3893386316078713, + "grad_norm": 0.08669517189264297, + "learning_rate": 4.6949047484056855e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3897613662784554, + "grad_norm": 0.08573263138532639, + "learning_rate": 4.693245995390601e-05, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3901841009490394, + "grad_norm": 0.093959741294384, + "learning_rate": 4.691587276263252e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3906068356196233, + "grad_norm": 0.08570476621389389, + "learning_rate": 4.689928591206881e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3910295702902074, + "grad_norm": 0.08342406898736954, + "learning_rate": 4.688269940404727e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3914523049607914, + "grad_norm": 0.10534121841192245, + "learning_rate": 4.686611324040024e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3918750396313753, + "grad_norm": 0.09409473836421967, + "learning_rate": 4.684952742296002e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3922977743019593, + "grad_norm": 0.10342258214950562, + "learning_rate": 4.683294195355891e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3927205089725434, + "grad_norm": 0.10669530183076859, + "learning_rate": 4.681635683402909e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3931432436431275, + "grad_norm": 0.1314956694841385, + "learning_rate": 4.679977206620279e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3935659783137113, + "grad_norm": 0.08082199841737747, + "learning_rate": 4.678318765191214e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3939887129842954, + "grad_norm": 0.08718214184045792, + "learning_rate": 4.676660359298927e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3944114476548795, + "grad_norm": 0.12634222209453583, + "learning_rate": 4.675001989126626e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3948341823254635, + "grad_norm": 0.10280666500329971, + "learning_rate": 4.6733436548575116e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3952569169960474, + "grad_norm": 0.08856112509965897, + "learning_rate": 4.671685356674785e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3956796516666314, + "grad_norm": 0.1043374314904213, + "learning_rate": 4.670027094761644e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3961023863372155, + "grad_norm": 0.08876169472932816, + "learning_rate": 4.6683688693012774e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3965251210077994, + "grad_norm": 0.09484806656837463, + "learning_rate": 4.6667106804768736e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3969478556783834, + "grad_norm": 0.09869488328695297, + "learning_rate": 4.665052528471615e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3973705903489675, + "grad_norm": 0.10840066522359848, + "learning_rate": 4.663394413468681e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3977933250195516, + "grad_norm": 0.1284104883670807, + "learning_rate": 4.66173633565125e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3982160596901354, + "grad_norm": 0.10047691315412521, + "learning_rate": 4.66007829520249e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3986387943607195, + "grad_norm": 0.09621471911668777, + "learning_rate": 4.6584202923055685e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3990615290313035, + "grad_norm": 0.11559142917394638, + "learning_rate": 4.6567623271436506e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3994842637018876, + "grad_norm": 0.11958057433366776, + "learning_rate": 4.6551043998998925e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.3999069983724715, + "grad_norm": 0.10504991561174393, + "learning_rate": 4.653446510757451e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4003297330430555, + "grad_norm": 0.09627680480480194, + "learning_rate": 4.651788659899474e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4007524677136396, + "grad_norm": 0.10160253942012787, + "learning_rate": 4.65013084750911e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4011752023842234, + "grad_norm": 0.10503148287534714, + "learning_rate": 4.648473073769498e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4015979370548075, + "grad_norm": 0.10279932618141174, + "learning_rate": 4.646815338863778e-05, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4020206717253916, + "grad_norm": 0.08123169839382172, + "learning_rate": 4.645157642975084e-05, + "loss": 0.3525, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4024434063959756, + "grad_norm": 0.08749069273471832, + "learning_rate": 4.6434999862865425e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4028661410665595, + "grad_norm": 0.09751523286104202, + "learning_rate": 4.6418423689812796e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4032888757371436, + "grad_norm": 0.11492685973644257, + "learning_rate": 4.6401847912424164e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4037116104077276, + "grad_norm": 0.08804748207330704, + "learning_rate": 4.638527253253068e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4041343450783117, + "grad_norm": 0.09989903122186661, + "learning_rate": 4.636869755196346e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4045570797488955, + "grad_norm": 0.08088167756795883, + "learning_rate": 4.635212297255357e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4049798144194796, + "grad_norm": 0.10143547505140305, + "learning_rate": 4.6335548796132036e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4054025490900637, + "grad_norm": 0.08626094460487366, + "learning_rate": 4.631897502452986e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4058252837606475, + "grad_norm": 0.10707937926054001, + "learning_rate": 4.630240165957795e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4062480184312316, + "grad_norm": 0.10371320694684982, + "learning_rate": 4.628582870310722e-05, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4066707531018157, + "grad_norm": 0.08502169698476791, + "learning_rate": 4.626925615694854e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4070934877723997, + "grad_norm": 0.09356865286827087, + "learning_rate": 4.6252684022932666e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4075162224429838, + "grad_norm": 0.13017134368419647, + "learning_rate": 4.62361123028904e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4079389571135676, + "grad_norm": 0.09349583089351654, + "learning_rate": 4.6219540998652403e-05, + "loss": 0.3535, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4083616917841517, + "grad_norm": 0.09365864843130112, + "learning_rate": 4.620297011204939e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4087844264547358, + "grad_norm": 0.10431011021137238, + "learning_rate": 4.618639964491194e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4092071611253196, + "grad_norm": 0.11458063870668411, + "learning_rate": 4.6169829599070644e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4096298957959037, + "grad_norm": 0.10474558174610138, + "learning_rate": 4.615325997635604e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4100526304664878, + "grad_norm": 0.09523475915193558, + "learning_rate": 4.613669077859858e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4104753651370716, + "grad_norm": 0.09446924179792404, + "learning_rate": 4.6120122007628724e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4108980998076557, + "grad_norm": 0.08799906820058823, + "learning_rate": 4.6103553665276856e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4113208344782397, + "grad_norm": 0.10617750883102417, + "learning_rate": 4.60869857533733e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4117435691488238, + "grad_norm": 0.09829738736152649, + "learning_rate": 4.607041827374836e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4121663038194079, + "grad_norm": 0.08984767645597458, + "learning_rate": 4.605385122823225e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4125890384899917, + "grad_norm": 0.10952026396989822, + "learning_rate": 4.6037284618655196e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4130117731605758, + "grad_norm": 0.08269986510276794, + "learning_rate": 4.6020718446847346e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4134345078311599, + "grad_norm": 0.11468230187892914, + "learning_rate": 4.600415271463877e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4138572425017437, + "grad_norm": 0.09506204724311829, + "learning_rate": 4.598758742385954e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4142799771723278, + "grad_norm": 0.10548045486211777, + "learning_rate": 4.597102257633966e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4147027118429119, + "grad_norm": 0.11027508974075317, + "learning_rate": 4.595445817390907e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4151254465134957, + "grad_norm": 0.10242374241352081, + "learning_rate": 4.59378942183977e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4155481811840798, + "grad_norm": 0.07883413136005402, + "learning_rate": 4.592133071163536e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4159709158546638, + "grad_norm": 0.09272819012403488, + "learning_rate": 4.590476765545188e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.416393650525248, + "grad_norm": 0.09196022152900696, + "learning_rate": 4.5888205051677005e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.416816385195832, + "grad_norm": 0.12112501263618469, + "learning_rate": 4.587164290214044e-05, + "loss": 0.3538, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4172391198664158, + "grad_norm": 0.0929594412446022, + "learning_rate": 4.585508120867186e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4176618545369999, + "grad_norm": 0.08891580253839493, + "learning_rate": 4.583851997310085e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.418084589207584, + "grad_norm": 0.10611018538475037, + "learning_rate": 4.5821959197256955e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4185073238781678, + "grad_norm": 0.13724584877490997, + "learning_rate": 4.5805398882969704e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4189300585487519, + "grad_norm": 0.10824238508939743, + "learning_rate": 4.578883903206853e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.419352793219336, + "grad_norm": 0.1361611932516098, + "learning_rate": 4.5772279646382834e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4197755278899198, + "grad_norm": 0.09529910981655121, + "learning_rate": 4.5755720727741964e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4201982625605039, + "grad_norm": 0.10744849592447281, + "learning_rate": 4.5739162277975214e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.420620997231088, + "grad_norm": 0.11571817845106125, + "learning_rate": 4.5722604298911855e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.421043731901672, + "grad_norm": 0.08319410681724548, + "learning_rate": 4.570604679238104e-05, + "loss": 0.353, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.421466466572256, + "grad_norm": 0.11537440121173859, + "learning_rate": 4.568948976021194e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.42188920124284, + "grad_norm": 0.10470148921012878, + "learning_rate": 4.567293320423364e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.422311935913424, + "grad_norm": 0.10538194328546524, + "learning_rate": 4.5656377126275166e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 49990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.422734670584008, + "grad_norm": 0.10151849687099457, + "learning_rate": 4.5639821528165524e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.423157405254592, + "grad_norm": 0.1018582433462143, + "learning_rate": 4.562326641173361e-05, + "loss": 0.3534, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.423580139925176, + "grad_norm": 0.09154338389635086, + "learning_rate": 4.560671177880833e-05, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.42400287459576, + "grad_norm": 0.09612631797790527, + "learning_rate": 4.559015763121849e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4244256092663439, + "grad_norm": 0.11830408871173859, + "learning_rate": 4.557360397079286e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.424848343936928, + "grad_norm": 0.10012178122997284, + "learning_rate": 4.555705079936018e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.425271078607512, + "grad_norm": 0.09941676259040833, + "learning_rate": 4.554049811874908e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.425693813278096, + "grad_norm": 0.08923804759979248, + "learning_rate": 4.5523945930788184e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4261165479486801, + "grad_norm": 0.1375589519739151, + "learning_rate": 4.550739423730605e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.426539282619264, + "grad_norm": 0.10218234360218048, + "learning_rate": 4.5490843040131194e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.426962017289848, + "grad_norm": 0.08706291019916534, + "learning_rate": 4.547429234109202e-05, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4273847519604321, + "grad_norm": 0.11563076823949814, + "learning_rate": 4.545774214201694e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.427807486631016, + "grad_norm": 0.08852064609527588, + "learning_rate": 4.544119244473427e-05, + "loss": 0.3517, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4282302213016, + "grad_norm": 0.10148099809885025, + "learning_rate": 4.542464325107232e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4286529559721841, + "grad_norm": 0.1014893651008606, + "learning_rate": 4.540809456285928e-05, + "loss": 0.3514, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.429075690642768, + "grad_norm": 0.12340392172336578, + "learning_rate": 4.539154638192335e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.429498425313352, + "grad_norm": 0.09701044112443924, + "learning_rate": 4.5374998710092615e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.429921159983936, + "grad_norm": 0.10880783200263977, + "learning_rate": 4.535845154919514e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4303438946545202, + "grad_norm": 0.1187034547328949, + "learning_rate": 4.5341904901058936e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4307666293251042, + "grad_norm": 0.12019286304712296, + "learning_rate": 4.532535876751191e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.431189363995688, + "grad_norm": 0.09664688259363174, + "learning_rate": 4.5308813150381977e-05, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4316120986662721, + "grad_norm": 0.09982411563396454, + "learning_rate": 4.529226805149695e-05, + "loss": 0.3533, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4320348333368562, + "grad_norm": 0.08566634356975555, + "learning_rate": 4.527572347268461e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.43245756800744, + "grad_norm": 0.09152856469154358, + "learning_rate": 4.525917941577266e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4328803026780241, + "grad_norm": 0.10766222327947617, + "learning_rate": 4.524263588258877e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4333030373486082, + "grad_norm": 0.07785844057798386, + "learning_rate": 4.522609287496052e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.433725772019192, + "grad_norm": 0.08465311676263809, + "learning_rate": 4.5209550394715454e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4341485066897761, + "grad_norm": 0.0968073159456253, + "learning_rate": 4.519300844368108e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4345712413603602, + "grad_norm": 0.09856528788805008, + "learning_rate": 4.517646702368479e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4349939760309443, + "grad_norm": 0.09303019940853119, + "learning_rate": 4.515992613655394e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4354167107015283, + "grad_norm": 0.09804157167673111, + "learning_rate": 4.514338578411586e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4358394453721122, + "grad_norm": 0.08779001235961914, + "learning_rate": 4.51268459681978e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4362621800426962, + "grad_norm": 0.10213009268045425, + "learning_rate": 4.5110306690626915e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4366849147132803, + "grad_norm": 0.0898546427488327, + "learning_rate": 4.5093767953230355e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4371076493838641, + "grad_norm": 0.09929900616407394, + "learning_rate": 4.5077229757835196e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4375303840544482, + "grad_norm": 0.09942048788070679, + "learning_rate": 4.506069210626843e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4379531187250323, + "grad_norm": 0.1090492531657219, + "learning_rate": 4.5044155000357016e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4383758533956161, + "grad_norm": 0.09468581527471542, + "learning_rate": 4.5027618441927824e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4387985880662002, + "grad_norm": 0.09589140862226486, + "learning_rate": 4.50110824328077e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4392213227367843, + "grad_norm": 0.08282195776700974, + "learning_rate": 4.49945469748234e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4396440574073683, + "grad_norm": 0.10557112842798233, + "learning_rate": 4.4978012069801635e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4400667920779524, + "grad_norm": 0.10042043030261993, + "learning_rate": 4.4961477719569034e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4404895267485363, + "grad_norm": 0.11584748327732086, + "learning_rate": 4.494494392595221e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4409122614191203, + "grad_norm": 0.09796369075775146, + "learning_rate": 4.492841069077766e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4413349960897044, + "grad_norm": 0.10023734718561172, + "learning_rate": 4.4911878015871845e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4417577307602882, + "grad_norm": 0.0989031046628952, + "learning_rate": 4.4895345903061195e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4421804654308723, + "grad_norm": 0.12220827490091324, + "learning_rate": 4.4878814354172014e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4426032001014564, + "grad_norm": 0.10619290918111801, + "learning_rate": 4.4862283371030564e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4430259347720402, + "grad_norm": 0.090878427028656, + "learning_rate": 4.484575295546308e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4434486694426243, + "grad_norm": 0.1146378144621849, + "learning_rate": 4.4829223109295714e-05, + "loss": 0.3514, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4438714041132084, + "grad_norm": 0.11378207802772522, + "learning_rate": 4.481269383435453e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4442941387837924, + "grad_norm": 0.09615776687860489, + "learning_rate": 4.479616513246556e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4447168734543765, + "grad_norm": 0.11579839885234833, + "learning_rate": 4.4779637005454786e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4451396081249603, + "grad_norm": 0.08645466715097427, + "learning_rate": 4.4763109455148075e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4455623427955444, + "grad_norm": 0.10489698499441147, + "learning_rate": 4.474658248337128e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4459850774661285, + "grad_norm": 0.09668981283903122, + "learning_rate": 4.473005609195014e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4464078121367123, + "grad_norm": 0.11827098578214645, + "learning_rate": 4.471353028271037e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4468305468072964, + "grad_norm": 0.1170039176940918, + "learning_rate": 4.469700505747764e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4472532814778805, + "grad_norm": 0.09630496054887772, + "learning_rate": 4.468048041807748e-05, + "loss": 0.3537, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4476760161484643, + "grad_norm": 0.08009231090545654, + "learning_rate": 4.466395636633542e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4480987508190484, + "grad_norm": 0.10687550157308578, + "learning_rate": 4.464743290407692e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4485214854896324, + "grad_norm": 0.09874526411294937, + "learning_rate": 4.463091003312734e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4489442201602165, + "grad_norm": 0.10906022042036057, + "learning_rate": 4.4614387755311993e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4493669548308006, + "grad_norm": 0.08740022778511047, + "learning_rate": 4.459786607245616e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4497896895013844, + "grad_norm": 0.1308751404285431, + "learning_rate": 4.4581344986385e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4502124241719685, + "grad_norm": 0.12514953315258026, + "learning_rate": 4.456482449892362e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4506351588425526, + "grad_norm": 0.08579155802726746, + "learning_rate": 4.454830461189708e-05, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4510578935131364, + "grad_norm": 0.09539785981178284, + "learning_rate": 4.4531785327130384e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4514806281837205, + "grad_norm": 0.08729325234889984, + "learning_rate": 4.451526664644842e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4519033628543045, + "grad_norm": 0.08520582318305969, + "learning_rate": 4.449874857167606e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4523260975248884, + "grad_norm": 0.08990948647260666, + "learning_rate": 4.44822311046381e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4527488321954725, + "grad_norm": 0.11165622621774673, + "learning_rate": 4.446571424715923e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4531715668660565, + "grad_norm": 0.09478408098220825, + "learning_rate": 4.444919800106414e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4535943015366406, + "grad_norm": 0.10235973447561264, + "learning_rate": 4.443268236817736e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4540170362072247, + "grad_norm": 0.0901927500963211, + "learning_rate": 4.441616735032345e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4544397708778085, + "grad_norm": 0.11142667382955551, + "learning_rate": 4.439965294932683e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4548625055483926, + "grad_norm": 0.09190578758716583, + "learning_rate": 4.43831391670119e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4552852402189767, + "grad_norm": 0.11305300891399384, + "learning_rate": 4.4366626005202945e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4557079748895605, + "grad_norm": 0.09768659621477127, + "learning_rate": 4.435011346572424e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4561307095601446, + "grad_norm": 0.09972648322582245, + "learning_rate": 4.433360155039993e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4565534442307286, + "grad_norm": 0.10105162858963013, + "learning_rate": 4.4317090261054134e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4569761789013125, + "grad_norm": 0.0916714295744896, + "learning_rate": 4.43005795995109e-05, + "loss": 0.3507, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4573989135718965, + "grad_norm": 0.09343503415584564, + "learning_rate": 4.428406956759418e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4578216482424806, + "grad_norm": 0.09773162752389908, + "learning_rate": 4.426756016712786e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4582443829130647, + "grad_norm": 0.09392120689153671, + "learning_rate": 4.425105139993577e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4586671175836488, + "grad_norm": 0.10340922325849533, + "learning_rate": 4.4234543267841687e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4590898522542326, + "grad_norm": 0.09535709768533707, + "learning_rate": 4.4218035772669273e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4595125869248167, + "grad_norm": 0.08341796696186066, + "learning_rate": 4.420152891624216e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4599353215954007, + "grad_norm": 0.10591297596693039, + "learning_rate": 4.41850227003839e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4603580562659846, + "grad_norm": 0.1055067777633667, + "learning_rate": 4.416851712691795e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4607807909365687, + "grad_norm": 0.10184316337108612, + "learning_rate": 4.415201219766774e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4612035256071527, + "grad_norm": 0.1034640222787857, + "learning_rate": 4.4135507914456564e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4616262602777366, + "grad_norm": 0.12798655033111572, + "learning_rate": 4.41190042791077e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4620489949483206, + "grad_norm": 0.10217739641666412, + "learning_rate": 4.410250129344437e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4624717296189047, + "grad_norm": 0.09360674768686295, + "learning_rate": 4.408599895928964e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4628944642894888, + "grad_norm": 0.102680504322052, + "learning_rate": 4.4069497278466595e-05, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4633171989600728, + "grad_norm": 0.10089869052171707, + "learning_rate": 4.40529962527982e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4637399336306567, + "grad_norm": 0.08681371062994003, + "learning_rate": 4.403649588410734e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4641626683012408, + "grad_norm": 0.09702497720718384, + "learning_rate": 4.401999617421685e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4645854029718248, + "grad_norm": 0.1653824895620346, + "learning_rate": 4.400349712494952e-05, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 50990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4650081376424087, + "grad_norm": 0.09506744891405106, + "learning_rate": 4.3986998738128e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4654308723129927, + "grad_norm": 0.12676075100898743, + "learning_rate": 4.3970501015574894e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4658536069835768, + "grad_norm": 0.09899434447288513, + "learning_rate": 4.395400395911275e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4662763416541607, + "grad_norm": 0.09221740812063217, + "learning_rate": 4.3937507570564046e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4666990763247447, + "grad_norm": 0.0948951467871666, + "learning_rate": 4.392101185175114e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4671218109953288, + "grad_norm": 0.11269141733646393, + "learning_rate": 4.390451680449638e-05, + "loss": 0.3542, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4675445456659129, + "grad_norm": 0.11593817174434662, + "learning_rate": 4.3888022430621986e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.467967280336497, + "grad_norm": 0.10398992896080017, + "learning_rate": 4.387152873195014e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4683900150070808, + "grad_norm": 0.10716083645820618, + "learning_rate": 4.385503571030293e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4688127496776648, + "grad_norm": 0.09646961092948914, + "learning_rate": 4.3838543367502356e-05, + "loss": 0.3512, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.469235484348249, + "grad_norm": 0.1074054166674614, + "learning_rate": 4.382205170537037e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4696582190188328, + "grad_norm": 0.10389100015163422, + "learning_rate": 4.380556072572886e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4700809536894168, + "grad_norm": 0.0866631418466568, + "learning_rate": 4.378907043039959e-05, + "loss": 0.3526, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.470503688360001, + "grad_norm": 0.08829646557569504, + "learning_rate": 4.3772580821204284e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4709264230305847, + "grad_norm": 0.08796197921037674, + "learning_rate": 4.375609189996459e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4713491577011688, + "grad_norm": 0.09795226901769638, + "learning_rate": 4.373960366850207e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4717718923717529, + "grad_norm": 0.10932096838951111, + "learning_rate": 4.372311612863823e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.472194627042337, + "grad_norm": 0.13502493500709534, + "learning_rate": 4.3706629282194446e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.472617361712921, + "grad_norm": 0.07720505446195602, + "learning_rate": 4.3690143130992075e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4730400963835049, + "grad_norm": 0.10743998736143112, + "learning_rate": 4.367365767685236e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.473462831054089, + "grad_norm": 0.12121573090553284, + "learning_rate": 4.365717292159649e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.473885565724673, + "grad_norm": 0.11440940946340561, + "learning_rate": 4.3640688867045575e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4743083003952568, + "grad_norm": 0.09742258489131927, + "learning_rate": 4.362420551502065e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.474731035065841, + "grad_norm": 0.08831225335597992, + "learning_rate": 4.360772286734265e-05, + "loss": 0.3543, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.475153769736425, + "grad_norm": 0.09831495583057404, + "learning_rate": 4.359124092583244e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4755765044070088, + "grad_norm": 0.11306928098201752, + "learning_rate": 4.357475969231085e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.475999239077593, + "grad_norm": 0.09431207925081253, + "learning_rate": 4.3558279168598556e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.476421973748177, + "grad_norm": 0.10655613243579865, + "learning_rate": 4.35417993565162e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.476844708418761, + "grad_norm": 0.1060393676161766, + "learning_rate": 4.352532025788435e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.477267443089345, + "grad_norm": 0.0937681496143341, + "learning_rate": 4.350884187452349e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.477690177759929, + "grad_norm": 0.081802137196064, + "learning_rate": 4.3492364208254e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.478112912430513, + "grad_norm": 0.11198323965072632, + "learning_rate": 4.347588726089622e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.478535647101097, + "grad_norm": 0.0977250412106514, + "learning_rate": 4.3459411034270393e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.478958381771681, + "grad_norm": 0.12247279286384583, + "learning_rate": 4.344293553019667e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.479381116442265, + "grad_norm": 0.11085812747478485, + "learning_rate": 4.342646075049515e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.479803851112849, + "grad_norm": 0.09892115741968155, + "learning_rate": 4.340998669698581e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.480226585783433, + "grad_norm": 0.12752234935760498, + "learning_rate": 4.339351337148858e-05, + "loss": 0.3527, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.480649320454017, + "grad_norm": 0.08641770482063293, + "learning_rate": 4.337704077582332e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.481072055124601, + "grad_norm": 0.09590722620487213, + "learning_rate": 4.336056891180977e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4814947897951851, + "grad_norm": 0.10024551302194595, + "learning_rate": 4.334409778126761e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4819175244657692, + "grad_norm": 0.1009586900472641, + "learning_rate": 4.332762738601647e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.482340259136353, + "grad_norm": 0.0859321653842926, + "learning_rate": 4.331115772787583e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.482762993806937, + "grad_norm": 0.08236454427242279, + "learning_rate": 4.329468880866514e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4831857284775212, + "grad_norm": 0.10415168106555939, + "learning_rate": 4.327822063020378e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.483608463148105, + "grad_norm": 0.0924796387553215, + "learning_rate": 4.326175319431098e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.484031197818689, + "grad_norm": 0.08670560270547867, + "learning_rate": 4.3245286502805946e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4844539324892732, + "grad_norm": 0.09457952529191971, + "learning_rate": 4.32288205575078e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.484876667159857, + "grad_norm": 0.09379423409700394, + "learning_rate": 4.3212355360235554e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.485299401830441, + "grad_norm": 0.09295344352722168, + "learning_rate": 4.3195890912808156e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4857221365010251, + "grad_norm": 0.09547335654497147, + "learning_rate": 4.317942721704447e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4861448711716092, + "grad_norm": 0.10457353293895721, + "learning_rate": 4.316296427476328e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4865676058421933, + "grad_norm": 0.1022263765335083, + "learning_rate": 4.3146502087783266e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4869903405127771, + "grad_norm": 0.09490494430065155, + "learning_rate": 4.313004065792306e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4874130751833612, + "grad_norm": 0.09180442243814468, + "learning_rate": 4.311357998700116e-05, + "loss": 0.3516, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4878358098539453, + "grad_norm": 0.1081976592540741, + "learning_rate": 4.309712007683602e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.488258544524529, + "grad_norm": 0.09166820347309113, + "learning_rate": 4.308066092924601e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4886812791951132, + "grad_norm": 0.09687681496143341, + "learning_rate": 4.3064202546049404e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4891040138656972, + "grad_norm": 0.086575448513031, + "learning_rate": 4.304774492906438e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.489526748536281, + "grad_norm": 0.08679473400115967, + "learning_rate": 4.303128808010906e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4899494832068652, + "grad_norm": 0.09936534613370895, + "learning_rate": 4.301483200100145e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4903722178774492, + "grad_norm": 0.08555677533149719, + "learning_rate": 4.29983766935595e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4907949525480333, + "grad_norm": 0.08813010156154633, + "learning_rate": 4.2981922159601075e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4912176872186174, + "grad_norm": 0.10131537914276123, + "learning_rate": 4.2965468400943906e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4916404218892012, + "grad_norm": 0.11560308188199997, + "learning_rate": 4.294901541940569e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4920631565597853, + "grad_norm": 0.10108035802841187, + "learning_rate": 4.2932563216804014e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4924858912303693, + "grad_norm": 0.10316962003707886, + "learning_rate": 4.29161117949564e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4929086259009532, + "grad_norm": 0.10077977180480957, + "learning_rate": 4.289966115568025e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4933313605715373, + "grad_norm": 0.08693758398294449, + "learning_rate": 4.288321130079291e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4937540952421213, + "grad_norm": 0.10085028409957886, + "learning_rate": 4.286840710354871e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4941768299127052, + "grad_norm": 0.10314809530973434, + "learning_rate": 4.2851958744006576e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4945995645832892, + "grad_norm": 0.07907504588365555, + "learning_rate": 4.283551117412302e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4950222992538733, + "grad_norm": 0.10821948200464249, + "learning_rate": 4.281906439571506e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4954450339244574, + "grad_norm": 0.10047976672649384, + "learning_rate": 4.280261841059961e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4958677685950414, + "grad_norm": 0.09827427566051483, + "learning_rate": 4.278617322059346e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4962905032656253, + "grad_norm": 0.1158173605799675, + "learning_rate": 4.276972882751337e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4967132379362094, + "grad_norm": 0.10515749454498291, + "learning_rate": 4.2753285233175995e-05, + "loss": 0.3538, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4971359726067934, + "grad_norm": 0.08473005145788193, + "learning_rate": 4.2736842439397876e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4975587072773773, + "grad_norm": 0.08245902508497238, + "learning_rate": 4.272040044799547e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4979814419479613, + "grad_norm": 0.09501204639673233, + "learning_rate": 4.270395926078516e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4984041766185454, + "grad_norm": 0.08435887098312378, + "learning_rate": 4.268751887958326e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4988269112891293, + "grad_norm": 0.09604177623987198, + "learning_rate": 4.267107930620595e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4992496459597133, + "grad_norm": 0.09252262115478516, + "learning_rate": 4.265464054246935e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.4996723806302974, + "grad_norm": 0.11844949424266815, + "learning_rate": 4.263820259018949e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5000951153008812, + "grad_norm": 0.11835962533950806, + "learning_rate": 4.2621765451182294e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5005178499714655, + "grad_norm": 0.13059371709823608, + "learning_rate": 4.2605329127263606e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5009405846420494, + "grad_norm": 0.0888703241944313, + "learning_rate": 4.258889362024921e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5013633193126334, + "grad_norm": 0.09475035965442657, + "learning_rate": 4.257245893195472e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5017860539832175, + "grad_norm": 0.0935138687491417, + "learning_rate": 4.2557668413995796e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5022087886538014, + "grad_norm": 0.09649574756622314, + "learning_rate": 4.2541235286271e-05, + "loss": 0.3546, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5026315233243854, + "grad_norm": 0.10731510072946548, + "learning_rate": 4.252480298253106e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5030542579949695, + "grad_norm": 0.09758230298757553, + "learning_rate": 4.250837150459129e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5034769926655533, + "grad_norm": 0.11947084218263626, + "learning_rate": 4.249194085426687e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5038997273361376, + "grad_norm": 0.0912633016705513, + "learning_rate": 4.247551103337294e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5043224620067215, + "grad_norm": 0.08498969674110413, + "learning_rate": 4.2459082043724565e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5047451966773053, + "grad_norm": 0.09724396467208862, + "learning_rate": 4.2442653887136655e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5051679313478896, + "grad_norm": 0.09257620573043823, + "learning_rate": 4.242622656542407e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5055906660184735, + "grad_norm": 0.10217487812042236, + "learning_rate": 4.240980008040158e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5060134006890575, + "grad_norm": 0.10976829379796982, + "learning_rate": 4.239337443388385e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5064361353596416, + "grad_norm": 0.08556168526411057, + "learning_rate": 4.237694962768544e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5068588700302255, + "grad_norm": 0.09515631943941116, + "learning_rate": 4.236052566362087e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 51990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5072816047008095, + "grad_norm": 0.08898670226335526, + "learning_rate": 4.234410254350448e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5077043393713936, + "grad_norm": 0.09864699095487595, + "learning_rate": 4.232768026915059e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5081270740419774, + "grad_norm": 0.08584629744291306, + "learning_rate": 4.23112588423734e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5085498087125617, + "grad_norm": 0.10046399384737015, + "learning_rate": 4.2294838264987005e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5089725433831456, + "grad_norm": 0.08633457869291306, + "learning_rate": 4.2278418538805445e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5093952780537294, + "grad_norm": 0.0787804126739502, + "learning_rate": 4.226199966564262e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5098180127243137, + "grad_norm": 0.09134696424007416, + "learning_rate": 4.224558164731235e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5102407473948976, + "grad_norm": 0.09727161377668381, + "learning_rate": 4.2229164485628404e-05, + "loss": 0.351, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5106634820654816, + "grad_norm": 0.09714750945568085, + "learning_rate": 4.22127481824044e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5110862167360657, + "grad_norm": 0.11085519939661026, + "learning_rate": 4.219633273945385e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5115089514066495, + "grad_norm": 0.10344839841127396, + "learning_rate": 4.2179918158590224e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5119316860772336, + "grad_norm": 0.09314566850662231, + "learning_rate": 4.216350444162689e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5123544207478177, + "grad_norm": 0.08516397327184677, + "learning_rate": 4.214709159037709e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5127771554184015, + "grad_norm": 0.08850429952144623, + "learning_rate": 4.213067960665397e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5131998900889858, + "grad_norm": 0.10775408148765564, + "learning_rate": 4.211426849227063e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5136226247595697, + "grad_norm": 0.10474662482738495, + "learning_rate": 4.2097858249040006e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5140453594301535, + "grad_norm": 0.08887671679258347, + "learning_rate": 4.208144887877498e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5144680941007378, + "grad_norm": 0.10175906121730804, + "learning_rate": 4.206504038328836e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5148908287713216, + "grad_norm": 0.0918508768081665, + "learning_rate": 4.204863276439278e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5153135634419057, + "grad_norm": 0.10831017047166824, + "learning_rate": 4.203222602390084e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5157362981124898, + "grad_norm": 0.08302347362041473, + "learning_rate": 4.201582016362503e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5161590327830736, + "grad_norm": 0.10356847196817398, + "learning_rate": 4.1999415185377725e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5165817674536577, + "grad_norm": 0.09896865487098694, + "learning_rate": 4.198301109097124e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5170045021242418, + "grad_norm": 0.09281165897846222, + "learning_rate": 4.1966607882217745e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5174272367948256, + "grad_norm": 0.0918421745300293, + "learning_rate": 4.195020556092935e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.51784997146541, + "grad_norm": 0.12278810143470764, + "learning_rate": 4.193380412891806e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5182727061359937, + "grad_norm": 0.09154857695102692, + "learning_rate": 4.1917403587995765e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5186954408065776, + "grad_norm": 0.08849607408046722, + "learning_rate": 4.1901003939974246e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5191181754771619, + "grad_norm": 0.1112401932477951, + "learning_rate": 4.1884605186665234e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5195409101477457, + "grad_norm": 0.09679940342903137, + "learning_rate": 4.186820732988032e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5199636448183298, + "grad_norm": 0.10548747330904007, + "learning_rate": 4.185181037143101e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5203863794889139, + "grad_norm": 0.1089174821972847, + "learning_rate": 4.1835414313128695e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5208091141594977, + "grad_norm": 0.09345147758722305, + "learning_rate": 4.1819019156784714e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5212318488300818, + "grad_norm": 0.09878484159708023, + "learning_rate": 4.1802624904210244e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5216545835006658, + "grad_norm": 0.10288581252098083, + "learning_rate": 4.1786231557216404e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5220773181712497, + "grad_norm": 0.1153779849410057, + "learning_rate": 4.176983911761422e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.522500052841834, + "grad_norm": 0.1196327954530716, + "learning_rate": 4.175344758721455e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5229227875124178, + "grad_norm": 0.09982497245073318, + "learning_rate": 4.173705696782824e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5233455221830017, + "grad_norm": 0.14133630692958832, + "learning_rate": 4.172066726126597e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.523768256853586, + "grad_norm": 0.09433500468730927, + "learning_rate": 4.170427846933835e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5241909915241698, + "grad_norm": 0.09975042939186096, + "learning_rate": 4.16878905938559e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5246137261947539, + "grad_norm": 0.0933978483080864, + "learning_rate": 4.1671503636629e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.525036460865338, + "grad_norm": 0.112689308822155, + "learning_rate": 4.165511759946796e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5254591955359218, + "grad_norm": 0.09819865971803665, + "learning_rate": 4.1638732484182985e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5258819302065059, + "grad_norm": 0.09082372486591339, + "learning_rate": 4.162234829258418e-05, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.52630466487709, + "grad_norm": 0.09239795804023743, + "learning_rate": 4.16059650264815e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5267273995476738, + "grad_norm": 0.08529244363307953, + "learning_rate": 4.158958268768487e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.527150134218258, + "grad_norm": 0.09985142946243286, + "learning_rate": 4.1573201278004073e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.527572868888842, + "grad_norm": 0.10204614698886871, + "learning_rate": 4.15568207992488e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5279956035594258, + "grad_norm": 0.0955485850572586, + "learning_rate": 4.1540441253228616e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.52841833823001, + "grad_norm": 0.0915314108133316, + "learning_rate": 4.152406264175304e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.528841072900594, + "grad_norm": 0.08747690171003342, + "learning_rate": 4.1507684966631416e-05, + "loss": 0.3541, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.529263807571178, + "grad_norm": 0.10168622434139252, + "learning_rate": 4.149130822967303e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.529686542241762, + "grad_norm": 0.11923198401927948, + "learning_rate": 4.147493243268708e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5301092769123459, + "grad_norm": 0.0788947269320488, + "learning_rate": 4.145855757748258e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.53053201158293, + "grad_norm": 0.09964432567358017, + "learning_rate": 4.144218366586854e-05, + "loss": 0.352, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.530954746253514, + "grad_norm": 0.09542679041624069, + "learning_rate": 4.142581069965379e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5313774809240979, + "grad_norm": 0.11045187711715698, + "learning_rate": 4.1409438680647095e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5318002155946822, + "grad_norm": 0.11429636925458908, + "learning_rate": 4.139306761065712e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.532222950265266, + "grad_norm": 0.10134705901145935, + "learning_rate": 4.137669749149238e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5326456849358499, + "grad_norm": 0.10736095905303955, + "learning_rate": 4.136032832496133e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5330684196064341, + "grad_norm": 0.0991000384092331, + "learning_rate": 4.1343960112872324e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.533491154277018, + "grad_norm": 0.09500618278980255, + "learning_rate": 4.132759285703358e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.533913888947602, + "grad_norm": 0.09210672974586487, + "learning_rate": 4.1311226559253195e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5343366236181861, + "grad_norm": 0.10117146372795105, + "learning_rate": 4.129486122133921e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.53475935828877, + "grad_norm": 0.10391350835561752, + "learning_rate": 4.127849684509955e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.535182092959354, + "grad_norm": 0.08865182846784592, + "learning_rate": 4.126213343234199e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.535604827629938, + "grad_norm": 0.09804297983646393, + "learning_rate": 4.124577098487424e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.536027562300522, + "grad_norm": 0.09741973876953125, + "learning_rate": 4.12294095045039e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5364502969711062, + "grad_norm": 0.10775598883628845, + "learning_rate": 4.1213048993038474e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.53687303164169, + "grad_norm": 0.11599568277597427, + "learning_rate": 4.11966894522853e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.537295766312274, + "grad_norm": 0.09729789942502975, + "learning_rate": 4.11803308840517e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5377185009828582, + "grad_norm": 0.10525138676166534, + "learning_rate": 4.1163973290144774e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.538141235653442, + "grad_norm": 0.09494759887456894, + "learning_rate": 4.114761667237164e-05, + "loss": 0.3515, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5385639703240261, + "grad_norm": 0.10712543874979019, + "learning_rate": 4.1131261032539195e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5389867049946102, + "grad_norm": 0.1099325567483902, + "learning_rate": 4.111490637245431e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.539409439665194, + "grad_norm": 0.09517675638198853, + "learning_rate": 4.109855269392373e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5398321743357781, + "grad_norm": 0.10535828024148941, + "learning_rate": 4.108219999875403e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5402549090063622, + "grad_norm": 0.10106395930051804, + "learning_rate": 4.106584828875178e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.540677643676946, + "grad_norm": 0.09040851145982742, + "learning_rate": 4.104949756572336e-05, + "loss": 0.3508, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5411003783475303, + "grad_norm": 0.09801467508077621, + "learning_rate": 4.103314783147508e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5415231130181142, + "grad_norm": 0.0935758650302887, + "learning_rate": 4.10167990878131e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.541945847688698, + "grad_norm": 0.09407702833414078, + "learning_rate": 4.100045133654351e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5423685823592823, + "grad_norm": 0.1079779788851738, + "learning_rate": 4.098410457947229e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5427913170298662, + "grad_norm": 0.09034605324268341, + "learning_rate": 4.096775881840532e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5432140517004502, + "grad_norm": 0.1049281433224678, + "learning_rate": 4.0951414055148296e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5436367863710343, + "grad_norm": 0.11610250920057297, + "learning_rate": 4.0935070291506895e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5440595210416181, + "grad_norm": 0.1190100759267807, + "learning_rate": 4.091872752928664e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5444822557122022, + "grad_norm": 0.08657199144363403, + "learning_rate": 4.0902385770292964e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5449049903827863, + "grad_norm": 0.08808436244726181, + "learning_rate": 4.088604501633114e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5453277250533701, + "grad_norm": 0.0965336486697197, + "learning_rate": 4.086970526920637e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5457504597239544, + "grad_norm": 0.09376021474599838, + "learning_rate": 4.085336653072376e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5461731943945383, + "grad_norm": 0.10271687805652618, + "learning_rate": 4.0837028802688264e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.546595929065122, + "grad_norm": 0.1117398664355278, + "learning_rate": 4.082069208690475e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5470186637357064, + "grad_norm": 0.10061702132225037, + "learning_rate": 4.080435638517799e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5474413984062902, + "grad_norm": 0.1059851348400116, + "learning_rate": 4.0788021699312584e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5478641330768743, + "grad_norm": 0.11253190040588379, + "learning_rate": 4.077168803111308e-05, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5482868677474584, + "grad_norm": 0.11762633919715881, + "learning_rate": 4.0755355382383906e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5487096024180422, + "grad_norm": 0.11270525306463242, + "learning_rate": 4.073902375492933e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5491323370886263, + "grad_norm": 0.0891830250620842, + "learning_rate": 4.072269315055356e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 52990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5495550717592104, + "grad_norm": 0.10175006836652756, + "learning_rate": 4.0706363571060654e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5499778064297942, + "grad_norm": 0.12037036567926407, + "learning_rate": 4.0690035018254584e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5504005411003785, + "grad_norm": 0.10781057178974152, + "learning_rate": 4.0673707493939215e-05, + "loss": 0.3706, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5508232757709624, + "grad_norm": 0.09325242787599564, + "learning_rate": 4.0657380999918254e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5512460104415462, + "grad_norm": 0.131817027926445, + "learning_rate": 4.064105553799533e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5516687451121305, + "grad_norm": 0.10704177618026733, + "learning_rate": 4.0624731109973976e-05, + "loss": 0.3528, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5520914797827143, + "grad_norm": 0.09465155750513077, + "learning_rate": 4.0608407717657557e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5525142144532984, + "grad_norm": 0.1027459129691124, + "learning_rate": 4.059208536284935e-05, + "loss": 0.3535, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5529369491238825, + "grad_norm": 0.09980654716491699, + "learning_rate": 4.0575764047352517e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5533596837944663, + "grad_norm": 0.1234932616353035, + "learning_rate": 4.055944377297013e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5537824184650504, + "grad_norm": 0.10116894543170929, + "learning_rate": 4.0543124541505094e-05, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5542051531356345, + "grad_norm": 0.08927939087152481, + "learning_rate": 4.0526806354760244e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5546278878062183, + "grad_norm": 0.10200810432434082, + "learning_rate": 4.0510489214538295e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5550506224768026, + "grad_norm": 0.08984579890966415, + "learning_rate": 4.04941731226418e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5554733571473864, + "grad_norm": 0.10630907863378525, + "learning_rate": 4.0477858080873255e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5558960918179705, + "grad_norm": 0.10069738328456879, + "learning_rate": 4.0461544091035035e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5563188264885546, + "grad_norm": 0.08205108344554901, + "learning_rate": 4.0445231154929334e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5567415611591384, + "grad_norm": 0.10073509067296982, + "learning_rate": 4.042891927435831e-05, + "loss": 0.3719, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5571642958297225, + "grad_norm": 0.11131121963262558, + "learning_rate": 4.041260845112394e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5575870305003066, + "grad_norm": 0.1333172768354416, + "learning_rate": 4.039629868702813e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5580097651708904, + "grad_norm": 0.1109989657998085, + "learning_rate": 4.037998998387267e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5584324998414745, + "grad_norm": 0.09574081003665924, + "learning_rate": 4.036368234345919e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5588552345120585, + "grad_norm": 0.13708549737930298, + "learning_rate": 4.034737576758922e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5592779691826424, + "grad_norm": 0.09089354425668716, + "learning_rate": 4.033107025806423e-05, + "loss": 0.3713, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5597007038532267, + "grad_norm": 0.0951312780380249, + "learning_rate": 4.0314765816685485e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5601234385238105, + "grad_norm": 0.11754649877548218, + "learning_rate": 4.029846244525416e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5605461731943946, + "grad_norm": 0.10605931282043457, + "learning_rate": 4.028216014557133e-05, + "loss": 0.3711, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5609689078649787, + "grad_norm": 0.09505799412727356, + "learning_rate": 4.026585891943796e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5613916425355625, + "grad_norm": 0.08888068050146103, + "learning_rate": 4.024955876865486e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5618143772061466, + "grad_norm": 0.1214713454246521, + "learning_rate": 4.023325969502275e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5622371118767306, + "grad_norm": 0.08424443006515503, + "learning_rate": 4.0216961700342234e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5626598465473145, + "grad_norm": 0.09188681840896606, + "learning_rate": 4.020066478641376e-05, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5630825812178986, + "grad_norm": 0.10197053849697113, + "learning_rate": 4.0184368955037685e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5635053158884826, + "grad_norm": 0.08969781547784805, + "learning_rate": 4.016807420801427e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5639280505590665, + "grad_norm": 0.08270411938428879, + "learning_rate": 4.015178054714359e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5643507852296508, + "grad_norm": 0.0953884869813919, + "learning_rate": 4.013548797422567e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5647735199002346, + "grad_norm": 0.10026618838310242, + "learning_rate": 4.011919649106036e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5651962545708187, + "grad_norm": 0.08316558599472046, + "learning_rate": 4.010290609944742e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5656189892414027, + "grad_norm": 0.09640863537788391, + "learning_rate": 4.0086616801186503e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5660417239119866, + "grad_norm": 0.09536662697792053, + "learning_rate": 4.007032859807709e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5664644585825707, + "grad_norm": 0.10001170635223389, + "learning_rate": 4.0054041491918584e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5668871932531547, + "grad_norm": 0.10718002915382385, + "learning_rate": 4.0037755484510274e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5673099279237386, + "grad_norm": 0.11850469559431076, + "learning_rate": 4.00214705776513e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5677326625943226, + "grad_norm": 0.09491994976997375, + "learning_rate": 4.000518677314065e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5681553972649067, + "grad_norm": 0.11559350788593292, + "learning_rate": 3.9988904072777264e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5685781319354906, + "grad_norm": 0.10409149527549744, + "learning_rate": 3.997262247835993e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5690008666060749, + "grad_norm": 0.09848589450120926, + "learning_rate": 3.9956341991687284e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5694236012766587, + "grad_norm": 0.11303503066301346, + "learning_rate": 3.994006261455788e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5698463359472428, + "grad_norm": 0.08892778307199478, + "learning_rate": 3.992378434877014e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5702690706178268, + "grad_norm": 0.09656780958175659, + "learning_rate": 3.990750719612234e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5706918052884107, + "grad_norm": 0.09097453206777573, + "learning_rate": 3.989123115841266e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5711145399589947, + "grad_norm": 0.0892903208732605, + "learning_rate": 3.9874956237439164e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5715372746295788, + "grad_norm": 0.09624665975570679, + "learning_rate": 3.9858682434999736e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5719600093001627, + "grad_norm": 0.09097728133201599, + "learning_rate": 3.984240975289221e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5723827439707467, + "grad_norm": 0.09832444041967392, + "learning_rate": 3.982613819291424e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5728054786413308, + "grad_norm": 0.11264850944280624, + "learning_rate": 3.9809867756863386e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5732282133119146, + "grad_norm": 0.11672240495681763, + "learning_rate": 3.979359844653709e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.573650947982499, + "grad_norm": 0.09929953515529633, + "learning_rate": 3.9777330263732624e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5740736826530828, + "grad_norm": 0.11467316001653671, + "learning_rate": 3.97610632102472e-05, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5744964173236669, + "grad_norm": 0.10766081511974335, + "learning_rate": 3.974479728787785e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.574919151994251, + "grad_norm": 0.08251751959323883, + "learning_rate": 3.972853249842155e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5753418866648348, + "grad_norm": 0.08924482762813568, + "learning_rate": 3.971226884367502e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5757646213354188, + "grad_norm": 0.10682671517133713, + "learning_rate": 3.9696006325434995e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.576187356006003, + "grad_norm": 0.09723201394081116, + "learning_rate": 3.9679744945498026e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5766100906765868, + "grad_norm": 0.10840103775262833, + "learning_rate": 3.9663484705660526e-05, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5770328253471708, + "grad_norm": 0.10666587948799133, + "learning_rate": 3.9647225607718795e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5774555600177549, + "grad_norm": 0.09989669173955917, + "learning_rate": 3.963096765346904e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5778782946883387, + "grad_norm": 0.10355124622583389, + "learning_rate": 3.961471084470727e-05, + "loss": 0.3527, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.578301029358923, + "grad_norm": 0.09293796867132187, + "learning_rate": 3.959845518322943e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5787237640295069, + "grad_norm": 0.0896683856844902, + "learning_rate": 3.9582200670831326e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.579146498700091, + "grad_norm": 0.08983779698610306, + "learning_rate": 3.956594730930859e-05, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.579569233370675, + "grad_norm": 0.09295137971639633, + "learning_rate": 3.95496951004568e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5799919680412589, + "grad_norm": 0.10322882980108261, + "learning_rate": 3.953344404607134e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.580414702711843, + "grad_norm": 0.12625755369663239, + "learning_rate": 3.951719414794751e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.580837437382427, + "grad_norm": 0.1078839972615242, + "learning_rate": 3.950094540788049e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5812601720530108, + "grad_norm": 0.10424195230007172, + "learning_rate": 3.948469782766528e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.581682906723595, + "grad_norm": 0.09249597787857056, + "learning_rate": 3.94684514090968e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.582105641394179, + "grad_norm": 0.09354595839977264, + "learning_rate": 3.9452206153969825e-05, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5825283760647628, + "grad_norm": 0.10045778751373291, + "learning_rate": 3.943596206407901e-05, + "loss": 0.3525, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5829511107353471, + "grad_norm": 0.09879782795906067, + "learning_rate": 3.9419719141218834e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.583373845405931, + "grad_norm": 0.12695449590682983, + "learning_rate": 3.940347738718372e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.583796580076515, + "grad_norm": 0.1022525206208229, + "learning_rate": 3.938723680376793e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.584219314747099, + "grad_norm": 0.11382701247930527, + "learning_rate": 3.937099739276556e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.584642049417683, + "grad_norm": 0.09955748915672302, + "learning_rate": 3.935475915597064e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.585064784088267, + "grad_norm": 0.11779879778623581, + "learning_rate": 3.933852209517703e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.585487518758851, + "grad_norm": 0.09811430424451828, + "learning_rate": 3.93222862121785e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.585910253429435, + "grad_norm": 0.11517129093408585, + "learning_rate": 3.9306051508768604e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.586332988100019, + "grad_norm": 0.0992111787199974, + "learning_rate": 3.928981798674089e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.586755722770603, + "grad_norm": 0.09909151494503021, + "learning_rate": 3.927358564788865e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.587178457441187, + "grad_norm": 0.09675218909978867, + "learning_rate": 3.9257354494005135e-05, + "loss": 0.354, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5876011921117712, + "grad_norm": 0.1082572415471077, + "learning_rate": 3.924112452688341e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.588023926782355, + "grad_norm": 0.1381528675556183, + "learning_rate": 3.922489574831645e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5884466614529391, + "grad_norm": 0.09080474823713303, + "learning_rate": 3.9208668160097096e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5888693961235232, + "grad_norm": 0.09755030274391174, + "learning_rate": 3.9192441764018006e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.589292130794107, + "grad_norm": 0.1124471127986908, + "learning_rate": 3.917621656187176e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.589714865464691, + "grad_norm": 0.11723663657903671, + "learning_rate": 3.91599925554508e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5901376001352752, + "grad_norm": 0.10309942066669464, + "learning_rate": 3.9143769746547435e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.590560334805859, + "grad_norm": 0.1335654854774475, + "learning_rate": 3.912754813695378e-05, + "loss": 0.3515, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.590983069476443, + "grad_norm": 0.10279027372598648, + "learning_rate": 3.911132772846191e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5914058041470271, + "grad_norm": 0.11112121492624283, + "learning_rate": 3.909510852286371e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 53990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.591828538817611, + "grad_norm": 0.09422079473733902, + "learning_rate": 3.907889052195097e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5922512734881953, + "grad_norm": 0.09382358938455582, + "learning_rate": 3.90626737275153e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5926740081587791, + "grad_norm": 0.08751518279314041, + "learning_rate": 3.904645814134821e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5930967428293632, + "grad_norm": 0.10851125419139862, + "learning_rate": 3.903024376524108e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5935194774999473, + "grad_norm": 0.09218515455722809, + "learning_rate": 3.901403060098513e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5939422121705311, + "grad_norm": 0.11210844665765762, + "learning_rate": 3.8997818650371484e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5943649468411152, + "grad_norm": 0.0923786610364914, + "learning_rate": 3.8981607915191085e-05, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5947876815116993, + "grad_norm": 0.11331034451723099, + "learning_rate": 3.896539839723477e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.595210416182283, + "grad_norm": 0.13623347878456116, + "learning_rate": 3.8949190098293236e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5956331508528672, + "grad_norm": 0.11317523568868637, + "learning_rate": 3.8932983020157055e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5960558855234512, + "grad_norm": 0.10561933368444443, + "learning_rate": 3.8916777164616656e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.596478620194035, + "grad_norm": 0.11460139602422714, + "learning_rate": 3.890057253346232e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5969013548646194, + "grad_norm": 0.13033731281757355, + "learning_rate": 3.888436912848422e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5973240895352032, + "grad_norm": 0.08732825517654419, + "learning_rate": 3.8868166951472376e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5977468242057873, + "grad_norm": 0.11021659523248672, + "learning_rate": 3.885196600421667e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5981695588763714, + "grad_norm": 0.10650993883609772, + "learning_rate": 3.883576628850686e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5985922935469552, + "grad_norm": 0.09378135949373245, + "learning_rate": 3.881956780613255e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5990150282175393, + "grad_norm": 0.12006010860204697, + "learning_rate": 3.880337055888322e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5994377628881233, + "grad_norm": 0.15215618908405304, + "learning_rate": 3.8787174548548236e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.5998604975587072, + "grad_norm": 0.08230208605527878, + "learning_rate": 3.8770979776916774e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6002832322292913, + "grad_norm": 0.14847132563591003, + "learning_rate": 3.875478624577792e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6007059668998753, + "grad_norm": 0.13009722530841827, + "learning_rate": 3.873859395692061e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6011287015704592, + "grad_norm": 0.08723198622465134, + "learning_rate": 3.872240291213363e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6015514362410435, + "grad_norm": 0.09215543419122696, + "learning_rate": 3.870621311320565e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6019741709116273, + "grad_norm": 0.0959765762090683, + "learning_rate": 3.869002456192516e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6023969055822114, + "grad_norm": 0.082159124314785, + "learning_rate": 3.867383726008056e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6028196402527954, + "grad_norm": 0.13126897811889648, + "learning_rate": 3.86576512094601e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6032423749233793, + "grad_norm": 0.08471386134624481, + "learning_rate": 3.864146641185188e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6036651095939634, + "grad_norm": 0.09589790552854538, + "learning_rate": 3.862528286904387e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6040878442645474, + "grad_norm": 0.09193417429924011, + "learning_rate": 3.860910058282389e-05, + "loss": 0.35, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6045105789351313, + "grad_norm": 0.10882999747991562, + "learning_rate": 3.8592919554979646e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6049333136057153, + "grad_norm": 0.12345169484615326, + "learning_rate": 3.857673978729868e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6053560482762994, + "grad_norm": 0.10809362679719925, + "learning_rate": 3.856056128156841e-05, + "loss": 0.3525, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6057787829468833, + "grad_norm": 0.13127006590366364, + "learning_rate": 3.8544384039576096e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6062015176174675, + "grad_norm": 0.08341936767101288, + "learning_rate": 3.8528208063108864e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6066242522880514, + "grad_norm": 0.0976388156414032, + "learning_rate": 3.851203335395373e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6070469869586355, + "grad_norm": 0.11459273099899292, + "learning_rate": 3.849585991389754e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6074697216292195, + "grad_norm": 0.09659548848867416, + "learning_rate": 3.847968774472699e-05, + "loss": 0.3702, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6078924562998034, + "grad_norm": 0.08919502049684525, + "learning_rate": 3.8463516848228665e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6083151909703874, + "grad_norm": 0.09698038548231125, + "learning_rate": 3.844734722618901e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6087379256409715, + "grad_norm": 0.09571284800767899, + "learning_rate": 3.8431178880394294e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6091606603115554, + "grad_norm": 0.10199996083974838, + "learning_rate": 3.84150118126307e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6095833949821394, + "grad_norm": 0.1008402407169342, + "learning_rate": 3.839884602468418e-05, + "loss": 0.3543, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6100061296527235, + "grad_norm": 0.10156096518039703, + "learning_rate": 3.838268151834065e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6104288643233073, + "grad_norm": 0.1298118680715561, + "learning_rate": 3.836651829538581e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6108515989938916, + "grad_norm": 0.0900474414229393, + "learning_rate": 3.835035635760525e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6112743336644755, + "grad_norm": 0.09224024415016174, + "learning_rate": 3.833419570678443e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6116970683350595, + "grad_norm": 0.0873030424118042, + "learning_rate": 3.831803634470862e-05, + "loss": 0.3527, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6121198030056436, + "grad_norm": 0.1037120372056961, + "learning_rate": 3.8301878273163e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6125425376762275, + "grad_norm": 0.1424626111984253, + "learning_rate": 3.828572149393259e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6129652723468115, + "grad_norm": 0.10262609273195267, + "learning_rate": 3.8269566008802235e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6133880070173956, + "grad_norm": 0.11094491183757782, + "learning_rate": 3.8253411819556695e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6138107416879794, + "grad_norm": 0.09127236902713776, + "learning_rate": 3.8237258927980524e-05, + "loss": 0.3534, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6142334763585635, + "grad_norm": 0.11335050314664841, + "learning_rate": 3.822110733585818e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6146562110291476, + "grad_norm": 0.08409397304058075, + "learning_rate": 3.8204957044973967e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6150789456997314, + "grad_norm": 0.1034776121377945, + "learning_rate": 3.818880805711203e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6155016803703157, + "grad_norm": 0.08634500205516815, + "learning_rate": 3.817266037405639e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6159244150408996, + "grad_norm": 0.1065390482544899, + "learning_rate": 3.8156513997590914e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6163471497114836, + "grad_norm": 0.09028283506631851, + "learning_rate": 3.8140368929499313e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6167698843820677, + "grad_norm": 0.11101330071687698, + "learning_rate": 3.81242251715652e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6171926190526515, + "grad_norm": 0.11454228311777115, + "learning_rate": 3.810808272557196e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6176153537232356, + "grad_norm": 0.12412311136722565, + "learning_rate": 3.809194159330291e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6180380883938197, + "grad_norm": 0.09113699197769165, + "learning_rate": 3.807580177654118e-05, + "loss": 0.3542, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6184608230644035, + "grad_norm": 0.07394272834062576, + "learning_rate": 3.805966327706978e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6188835577349876, + "grad_norm": 0.11046329885721207, + "learning_rate": 3.8043526096671566e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6193062924055717, + "grad_norm": 0.08664979785680771, + "learning_rate": 3.8027390237129225e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6197290270761555, + "grad_norm": 0.08882985264062881, + "learning_rate": 3.8011255700225336e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6201517617467398, + "grad_norm": 0.0983869656920433, + "learning_rate": 3.7995122487742325e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6205744964173237, + "grad_norm": 0.11496715247631073, + "learning_rate": 3.7978990601462425e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6209972310879077, + "grad_norm": 0.10472384095191956, + "learning_rate": 3.7962860043167787e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6214199657584918, + "grad_norm": 0.09782272577285767, + "learning_rate": 3.7946730814640363e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6218427004290756, + "grad_norm": 0.10017378628253937, + "learning_rate": 3.7930602917662e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6222654350996597, + "grad_norm": 0.10317075252532959, + "learning_rate": 3.791447635401439e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6226881697702438, + "grad_norm": 0.09861904382705688, + "learning_rate": 3.7898351125479036e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6231109044408276, + "grad_norm": 0.11007080972194672, + "learning_rate": 3.7882227233837345e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6235336391114117, + "grad_norm": 0.11503998190164566, + "learning_rate": 3.786610468087055e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6239563737819958, + "grad_norm": 0.1073482483625412, + "learning_rate": 3.7849983468359765e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6243791084525796, + "grad_norm": 0.1005290225148201, + "learning_rate": 3.7833863598085894e-05, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.624801843123164, + "grad_norm": 0.10988392680883408, + "learning_rate": 3.781774507182974e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6252245777937477, + "grad_norm": 0.10978097468614578, + "learning_rate": 3.780162789137198e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6256473124643318, + "grad_norm": 0.10949535667896271, + "learning_rate": 3.778551205849307e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6260700471349159, + "grad_norm": 0.12113095074892044, + "learning_rate": 3.776939757497339e-05, + "loss": 0.3529, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6264927818054997, + "grad_norm": 0.08419416844844818, + "learning_rate": 3.775328444259312e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6269155164760838, + "grad_norm": 0.09491756558418274, + "learning_rate": 3.773717266313233e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6273382511466679, + "grad_norm": 0.10226808488368988, + "learning_rate": 3.77210622383709e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6277609858172517, + "grad_norm": 0.11215116828680038, + "learning_rate": 3.770495317008861e-05, + "loss": 0.3537, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6281837204878358, + "grad_norm": 0.08785828202962875, + "learning_rate": 3.7688845460065024e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6286064551584198, + "grad_norm": 0.09388149529695511, + "learning_rate": 3.7672739110079625e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6290291898290037, + "grad_norm": 0.09373711049556732, + "learning_rate": 3.765663412191169e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.629451924499588, + "grad_norm": 0.09978163242340088, + "learning_rate": 3.764053049734038e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6298746591701718, + "grad_norm": 0.09474509954452515, + "learning_rate": 3.7624428238144704e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.630297393840756, + "grad_norm": 0.10009393841028214, + "learning_rate": 3.760832734610349e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.63072012851134, + "grad_norm": 0.09395215660333633, + "learning_rate": 3.759222782299545e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6311428631819238, + "grad_norm": 0.11328864842653275, + "learning_rate": 3.757612967059915e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6315655978525079, + "grad_norm": 0.10051129013299942, + "learning_rate": 3.756003289069296e-05, + "loss": 0.352, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.631988332523092, + "grad_norm": 0.09670386463403702, + "learning_rate": 3.7543937485055124e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6324110671936758, + "grad_norm": 0.13259126245975494, + "learning_rate": 3.752784345546373e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6328338018642599, + "grad_norm": 0.08681239187717438, + "learning_rate": 3.7511750803696735e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.633256536534844, + "grad_norm": 0.09470473974943161, + "learning_rate": 3.7495659531531926e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6336792712054278, + "grad_norm": 0.1026219055056572, + "learning_rate": 3.747956964074692e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 54990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.634102005876012, + "grad_norm": 0.09744653850793839, + "learning_rate": 3.746348113311921e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.634524740546596, + "grad_norm": 0.10909479856491089, + "learning_rate": 3.744739401042614e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.63494747521718, + "grad_norm": 0.1564907729625702, + "learning_rate": 3.7431308274444865e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.635370209887764, + "grad_norm": 0.10934529453516006, + "learning_rate": 3.7415223926952434e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.635792944558348, + "grad_norm": 0.09381961822509766, + "learning_rate": 3.7399140969725685e-05, + "loss": 0.3536, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.636215679228932, + "grad_norm": 0.09560791403055191, + "learning_rate": 3.738305940454136e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.636638413899516, + "grad_norm": 0.10961221158504486, + "learning_rate": 3.736697923317601e-05, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6370611485700999, + "grad_norm": 0.12278375774621964, + "learning_rate": 3.735090045740605e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.637483883240684, + "grad_norm": 0.10296183824539185, + "learning_rate": 3.733482307900773e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.637906617911268, + "grad_norm": 0.09235161542892456, + "learning_rate": 3.731874709975715e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6383293525818519, + "grad_norm": 0.10383596271276474, + "learning_rate": 3.730267252143026e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6387520872524362, + "grad_norm": 0.09976121038198471, + "learning_rate": 3.728659934580286e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.63917482192302, + "grad_norm": 0.1138455718755722, + "learning_rate": 3.727052757465058e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.639597556593604, + "grad_norm": 0.10567724704742432, + "learning_rate": 3.7254457209748895e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6400202912641881, + "grad_norm": 0.09456530958414078, + "learning_rate": 3.723838825287312e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.640443025934772, + "grad_norm": 0.10619884729385376, + "learning_rate": 3.722232070579844e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.640865760605356, + "grad_norm": 0.0951908752322197, + "learning_rate": 3.720625457029988e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6412884952759401, + "grad_norm": 0.10133374482393265, + "learning_rate": 3.7190189848152276e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.641711229946524, + "grad_norm": 0.08633384108543396, + "learning_rate": 3.7174126541130344e-05, + "loss": 0.3539, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.642133964617108, + "grad_norm": 0.09430554509162903, + "learning_rate": 3.715806465100863e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.642556699287692, + "grad_norm": 0.1290643811225891, + "learning_rate": 3.714200417956152e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.642979433958276, + "grad_norm": 0.09571239352226257, + "learning_rate": 3.712594512856326e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6434021686288602, + "grad_norm": 0.11119784414768219, + "learning_rate": 3.710988749978791e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.643824903299444, + "grad_norm": 0.10640596598386765, + "learning_rate": 3.7093831295009384e-05, + "loss": 0.3513, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6442476379700282, + "grad_norm": 0.12992359697818756, + "learning_rate": 3.707777651600145e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6446703726406122, + "grad_norm": 0.0998828187584877, + "learning_rate": 3.706172316453771e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.645093107311196, + "grad_norm": 0.10275661200284958, + "learning_rate": 3.704567124239162e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6455158419817801, + "grad_norm": 0.08797160536050797, + "learning_rate": 3.7029620751336456e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6459385766523642, + "grad_norm": 0.09199215471744537, + "learning_rate": 3.701357169314536e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.646361311322948, + "grad_norm": 0.11409495770931244, + "learning_rate": 3.6997524069591304e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6467840459935321, + "grad_norm": 0.11844733357429504, + "learning_rate": 3.69814778824471e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6472067806641162, + "grad_norm": 0.10026625543832779, + "learning_rate": 3.696543313348539e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6476295153347, + "grad_norm": 0.11221660673618317, + "learning_rate": 3.6949389824478675e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6480522500052843, + "grad_norm": 0.10112892836332321, + "learning_rate": 3.6934952078998995e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6484749846758682, + "grad_norm": 0.09793822467327118, + "learning_rate": 3.6918911510789436e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6488977193464522, + "grad_norm": 0.0936591625213623, + "learning_rate": 3.6902872387674205e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6493204540170363, + "grad_norm": 0.11011312156915665, + "learning_rate": 3.688683471142521e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6497431886876202, + "grad_norm": 0.12360543012619019, + "learning_rate": 3.6870798483814134e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6501659233582042, + "grad_norm": 0.10760898888111115, + "learning_rate": 3.685476370661253e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6505886580287883, + "grad_norm": 0.10207384079694748, + "learning_rate": 3.683873038159179e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6510113926993721, + "grad_norm": 0.10121886432170868, + "learning_rate": 3.682269851052317e-05, + "loss": 0.3534, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6514341273699562, + "grad_norm": 0.09441575407981873, + "learning_rate": 3.6806668095177725e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6518568620405403, + "grad_norm": 0.11245013028383255, + "learning_rate": 3.6790639137326375e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6522795967111241, + "grad_norm": 0.10428163409233093, + "learning_rate": 3.6774611638739884e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6527023313817084, + "grad_norm": 0.08869388699531555, + "learning_rate": 3.6758585601188813e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6531250660522923, + "grad_norm": 0.08589374274015427, + "learning_rate": 3.674256102644362e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6535478007228763, + "grad_norm": 0.1371346414089203, + "learning_rate": 3.672653791627455e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6539705353934604, + "grad_norm": 0.11141324043273926, + "learning_rate": 3.671051627245171e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6543932700640442, + "grad_norm": 0.0837167352437973, + "learning_rate": 3.669449609674507e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6548160047346283, + "grad_norm": 0.10123047977685928, + "learning_rate": 3.667847739092437e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6552387394052124, + "grad_norm": 0.08883309364318848, + "learning_rate": 3.6662460156759257e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6556614740757962, + "grad_norm": 0.07741746306419373, + "learning_rate": 3.66464443960192e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6560842087463803, + "grad_norm": 0.11328630149364471, + "learning_rate": 3.663043011047345e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6565069434169644, + "grad_norm": 0.09909096360206604, + "learning_rate": 3.661441730189116e-05, + "loss": 0.3515, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6569296780875482, + "grad_norm": 0.10398653894662857, + "learning_rate": 3.659840597204133e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6573524127581325, + "grad_norm": 0.09139741957187653, + "learning_rate": 3.6582396122692715e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6577751474287163, + "grad_norm": 0.11186228692531586, + "learning_rate": 3.656638775561396e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6581978820993004, + "grad_norm": 0.08625820279121399, + "learning_rate": 3.655038087257356e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6586206167698845, + "grad_norm": 0.10235518962144852, + "learning_rate": 3.653437547533983e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6590433514404683, + "grad_norm": 0.10212196409702301, + "learning_rate": 3.6518371565680905e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6594660861110524, + "grad_norm": 0.1138731837272644, + "learning_rate": 3.6502369145364765e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6598888207816365, + "grad_norm": 0.10102026164531708, + "learning_rate": 3.648636821615926e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6603115554522203, + "grad_norm": 0.13533513247966766, + "learning_rate": 3.6470368779832025e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6607342901228044, + "grad_norm": 0.10285215824842453, + "learning_rate": 3.645437083815055e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6611570247933884, + "grad_norm": 0.11214106529951096, + "learning_rate": 3.643837439288215e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6615797594639723, + "grad_norm": 0.08863095939159393, + "learning_rate": 3.642237944579399e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6620024941345566, + "grad_norm": 0.09542632102966309, + "learning_rate": 3.640638599865309e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6624252288051404, + "grad_norm": 0.11534330993890762, + "learning_rate": 3.639039405322624e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6628479634757245, + "grad_norm": 0.11467478424310684, + "learning_rate": 3.6374403611280114e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6632706981463086, + "grad_norm": 0.09833848476409912, + "learning_rate": 3.635841467458124e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6636934328168924, + "grad_norm": 0.09710056334733963, + "learning_rate": 3.634242724489591e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6641161674874765, + "grad_norm": 0.10881131142377853, + "learning_rate": 3.632644132399031e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6645389021580606, + "grad_norm": 0.08297046273946762, + "learning_rate": 3.631045691363041e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6649616368286444, + "grad_norm": 0.09299198538064957, + "learning_rate": 3.629447401558208e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6653843714992285, + "grad_norm": 0.10239946097135544, + "learning_rate": 3.627849263161094e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6658071061698125, + "grad_norm": 0.09151832014322281, + "learning_rate": 3.626251276348251e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6662298408403964, + "grad_norm": 0.08894705027341843, + "learning_rate": 3.6246534412962124e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6666525755109807, + "grad_norm": 0.0996883288025856, + "learning_rate": 3.6230557581814925e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6670753101815645, + "grad_norm": 0.09848849475383759, + "learning_rate": 3.621458227180592e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6674980448521486, + "grad_norm": 0.11516475677490234, + "learning_rate": 3.619860848469994e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6679207795227327, + "grad_norm": 0.11237359046936035, + "learning_rate": 3.618263622226162e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6683435141933165, + "grad_norm": 0.12012819200754166, + "learning_rate": 3.616666548625547e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6687662488639006, + "grad_norm": 0.13062678277492523, + "learning_rate": 3.615069627844578e-05, + "loss": 0.3529, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6691889835344846, + "grad_norm": 0.09803353995084763, + "learning_rate": 3.613472860059672e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6696117182050685, + "grad_norm": 0.08365119993686676, + "learning_rate": 3.611876245447228e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6700344528756526, + "grad_norm": 0.089998260140419, + "learning_rate": 3.610279784183626e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6704571875462366, + "grad_norm": 0.08890099078416824, + "learning_rate": 3.60868347644523e-05, + "loss": 0.3511, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6708799222168205, + "grad_norm": 0.11417756229639053, + "learning_rate": 3.607087322408389e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6713026568874048, + "grad_norm": 0.12301240861415863, + "learning_rate": 3.605491322249432e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6717253915579886, + "grad_norm": 0.12344230711460114, + "learning_rate": 3.603895476144674e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6721481262285727, + "grad_norm": 0.1065618023276329, + "learning_rate": 3.602299784270409e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6725708608991567, + "grad_norm": 0.10086624324321747, + "learning_rate": 3.600704246802917e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6729935955697406, + "grad_norm": 0.09704624116420746, + "learning_rate": 3.59910886391846e-05, + "loss": 0.3544, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6734163302403247, + "grad_norm": 0.0926826149225235, + "learning_rate": 3.5975136357932846e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6738390649109087, + "grad_norm": 0.09288319200277328, + "learning_rate": 3.595918562603618e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6742617995814926, + "grad_norm": 0.10486806929111481, + "learning_rate": 3.5943236445256716e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6746845342520766, + "grad_norm": 0.11599864810705185, + "learning_rate": 3.592728881735639e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6751072689226607, + "grad_norm": 0.0992875024676323, + "learning_rate": 3.591134274409697e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6755300035932446, + "grad_norm": 0.10810456424951553, + "learning_rate": 3.589539822724004e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6759527382638288, + "grad_norm": 0.11027906090021133, + "learning_rate": 3.5879455268547044e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 55990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6763754729344127, + "grad_norm": 0.11542516946792603, + "learning_rate": 3.5863513869779206e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6767982076049968, + "grad_norm": 0.10722671449184418, + "learning_rate": 3.5847574032697614e-05, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6772209422755808, + "grad_norm": 0.09960127621889114, + "learning_rate": 3.5831635759063195e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6776436769461647, + "grad_norm": 0.1011098176240921, + "learning_rate": 3.581569905063667e-05, + "loss": 0.3512, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6780664116167487, + "grad_norm": 0.10862002521753311, + "learning_rate": 3.579976390917858e-05, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6784891462873328, + "grad_norm": 0.11845294386148453, + "learning_rate": 3.578383033644934e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6789118809579167, + "grad_norm": 0.10712536424398422, + "learning_rate": 3.576789833420914e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6793346156285007, + "grad_norm": 0.08515691012144089, + "learning_rate": 3.575196790421806e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6797573502990848, + "grad_norm": 0.11287985742092133, + "learning_rate": 3.5736039048235916e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6801800849696686, + "grad_norm": 0.09825675189495087, + "learning_rate": 3.572011176802244e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.680602819640253, + "grad_norm": 0.10983681678771973, + "learning_rate": 3.570418606533712e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6810255543108368, + "grad_norm": 0.12152458727359772, + "learning_rate": 3.5688261941939325e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6814482889814208, + "grad_norm": 0.10328928381204605, + "learning_rate": 3.567233939958822e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.681871023652005, + "grad_norm": 0.11302266269922256, + "learning_rate": 3.565641844004278e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6822937583225888, + "grad_norm": 0.10797183215618134, + "learning_rate": 3.564049906506186e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6827164929931728, + "grad_norm": 0.09877846390008926, + "learning_rate": 3.5624581276404075e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.683139227663757, + "grad_norm": 0.0887494757771492, + "learning_rate": 3.560866507582793e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6835619623343407, + "grad_norm": 0.1047745868563652, + "learning_rate": 3.5592750465091696e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6839846970049248, + "grad_norm": 0.107864610850811, + "learning_rate": 3.5576837445953484e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6844074316755089, + "grad_norm": 0.09455578774213791, + "learning_rate": 3.5560926020171234e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6848301663460927, + "grad_norm": 0.0992719978094101, + "learning_rate": 3.5545016189502754e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.685252901016677, + "grad_norm": 0.1132851094007492, + "learning_rate": 3.552910795570559e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6856756356872609, + "grad_norm": 0.13748018443584442, + "learning_rate": 3.551320132053718e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.686098370357845, + "grad_norm": 0.08724167943000793, + "learning_rate": 3.549729628575477e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.686521105028429, + "grad_norm": 0.10420708358287811, + "learning_rate": 3.5481392853115396e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6869438396990128, + "grad_norm": 0.09736388176679611, + "learning_rate": 3.546549102437598e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.687366574369597, + "grad_norm": 0.08603719621896744, + "learning_rate": 3.544959080129318e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.687789309040181, + "grad_norm": 0.10102616250514984, + "learning_rate": 3.543369218562357e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6882120437107648, + "grad_norm": 0.09269856661558151, + "learning_rate": 3.541779517912346e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.688634778381349, + "grad_norm": 0.09455166012048721, + "learning_rate": 3.5401899783549056e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.689057513051933, + "grad_norm": 0.10654540359973907, + "learning_rate": 3.538600600065634e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6894802477225168, + "grad_norm": 0.0914996787905693, + "learning_rate": 3.5370113832201154e-05, + "loss": 0.3535, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.689902982393101, + "grad_norm": 0.10749492794275284, + "learning_rate": 3.53542232799391e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.690325717063685, + "grad_norm": 0.102180615067482, + "learning_rate": 3.5338334345625675e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.690748451734269, + "grad_norm": 0.08410289883613586, + "learning_rate": 3.532244703101616e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.691171186404853, + "grad_norm": 0.10337549448013306, + "learning_rate": 3.530656133786563e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.691593921075437, + "grad_norm": 0.09791681915521622, + "learning_rate": 3.5290677267929025e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.692016655746021, + "grad_norm": 0.1045289859175682, + "learning_rate": 3.5274794822961084e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.692439390416605, + "grad_norm": 0.09047198295593262, + "learning_rate": 3.5258914004716395e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.692862125087189, + "grad_norm": 0.0984082967042923, + "learning_rate": 3.524303481494931e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.693284859757773, + "grad_norm": 0.1024203971028328, + "learning_rate": 3.522715725541406e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.693707594428357, + "grad_norm": 0.09277921915054321, + "learning_rate": 3.5211281327864676e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.694130329098941, + "grad_norm": 0.15533378720283508, + "learning_rate": 3.5195407034054984e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6945530637695252, + "grad_norm": 0.09140869975090027, + "learning_rate": 3.5179534375738676e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.694975798440109, + "grad_norm": 0.08972223848104477, + "learning_rate": 3.51636633546692e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.695398533110693, + "grad_norm": 0.09379907697439194, + "learning_rate": 3.514779397259987e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6958212677812772, + "grad_norm": 0.08455498516559601, + "learning_rate": 3.513192623128384e-05, + "loss": 0.3519, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.696244002451861, + "grad_norm": 0.11270088702440262, + "learning_rate": 3.511606013247401e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.696666737122445, + "grad_norm": 0.10220117121934891, + "learning_rate": 3.510019567792318e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6970894717930292, + "grad_norm": 0.09772270172834396, + "learning_rate": 3.50843328693839e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.697512206463613, + "grad_norm": 0.09549157321453094, + "learning_rate": 3.5068471708608565e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.697934941134197, + "grad_norm": 0.09868922084569931, + "learning_rate": 3.5052612197349397e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6983576758047811, + "grad_norm": 0.10965389758348465, + "learning_rate": 3.5036754337358455e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.698780410475365, + "grad_norm": 0.09553077071905136, + "learning_rate": 3.502089813038755e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6992031451459493, + "grad_norm": 0.09201930463314056, + "learning_rate": 3.5005043578188354e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.6996258798165331, + "grad_norm": 0.09662003815174103, + "learning_rate": 3.498919068251237e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7000486144871172, + "grad_norm": 0.10799644142389297, + "learning_rate": 3.4973339445110894e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7004713491577013, + "grad_norm": 0.08457864075899124, + "learning_rate": 3.4957489867735024e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.700894083828285, + "grad_norm": 0.11631647497415543, + "learning_rate": 3.4941641952135726e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7013168184988692, + "grad_norm": 0.11552654951810837, + "learning_rate": 3.4925795700063735e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7017395531694532, + "grad_norm": 0.09434747695922852, + "learning_rate": 3.490995111326961e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.702162287840037, + "grad_norm": 0.0992506816983223, + "learning_rate": 3.489410819350378e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7025850225106212, + "grad_norm": 0.09629543125629425, + "learning_rate": 3.487826694251638e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7030077571812052, + "grad_norm": 0.10176637023687363, + "learning_rate": 3.486242736205745e-05, + "loss": 0.3481, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.703430491851789, + "grad_norm": 0.10082369297742844, + "learning_rate": 3.484658945387684e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7038532265223734, + "grad_norm": 0.09943409264087677, + "learning_rate": 3.4830753219724165e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7042759611929572, + "grad_norm": 0.09576503187417984, + "learning_rate": 3.481491866134891e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7046986958635413, + "grad_norm": 0.09257268160581589, + "learning_rate": 3.479908578050035e-05, + "loss": 0.3523, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7051214305341253, + "grad_norm": 0.10014744102954865, + "learning_rate": 3.478325457892756e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7055441652047092, + "grad_norm": 0.09591039270162582, + "learning_rate": 3.4767425058379446e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7059668998752933, + "grad_norm": 0.09090810269117355, + "learning_rate": 3.475159722060476e-05, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7063896345458773, + "grad_norm": 0.12476792186498642, + "learning_rate": 3.4735771067351995e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7068123692164612, + "grad_norm": 0.13087132573127747, + "learning_rate": 3.4719946600369504e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7072351038870452, + "grad_norm": 0.0991661325097084, + "learning_rate": 3.470412382140546e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7076578385576293, + "grad_norm": 0.10320647060871124, + "learning_rate": 3.468830273220784e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7080805732282132, + "grad_norm": 0.10679958015680313, + "learning_rate": 3.467248333452442e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7085033078987975, + "grad_norm": 0.10422040522098541, + "learning_rate": 3.46566656301028e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7089260425693813, + "grad_norm": 0.10282082855701447, + "learning_rate": 3.464084962069042e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7093487772399654, + "grad_norm": 0.10093332082033157, + "learning_rate": 3.462503530803447e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7097715119105494, + "grad_norm": 0.14155402779579163, + "learning_rate": 3.4609222693882025e-05, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7101942465811333, + "grad_norm": 0.10768143832683563, + "learning_rate": 3.45934117799799e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7106169812517174, + "grad_norm": 0.119455985724926, + "learning_rate": 3.4577602568074773e-05, + "loss": 0.352, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7110397159223014, + "grad_norm": 0.1473097801208496, + "learning_rate": 3.456179505991314e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7114624505928853, + "grad_norm": 0.0940147340297699, + "learning_rate": 3.454598925724125e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7118851852634693, + "grad_norm": 0.08873625099658966, + "learning_rate": 3.453018516180524e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7123079199340534, + "grad_norm": 0.11564906686544418, + "learning_rate": 3.4514382775350995e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7127306546046372, + "grad_norm": 0.10000364482402802, + "learning_rate": 3.449858209962425e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7131533892752215, + "grad_norm": 0.08779865503311157, + "learning_rate": 3.4482783136370524e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7135761239458054, + "grad_norm": 0.1001291573047638, + "learning_rate": 3.4466985887335194e-05, + "loss": 0.353, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7139988586163895, + "grad_norm": 0.09644114971160889, + "learning_rate": 3.4451190354263384e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7144215932869735, + "grad_norm": 0.1034306064248085, + "learning_rate": 3.443539653890004e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7148443279575574, + "grad_norm": 0.10460955649614334, + "learning_rate": 3.441960444298998e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7152670626281414, + "grad_norm": 0.09561905264854431, + "learning_rate": 3.440381406827777e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7156897972987255, + "grad_norm": 0.10334301739931107, + "learning_rate": 3.438802541650779e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7161125319693094, + "grad_norm": 0.09799428284168243, + "learning_rate": 3.437223848942427e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7165352666398934, + "grad_norm": 0.10683714598417282, + "learning_rate": 3.435645328877121e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7169580013104775, + "grad_norm": 0.10755344480276108, + "learning_rate": 3.434066981629244e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7173807359810613, + "grad_norm": 0.11213657259941101, + "learning_rate": 3.432488807373159e-05, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7178034706516456, + "grad_norm": 0.09188859164714813, + "learning_rate": 3.430910806283209e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7182262053222295, + "grad_norm": 0.08825931698083878, + "learning_rate": 3.42933297853372e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 56990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7186489399928135, + "grad_norm": 0.09860263764858246, + "learning_rate": 3.427755324298998e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7190716746633976, + "grad_norm": 0.10574677586555481, + "learning_rate": 3.426177843753329e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7194944093339815, + "grad_norm": 0.09760892391204834, + "learning_rate": 3.424600537070981e-05, + "loss": 0.3536, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7199171440045655, + "grad_norm": 0.11504248529672623, + "learning_rate": 3.423023404426202e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7203398786751496, + "grad_norm": 0.12707626819610596, + "learning_rate": 3.421446445993221e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7207626133457334, + "grad_norm": 0.12757045030593872, + "learning_rate": 3.419869661946248e-05, + "loss": 0.3678, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7211853480163175, + "grad_norm": 0.09751589596271515, + "learning_rate": 3.418293052459475e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7216080826869016, + "grad_norm": 0.09047985076904297, + "learning_rate": 3.416716617707071e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7220308173574854, + "grad_norm": 0.10491714626550674, + "learning_rate": 3.415140357863188e-05, + "loss": 0.3531, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7224535520280697, + "grad_norm": 0.09005116671323776, + "learning_rate": 3.413564273101958e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7228762866986536, + "grad_norm": 0.10428541898727417, + "learning_rate": 3.411988363597497e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7232990213692376, + "grad_norm": 0.10191106796264648, + "learning_rate": 3.410412629523897e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7237217560398217, + "grad_norm": 0.1043650284409523, + "learning_rate": 3.4088370710552326e-05, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7241444907104055, + "grad_norm": 0.10967552661895752, + "learning_rate": 3.4072616883655596e-05, + "loss": 0.3508, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7245672253809896, + "grad_norm": 0.10000928491353989, + "learning_rate": 3.405686481628914e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7249899600515737, + "grad_norm": 0.1142902597784996, + "learning_rate": 3.404111451019313e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7254126947221575, + "grad_norm": 0.08850684016942978, + "learning_rate": 3.402536596710749e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7258354293927416, + "grad_norm": 0.10601989179849625, + "learning_rate": 3.400961918877203e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7262581640633257, + "grad_norm": 0.09894805401563644, + "learning_rate": 3.399387417692633e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7266808987339095, + "grad_norm": 0.10093715786933899, + "learning_rate": 3.3978130933309746e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7271036334044938, + "grad_norm": 0.11069990694522858, + "learning_rate": 3.39623894596615e-05, + "loss": 0.3539, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7275263680750776, + "grad_norm": 0.10055757313966751, + "learning_rate": 3.394664975772057e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7279491027456617, + "grad_norm": 0.08715584874153137, + "learning_rate": 3.393091182922574e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7283718374162458, + "grad_norm": 0.09773378819227219, + "learning_rate": 3.391517567591563e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7287945720868296, + "grad_norm": 0.0845671221613884, + "learning_rate": 3.389944129952865e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7292173067574137, + "grad_norm": 0.08341611921787262, + "learning_rate": 3.3883708701802986e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7296400414279978, + "grad_norm": 0.10376916825771332, + "learning_rate": 3.3867977884476654e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7300627760985816, + "grad_norm": 0.11298813670873642, + "learning_rate": 3.385224884928747e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7304855107691657, + "grad_norm": 0.11306030303239822, + "learning_rate": 3.383652159797306e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7309082454397497, + "grad_norm": 0.09581732004880905, + "learning_rate": 3.382079613227085e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7313309801103336, + "grad_norm": 0.09019611030817032, + "learning_rate": 3.3805072453918054e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7317537147809179, + "grad_norm": 0.0940159261226654, + "learning_rate": 3.3789350564651694e-05, + "loss": 0.3541, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7321764494515017, + "grad_norm": 0.08424212783575058, + "learning_rate": 3.3773630466208615e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7325991841220858, + "grad_norm": 0.12574371695518494, + "learning_rate": 3.375791216032546e-05, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7330219187926699, + "grad_norm": 0.1177377924323082, + "learning_rate": 3.3742195648738614e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7334446534632537, + "grad_norm": 0.1042710542678833, + "learning_rate": 3.372648093318433e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7338673881338378, + "grad_norm": 0.09789528697729111, + "learning_rate": 3.371076801539868e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7342901228044219, + "grad_norm": 0.11041824519634247, + "learning_rate": 3.369505689711746e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7347128574750057, + "grad_norm": 0.10877611488103867, + "learning_rate": 3.3679347580076314e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7351355921455898, + "grad_norm": 0.09508384764194489, + "learning_rate": 3.366364006601072e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7355583268161738, + "grad_norm": 0.1031811311841011, + "learning_rate": 3.364793435665587e-05, + "loss": 0.3518, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7359810614867577, + "grad_norm": 0.09763573110103607, + "learning_rate": 3.363223045374685e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.736403796157342, + "grad_norm": 0.09702251106500626, + "learning_rate": 3.3616528359018454e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7368265308279258, + "grad_norm": 0.1034962460398674, + "learning_rate": 3.360082807420536e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7372492654985099, + "grad_norm": 0.1020241305232048, + "learning_rate": 3.3585129601042e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.737672000169094, + "grad_norm": 0.1121930480003357, + "learning_rate": 3.356943294126261e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7380947348396778, + "grad_norm": 0.1072620078921318, + "learning_rate": 3.355373809660123e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7385174695102619, + "grad_norm": 0.09286395460367203, + "learning_rate": 3.3538045068791726e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.738940204180846, + "grad_norm": 0.11299845576286316, + "learning_rate": 3.3522353859567714e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7393629388514298, + "grad_norm": 0.10527591407299042, + "learning_rate": 3.350666447066263e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7397856735220139, + "grad_norm": 0.1005609780550003, + "learning_rate": 3.349097690380975e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.740208408192598, + "grad_norm": 0.10224814713001251, + "learning_rate": 3.347685965292911e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7406311428631818, + "grad_norm": 0.09585567563772202, + "learning_rate": 3.34611755527497e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.741053877533766, + "grad_norm": 0.0984659492969513, + "learning_rate": 3.344549327964773e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.74147661220435, + "grad_norm": 0.09068065136671066, + "learning_rate": 3.342981283535563e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.741899346874934, + "grad_norm": 0.09279324859380722, + "learning_rate": 3.341413422160566e-05, + "loss": 0.3513, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.742322081545518, + "grad_norm": 0.1030937060713768, + "learning_rate": 3.33984574401299e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7427448162161019, + "grad_norm": 0.09353764355182648, + "learning_rate": 3.3382782492660145e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.743167550886686, + "grad_norm": 0.08036317676305771, + "learning_rate": 3.336710938092806e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.74359028555727, + "grad_norm": 0.10299868136644363, + "learning_rate": 3.335143810666509e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7440130202278539, + "grad_norm": 0.10130858421325684, + "learning_rate": 3.3335768671602455e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.744435754898438, + "grad_norm": 0.09256476163864136, + "learning_rate": 3.332010107747122e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.744858489569022, + "grad_norm": 0.09249821305274963, + "learning_rate": 3.3304435326002193e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7452812242396059, + "grad_norm": 0.0888192281126976, + "learning_rate": 3.3288771418926e-05, + "loss": 0.3544, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7457039589101901, + "grad_norm": 0.08867738395929337, + "learning_rate": 3.3273109357973074e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.746126693580774, + "grad_norm": 0.10836607217788696, + "learning_rate": 3.3257449144873654e-05, + "loss": 0.3747, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.746549428251358, + "grad_norm": 0.09672817587852478, + "learning_rate": 3.324179078135771e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7469721629219421, + "grad_norm": 0.12128332257270813, + "learning_rate": 3.3226134269155076e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.747394897592526, + "grad_norm": 0.0983094573020935, + "learning_rate": 3.321047960999537e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.74781763226311, + "grad_norm": 0.10042089223861694, + "learning_rate": 3.319482680560797e-05, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7482403669336941, + "grad_norm": 0.1186598390340805, + "learning_rate": 3.3179175857722084e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.748663101604278, + "grad_norm": 0.11653875559568405, + "learning_rate": 3.316352676806672e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7490858362748622, + "grad_norm": 0.10036171227693558, + "learning_rate": 3.3147879538370635e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.749508570945446, + "grad_norm": 0.1030111238360405, + "learning_rate": 3.313223417036243e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.74993130561603, + "grad_norm": 0.12851069867610931, + "learning_rate": 3.31165906657705e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7503540402866142, + "grad_norm": 0.10883824527263641, + "learning_rate": 3.310094902632296e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.750776774957198, + "grad_norm": 0.1024181991815567, + "learning_rate": 3.308530925374782e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7511995096277821, + "grad_norm": 0.1013370007276535, + "learning_rate": 3.306967134977281e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7516222442983662, + "grad_norm": 0.10915695875883102, + "learning_rate": 3.305403531612549e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.75204497896895, + "grad_norm": 0.12331274896860123, + "learning_rate": 3.303840115453322e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7524677136395341, + "grad_norm": 0.08244474977254868, + "learning_rate": 3.302276886672312e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7528904483101182, + "grad_norm": 0.0911758616566658, + "learning_rate": 3.300713845442211e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.753313182980702, + "grad_norm": 0.09660731256008148, + "learning_rate": 3.299150991935695e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7537359176512863, + "grad_norm": 0.11851758509874344, + "learning_rate": 3.2975883263254144e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7541586523218702, + "grad_norm": 0.10946464538574219, + "learning_rate": 3.296025848783997e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.754581386992454, + "grad_norm": 0.12314862757921219, + "learning_rate": 3.294463559484055e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7550041216630383, + "grad_norm": 0.09668030589818954, + "learning_rate": 3.292901458598178e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7554268563336222, + "grad_norm": 0.1226174533367157, + "learning_rate": 3.291339546298933e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7558495910042062, + "grad_norm": 0.1075817197561264, + "learning_rate": 3.289777822758868e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7562723256747903, + "grad_norm": 0.09726765751838684, + "learning_rate": 3.2882162881505126e-05, + "loss": 0.3535, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7566950603453741, + "grad_norm": 0.0924716591835022, + "learning_rate": 3.286654942646369e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7571177950159582, + "grad_norm": 0.10388702899217606, + "learning_rate": 3.2850937864189236e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7575405296865423, + "grad_norm": 0.10910151898860931, + "learning_rate": 3.2835328196406426e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7579632643571261, + "grad_norm": 0.12316495180130005, + "learning_rate": 3.281972042483965e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7583859990277104, + "grad_norm": 0.0984247699379921, + "learning_rate": 3.280411455121316e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7588087336982943, + "grad_norm": 0.08615661412477493, + "learning_rate": 3.2788510577250955e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7592314683688781, + "grad_norm": 0.12338963150978088, + "learning_rate": 3.2772908504676834e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7596542030394624, + "grad_norm": 0.08784861117601395, + "learning_rate": 3.275730833521442e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7600769377100463, + "grad_norm": 0.11225203424692154, + "learning_rate": 3.274171007058705e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7604996723806303, + "grad_norm": 0.0932641476392746, + "learning_rate": 3.272611371251792e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 57990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7609224070512144, + "grad_norm": 0.11895021051168442, + "learning_rate": 3.271051926273001e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7613451417217982, + "grad_norm": 0.10619886964559555, + "learning_rate": 3.2694926722946065e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7617678763923823, + "grad_norm": 0.11513940989971161, + "learning_rate": 3.267933609488858e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7621906110629664, + "grad_norm": 0.09405707567930222, + "learning_rate": 3.266374738027992e-05, + "loss": 0.3501, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7626133457335502, + "grad_norm": 0.09416071325540543, + "learning_rate": 3.26481605808422e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7630360804041345, + "grad_norm": 0.10549627989530563, + "learning_rate": 3.263257569829731e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7634588150747184, + "grad_norm": 0.08303295075893402, + "learning_rate": 3.2616992734366955e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7638815497453022, + "grad_norm": 0.09140641987323761, + "learning_rate": 3.260141169077263e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7643042844158865, + "grad_norm": 0.1011972725391388, + "learning_rate": 3.258583256923557e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7647270190864703, + "grad_norm": 0.08700942248106003, + "learning_rate": 3.257025537147686e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7651497537570544, + "grad_norm": 0.12228573113679886, + "learning_rate": 3.255468009921736e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7655724884276385, + "grad_norm": 0.08951246738433838, + "learning_rate": 3.253910675417765e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7659952230982223, + "grad_norm": 0.10417259484529495, + "learning_rate": 3.2523535338078195e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7664179577688064, + "grad_norm": 0.10637911409139633, + "learning_rate": 3.250796585263917e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7668406924393905, + "grad_norm": 0.10693103075027466, + "learning_rate": 3.249239829958059e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7672634271099743, + "grad_norm": 0.15040288865566254, + "learning_rate": 3.247683268062222e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7676861617805586, + "grad_norm": 0.10666640102863312, + "learning_rate": 3.246126899748363e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7681088964511424, + "grad_norm": 0.08550865948200226, + "learning_rate": 3.244570725188417e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7685316311217263, + "grad_norm": 0.1073112040758133, + "learning_rate": 3.243014744554299e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7689543657923106, + "grad_norm": 0.09859391301870346, + "learning_rate": 3.2414589580179015e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7693771004628944, + "grad_norm": 0.11794068664312363, + "learning_rate": 3.239903365751093e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7697998351334785, + "grad_norm": 0.1076488271355629, + "learning_rate": 3.238347967925722e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7702225698040626, + "grad_norm": 0.1074996292591095, + "learning_rate": 3.2367927647136214e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7706453044746464, + "grad_norm": 0.11113600432872772, + "learning_rate": 3.235237756286593e-05, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7710680391452305, + "grad_norm": 0.10985404998064041, + "learning_rate": 3.2336829428164236e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7714907738158145, + "grad_norm": 0.11124347895383835, + "learning_rate": 3.2321283244748786e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7719135084863984, + "grad_norm": 0.09798181802034378, + "learning_rate": 3.230573901433696e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7723362431569827, + "grad_norm": 0.08832883834838867, + "learning_rate": 3.2290196738645975e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7727589778275665, + "grad_norm": 0.1244971826672554, + "learning_rate": 3.2274656419392844e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7731817124981504, + "grad_norm": 0.09596402198076248, + "learning_rate": 3.2259118058294305e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7736044471687347, + "grad_norm": 0.08048424869775772, + "learning_rate": 3.2243581657066915e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7740271818393185, + "grad_norm": 0.13927312195301056, + "learning_rate": 3.222804721742702e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7744499165099026, + "grad_norm": 0.0942796915769577, + "learning_rate": 3.221251474109074e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7748726511804866, + "grad_norm": 0.1425962746143341, + "learning_rate": 3.2196984229773984e-05, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7752953858510705, + "grad_norm": 0.11654611676931381, + "learning_rate": 3.218145568519242e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7757181205216546, + "grad_norm": 0.10112938284873962, + "learning_rate": 3.2165929109061546e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7761408551922386, + "grad_norm": 0.1011710911989212, + "learning_rate": 3.215040450309661e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7765635898628225, + "grad_norm": 0.10441233962774277, + "learning_rate": 3.213488186901262e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7769863245334068, + "grad_norm": 0.09638907015323639, + "learning_rate": 3.211936120852441e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7774090592039906, + "grad_norm": 0.1133028045296669, + "learning_rate": 3.2103842523346575e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7778317938745745, + "grad_norm": 0.09342315047979355, + "learning_rate": 3.208832581519351e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7782545285451588, + "grad_norm": 0.0989115983247757, + "learning_rate": 3.207281108577935e-05, + "loss": 0.354, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7786772632157426, + "grad_norm": 0.11525425314903259, + "learning_rate": 3.205729833681807e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7790999978863267, + "grad_norm": 0.09521932154893875, + "learning_rate": 3.2041787570023365e-05, + "loss": 0.3538, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7795227325569107, + "grad_norm": 0.09889493882656097, + "learning_rate": 3.202627878710876e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7799454672274946, + "grad_norm": 0.09453462809324265, + "learning_rate": 3.201077198978756e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7803682018980787, + "grad_norm": 0.11328732222318649, + "learning_rate": 3.199526717977279e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7807909365686627, + "grad_norm": 0.09776946157217026, + "learning_rate": 3.197976435877731e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7812136712392466, + "grad_norm": 0.09417478740215302, + "learning_rate": 3.196426352851376e-05, + "loss": 0.3511, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7816364059098309, + "grad_norm": 0.08898521214723587, + "learning_rate": 3.194876469069454e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7820591405804147, + "grad_norm": 0.10189004242420197, + "learning_rate": 3.193326784703185e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7824818752509985, + "grad_norm": 0.10994280129671097, + "learning_rate": 3.191777299923765e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7829046099215828, + "grad_norm": 0.10615615546703339, + "learning_rate": 3.190228014902367e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7833273445921667, + "grad_norm": 0.10164639353752136, + "learning_rate": 3.188678929810146e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7837500792627508, + "grad_norm": 0.10491034388542175, + "learning_rate": 3.187130044818234e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7841728139333348, + "grad_norm": 0.13438910245895386, + "learning_rate": 3.185581360097736e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7845955486039187, + "grad_norm": 0.11764096468687057, + "learning_rate": 3.184032875819738e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7850182832745027, + "grad_norm": 0.10368874669075012, + "learning_rate": 3.182484592155306e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7854410179450868, + "grad_norm": 0.11001785844564438, + "learning_rate": 3.180936509275483e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7858637526156707, + "grad_norm": 0.09694403409957886, + "learning_rate": 3.179388627351288e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.786286487286255, + "grad_norm": 0.11132796853780746, + "learning_rate": 3.177840946553716e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7867092219568388, + "grad_norm": 0.0895589217543602, + "learning_rate": 3.1762934670537465e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7871319566274226, + "grad_norm": 0.09546174854040146, + "learning_rate": 3.174746189022332e-05, + "loss": 0.3519, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.787554691298007, + "grad_norm": 0.09106869250535965, + "learning_rate": 3.1731991126304026e-05, + "loss": 0.3537, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7879774259685908, + "grad_norm": 0.09272082895040512, + "learning_rate": 3.1716522380488644e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7884001606391748, + "grad_norm": 0.085500068962574, + "learning_rate": 3.170105565448607e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.788822895309759, + "grad_norm": 0.09909339994192123, + "learning_rate": 3.1685590950004947e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7892456299803428, + "grad_norm": 0.10519188642501831, + "learning_rate": 3.1670128268753664e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7896683646509268, + "grad_norm": 0.11251526325941086, + "learning_rate": 3.1654667612440434e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.790091099321511, + "grad_norm": 0.10364605486392975, + "learning_rate": 3.163920898277324e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7905138339920947, + "grad_norm": 0.10763643682003021, + "learning_rate": 3.1623752381459806e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.790936568662679, + "grad_norm": 0.10173743963241577, + "learning_rate": 3.1608297810207646e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7913593033332629, + "grad_norm": 0.08533059060573578, + "learning_rate": 3.15928452707241e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7917820380038467, + "grad_norm": 0.11018272489309311, + "learning_rate": 3.15773947647162e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.792204772674431, + "grad_norm": 0.0929766520857811, + "learning_rate": 3.156194629389081e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7926275073450149, + "grad_norm": 0.1044778823852539, + "learning_rate": 3.154649985995455e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.793050242015599, + "grad_norm": 0.09020017087459564, + "learning_rate": 3.1531055464613825e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.793472976686183, + "grad_norm": 0.09434926509857178, + "learning_rate": 3.151561310957481e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7938957113567668, + "grad_norm": 0.09823911637067795, + "learning_rate": 3.150017279654343e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.794318446027351, + "grad_norm": 0.11784691363573074, + "learning_rate": 3.148473452722543e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.794741180697935, + "grad_norm": 0.12198171019554138, + "learning_rate": 3.146929830332632e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7951639153685188, + "grad_norm": 0.10002987831830978, + "learning_rate": 3.145386412655136e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7955866500391031, + "grad_norm": 0.11031734198331833, + "learning_rate": 3.143843199860557e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.796009384709687, + "grad_norm": 0.10034357011318207, + "learning_rate": 3.142300192119378e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.7964321193802708, + "grad_norm": 0.10129083693027496, + "learning_rate": 3.140757389602062e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.796854854050855, + "grad_norm": 0.10596144944429398, + "learning_rate": 3.139214792479039e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.797277588721439, + "grad_norm": 0.10014521330595016, + "learning_rate": 3.137672400920727e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.797700323392023, + "grad_norm": 0.0933852568268776, + "learning_rate": 3.136130215097517e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.798123058062607, + "grad_norm": 0.10345818847417831, + "learning_rate": 3.1345882351797765e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.798545792733191, + "grad_norm": 0.09389909356832504, + "learning_rate": 3.133046461337851e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.798968527403775, + "grad_norm": 0.0994175374507904, + "learning_rate": 3.1315048937420665e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.799391262074359, + "grad_norm": 0.10471896082162857, + "learning_rate": 3.1299635325627177e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.799813996744943, + "grad_norm": 0.1142059713602066, + "learning_rate": 3.128422377970085e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8002367314155272, + "grad_norm": 0.09599743783473969, + "learning_rate": 3.126881430134423e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.800659466086111, + "grad_norm": 0.09220371395349503, + "learning_rate": 3.125340689225961e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.801082200756695, + "grad_norm": 0.11076585203409195, + "learning_rate": 3.123800155414912e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8015049354272792, + "grad_norm": 0.0912444218993187, + "learning_rate": 3.122259828871458e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.801927670097863, + "grad_norm": 0.10961997509002686, + "learning_rate": 3.120719709765763e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.802350404768447, + "grad_norm": 0.10668549686670303, + "learning_rate": 3.1191797982679684e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8027731394390312, + "grad_norm": 0.09426377713680267, + "learning_rate": 3.1176400945481915e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 58990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.803195874109615, + "grad_norm": 0.1279633641242981, + "learning_rate": 3.116100598776524e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.803618608780199, + "grad_norm": 0.11151964962482452, + "learning_rate": 3.114561311123038e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8040413434507832, + "grad_norm": 0.095645010471344, + "learning_rate": 3.113022231757783e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.804464078121367, + "grad_norm": 0.0992555022239685, + "learning_rate": 3.111483360850783e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8048868127919513, + "grad_norm": 0.08871160447597504, + "learning_rate": 3.1099446985720404e-05, + "loss": 0.3495, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8053095474625351, + "grad_norm": 0.11333070695400238, + "learning_rate": 3.108406245091535e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.805732282133119, + "grad_norm": 0.11497344821691513, + "learning_rate": 3.106868000579223e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8061550168037033, + "grad_norm": 0.150468647480011, + "learning_rate": 3.105329965205036e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8065777514742871, + "grad_norm": 0.10780470073223114, + "learning_rate": 3.103792139138886e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8070004861448712, + "grad_norm": 0.08663416653871536, + "learning_rate": 3.102254522550657e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8074232208154553, + "grad_norm": 0.10419370234012604, + "learning_rate": 3.100717115610215e-05, + "loss": 0.3712, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.807845955486039, + "grad_norm": 0.10663759708404541, + "learning_rate": 3.0991799184874e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8082686901566232, + "grad_norm": 0.09616532176733017, + "learning_rate": 3.097642931352027e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8086914248272072, + "grad_norm": 0.09562486410140991, + "learning_rate": 3.096106154373895e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.809114159497791, + "grad_norm": 0.11079885065555573, + "learning_rate": 3.094569587722769e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8095368941683754, + "grad_norm": 0.11735156923532486, + "learning_rate": 3.0930332315683994e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8099596288389592, + "grad_norm": 0.10605236142873764, + "learning_rate": 3.091497086080512e-05, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.810382363509543, + "grad_norm": 0.08587904274463654, + "learning_rate": 3.0899611514288077e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8108050981801274, + "grad_norm": 0.10247275978326797, + "learning_rate": 3.088425427782961e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8112278328507112, + "grad_norm": 0.0919850617647171, + "learning_rate": 3.086889915312629e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8116505675212953, + "grad_norm": 0.1739359349012375, + "learning_rate": 3.0853546141874425e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8120733021918793, + "grad_norm": 0.11491694301366806, + "learning_rate": 3.0838195245770084e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8124960368624632, + "grad_norm": 0.11712837219238281, + "learning_rate": 3.082284646650913e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8129187715330473, + "grad_norm": 0.11973364651203156, + "learning_rate": 3.0807499805787156e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8133415062036313, + "grad_norm": 0.09234608709812164, + "learning_rate": 3.079215526529955e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8137642408742152, + "grad_norm": 0.10724243521690369, + "learning_rate": 3.0776812846741445e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8141869755447995, + "grad_norm": 0.11882653087377548, + "learning_rate": 3.0761472551807776e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8146097102153833, + "grad_norm": 0.09241177141666412, + "learning_rate": 3.074613438219317e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8150324448859674, + "grad_norm": 0.10010688751935959, + "learning_rate": 3.0730798339592105e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8154551795565514, + "grad_norm": 0.1246260553598404, + "learning_rate": 3.071546442569876e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8158779142271353, + "grad_norm": 0.12698011100292206, + "learning_rate": 3.0700132642207115e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8163006488977194, + "grad_norm": 0.10899122059345245, + "learning_rate": 3.0684802990810915e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8167233835683034, + "grad_norm": 0.09794049710035324, + "learning_rate": 3.066947547320363e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8171461182388873, + "grad_norm": 0.09891273081302643, + "learning_rate": 3.065415009107854e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8175688529094713, + "grad_norm": 0.07788708060979843, + "learning_rate": 3.0638826846128675e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8179915875800554, + "grad_norm": 0.10500270873308182, + "learning_rate": 3.062350574004683e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8184143222506393, + "grad_norm": 0.09966377168893814, + "learning_rate": 3.060818677452552e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8188370569212235, + "grad_norm": 0.10221244394779205, + "learning_rate": 3.05928699512571e-05, + "loss": 0.354, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8192597915918074, + "grad_norm": 0.10997888445854187, + "learning_rate": 3.057755527193363e-05, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8196825262623915, + "grad_norm": 0.10571350157260895, + "learning_rate": 3.056224273824697e-05, + "loss": 0.3722, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8201052609329755, + "grad_norm": 0.09258496761322021, + "learning_rate": 3.0546932351888716e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8205279956035594, + "grad_norm": 0.09846281260251999, + "learning_rate": 3.0531624114550245e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8209507302741434, + "grad_norm": 0.09419005364179611, + "learning_rate": 3.0516318027922675e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8213734649447275, + "grad_norm": 0.11110673099756241, + "learning_rate": 3.050101409369691e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8217961996153114, + "grad_norm": 0.09840739518404007, + "learning_rate": 3.048571231356363e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8222189342858954, + "grad_norm": 0.11178798973560333, + "learning_rate": 3.0470412689213208e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8226416689564795, + "grad_norm": 0.08302414417266846, + "learning_rate": 3.0455115222335856e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8230644036270633, + "grad_norm": 0.10449288785457611, + "learning_rate": 3.04398199146215e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8234871382976476, + "grad_norm": 0.10087790340185165, + "learning_rate": 3.0424526767759843e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8239098729682315, + "grad_norm": 0.10302358120679855, + "learning_rate": 3.040923578344037e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8243326076388156, + "grad_norm": 0.10821270197629929, + "learning_rate": 3.0393946963352292e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8247553423093996, + "grad_norm": 0.11042241752147675, + "learning_rate": 3.037866030918459e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8251780769799835, + "grad_norm": 0.09427094459533691, + "learning_rate": 3.0363375822626028e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8256008116505675, + "grad_norm": 0.11010299623012543, + "learning_rate": 3.0348093505365128e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8260235463211516, + "grad_norm": 0.11420219391584396, + "learning_rate": 3.0332813359090105e-05, + "loss": 0.3546, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8264462809917354, + "grad_norm": 0.10075879842042923, + "learning_rate": 3.0317535385489022e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8268690156623195, + "grad_norm": 0.10118397325277328, + "learning_rate": 3.0302259586249677e-05, + "loss": 0.3525, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8272917503329036, + "grad_norm": 0.08509175479412079, + "learning_rate": 3.028698596305959e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8277144850034874, + "grad_norm": 0.09982781857252121, + "learning_rate": 3.027171451760609e-05, + "loss": 0.3546, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8281372196740717, + "grad_norm": 0.10838465392589569, + "learning_rate": 3.0257972080057096e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8285599543446556, + "grad_norm": 0.09739003330469131, + "learning_rate": 3.0242704776950785e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8289826890152396, + "grad_norm": 0.11012320965528488, + "learning_rate": 3.0227439656472877e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8294054236858237, + "grad_norm": 0.09261346608400345, + "learning_rate": 3.021217672030976e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8298281583564076, + "grad_norm": 0.11601386964321136, + "learning_rate": 3.0196915970147553e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8302508930269916, + "grad_norm": 0.09732620418071747, + "learning_rate": 3.0181657407672138e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8306736276975757, + "grad_norm": 0.10209287703037262, + "learning_rate": 3.0166401034569165e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8310963623681595, + "grad_norm": 0.13958176970481873, + "learning_rate": 3.0151146852524048e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8315190970387436, + "grad_norm": 0.1252824366092682, + "learning_rate": 3.0135894863221936e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8319418317093277, + "grad_norm": 0.10164088010787964, + "learning_rate": 3.0120645068347736e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8323645663799115, + "grad_norm": 0.11266381293535233, + "learning_rate": 3.0105397469586128e-05, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8327873010504958, + "grad_norm": 0.11306724697351456, + "learning_rate": 3.009015206862157e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8332100357210797, + "grad_norm": 0.09622479230165482, + "learning_rate": 3.0074908867138207e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8336327703916637, + "grad_norm": 0.11217351257801056, + "learning_rate": 3.005966786682003e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8340555050622478, + "grad_norm": 0.08909120410680771, + "learning_rate": 3.004442906935072e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8344782397328316, + "grad_norm": 0.10239365696907043, + "learning_rate": 3.0029192476413747e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8349009744034157, + "grad_norm": 0.09755908697843552, + "learning_rate": 3.0013958089692318e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8353237090739998, + "grad_norm": 0.10727082192897797, + "learning_rate": 2.9998725910869436e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8357464437445836, + "grad_norm": 0.10448334366083145, + "learning_rate": 2.9983495941627786e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8361691784151677, + "grad_norm": 0.08695653080940247, + "learning_rate": 2.996826818364989e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8365919130857518, + "grad_norm": 0.10427482426166534, + "learning_rate": 2.995304263861796e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8370146477563356, + "grad_norm": 0.11427325755357742, + "learning_rate": 2.993781930821401e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.83743738242692, + "grad_norm": 0.11506403237581253, + "learning_rate": 2.99225981941198e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8378601170975037, + "grad_norm": 0.09194330871105194, + "learning_rate": 2.990737929801682e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8382828517680878, + "grad_norm": 0.10113702714443207, + "learning_rate": 2.9892162621586334e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8387055864386719, + "grad_norm": 0.10795366764068604, + "learning_rate": 2.9876948166509377e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8391283211092557, + "grad_norm": 0.110526442527771, + "learning_rate": 2.986173593446672e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8395510557798398, + "grad_norm": 0.10858305543661118, + "learning_rate": 2.9846525927138857e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8399737904504239, + "grad_norm": 0.08798452466726303, + "learning_rate": 2.983131814620609e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8403965251210077, + "grad_norm": 0.09148677438497543, + "learning_rate": 2.9816112593348457e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8408192597915918, + "grad_norm": 0.10208820551633835, + "learning_rate": 2.980090927024573e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8412419944621758, + "grad_norm": 0.10844340175390244, + "learning_rate": 2.9785708178577466e-05, + "loss": 0.3685, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8416647291327597, + "grad_norm": 0.11700333654880524, + "learning_rate": 2.9770509320022956e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.842087463803344, + "grad_norm": 0.09316378831863403, + "learning_rate": 2.975531269626126e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8425101984739278, + "grad_norm": 0.08319056034088135, + "learning_rate": 2.9740118308971154e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.842932933144512, + "grad_norm": 0.08860398083925247, + "learning_rate": 2.9724926159831236e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.843355667815096, + "grad_norm": 0.11127728223800659, + "learning_rate": 2.970973625051976e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8437784024856798, + "grad_norm": 0.09871470183134079, + "learning_rate": 2.969454858271482e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8442011371562639, + "grad_norm": 0.09460622817277908, + "learning_rate": 2.9679363158094208e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.844623871826848, + "grad_norm": 0.1723061203956604, + "learning_rate": 2.9664179978335504e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8450466064974318, + "grad_norm": 0.10519243031740189, + "learning_rate": 2.9648999045116032e-05, + "loss": 0.3535, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 59990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8454693411680159, + "grad_norm": 0.10015083104372025, + "learning_rate": 2.963382036011284e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8458920758386, + "grad_norm": 0.1116843894124031, + "learning_rate": 2.9618643925002752e-05, + "loss": 0.3698, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.8463148105091838, + "grad_norm": 0.10253959894180298, + "learning_rate": 2.9603469741462363e-05, + "loss": 0.3529, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.846737545179768, + "grad_norm": 0.12169019132852554, + "learning_rate": 2.958829781116798e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 1.847160279850352, + "grad_norm": 0.08182228356599808, + "learning_rate": 2.9573128135795658e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.000317051002938, + "grad_norm": 0.2194492518901825, + "learning_rate": 2.9557960717021238e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.000739785673522, + "grad_norm": 0.18660464882850647, + "learning_rate": 2.9542795556520298e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.001162520344106, + "grad_norm": 0.28729403018951416, + "learning_rate": 2.952763265596816e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.00158525501469, + "grad_norm": 0.2628413438796997, + "learning_rate": 2.9512472017039903e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.002007989685274, + "grad_norm": 0.2500033378601074, + "learning_rate": 2.949731364141035e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.002430724355858, + "grad_norm": 0.28376126289367676, + "learning_rate": 2.9482157530754097e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.002853459026442, + "grad_norm": 0.19589963555335999, + "learning_rate": 2.9467003686745432e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.003276193697026, + "grad_norm": 0.18755470216274261, + "learning_rate": 2.945185211105848e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.00369892836761, + "grad_norm": 0.1755681335926056, + "learning_rate": 2.9436702805367018e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.004121663038194, + "grad_norm": 0.2279110997915268, + "learning_rate": 2.9421555771344644e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.004544397708778, + "grad_norm": 0.22555211186408997, + "learning_rate": 2.9406411010664674e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0049671323793623, + "grad_norm": 0.23816511034965515, + "learning_rate": 2.9391268525000182e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.005389867049946, + "grad_norm": 0.2971150875091553, + "learning_rate": 2.9376128316024003e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.00581260172053, + "grad_norm": 0.42649373412132263, + "learning_rate": 2.9360990385408687e-05, + "loss": 0.3531, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0062353363911143, + "grad_norm": 0.1665913611650467, + "learning_rate": 2.934585473482656e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.006658071061698, + "grad_norm": 0.20427051186561584, + "learning_rate": 2.93307213659497e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.007080805732282, + "grad_norm": 0.18029527366161346, + "learning_rate": 2.9315590280449902e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0075035404028663, + "grad_norm": 0.1921653151512146, + "learning_rate": 2.9300461479998736e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.00792627507345, + "grad_norm": 0.18577773869037628, + "learning_rate": 2.9285334966267504e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.008349009744034, + "grad_norm": 0.22203722596168518, + "learning_rate": 2.927021074092726e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0087717444146183, + "grad_norm": 0.21019189059734344, + "learning_rate": 2.925508880564884e-05, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.009194479085202, + "grad_norm": 0.22271183133125305, + "learning_rate": 2.9239969162102753e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0096172137557864, + "grad_norm": 0.22620221972465515, + "learning_rate": 2.922485181195932e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0100399484263702, + "grad_norm": 0.281980961561203, + "learning_rate": 2.9209736756888585e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.010462683096954, + "grad_norm": 0.29113665223121643, + "learning_rate": 2.919462399856032e-05, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0108854177675384, + "grad_norm": 0.22208215296268463, + "learning_rate": 2.9179513538644104e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.011308152438122, + "grad_norm": 0.2171613723039627, + "learning_rate": 2.916440537880917e-05, + "loss": 0.3747, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.011730887108706, + "grad_norm": 0.31287142634391785, + "learning_rate": 2.9149299520724566e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0121536217792904, + "grad_norm": 0.2609288692474365, + "learning_rate": 2.9134195966059073e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.012576356449874, + "grad_norm": 0.22911177575588226, + "learning_rate": 2.9119094716481227e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.012999091120458, + "grad_norm": 0.20739519596099854, + "learning_rate": 2.9103995773659254e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0134218257910423, + "grad_norm": 0.20550692081451416, + "learning_rate": 2.9088899139261184e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.013844560461626, + "grad_norm": 0.23627065122127533, + "learning_rate": 2.9073804814954776e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0142672951322105, + "grad_norm": 0.20552393794059753, + "learning_rate": 2.9058712802407528e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0146900298027943, + "grad_norm": 0.18774114549160004, + "learning_rate": 2.904362310328671e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.015112764473378, + "grad_norm": 0.2516428828239441, + "learning_rate": 2.9028535719259265e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0155354991439625, + "grad_norm": 0.2189192920923233, + "learning_rate": 2.9013450651991962e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0159582338145463, + "grad_norm": 0.34744417667388916, + "learning_rate": 2.8998367903151258e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.01638096848513, + "grad_norm": 0.23115183413028717, + "learning_rate": 2.8983287474403376e-05, + "loss": 0.3513, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0168037031557144, + "grad_norm": 0.15985798835754395, + "learning_rate": 2.896820936741429e-05, + "loss": 0.3693, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0172264378262983, + "grad_norm": 0.19340363144874573, + "learning_rate": 2.8953133583849706e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.017649172496882, + "grad_norm": 0.2067916989326477, + "learning_rate": 2.8938060125375076e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0180719071674664, + "grad_norm": 0.2515498697757721, + "learning_rate": 2.8922988993655632e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0184946418380503, + "grad_norm": 0.2615542709827423, + "learning_rate": 2.890792019035624e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0189173765086346, + "grad_norm": 0.26489824056625366, + "learning_rate": 2.8892853717141648e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0193401111792184, + "grad_norm": 0.18052394688129425, + "learning_rate": 2.887778957567623e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0197628458498023, + "grad_norm": 0.2010137289762497, + "learning_rate": 2.8862727767624175e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0201855805203865, + "grad_norm": 0.21667931973934174, + "learning_rate": 2.8847668294649387e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0206083151909704, + "grad_norm": 0.22750884294509888, + "learning_rate": 2.883261115841552e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0210310498615542, + "grad_norm": 0.28309154510498047, + "learning_rate": 2.881755636058596e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0214537845321385, + "grad_norm": 0.22073446214199066, + "learning_rate": 2.8802503902823872e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0218765192027224, + "grad_norm": 0.20955321192741394, + "learning_rate": 2.878745378679209e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0222992538733062, + "grad_norm": 0.2745480537414551, + "learning_rate": 2.8772406014153262e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0227219885438905, + "grad_norm": 0.22743460536003113, + "learning_rate": 2.875736058656971e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0231447232144744, + "grad_norm": 0.1696351319551468, + "learning_rate": 2.8742317505703553e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0235674578850587, + "grad_norm": 0.19578619301319122, + "learning_rate": 2.872727677321663e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0239901925556425, + "grad_norm": 0.2275865226984024, + "learning_rate": 2.871223839077053e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0244129272262263, + "grad_norm": 0.24273896217346191, + "learning_rate": 2.8697202360026576e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0248356618968106, + "grad_norm": 0.25843071937561035, + "learning_rate": 2.8682168682645804e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0252583965673945, + "grad_norm": 0.2776300609111786, + "learning_rate": 2.8667137360289032e-05, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0256811312379783, + "grad_norm": 0.18971092998981476, + "learning_rate": 2.8652108394616795e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0261038659085626, + "grad_norm": 0.21133539080619812, + "learning_rate": 2.8637081787289395e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0265266005791465, + "grad_norm": 0.21808476746082306, + "learning_rate": 2.862205753996681e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0269493352497303, + "grad_norm": 0.2107633352279663, + "learning_rate": 2.860703565430883e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0273720699203146, + "grad_norm": 0.2191738337278366, + "learning_rate": 2.859201613197494e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0277948045908984, + "grad_norm": 0.2147776186466217, + "learning_rate": 2.8576998974624402e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0282175392614827, + "grad_norm": 0.22105181217193604, + "learning_rate": 2.8561984183916157e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0286402739320666, + "grad_norm": 0.18640340864658356, + "learning_rate": 2.854697176150893e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0290630086026504, + "grad_norm": 0.23979534208774567, + "learning_rate": 2.8531961709061174e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0294857432732347, + "grad_norm": 0.24514606595039368, + "learning_rate": 2.8516954028231092e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0299084779438186, + "grad_norm": 0.28105291724205017, + "learning_rate": 2.8501948720676618e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0303312126144024, + "grad_norm": 0.24354976415634155, + "learning_rate": 2.848694578805539e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0307539472849867, + "grad_norm": 0.1874832659959793, + "learning_rate": 2.847194523202485e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0311766819555706, + "grad_norm": 0.28910312056541443, + "learning_rate": 2.8456947054242093e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0315994166261544, + "grad_norm": 0.2403900921344757, + "learning_rate": 2.8441951256364024e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0320221512967387, + "grad_norm": 0.1988850235939026, + "learning_rate": 2.842695784004726e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0324448859673225, + "grad_norm": 0.23044128715991974, + "learning_rate": 2.8411966806948155e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.032867620637907, + "grad_norm": 0.23378139734268188, + "learning_rate": 2.8396978158722786e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0332903553084907, + "grad_norm": 0.32210198044776917, + "learning_rate": 2.8381991897026992e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0337130899790745, + "grad_norm": 0.19552962481975555, + "learning_rate": 2.8367008023516362e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.034135824649659, + "grad_norm": 0.16105102002620697, + "learning_rate": 2.8352026539846156e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0345585593202427, + "grad_norm": 0.15970641374588013, + "learning_rate": 2.83370474476714e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0349812939908265, + "grad_norm": 0.222952738404274, + "learning_rate": 2.8322070748646885e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.035404028661411, + "grad_norm": 0.19596566259860992, + "learning_rate": 2.8307096444427116e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0358267633319946, + "grad_norm": 0.18731513619422913, + "learning_rate": 2.8292124536666332e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0362494980025785, + "grad_norm": 0.24307869374752045, + "learning_rate": 2.827715502701851e-05, + "loss": 0.3513, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0366722326731628, + "grad_norm": 0.30042779445648193, + "learning_rate": 2.8262187917137388e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0370949673437466, + "grad_norm": 0.1752796620130539, + "learning_rate": 2.824722320867637e-05, + "loss": 0.3709, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.037517702014331, + "grad_norm": 0.2985466420650482, + "learning_rate": 2.8232260903288676e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0379404366849148, + "grad_norm": 0.21144725382328033, + "learning_rate": 2.8217301002627182e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0383631713554986, + "grad_norm": 0.20707112550735474, + "learning_rate": 2.820234350834456e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.038785906026083, + "grad_norm": 0.21636414527893066, + "learning_rate": 2.818738842209319e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0392086406966667, + "grad_norm": 0.16020074486732483, + "learning_rate": 2.8172435745525204e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0396313753672506, + "grad_norm": 0.21981492638587952, + "learning_rate": 2.8157485480292433e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.040054110037835, + "grad_norm": 0.1944180279970169, + "learning_rate": 2.8142537628046506e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 60990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0404768447084187, + "grad_norm": 0.19838182628154755, + "learning_rate": 2.8127592190438688e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0408995793790026, + "grad_norm": 0.24525107443332672, + "learning_rate": 2.811264916912005e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.041322314049587, + "grad_norm": 0.14729271829128265, + "learning_rate": 2.809770856574141e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0417450487201707, + "grad_norm": 0.2061442881822586, + "learning_rate": 2.8082770381953244e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.042167783390755, + "grad_norm": 0.18136656284332275, + "learning_rate": 2.8067834619405815e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.042590518061339, + "grad_norm": 0.1953580677509308, + "learning_rate": 2.805290127974911e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0430132527319227, + "grad_norm": 0.2649862468242645, + "learning_rate": 2.803797036463287e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.043435987402507, + "grad_norm": 0.24500451982021332, + "learning_rate": 2.802304187570649e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.043858722073091, + "grad_norm": 0.2356261909008026, + "learning_rate": 2.8008115814619197e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0442814567436747, + "grad_norm": 0.17810922861099243, + "learning_rate": 2.7993192183019872e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.044704191414259, + "grad_norm": 0.21541951596736908, + "learning_rate": 2.7978270982557203e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.045126926084843, + "grad_norm": 0.21551452577114105, + "learning_rate": 2.7963352214879512e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0455496607554267, + "grad_norm": 0.2181146740913391, + "learning_rate": 2.7948435881634928e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.045972395426011, + "grad_norm": 0.15827375650405884, + "learning_rate": 2.793352198447129e-05, + "loss": 0.3537, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.046395130096595, + "grad_norm": 0.2738408148288727, + "learning_rate": 2.7918610525036193e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.046817864767179, + "grad_norm": 0.34055107831954956, + "learning_rate": 2.7903701504976887e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.047240599437763, + "grad_norm": 0.2059088498353958, + "learning_rate": 2.788879492594042e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0476633341083468, + "grad_norm": 0.2749532461166382, + "learning_rate": 2.7873890789573553e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.048086068778931, + "grad_norm": 0.16830389201641083, + "learning_rate": 2.7858989097522785e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.048508803449515, + "grad_norm": 0.2648961842060089, + "learning_rate": 2.784408985143435e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0489315381200988, + "grad_norm": 0.19552694261074066, + "learning_rate": 2.7829193052954154e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.049354272790683, + "grad_norm": 0.23816749453544617, + "learning_rate": 2.7814298703727926e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.049777007461267, + "grad_norm": 0.1815076321363449, + "learning_rate": 2.779940680540103e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0501997421318507, + "grad_norm": 0.2179591953754425, + "learning_rate": 2.7784517359618617e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.050622476802435, + "grad_norm": 0.29117879271507263, + "learning_rate": 2.7769630368025568e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.051045211473019, + "grad_norm": 0.263729065656662, + "learning_rate": 2.7754745832266466e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.051467946143603, + "grad_norm": 0.30411282181739807, + "learning_rate": 2.7739863753985647e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.051890680814187, + "grad_norm": 0.23332057893276215, + "learning_rate": 2.772498413482717e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.052313415484771, + "grad_norm": 0.20933504402637482, + "learning_rate": 2.7710106976434798e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.052736150155355, + "grad_norm": 0.1863022893667221, + "learning_rate": 2.769523228045206e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.053158884825939, + "grad_norm": 0.23356613516807556, + "learning_rate": 2.7680360048522168e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.053581619496523, + "grad_norm": 0.27393871545791626, + "learning_rate": 2.76654902822881e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.054004354167107, + "grad_norm": 0.180935800075531, + "learning_rate": 2.765062298339255e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.054427088837691, + "grad_norm": 0.28071069717407227, + "learning_rate": 2.763575815347794e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.054849823508275, + "grad_norm": 0.22834427654743195, + "learning_rate": 2.7620895794186417e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.055272558178859, + "grad_norm": 0.28925690054893494, + "learning_rate": 2.7606035907159878e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.055695292849443, + "grad_norm": 0.18844468891620636, + "learning_rate": 2.7591178494039882e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0561180275200273, + "grad_norm": 0.19696100056171417, + "learning_rate": 2.7576323556467804e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.056540762190611, + "grad_norm": 0.24377702176570892, + "learning_rate": 2.7561471096084647e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.056963496861195, + "grad_norm": 0.21782904863357544, + "learning_rate": 2.7546621114531223e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0573862315317792, + "grad_norm": 0.3147704005241394, + "learning_rate": 2.7531773613448043e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.057808966202363, + "grad_norm": 0.160898819565773, + "learning_rate": 2.7516928594475323e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.058231700872947, + "grad_norm": 0.2441456913948059, + "learning_rate": 2.7502086059253052e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0586544355435312, + "grad_norm": 0.23272159695625305, + "learning_rate": 2.748724600942088e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.059077170214115, + "grad_norm": 0.23374386131763458, + "learning_rate": 2.7472408446618237e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.059499904884699, + "grad_norm": 0.1716456562280655, + "learning_rate": 2.7457573372484245e-05, + "loss": 0.3522, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.059922639555283, + "grad_norm": 0.2873116731643677, + "learning_rate": 2.74427407886578e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.060345374225867, + "grad_norm": 0.17939786612987518, + "learning_rate": 2.7427910696777444e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0607681088964513, + "grad_norm": 0.18930703401565552, + "learning_rate": 2.7413083098481502e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.061190843567035, + "grad_norm": 0.22161346673965454, + "learning_rate": 2.739825799540801e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.061613578237619, + "grad_norm": 0.26682713627815247, + "learning_rate": 2.738343538919475e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0620363129082033, + "grad_norm": 0.3264729380607605, + "learning_rate": 2.736861528147917e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.062459047578787, + "grad_norm": 0.22584770619869232, + "learning_rate": 2.7353797673898485e-05, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.062881782249371, + "grad_norm": 0.2620195746421814, + "learning_rate": 2.7338982568089634e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0633045169199553, + "grad_norm": 0.22584030032157898, + "learning_rate": 2.7324169965689273e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.063727251590539, + "grad_norm": 0.2585848271846771, + "learning_rate": 2.7309359868333794e-05, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.064149986261123, + "grad_norm": 0.24760310351848602, + "learning_rate": 2.729455227765927e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0645727209317073, + "grad_norm": 0.20094409584999084, + "learning_rate": 2.7279747195301553e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.064995455602291, + "grad_norm": 0.25999754667282104, + "learning_rate": 2.7264944622896155e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0654181902728754, + "grad_norm": 0.2705923020839691, + "learning_rate": 2.725014456207836e-05, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0658409249434593, + "grad_norm": 0.21093398332595825, + "learning_rate": 2.723534701448318e-05, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.066263659614043, + "grad_norm": 0.22315403819084167, + "learning_rate": 2.7220551981745312e-05, + "loss": 0.3681, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0666863942846274, + "grad_norm": 0.2277369499206543, + "learning_rate": 2.720575946549919e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0671091289552113, + "grad_norm": 0.261420875787735, + "learning_rate": 2.7190969467379014e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.067531863625795, + "grad_norm": 0.2268618792295456, + "learning_rate": 2.7177660623418898e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0679545982963794, + "grad_norm": 0.24150504171848297, + "learning_rate": 2.7162875414239042e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0683773329669632, + "grad_norm": 0.24096421897411346, + "learning_rate": 2.714809272792259e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.068800067637547, + "grad_norm": 0.22159187495708466, + "learning_rate": 2.713331256610263e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0692228023081314, + "grad_norm": 0.22898459434509277, + "learning_rate": 2.7118534930411927e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0696455369787152, + "grad_norm": 0.2335589975118637, + "learning_rate": 2.7103759822483e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0700682716492995, + "grad_norm": 0.2183474451303482, + "learning_rate": 2.7088987243948117e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0704910063198834, + "grad_norm": 0.2596646845340729, + "learning_rate": 2.7074217196439194e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.070913740990467, + "grad_norm": 0.31750184297561646, + "learning_rate": 2.7059449681587922e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0713364756610515, + "grad_norm": 0.16757670044898987, + "learning_rate": 2.7044684701025702e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0717592103316353, + "grad_norm": 0.2446734458208084, + "learning_rate": 2.7029922256383677e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.072181945002219, + "grad_norm": 0.20137053728103638, + "learning_rate": 2.7015162349292634e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0726046796728035, + "grad_norm": 0.18664519488811493, + "learning_rate": 2.7000404981383155e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0730274143433873, + "grad_norm": 0.1880853921175003, + "learning_rate": 2.6985650154285508e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.073450149013971, + "grad_norm": 0.1715870499610901, + "learning_rate": 2.69708978696297e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0738728836845555, + "grad_norm": 0.24770332872867584, + "learning_rate": 2.6956148129045465e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0742956183551393, + "grad_norm": 0.1574331372976303, + "learning_rate": 2.694140093416219e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0747183530257236, + "grad_norm": 0.30997881293296814, + "learning_rate": 2.6926656286609055e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0751410876963075, + "grad_norm": 0.26773494482040405, + "learning_rate": 2.691191418801494e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0755638223668913, + "grad_norm": 0.24886052310466766, + "learning_rate": 2.6897174640008404e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0759865570374756, + "grad_norm": 0.20675428211688995, + "learning_rate": 2.688243764421778e-05, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0764092917080594, + "grad_norm": 0.20473529398441315, + "learning_rate": 2.6867703202271077e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0768320263786433, + "grad_norm": 0.2279502898454666, + "learning_rate": 2.685297131579605e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0772547610492276, + "grad_norm": 0.18215280771255493, + "learning_rate": 2.683824198642016e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0776774957198114, + "grad_norm": 0.18076324462890625, + "learning_rate": 2.68235152157706e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0781002303903953, + "grad_norm": 0.15823175013065338, + "learning_rate": 2.6808791005474254e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0785229650609796, + "grad_norm": 0.28583335876464844, + "learning_rate": 2.6794069357157707e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0789456997315634, + "grad_norm": 0.2178484946489334, + "learning_rate": 2.6779350272447317e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0793684344021477, + "grad_norm": 0.27286848425865173, + "learning_rate": 2.6764633752969127e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0797911690727315, + "grad_norm": 0.177617609500885, + "learning_rate": 2.67499198003489e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0802139037433154, + "grad_norm": 0.21541327238082886, + "learning_rate": 2.6735208416212122e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0806366384138997, + "grad_norm": 0.3251809775829315, + "learning_rate": 2.6720499602183996e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0810593730844835, + "grad_norm": 0.23954424262046814, + "learning_rate": 2.6705793359889407e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0814821077550674, + "grad_norm": 0.2172805517911911, + "learning_rate": 2.6691089690953025e-05, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0819048424256517, + "grad_norm": 0.4062487781047821, + "learning_rate": 2.6676388596999146e-05, + "loss": 0.3717, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0823275770962355, + "grad_norm": 0.21187372505664825, + "learning_rate": 2.6661690079651846e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 61990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0827503117668194, + "grad_norm": 0.19212310016155243, + "learning_rate": 2.6646994140534914e-05, + "loss": 0.3534, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0831730464374036, + "grad_norm": 0.2638002336025238, + "learning_rate": 2.6632300781271836e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0835957811079875, + "grad_norm": 0.19066816568374634, + "learning_rate": 2.6617610003485803e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.084018515778572, + "grad_norm": 0.17534898221492767, + "learning_rate": 2.6602921808799774e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0844412504491556, + "grad_norm": 0.17951825261116028, + "learning_rate": 2.658823619883633e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0848639851197395, + "grad_norm": 0.24328383803367615, + "learning_rate": 2.657355317521787e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0852867197903238, + "grad_norm": 0.2276500165462494, + "learning_rate": 2.655887273956641e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0857094544609076, + "grad_norm": 0.19034647941589355, + "learning_rate": 2.654419489350375e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0861321891314915, + "grad_norm": 0.1412111520767212, + "learning_rate": 2.652951963865138e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0865549238020757, + "grad_norm": 0.1960137039422989, + "learning_rate": 2.6514846976630514e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0869776584726596, + "grad_norm": 0.25416839122772217, + "learning_rate": 2.650017690906208e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0874003931432434, + "grad_norm": 0.1835961788892746, + "learning_rate": 2.648550943756667e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0878231278138277, + "grad_norm": 0.18683142960071564, + "learning_rate": 2.6470844563764653e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0882458624844116, + "grad_norm": 0.21863335371017456, + "learning_rate": 2.6456182289276087e-05, + "loss": 0.3535, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.088668597154996, + "grad_norm": 0.18637986481189728, + "learning_rate": 2.6441522615720766e-05, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0890913318255797, + "grad_norm": 0.27767854928970337, + "learning_rate": 2.6426865544718127e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0895140664961636, + "grad_norm": 0.21143533289432526, + "learning_rate": 2.6412211077887394e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.089936801166748, + "grad_norm": 0.18591158092021942, + "learning_rate": 2.639755921684748e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0903595358373317, + "grad_norm": 0.1941329538822174, + "learning_rate": 2.6382909963217007e-05, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0907822705079155, + "grad_norm": 0.18040831387043, + "learning_rate": 2.6368263318614277e-05, + "loss": 0.3533, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0912050051785, + "grad_norm": 0.24480466544628143, + "learning_rate": 2.635361928465736e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0916277398490837, + "grad_norm": 0.20075318217277527, + "learning_rate": 2.633897786296401e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0920504745196675, + "grad_norm": 0.21565182507038116, + "learning_rate": 2.6324339055151685e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.092473209190252, + "grad_norm": 0.1771952509880066, + "learning_rate": 2.6309702862837598e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0928959438608357, + "grad_norm": 0.2817683219909668, + "learning_rate": 2.629506928763859e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.09331867853142, + "grad_norm": 0.22967812418937683, + "learning_rate": 2.628043833117131e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.093741413202004, + "grad_norm": 0.2954597473144531, + "learning_rate": 2.6265809995052016e-05, + "loss": 0.3707, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0941641478725876, + "grad_norm": 0.21538466215133667, + "learning_rate": 2.6251184280896756e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.094586882543172, + "grad_norm": 0.18994252383708954, + "learning_rate": 2.623656119032126e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.095009617213756, + "grad_norm": 0.2183171510696411, + "learning_rate": 2.6221940724940985e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0954323518843396, + "grad_norm": 0.16453655064105988, + "learning_rate": 2.6207322886371067e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.095855086554924, + "grad_norm": 0.2734546363353729, + "learning_rate": 2.6192707676226375e-05, + "loss": 0.3705, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0962778212255078, + "grad_norm": 0.21554099023342133, + "learning_rate": 2.6178095096121498e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0967005558960916, + "grad_norm": 0.18814332783222198, + "learning_rate": 2.6163485147670698e-05, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.097123290566676, + "grad_norm": 0.16024228930473328, + "learning_rate": 2.614887783248795e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0975460252372597, + "grad_norm": 0.23978950083255768, + "learning_rate": 2.613427315218696e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.097968759907844, + "grad_norm": 0.37107372283935547, + "learning_rate": 2.6119671108381156e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.098391494578428, + "grad_norm": 0.17530658841133118, + "learning_rate": 2.610507170268364e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.0988142292490117, + "grad_norm": 0.21535438299179077, + "learning_rate": 2.6090474936707247e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.099236963919596, + "grad_norm": 0.24682220816612244, + "learning_rate": 2.6075880812064528e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.09965969859018, + "grad_norm": 0.22238758206367493, + "learning_rate": 2.606128933036769e-05, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1000824332607637, + "grad_norm": 0.1912747025489807, + "learning_rate": 2.6046700493228714e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.100505167931348, + "grad_norm": 0.20964448153972626, + "learning_rate": 2.6032114302259224e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.100927902601932, + "grad_norm": 0.21914717555046082, + "learning_rate": 2.6017530759070603e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1013506372725157, + "grad_norm": 0.19775667786598206, + "learning_rate": 2.600294986527393e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1017733719431, + "grad_norm": 0.1990610659122467, + "learning_rate": 2.598837162247998e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.102196106613684, + "grad_norm": 0.20070740580558777, + "learning_rate": 2.5973796032299248e-05, + "loss": 0.3694, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.102618841284268, + "grad_norm": 0.20689159631729126, + "learning_rate": 2.5959223096341944e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.103041575954852, + "grad_norm": 0.12624400854110718, + "learning_rate": 2.594465281621793e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.103464310625436, + "grad_norm": 0.17353901267051697, + "learning_rate": 2.5930085193536833e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.10388704529602, + "grad_norm": 0.27023616433143616, + "learning_rate": 2.5915520229907995e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.104309779966604, + "grad_norm": 0.22040174901485443, + "learning_rate": 2.590095792694039e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.104732514637188, + "grad_norm": 0.20487220585346222, + "learning_rate": 2.5886398286242765e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.105155249307772, + "grad_norm": 0.2067434936761856, + "learning_rate": 2.5871841309423555e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.105577983978356, + "grad_norm": 0.2506355941295624, + "learning_rate": 2.5857286998090918e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.10600071864894, + "grad_norm": 0.20017513632774353, + "learning_rate": 2.584273535385266e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.106423453319524, + "grad_norm": 0.22276777029037476, + "learning_rate": 2.5828186378316356e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.106846187990108, + "grad_norm": 0.17783793807029724, + "learning_rate": 2.581364007308924e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.107268922660692, + "grad_norm": 0.23458345234394073, + "learning_rate": 2.5799096439778297e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.107691657331276, + "grad_norm": 0.2138543576002121, + "learning_rate": 2.5784555479990197e-05, + "loss": 0.369, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.10811439200186, + "grad_norm": 0.236094668507576, + "learning_rate": 2.5770017195331275e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.108537126672444, + "grad_norm": 0.21240198612213135, + "learning_rate": 2.575548158740762e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.108959861343028, + "grad_norm": 0.17024686932563782, + "learning_rate": 2.5740948657825036e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.109382596013612, + "grad_norm": 0.14944888651371002, + "learning_rate": 2.5726418408188956e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.109805330684196, + "grad_norm": 0.20806513726711273, + "learning_rate": 2.5711890840104604e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.11022806535478, + "grad_norm": 0.22594405710697174, + "learning_rate": 2.5697365955176845e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.110650800025364, + "grad_norm": 0.19034965336322784, + "learning_rate": 2.568284375501029e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.111073534695948, + "grad_norm": 0.22950446605682373, + "learning_rate": 2.566832424120923e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.111496269366532, + "grad_norm": 0.25221607089042664, + "learning_rate": 2.565380741537769e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1119190040371163, + "grad_norm": 0.16408662497997284, + "learning_rate": 2.5639293279119346e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1123417387077, + "grad_norm": 0.19870874285697937, + "learning_rate": 2.5624781834037592e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.112764473378284, + "grad_norm": 0.22369013726711273, + "learning_rate": 2.5610273081735546e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1131872080488683, + "grad_norm": 0.2215665578842163, + "learning_rate": 2.559576702381603e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.113609942719452, + "grad_norm": 0.2234027236700058, + "learning_rate": 2.5581263661881554e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.114032677390036, + "grad_norm": 0.22463072836399078, + "learning_rate": 2.5566762997534334e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1144554120606203, + "grad_norm": 0.2280828058719635, + "learning_rate": 2.55522650323763e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.114878146731204, + "grad_norm": 0.35918569564819336, + "learning_rate": 2.553776976800904e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.115300881401788, + "grad_norm": 0.14704473316669464, + "learning_rate": 2.5523277206033914e-05, + "loss": 0.3701, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1157236160723722, + "grad_norm": 0.20945385098457336, + "learning_rate": 2.550878734805191e-05, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.116146350742956, + "grad_norm": 0.25943616032600403, + "learning_rate": 2.5494300195663754e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1165690854135404, + "grad_norm": 0.2538602948188782, + "learning_rate": 2.5479815750469894e-05, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1169918200841242, + "grad_norm": 0.20123761892318726, + "learning_rate": 2.546533401407044e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.117414554754708, + "grad_norm": 0.22765254974365234, + "learning_rate": 2.5450854988065225e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1178372894252924, + "grad_norm": 0.26229557394981384, + "learning_rate": 2.5436378674053796e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.118260024095876, + "grad_norm": 0.20343278348445892, + "learning_rate": 2.5421905073635337e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.11868275876646, + "grad_norm": 0.1596374660730362, + "learning_rate": 2.54074341884088e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1191054934370444, + "grad_norm": 0.19873248040676117, + "learning_rate": 2.539296601997283e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.119528228107628, + "grad_norm": 0.17997002601623535, + "learning_rate": 2.5378500569925723e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.119950962778212, + "grad_norm": 0.14910754561424255, + "learning_rate": 2.5364037839865518e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1203736974487963, + "grad_norm": 0.19417595863342285, + "learning_rate": 2.534957783138994e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.12079643211938, + "grad_norm": 0.18090198934078217, + "learning_rate": 2.533512054609644e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1212191667899645, + "grad_norm": 0.22689925134181976, + "learning_rate": 2.5320665985582103e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1216419014605483, + "grad_norm": 0.23137834668159485, + "learning_rate": 2.530621415144378e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.122064636131132, + "grad_norm": 0.27699580788612366, + "learning_rate": 2.5291765045277982e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1224873708017165, + "grad_norm": 0.20527683198451996, + "learning_rate": 2.5277318668680943e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1229101054723003, + "grad_norm": 0.24500128626823425, + "learning_rate": 2.5262875023248595e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.123332840142884, + "grad_norm": 0.3010150194168091, + "learning_rate": 2.524843411057652e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1237555748134684, + "grad_norm": 0.2577902376651764, + "learning_rate": 2.5233995932260053e-05, + "loss": 0.3536, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1241783094840523, + "grad_norm": 0.22339169681072235, + "learning_rate": 2.5219560489894233e-05, + "loss": 0.367, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.124601044154636, + "grad_norm": 0.21987882256507874, + "learning_rate": 2.5205127785073725e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 62990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1250237788252204, + "grad_norm": 0.21582050621509552, + "learning_rate": 2.5190697819392966e-05, + "loss": 0.3528, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1254465134958043, + "grad_norm": 0.21207350492477417, + "learning_rate": 2.517627059444606e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1258692481663886, + "grad_norm": 0.18886099755764008, + "learning_rate": 2.5161846111826814e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1262919828369724, + "grad_norm": 0.2710430324077606, + "learning_rate": 2.5147424373128735e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1267147175075563, + "grad_norm": 0.17806220054626465, + "learning_rate": 2.513300537994503e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1271374521781405, + "grad_norm": 0.18559540808200836, + "learning_rate": 2.5118589133868574e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1275601868487244, + "grad_norm": 0.2116641104221344, + "learning_rate": 2.5104175636491955e-05, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1279829215193082, + "grad_norm": 0.32000303268432617, + "learning_rate": 2.5089764889407468e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1284056561898925, + "grad_norm": 0.2798571288585663, + "learning_rate": 2.5075356894207102e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1288283908604764, + "grad_norm": 0.20363180339336395, + "learning_rate": 2.5060951652482532e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.12925112553106, + "grad_norm": 0.27302372455596924, + "learning_rate": 2.504654916582514e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1296738602016445, + "grad_norm": 0.1920800507068634, + "learning_rate": 2.503214943582602e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1300965948722284, + "grad_norm": 0.20021003484725952, + "learning_rate": 2.5017752464075896e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1305193295428126, + "grad_norm": 0.19073717296123505, + "learning_rate": 2.500335825216526e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1309420642133965, + "grad_norm": 0.23834742605686188, + "learning_rate": 2.4988966801684244e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1313647988839803, + "grad_norm": 0.2613615095615387, + "learning_rate": 2.4974578114222718e-05, + "loss": 0.3536, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1317875335545646, + "grad_norm": 0.25281521677970886, + "learning_rate": 2.496019219137023e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1322102682251485, + "grad_norm": 0.32291892170906067, + "learning_rate": 2.4945809034716017e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1326330028957323, + "grad_norm": 0.20428188145160675, + "learning_rate": 2.493142864584902e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1330557375663166, + "grad_norm": 0.237855926156044, + "learning_rate": 2.491705102635789e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1334784722369005, + "grad_norm": 0.23609024286270142, + "learning_rate": 2.4902676177830907e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1339012069074847, + "grad_norm": 0.16334213316440582, + "learning_rate": 2.4888304101856113e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1343239415780686, + "grad_norm": 0.22017107903957367, + "learning_rate": 2.487393480002124e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1347466762486524, + "grad_norm": 0.18715688586235046, + "learning_rate": 2.4859568273913654e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1351694109192367, + "grad_norm": 0.2048446089029312, + "learning_rate": 2.484520452512047e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1355921455898206, + "grad_norm": 0.13917899131774902, + "learning_rate": 2.4830843555228488e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1360148802604044, + "grad_norm": 0.15649183094501495, + "learning_rate": 2.481648536582421e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1364376149309887, + "grad_norm": 0.16239048540592194, + "learning_rate": 2.4802129958493776e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1368603496015726, + "grad_norm": 0.2108689248561859, + "learning_rate": 2.478777733482307e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1372830842721564, + "grad_norm": 0.18127919733524323, + "learning_rate": 2.477342749639766e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1377058189427407, + "grad_norm": 0.17122679948806763, + "learning_rate": 2.4759080444802808e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1381285536133245, + "grad_norm": 0.24803103506565094, + "learning_rate": 2.4744736181623467e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1385512882839084, + "grad_norm": 0.23541103303432465, + "learning_rate": 2.4730394708444256e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1389740229544927, + "grad_norm": 0.24534612894058228, + "learning_rate": 2.471605602684951e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1393967576250765, + "grad_norm": 0.24098610877990723, + "learning_rate": 2.470172013842328e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.139819492295661, + "grad_norm": 0.19751018285751343, + "learning_rate": 2.468738704474924e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1402422269662447, + "grad_norm": 0.23409828543663025, + "learning_rate": 2.467305674741081e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1406649616368285, + "grad_norm": 0.2256140410900116, + "learning_rate": 2.4658729247991095e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.141087696307413, + "grad_norm": 0.21931229531764984, + "learning_rate": 2.464440454807288e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1415104309779966, + "grad_norm": 0.2274288386106491, + "learning_rate": 2.4630082649238646e-05, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1419331656485805, + "grad_norm": 0.25005608797073364, + "learning_rate": 2.4615763553070574e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.142355900319165, + "grad_norm": 0.21660074591636658, + "learning_rate": 2.4601447261150513e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1427786349897486, + "grad_norm": 0.1825816035270691, + "learning_rate": 2.458713377505999e-05, + "loss": 0.3511, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.143201369660333, + "grad_norm": 0.18121041357517242, + "learning_rate": 2.4572823096380266e-05, + "loss": 0.3676, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1436241043309168, + "grad_norm": 0.2199331521987915, + "learning_rate": 2.4558515226692264e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1440468390015006, + "grad_norm": 0.19686353206634521, + "learning_rate": 2.4544210167576604e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.144469573672085, + "grad_norm": 0.2844889461994171, + "learning_rate": 2.4529907920613605e-05, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1448923083426688, + "grad_norm": 0.24416251480579376, + "learning_rate": 2.4515608487383257e-05, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1453150430132526, + "grad_norm": 0.2702571153640747, + "learning_rate": 2.4501311869465265e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.145737777683837, + "grad_norm": 0.16987933218479156, + "learning_rate": 2.4487018068438995e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1461605123544207, + "grad_norm": 0.19873283803462982, + "learning_rate": 2.4472727085883484e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1465832470250046, + "grad_norm": 0.1925082951784134, + "learning_rate": 2.4458438923377508e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.147005981695589, + "grad_norm": 0.1696571707725525, + "learning_rate": 2.4444153582499513e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1474287163661727, + "grad_norm": 0.27456367015838623, + "learning_rate": 2.442987106482762e-05, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1478514510367566, + "grad_norm": 0.19651685655117035, + "learning_rate": 2.441559137193966e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.148274185707341, + "grad_norm": 0.18923228979110718, + "learning_rate": 2.4401314505413146e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1486969203779247, + "grad_norm": 0.2819032669067383, + "learning_rate": 2.4387040466825246e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.149119655048509, + "grad_norm": 0.24052487313747406, + "learning_rate": 2.4372769257752854e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.149542389719093, + "grad_norm": 0.17448262870311737, + "learning_rate": 2.435850087977256e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1499651243896767, + "grad_norm": 0.2751102149486542, + "learning_rate": 2.4344235334460587e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.150387859060261, + "grad_norm": 0.22443419694900513, + "learning_rate": 2.4329972623392887e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.150810593730845, + "grad_norm": 0.18454334139823914, + "learning_rate": 2.4315712748145098e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1512333284014287, + "grad_norm": 0.23408293724060059, + "learning_rate": 2.4301455710292537e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.151656063072013, + "grad_norm": 0.15617148578166962, + "learning_rate": 2.428720151141023e-05, + "loss": 0.3525, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.152078797742597, + "grad_norm": 0.16484394669532776, + "learning_rate": 2.4272950153072815e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.152501532413181, + "grad_norm": 0.20827864110469818, + "learning_rate": 2.425870163685471e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.152924267083765, + "grad_norm": 0.2351905107498169, + "learning_rate": 2.4244455964329953e-05, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.153347001754349, + "grad_norm": 0.3214499056339264, + "learning_rate": 2.4230213137072333e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.153769736424933, + "grad_norm": 0.21654149889945984, + "learning_rate": 2.4215973156655236e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.154192471095517, + "grad_norm": 0.19737419486045837, + "learning_rate": 2.4201736024651794e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1546152057661008, + "grad_norm": 0.14911329746246338, + "learning_rate": 2.4187501742634843e-05, + "loss": 0.3481, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.155037940436685, + "grad_norm": 0.2136392742395401, + "learning_rate": 2.4173270312176828e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.155460675107269, + "grad_norm": 0.24752222001552582, + "learning_rate": 2.4159041734849948e-05, + "loss": 0.3542, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1558834097778528, + "grad_norm": 0.17514784634113312, + "learning_rate": 2.414481601222605e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.156306144448437, + "grad_norm": 0.212669238448143, + "learning_rate": 2.4130593145876695e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.156728879119021, + "grad_norm": 0.2634544372558594, + "learning_rate": 2.4116373137373126e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1571516137896047, + "grad_norm": 0.17895285785198212, + "learning_rate": 2.4102155988286213e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.157574348460189, + "grad_norm": 0.23801104724407196, + "learning_rate": 2.408794170018657e-05, + "loss": 0.3538, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.157997083130773, + "grad_norm": 0.23074860870838165, + "learning_rate": 2.4073730274644506e-05, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.158419817801357, + "grad_norm": 0.19139917194843292, + "learning_rate": 2.4059521713229948e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.158842552471941, + "grad_norm": 0.23991118371486664, + "learning_rate": 2.4045316017512554e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.159265287142525, + "grad_norm": 0.17623494565486908, + "learning_rate": 2.4031113189061667e-05, + "loss": 0.3539, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.159688021813109, + "grad_norm": 0.3703693449497223, + "learning_rate": 2.4016913229446293e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.160110756483693, + "grad_norm": 0.16478745639324188, + "learning_rate": 2.400271614023513e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.160533491154277, + "grad_norm": 0.22205586731433868, + "learning_rate": 2.3988521922996586e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.160956225824861, + "grad_norm": 0.2268935889005661, + "learning_rate": 2.3974330579298705e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.161378960495445, + "grad_norm": 0.1500318944454193, + "learning_rate": 2.396014211070921e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1618016951660293, + "grad_norm": 0.1490434855222702, + "learning_rate": 2.3945956518795554e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.162224429836613, + "grad_norm": 0.3250833749771118, + "learning_rate": 2.393177380512484e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.162647164507197, + "grad_norm": 0.22457648813724518, + "learning_rate": 2.391759397126386e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1630698991777813, + "grad_norm": 0.2115565538406372, + "learning_rate": 2.3903417018779105e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.163492633848365, + "grad_norm": 0.14665858447551727, + "learning_rate": 2.388924294923674e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.163915368518949, + "grad_norm": 0.3031982481479645, + "learning_rate": 2.3875071764202563e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1643381031895332, + "grad_norm": 0.25706446170806885, + "learning_rate": 2.386090346524213e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.164760837860117, + "grad_norm": 0.18570294976234436, + "learning_rate": 2.3846738053920614e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.165183572530701, + "grad_norm": 0.24774636328220367, + "learning_rate": 2.3832575531802908e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.165606307201285, + "grad_norm": 0.22482989728450775, + "learning_rate": 2.3818415900453574e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.166029041871869, + "grad_norm": 0.15254326164722443, + "learning_rate": 2.380425916143686e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.166451776542453, + "grad_norm": 0.23252156376838684, + "learning_rate": 2.379010531631668e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.166874511213037, + "grad_norm": 0.18787036836147308, + "learning_rate": 2.377595436665667e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 63990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.167297245883621, + "grad_norm": 0.1878891885280609, + "learning_rate": 2.3761806314020067e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1677199805542053, + "grad_norm": 0.29916027188301086, + "learning_rate": 2.374766115996986e-05, + "loss": 0.3711, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.168142715224789, + "grad_norm": 0.1651177555322647, + "learning_rate": 2.3733518906068702e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.168565449895373, + "grad_norm": 0.265135794878006, + "learning_rate": 2.371937955387889e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1689881845659573, + "grad_norm": 0.21406660974025726, + "learning_rate": 2.3705243104962438e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.169410919236541, + "grad_norm": 0.21451590955257416, + "learning_rate": 2.3691109560881035e-05, + "loss": 0.3708, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.169833653907125, + "grad_norm": 0.19095668196678162, + "learning_rate": 2.3676978923196053e-05, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1702563885777093, + "grad_norm": 0.22026018798351288, + "learning_rate": 2.3662851193468504e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.170679123248293, + "grad_norm": 0.23807501792907715, + "learning_rate": 2.3648726373259116e-05, + "loss": 0.3504, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1711018579188774, + "grad_norm": 0.1548784226179123, + "learning_rate": 2.363460446412829e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1715245925894613, + "grad_norm": 0.21198508143424988, + "learning_rate": 2.3620485467636107e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.171947327260045, + "grad_norm": 0.19486457109451294, + "learning_rate": 2.3606369385342332e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1723700619306294, + "grad_norm": 0.14965446293354034, + "learning_rate": 2.3592256218806365e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1727927966012133, + "grad_norm": 0.2192060351371765, + "learning_rate": 2.357814596958733e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.173215531271797, + "grad_norm": 0.18920719623565674, + "learning_rate": 2.356403863924404e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1736382659423814, + "grad_norm": 0.2656528949737549, + "learning_rate": 2.354993422933492e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1740610006129653, + "grad_norm": 0.23729488253593445, + "learning_rate": 2.353583274141813e-05, + "loss": 0.3543, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.174483735283549, + "grad_norm": 0.21634715795516968, + "learning_rate": 2.3521734177051484e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1749064699541334, + "grad_norm": 0.2590022683143616, + "learning_rate": 2.3507638537792493e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1753292046247172, + "grad_norm": 0.19351695477962494, + "learning_rate": 2.3493545825198326e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.175751939295301, + "grad_norm": 0.1796617954969406, + "learning_rate": 2.3479456040825844e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1761746739658854, + "grad_norm": 0.22390982508659363, + "learning_rate": 2.3465369186231567e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.176597408636469, + "grad_norm": 0.19115564227104187, + "learning_rate": 2.3451285262971667e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1770201433070535, + "grad_norm": 0.2836092710494995, + "learning_rate": 2.3437204272602054e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1774428779776374, + "grad_norm": 0.2760016918182373, + "learning_rate": 2.3423126216678276e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.177865612648221, + "grad_norm": 0.2128637582063675, + "learning_rate": 2.340905109675557e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1782883473188055, + "grad_norm": 0.2242509126663208, + "learning_rate": 2.3394978914388836e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1787110819893893, + "grad_norm": 0.3750990629196167, + "learning_rate": 2.3380909671132667e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.179133816659973, + "grad_norm": 0.22015181183815002, + "learning_rate": 2.3366843368541324e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1795565513305575, + "grad_norm": 0.16233476996421814, + "learning_rate": 2.335278000816874e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1799792860011413, + "grad_norm": 0.18664489686489105, + "learning_rate": 2.3338719591568487e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1804020206717256, + "grad_norm": 0.15567909181118011, + "learning_rate": 2.3324662120293878e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1808247553423095, + "grad_norm": 0.21887381374835968, + "learning_rate": 2.331060759589786e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1812474900128933, + "grad_norm": 0.29336240887641907, + "learning_rate": 2.3296556019933073e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1816702246834776, + "grad_norm": 0.1798657476902008, + "learning_rate": 2.3282507393951824e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1820929593540614, + "grad_norm": 0.23030702769756317, + "learning_rate": 2.326846171950611e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1825156940246453, + "grad_norm": 0.1674424558877945, + "learning_rate": 2.3254418998147543e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1829384286952296, + "grad_norm": 0.2829645872116089, + "learning_rate": 2.324037923142747e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1833611633658134, + "grad_norm": 0.2102688103914261, + "learning_rate": 2.3226342420896922e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1837838980363973, + "grad_norm": 0.17985348403453827, + "learning_rate": 2.321230856810653e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1842066327069816, + "grad_norm": 0.22827473282814026, + "learning_rate": 2.319827767460665e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1846293673775654, + "grad_norm": 0.18222789466381073, + "learning_rate": 2.3184249741947317e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1850521020481493, + "grad_norm": 0.19650296866893768, + "learning_rate": 2.3170224771678223e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1854748367187335, + "grad_norm": 0.1727026104927063, + "learning_rate": 2.315620276534875e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1858975713893174, + "grad_norm": 0.2015468180179596, + "learning_rate": 2.31421837245079e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1863203060599017, + "grad_norm": 0.20580856502056122, + "learning_rate": 2.3128167650704408e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1867430407304855, + "grad_norm": 0.2576209604740143, + "learning_rate": 2.311415454548665e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1871657754010694, + "grad_norm": 0.2653101086616516, + "learning_rate": 2.310014441040271e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1875885100716537, + "grad_norm": 0.2958359122276306, + "learning_rate": 2.3086137247000273e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1880112447422375, + "grad_norm": 0.24787940084934235, + "learning_rate": 2.3072133056826762e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1884339794128214, + "grad_norm": 0.227633535861969, + "learning_rate": 2.305813184142926e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1888567140834057, + "grad_norm": 0.21073941886425018, + "learning_rate": 2.3044133602354472e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1892794487539895, + "grad_norm": 0.21271952986717224, + "learning_rate": 2.3030138341148844e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.189702183424574, + "grad_norm": 0.17835918068885803, + "learning_rate": 2.3016146059358447e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1901249180951576, + "grad_norm": 0.15672892332077026, + "learning_rate": 2.300215675852904e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1905476527657415, + "grad_norm": 0.2696237862110138, + "learning_rate": 2.2988170440206054e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1909703874363258, + "grad_norm": 0.2003515660762787, + "learning_rate": 2.2974187105934598e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1913931221069096, + "grad_norm": 0.18966488540172577, + "learning_rate": 2.2960206757259405e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1918158567774935, + "grad_norm": 0.2523229122161865, + "learning_rate": 2.294622939572495e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1922385914480778, + "grad_norm": 0.2775736153125763, + "learning_rate": 2.2932255022875305e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1926613261186616, + "grad_norm": 0.21816430985927582, + "learning_rate": 2.2918283640254262e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1930840607892454, + "grad_norm": 0.17311809957027435, + "learning_rate": 2.290431524940527e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1935067954598297, + "grad_norm": 0.2539565861225128, + "learning_rate": 2.2890349851871444e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1939295301304136, + "grad_norm": 0.1811557412147522, + "learning_rate": 2.2876387449195573e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1943522648009974, + "grad_norm": 0.28116682171821594, + "learning_rate": 2.286242804292013e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1947749994715817, + "grad_norm": 0.18521112203598022, + "learning_rate": 2.2848471634587215e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1951977341421656, + "grad_norm": 0.19142591953277588, + "learning_rate": 2.283451822573861e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.19562046881275, + "grad_norm": 0.3012286126613617, + "learning_rate": 2.2820567817915783e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1960432034833337, + "grad_norm": 0.2270827293395996, + "learning_rate": 2.280662041265988e-05, + "loss": 0.3519, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1964659381539176, + "grad_norm": 0.182038813829422, + "learning_rate": 2.2792676011511686e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.196888672824502, + "grad_norm": 0.1988639533519745, + "learning_rate": 2.2778734616011672e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1973114074950857, + "grad_norm": 0.18083010613918304, + "learning_rate": 2.276479622769999e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1977341421656695, + "grad_norm": 0.14385035634040833, + "learning_rate": 2.275086084811641e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.198156876836254, + "grad_norm": 0.20611606538295746, + "learning_rate": 2.2736928478800412e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1985796115068377, + "grad_norm": 0.21027341485023499, + "learning_rate": 2.2722999121291154e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.199002346177422, + "grad_norm": 0.24175581336021423, + "learning_rate": 2.270907277712741e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.199425080848006, + "grad_norm": 0.2623407542705536, + "learning_rate": 2.2695149447847657e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.1998478155185897, + "grad_norm": 0.21010203659534454, + "learning_rate": 2.2681229134990047e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.200270550189174, + "grad_norm": 0.3131447434425354, + "learning_rate": 2.2667311840092375e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.200693284859758, + "grad_norm": 0.1692415177822113, + "learning_rate": 2.265339756469214e-05, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2011160195303416, + "grad_norm": 0.1724984049797058, + "learning_rate": 2.2639486310326435e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.201538754200926, + "grad_norm": 0.19626504182815552, + "learning_rate": 2.262557807853209e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2019614888715098, + "grad_norm": 0.21230466663837433, + "learning_rate": 2.2611672870845567e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2023842235420936, + "grad_norm": 0.2868083119392395, + "learning_rate": 2.2597770688803026e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.202806958212678, + "grad_norm": 0.20464079082012177, + "learning_rate": 2.258387153394024e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2032296928832618, + "grad_norm": 0.1907915621995926, + "learning_rate": 2.2569975407792676e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2036524275538456, + "grad_norm": 0.28336301445961, + "learning_rate": 2.2556082311895505e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.20407516222443, + "grad_norm": 0.20962515473365784, + "learning_rate": 2.2542192247783477e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2044978968950137, + "grad_norm": 0.20237600803375244, + "learning_rate": 2.2528305216991074e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.204920631565598, + "grad_norm": 0.2451973855495453, + "learning_rate": 2.2514421221052434e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.205343366236182, + "grad_norm": 0.28307631611824036, + "learning_rate": 2.250054026150134e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2057661009067657, + "grad_norm": 0.18291166424751282, + "learning_rate": 2.248666233987126e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.20618883557735, + "grad_norm": 0.25217995047569275, + "learning_rate": 2.247278745769532e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.206611570247934, + "grad_norm": 0.19487479329109192, + "learning_rate": 2.2458915616506287e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2070343049185177, + "grad_norm": 0.38670679926872253, + "learning_rate": 2.2445046817836635e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.207457039589102, + "grad_norm": 0.20925308763980865, + "learning_rate": 2.2431181063218444e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.207879774259686, + "grad_norm": 0.221369668841362, + "learning_rate": 2.2417318354183516e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.20830250893027, + "grad_norm": 0.2626230716705322, + "learning_rate": 2.240345869226328e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.208725243600854, + "grad_norm": 0.17251461744308472, + "learning_rate": 2.2389602078988853e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.209147978271438, + "grad_norm": 0.1891750991344452, + "learning_rate": 2.2375748515891005e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 64990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.209570712942022, + "grad_norm": 0.23984849452972412, + "learning_rate": 2.2361898004500176e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.209993447612606, + "grad_norm": 0.24092990159988403, + "learning_rate": 2.2348050546346443e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.21041618228319, + "grad_norm": 0.17973807454109192, + "learning_rate": 2.233420614295955e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.210838916953774, + "grad_norm": 0.26750096678733826, + "learning_rate": 2.232036479586893e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.211261651624358, + "grad_norm": 0.2986236810684204, + "learning_rate": 2.230652650660367e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.211684386294942, + "grad_norm": 0.23074142634868622, + "learning_rate": 2.2292691276692507e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.212107120965526, + "grad_norm": 0.20931626856327057, + "learning_rate": 2.2278859107663853e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.21252985563611, + "grad_norm": 0.1855851113796234, + "learning_rate": 2.226503000104579e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.212952590306694, + "grad_norm": 0.26369303464889526, + "learning_rate": 2.225120395836601e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.213375324977278, + "grad_norm": 0.2239847183227539, + "learning_rate": 2.2237380981151924e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.213798059647862, + "grad_norm": 0.32002782821655273, + "learning_rate": 2.2223561070930605e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.214220794318446, + "grad_norm": 0.21931292116641998, + "learning_rate": 2.2209744229228724e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.21464352898903, + "grad_norm": 0.2810089886188507, + "learning_rate": 2.2195930457572684e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.215066263659614, + "grad_norm": 0.16255128383636475, + "learning_rate": 2.2182119757488508e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.215488998330198, + "grad_norm": 0.1790177971124649, + "learning_rate": 2.21683121305019e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.215911733000782, + "grad_norm": 0.29671621322631836, + "learning_rate": 2.2154507578138227e-05, + "loss": 0.3694, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.216334467671366, + "grad_norm": 0.16204893589019775, + "learning_rate": 2.214070610192248e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.21675720234195, + "grad_norm": 0.15738727152347565, + "learning_rate": 2.212690770337935e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.217179937012534, + "grad_norm": 0.2200344353914261, + "learning_rate": 2.2113112384033174e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2176026716831183, + "grad_norm": 0.2544606924057007, + "learning_rate": 2.2099320145407966e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.218025406353702, + "grad_norm": 0.1919236183166504, + "learning_rate": 2.208553098902734e-05, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.218448141024286, + "grad_norm": 0.19398672878742218, + "learning_rate": 2.2071744916414644e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2188708756948703, + "grad_norm": 0.23807166516780853, + "learning_rate": 2.2057961929092863e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.219293610365454, + "grad_norm": 0.18586760759353638, + "learning_rate": 2.2044182028584593e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.219716345036038, + "grad_norm": 0.2614770829677582, + "learning_rate": 2.2030405216412146e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2201390797066223, + "grad_norm": 0.18639369308948517, + "learning_rate": 2.2016631494097483e-05, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.220561814377206, + "grad_norm": 0.3090548515319824, + "learning_rate": 2.2002860863162213e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.22098454904779, + "grad_norm": 0.2526656985282898, + "learning_rate": 2.1989093325127597e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2214072837183743, + "grad_norm": 0.22322870790958405, + "learning_rate": 2.1975328881514584e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.221830018388958, + "grad_norm": 0.171277716755867, + "learning_rate": 2.1961567533843724e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.222252753059542, + "grad_norm": 0.2495175302028656, + "learning_rate": 2.1947809283635306e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2226754877301262, + "grad_norm": 0.24383747577667236, + "learning_rate": 2.193405413240918e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.22309822240071, + "grad_norm": 0.1773650050163269, + "learning_rate": 2.192030208168494e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2235209570712944, + "grad_norm": 0.22879008948802948, + "learning_rate": 2.190655313298179e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2239436917418782, + "grad_norm": 0.25343480706214905, + "learning_rate": 2.1892807287818608e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.224366426412462, + "grad_norm": 0.27384164929389954, + "learning_rate": 2.187906454771393e-05, + "loss": 0.349, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2247891610830464, + "grad_norm": 0.1806991696357727, + "learning_rate": 2.1865324914185954e-05, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.22521189575363, + "grad_norm": 0.21197561919689178, + "learning_rate": 2.1851588388752513e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.225634630424214, + "grad_norm": 0.15884321928024292, + "learning_rate": 2.1837854972931087e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2260573650947983, + "grad_norm": 0.24516811966896057, + "learning_rate": 2.182412466823886e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.226480099765382, + "grad_norm": 0.3824820816516876, + "learning_rate": 2.1810397476192635e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2269028344359665, + "grad_norm": 0.23370471596717834, + "learning_rate": 2.1796673398308892e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2273255691065503, + "grad_norm": 0.2398347705602646, + "learning_rate": 2.178295243610376e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.227748303777134, + "grad_norm": 0.2485511600971222, + "learning_rate": 2.176923459109301e-05, + "loss": 0.3682, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2281710384477185, + "grad_norm": 0.29494965076446533, + "learning_rate": 2.1755519864792105e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2285937731183023, + "grad_norm": 0.19384604692459106, + "learning_rate": 2.17418082587161e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.229016507788886, + "grad_norm": 0.15942588448524475, + "learning_rate": 2.1728099774379784e-05, + "loss": 0.3535, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2294392424594704, + "grad_norm": 0.23287732899188995, + "learning_rate": 2.1714394413297523e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2298619771300543, + "grad_norm": 0.2848256528377533, + "learning_rate": 2.1700692176983396e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.230284711800638, + "grad_norm": 0.33254820108413696, + "learning_rate": 2.1686993066951112e-05, + "loss": 0.3544, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2307074464712224, + "grad_norm": 0.3681736886501312, + "learning_rate": 2.1673297084714038e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2311301811418063, + "grad_norm": 0.16320760548114777, + "learning_rate": 2.1659604231785226e-05, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.23155291581239, + "grad_norm": 0.2726060450077057, + "learning_rate": 2.1645914509677306e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2319756504829744, + "grad_norm": 0.1477041393518448, + "learning_rate": 2.1632227919902633e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2323983851535583, + "grad_norm": 0.2558543086051941, + "learning_rate": 2.1618544463973183e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2328211198241426, + "grad_norm": 0.21741078794002533, + "learning_rate": 2.1604864143400634e-05, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2332438544947264, + "grad_norm": 0.21343247592449188, + "learning_rate": 2.159118695969622e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2336665891653102, + "grad_norm": 0.31444990634918213, + "learning_rate": 2.157751291437092e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2340893238358945, + "grad_norm": 0.20797255635261536, + "learning_rate": 2.156384200893532e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2345120585064784, + "grad_norm": 0.17786002159118652, + "learning_rate": 2.1550174244899707e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2349347931770622, + "grad_norm": 0.16894139349460602, + "learning_rate": 2.153650962377394e-05, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2353575278476465, + "grad_norm": Infinity, + "learning_rate": 2.152421415319634e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2357802625182304, + "grad_norm": 0.20807430148124695, + "learning_rate": 2.1510555507757885e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2362029971888147, + "grad_norm": 0.17182670533657074, + "learning_rate": 2.1496900009606035e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2366257318593985, + "grad_norm": 0.17103730142116547, + "learning_rate": 2.1483247660249368e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2370484665299823, + "grad_norm": 0.22690550982952118, + "learning_rate": 2.1469598461196077e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2374712012005666, + "grad_norm": 0.21815435588359833, + "learning_rate": 2.145595241395403e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2378939358711505, + "grad_norm": 0.24112865328788757, + "learning_rate": 2.1442309520030722e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2383166705417343, + "grad_norm": 0.1633225381374359, + "learning_rate": 2.1428669780933338e-05, + "loss": 0.3546, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2387394052123186, + "grad_norm": 0.2134915292263031, + "learning_rate": 2.1415033198168655e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2391621398829025, + "grad_norm": 0.1971406787633896, + "learning_rate": 2.1401399773243132e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2395848745534863, + "grad_norm": 0.19884061813354492, + "learning_rate": 2.1387769507662892e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2400076092240706, + "grad_norm": 0.1942957639694214, + "learning_rate": 2.1374142402933696e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2404303438946545, + "grad_norm": 0.24055728316307068, + "learning_rate": 2.1360518460560957e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2408530785652383, + "grad_norm": 0.17313097417354584, + "learning_rate": 2.134689768204975e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2412758132358226, + "grad_norm": 0.22388648986816406, + "learning_rate": 2.133328006890478e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2416985479064064, + "grad_norm": 0.21901261806488037, + "learning_rate": 2.1319665622630436e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2421212825769907, + "grad_norm": 0.213637113571167, + "learning_rate": 2.130605434473069e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2425440172475746, + "grad_norm": 0.22769632935523987, + "learning_rate": 2.129244623670925e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2429667519181584, + "grad_norm": 0.24692030251026154, + "learning_rate": 2.127884130006939e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2433894865887427, + "grad_norm": 0.26019978523254395, + "learning_rate": 2.12652395363141e-05, + "loss": 0.3674, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2438122212593266, + "grad_norm": 0.22568872570991516, + "learning_rate": 2.1251640946945994e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2442349559299104, + "grad_norm": 0.18657371401786804, + "learning_rate": 2.1238045533467326e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2446576906004947, + "grad_norm": 0.1857060343027115, + "learning_rate": 2.122445329738004e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2450804252710785, + "grad_norm": 0.25829562544822693, + "learning_rate": 2.1210864240185647e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.245503159941663, + "grad_norm": 0.21428033709526062, + "learning_rate": 2.1197278363385385e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2459258946122467, + "grad_norm": 0.36211466789245605, + "learning_rate": 2.1183695668480114e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2463486292828305, + "grad_norm": 0.18615303933620453, + "learning_rate": 2.117011615697036e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.246771363953415, + "grad_norm": 0.18437841534614563, + "learning_rate": 2.1156539830356237e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2471940986239987, + "grad_norm": 0.14482353627681732, + "learning_rate": 2.114296669013757e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2476168332945825, + "grad_norm": 0.3730587065219879, + "learning_rate": 2.1129396737813816e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.248039567965167, + "grad_norm": 0.1994226723909378, + "learning_rate": 2.1115829974884095e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2484623026357506, + "grad_norm": 0.20917803049087524, + "learning_rate": 2.1102266402847104e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2488850373063345, + "grad_norm": 0.19752323627471924, + "learning_rate": 2.1088706023201273e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.249307771976919, + "grad_norm": 0.1701521873474121, + "learning_rate": 2.107514883744463e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2497305066475026, + "grad_norm": 0.23333558440208435, + "learning_rate": 2.1061594847074874e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2501532413180865, + "grad_norm": 0.20358234643936157, + "learning_rate": 2.104804405358936e-05, + "loss": 0.3544, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2505759759886708, + "grad_norm": 0.16525663435459137, + "learning_rate": 2.1034496458485032e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2509987106592546, + "grad_norm": 0.2150331437587738, + "learning_rate": 2.1020952063258558e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.251421445329839, + "grad_norm": 0.2332477569580078, + "learning_rate": 2.100741086940618e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 65990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2518441800004227, + "grad_norm": 0.26314711570739746, + "learning_rate": 2.0993872878423838e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2522669146710066, + "grad_norm": 0.15695558488368988, + "learning_rate": 2.0980338091807096e-05, + "loss": 0.3675, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.252689649341591, + "grad_norm": 0.27743402123451233, + "learning_rate": 2.096680651105118e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2531123840121747, + "grad_norm": 0.17234009504318237, + "learning_rate": 2.095327813765094e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2535351186827586, + "grad_norm": 0.1690441071987152, + "learning_rate": 2.0939752973100907e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.253957853353343, + "grad_norm": 0.30729278922080994, + "learning_rate": 2.09262310188952e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2543805880239267, + "grad_norm": 0.1690475195646286, + "learning_rate": 2.0912712276527645e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.254803322694511, + "grad_norm": 0.27648332715034485, + "learning_rate": 2.0899196747491655e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.255226057365095, + "grad_norm": 0.23481617867946625, + "learning_rate": 2.0885684433280333e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2556487920356787, + "grad_norm": 0.22734366357326508, + "learning_rate": 2.0872175335386414e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.256071526706263, + "grad_norm": 0.261095255613327, + "learning_rate": 2.0858669455302275e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.256494261376847, + "grad_norm": 0.22888945043087006, + "learning_rate": 2.0845166794519932e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2569169960474307, + "grad_norm": 0.20862267911434174, + "learning_rate": 2.0831667354531077e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.257339730718015, + "grad_norm": 0.19677968323230743, + "learning_rate": 2.081817113682698e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.257762465388599, + "grad_norm": 0.18612295389175415, + "learning_rate": 2.0804678142898636e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2581852000591827, + "grad_norm": 0.19149185717105865, + "learning_rate": 2.0791188374236604e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.258607934729767, + "grad_norm": 0.27917420864105225, + "learning_rate": 2.077770183233114e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.259030669400351, + "grad_norm": 0.2190985381603241, + "learning_rate": 2.076421851867214e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2594534040709346, + "grad_norm": 0.17605677247047424, + "learning_rate": 2.075073843474912e-05, + "loss": 0.3528, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.259876138741519, + "grad_norm": 0.21059811115264893, + "learning_rate": 2.0737261582051286e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.260298873412103, + "grad_norm": 0.2525620460510254, + "learning_rate": 2.0723787962067405e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.260721608082687, + "grad_norm": 0.20417732000350952, + "learning_rate": 2.0710317576285954e-05, + "loss": 0.3528, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.261144342753271, + "grad_norm": 0.19986963272094727, + "learning_rate": 2.0696850426195037e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2615670774238548, + "grad_norm": 0.17978113889694214, + "learning_rate": 2.0683386513282414e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.261989812094439, + "grad_norm": 0.23795287311077118, + "learning_rate": 2.0669925839035437e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.262412546765023, + "grad_norm": 0.22721420228481293, + "learning_rate": 2.065646840494115e-05, + "loss": 0.3542, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2628352814356067, + "grad_norm": 0.18059369921684265, + "learning_rate": 2.064301421248622e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.263258016106191, + "grad_norm": 0.2753121554851532, + "learning_rate": 2.0629563263156987e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.263680750776775, + "grad_norm": 0.14069613814353943, + "learning_rate": 2.0616115558439357e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.264103485447359, + "grad_norm": 0.21681420505046844, + "learning_rate": 2.060267109981895e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.264526220117943, + "grad_norm": 0.18592292070388794, + "learning_rate": 2.0589229888781004e-05, + "loss": 0.3527, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.264948954788527, + "grad_norm": 0.3205487132072449, + "learning_rate": 2.0575791926810385e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.265371689459111, + "grad_norm": 0.23633421957492828, + "learning_rate": 2.0562357215391643e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.265794424129695, + "grad_norm": 0.1953718066215515, + "learning_rate": 2.0548925756008897e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.266217158800279, + "grad_norm": 0.2176944613456726, + "learning_rate": 2.0535497550145984e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.266639893470863, + "grad_norm": 0.20704391598701477, + "learning_rate": 2.0522072599286308e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.267062628141447, + "grad_norm": 0.23011736571788788, + "learning_rate": 2.0508650904912967e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.267485362812031, + "grad_norm": 0.23018226027488708, + "learning_rate": 2.0495232468508686e-05, + "loss": 0.3543, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.267908097482615, + "grad_norm": 0.1497071385383606, + "learning_rate": 2.0481817291555826e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.268330832153199, + "grad_norm": 0.20116956532001495, + "learning_rate": 2.0468405375536386e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.268753566823783, + "grad_norm": 0.2563815712928772, + "learning_rate": 2.0454996721932034e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.269176301494367, + "grad_norm": 0.20557855069637299, + "learning_rate": 2.0441591332224008e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.269599036164951, + "grad_norm": 0.16904209554195404, + "learning_rate": 2.042818920789326e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2700217708355352, + "grad_norm": 0.22188834846019745, + "learning_rate": 2.0414790350420328e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.270444505506119, + "grad_norm": 0.30206531286239624, + "learning_rate": 2.0401394761285415e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.270867240176703, + "grad_norm": 0.23735411465168, + "learning_rate": 2.0388002441968362e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2712899748472872, + "grad_norm": 0.2272021472454071, + "learning_rate": 2.037461339394865e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.271712709517871, + "grad_norm": 0.1826515942811966, + "learning_rate": 2.0361227618705392e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.272135444188455, + "grad_norm": 0.19255253672599792, + "learning_rate": 2.034784511771736e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.272558178859039, + "grad_norm": 0.21753482520580292, + "learning_rate": 2.033446589246293e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.272980913529623, + "grad_norm": 0.21130597591400146, + "learning_rate": 2.0321089944420114e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2734036482002073, + "grad_norm": 0.32547110319137573, + "learning_rate": 2.030771727506659e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.273826382870791, + "grad_norm": 0.23365657031536102, + "learning_rate": 2.0294347885879672e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.274249117541375, + "grad_norm": 0.20129454135894775, + "learning_rate": 2.02809817783363e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2746718522119593, + "grad_norm": 0.22024841606616974, + "learning_rate": 2.026761895391306e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.275094586882543, + "grad_norm": 0.24736593663692474, + "learning_rate": 2.0254259414086186e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.275517321553127, + "grad_norm": 0.1979711949825287, + "learning_rate": 2.0240903160331488e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2759400562237113, + "grad_norm": 0.21265992522239685, + "learning_rate": 2.022755019412449e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.276362790894295, + "grad_norm": 0.2093835175037384, + "learning_rate": 2.0214200516940335e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.276785525564879, + "grad_norm": 0.24091441929340363, + "learning_rate": 2.0200854130253748e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2772082602354633, + "grad_norm": 0.15537486970424652, + "learning_rate": 2.0187511035539154e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.277630994906047, + "grad_norm": 0.21977253258228302, + "learning_rate": 2.0174171234270595e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.278053729576631, + "grad_norm": 0.1899709552526474, + "learning_rate": 2.0160834727921736e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2784764642472153, + "grad_norm": 0.287727952003479, + "learning_rate": 2.014750151796591e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.278899198917799, + "grad_norm": 0.2010948210954666, + "learning_rate": 2.0134171605876035e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2793219335883834, + "grad_norm": 0.2224085032939911, + "learning_rate": 2.01208449931247e-05, + "loss": 0.3525, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2797446682589673, + "grad_norm": 0.19964423775672913, + "learning_rate": 2.0107521681184134e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.280167402929551, + "grad_norm": 0.17807653546333313, + "learning_rate": 2.0094201671526197e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2805901376001354, + "grad_norm": 0.2145770639181137, + "learning_rate": 2.008088496562235e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2810128722707192, + "grad_norm": 0.23260942101478577, + "learning_rate": 2.0067571564943733e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2814356069413035, + "grad_norm": 0.18603193759918213, + "learning_rate": 2.0054261470961115e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2818583416118874, + "grad_norm": 0.17993833124637604, + "learning_rate": 2.0040954685144864e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2822810762824712, + "grad_norm": 0.18769805133342743, + "learning_rate": 2.002765120896502e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2827038109530555, + "grad_norm": 0.17234373092651367, + "learning_rate": 2.0014351043891244e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2831265456236394, + "grad_norm": 0.2513400912284851, + "learning_rate": 2.0001054191392832e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.283549280294223, + "grad_norm": 0.15753641724586487, + "learning_rate": 1.998776065293872e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2839720149648075, + "grad_norm": 0.2505359947681427, + "learning_rate": 1.9974470429997483e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2843947496353914, + "grad_norm": 0.27001240849494934, + "learning_rate": 1.9961183524037287e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.284817484305975, + "grad_norm": 0.28128719329833984, + "learning_rate": 1.9947899936525993e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2852402189765595, + "grad_norm": 0.28049904108047485, + "learning_rate": 1.9934619668931042e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2856629536471433, + "grad_norm": 0.22026656568050385, + "learning_rate": 1.992134272271954e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.286085688317727, + "grad_norm": 0.27847158908843994, + "learning_rate": 1.9908069099358224e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2865084229883115, + "grad_norm": 0.17977125942707062, + "learning_rate": 1.9894798800313452e-05, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2869311576588953, + "grad_norm": 0.2042986899614334, + "learning_rate": 1.9881531827051224e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.287353892329479, + "grad_norm": 0.2274983525276184, + "learning_rate": 1.9868268181037185e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2877766270000635, + "grad_norm": 0.20208628475666046, + "learning_rate": 1.9855007863736584e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2881993616706473, + "grad_norm": 0.25798895955085754, + "learning_rate": 1.9841750876614296e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2886220963412316, + "grad_norm": 0.22413037717342377, + "learning_rate": 1.982849722113486e-05, + "loss": 0.3522, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2890448310118154, + "grad_norm": 0.18693648278713226, + "learning_rate": 1.9815246898762448e-05, + "loss": 0.3742, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2894675656823993, + "grad_norm": 0.18733637034893036, + "learning_rate": 1.980199991096083e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2898903003529836, + "grad_norm": 0.21347445249557495, + "learning_rate": 1.9788756259193436e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2903130350235674, + "grad_norm": 0.17256920039653778, + "learning_rate": 1.9775515944923324e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2907357696941517, + "grad_norm": 0.171157106757164, + "learning_rate": 1.976227896961319e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2911585043647356, + "grad_norm": 0.20901134610176086, + "learning_rate": 1.974904533472532e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2915812390353194, + "grad_norm": 0.18791645765304565, + "learning_rate": 1.9735815041721688e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2920039737059037, + "grad_norm": 0.22797220945358276, + "learning_rate": 1.972258809206385e-05, + "loss": 0.3541, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2924267083764875, + "grad_norm": 0.21422718465328217, + "learning_rate": 1.9709364487213012e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2928494430470714, + "grad_norm": 0.17805731296539307, + "learning_rate": 1.969614422863002e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2932721777176557, + "grad_norm": 0.20640702545642853, + "learning_rate": 1.9682927317775352e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2936949123882395, + "grad_norm": 0.2737247049808502, + "learning_rate": 1.9669713756109115e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 66990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2941176470588234, + "grad_norm": 0.2071264535188675, + "learning_rate": 1.9656503545091003e-05, + "loss": 0.3507, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2945403817294077, + "grad_norm": 0.15824854373931885, + "learning_rate": 1.9643296686180396e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2949631163999915, + "grad_norm": 0.23434920608997345, + "learning_rate": 1.9630093180836284e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2953858510705754, + "grad_norm": 0.19993902742862701, + "learning_rate": 1.96168930305173e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2958085857411596, + "grad_norm": 0.22106236219406128, + "learning_rate": 1.9603696236681645e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2962313204117435, + "grad_norm": 0.2525586783885956, + "learning_rate": 1.959050280078723e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2966540550823273, + "grad_norm": 0.22018197178840637, + "learning_rate": 1.9577312724291557e-05, + "loss": 0.3508, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2970767897529116, + "grad_norm": 0.20209114253520966, + "learning_rate": 1.9564126008651773e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2974995244234955, + "grad_norm": 0.17663875222206116, + "learning_rate": 1.955094265532461e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2979222590940798, + "grad_norm": 0.22747677564620972, + "learning_rate": 1.9537762665766474e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2983449937646636, + "grad_norm": 0.21222622692584991, + "learning_rate": 1.952458604143339e-05, + "loss": 0.3511, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2987677284352475, + "grad_norm": 0.26393312215805054, + "learning_rate": 1.9511412783781003e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2991904631058317, + "grad_norm": 0.1945323944091797, + "learning_rate": 1.9498242894264603e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.2996131977764156, + "grad_norm": 0.20660588145256042, + "learning_rate": 1.9485076374339067e-05, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.300035932447, + "grad_norm": 0.21813686192035675, + "learning_rate": 1.9471913225458955e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3004586671175837, + "grad_norm": 0.1822614073753357, + "learning_rate": 1.94587534490784e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3008814017881676, + "grad_norm": 0.30329304933547974, + "learning_rate": 1.9445597046651198e-05, + "loss": 0.3546, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.301304136458752, + "grad_norm": 0.19087570905685425, + "learning_rate": 1.943244401963077e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3017268711293357, + "grad_norm": 0.2028197944164276, + "learning_rate": 1.9419294369470153e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3021496057999196, + "grad_norm": 0.2806393504142761, + "learning_rate": 1.940614809762202e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.302572340470504, + "grad_norm": 0.24014447629451752, + "learning_rate": 1.9393005205538676e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3029950751410877, + "grad_norm": 0.18057921528816223, + "learning_rate": 1.9379865694672016e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3034178098116715, + "grad_norm": 0.20656567811965942, + "learning_rate": 1.9366729566473624e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.303840544482256, + "grad_norm": 0.20736576616764069, + "learning_rate": 1.9353596822394632e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3042632791528397, + "grad_norm": 0.17709481716156006, + "learning_rate": 1.9340467463885863e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3046860138234235, + "grad_norm": 0.1994362324476242, + "learning_rate": 1.9327341492397748e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.305108748494008, + "grad_norm": 0.1885833591222763, + "learning_rate": 1.9314218909380333e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3055314831645917, + "grad_norm": 0.20651443302631378, + "learning_rate": 1.9301099716283293e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3059542178351755, + "grad_norm": 0.1935814917087555, + "learning_rate": 1.9287983914555963e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.30637695250576, + "grad_norm": 0.20812655985355377, + "learning_rate": 1.9274871505647226e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3067996871763436, + "grad_norm": 0.20793208479881287, + "learning_rate": 1.926176249100567e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.307222421846928, + "grad_norm": 0.2685999870300293, + "learning_rate": 1.9248656872079444e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.307645156517512, + "grad_norm": 0.1925891935825348, + "learning_rate": 1.9235554650316372e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3080678911880956, + "grad_norm": 0.2004387080669403, + "learning_rate": 1.9222455827163883e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.30849062585868, + "grad_norm": 0.2297150194644928, + "learning_rate": 1.920936040406902e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3089133605292638, + "grad_norm": 0.25030842423439026, + "learning_rate": 1.9196268382478493e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.309336095199848, + "grad_norm": 0.25033730268478394, + "learning_rate": 1.918317976383856e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.309758829870432, + "grad_norm": 0.15167899429798126, + "learning_rate": 1.9170094549595168e-05, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3101815645410158, + "grad_norm": 0.1684025079011917, + "learning_rate": 1.9157012741193862e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3106042992116, + "grad_norm": 0.2002793699502945, + "learning_rate": 1.9143934340079843e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.311027033882184, + "grad_norm": 0.2787981927394867, + "learning_rate": 1.9130859347697865e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3114497685527677, + "grad_norm": 0.21656908094882965, + "learning_rate": 1.911778776549237e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.311872503223352, + "grad_norm": 0.20335011184215546, + "learning_rate": 1.9104719594907406e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.312295237893936, + "grad_norm": 0.26617929339408875, + "learning_rate": 1.909165483738665e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3127179725645197, + "grad_norm": 0.24792936444282532, + "learning_rate": 1.907859349437336e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.313140707235104, + "grad_norm": 0.16980113089084625, + "learning_rate": 1.9065535567310465e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.313563441905688, + "grad_norm": 0.1648082733154297, + "learning_rate": 1.905248105764051e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3139861765762717, + "grad_norm": 0.2667747139930725, + "learning_rate": 1.9039429966805637e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.314408911246856, + "grad_norm": 0.23860126733779907, + "learning_rate": 1.9026382296247658e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.31483164591744, + "grad_norm": 0.27367493510246277, + "learning_rate": 1.9013338047407936e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3152543805880237, + "grad_norm": 0.17341727018356323, + "learning_rate": 1.900029722172753e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.315677115258608, + "grad_norm": 0.24128469824790955, + "learning_rate": 1.8987259820647046e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.316099849929192, + "grad_norm": 0.19429367780685425, + "learning_rate": 1.897422584560678e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.316522584599776, + "grad_norm": 0.24155393242835999, + "learning_rate": 1.896119529804662e-05, + "loss": 0.3534, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.31694531927036, + "grad_norm": 0.23655523359775543, + "learning_rate": 1.8948168179406066e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.317368053940944, + "grad_norm": 0.22543518245220184, + "learning_rate": 1.8935144491124263e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.317790788611528, + "grad_norm": 0.3011503219604492, + "learning_rate": 1.892212423463997e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.318213523282112, + "grad_norm": 0.1756749451160431, + "learning_rate": 1.890910741139154e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3186362579526962, + "grad_norm": 0.2203018218278885, + "learning_rate": 1.889609402281699e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.31905899262328, + "grad_norm": 0.256241112947464, + "learning_rate": 1.8883084070353908e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.319481727293864, + "grad_norm": 0.17175717651844025, + "learning_rate": 1.8870077555439546e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.319904461964448, + "grad_norm": 0.18630462884902954, + "learning_rate": 1.885707447951076e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.320327196635032, + "grad_norm": 0.25386932492256165, + "learning_rate": 1.8844074844004022e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.320749931305616, + "grad_norm": 0.24250704050064087, + "learning_rate": 1.8831078650355434e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3211726659762, + "grad_norm": 0.18480949103832245, + "learning_rate": 1.8818085900000727e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.321595400646784, + "grad_norm": 0.2101181298494339, + "learning_rate": 1.88050965943752e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.322018135317368, + "grad_norm": 0.30468687415122986, + "learning_rate": 1.8792110734913842e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.322440869987952, + "grad_norm": 0.23922991752624512, + "learning_rate": 1.8779128323051198e-05, + "loss": 0.3537, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.322863604658536, + "grad_norm": 0.19575373828411102, + "learning_rate": 1.876614936022147e-05, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.32328633932912, + "grad_norm": 0.2145448625087738, + "learning_rate": 1.875317384785848e-05, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.323709073999704, + "grad_norm": 0.17903009057044983, + "learning_rate": 1.874020178739565e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.324131808670288, + "grad_norm": 0.30392441153526306, + "learning_rate": 1.8727233180266056e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.324554543340872, + "grad_norm": 0.21006184816360474, + "learning_rate": 1.8714268027902326e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.324977278011456, + "grad_norm": 0.18306757509708405, + "learning_rate": 1.8701306331736767e-05, + "loss": 0.3511, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.32540001268204, + "grad_norm": 0.264092355966568, + "learning_rate": 1.868834809320128e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3258227473526243, + "grad_norm": 0.1961873322725296, + "learning_rate": 1.867668863597623e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.326245482023208, + "grad_norm": 0.17757007479667664, + "learning_rate": 1.866373697088142e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.326668216693792, + "grad_norm": 0.3154357671737671, + "learning_rate": 1.8650788767567035e-05, + "loss": 0.3546, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3270909513643763, + "grad_norm": 0.3019065856933594, + "learning_rate": 1.863784402746351e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.32751368603496, + "grad_norm": 0.22180438041687012, + "learning_rate": 1.8624902752000866e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3279364207055444, + "grad_norm": 0.2740565538406372, + "learning_rate": 1.8611964942608773e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3283591553761283, + "grad_norm": 0.2540155053138733, + "learning_rate": 1.8599030600716455e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.328781890046712, + "grad_norm": 0.32684680819511414, + "learning_rate": 1.858609972775284e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3292046247172964, + "grad_norm": 0.20562338829040527, + "learning_rate": 1.8573172325146387e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3296273593878802, + "grad_norm": 0.1517927348613739, + "learning_rate": 1.8560248394325235e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.330050094058464, + "grad_norm": 0.2422783076763153, + "learning_rate": 1.8547327936717106e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3304728287290484, + "grad_norm": 0.21732410788536072, + "learning_rate": 1.8534410953749365e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.330895563399632, + "grad_norm": 0.20120130479335785, + "learning_rate": 1.8521497446848962e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.331318298070216, + "grad_norm": 0.21687577664852142, + "learning_rate": 1.8508587417442487e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3317410327408004, + "grad_norm": 0.21327604353427887, + "learning_rate": 1.849568086695615e-05, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.332163767411384, + "grad_norm": 0.24731090664863586, + "learning_rate": 1.8482777796815754e-05, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.332586502081968, + "grad_norm": 0.2190176248550415, + "learning_rate": 1.84698782084467e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3330092367525523, + "grad_norm": 0.18555091321468353, + "learning_rate": 1.8456982103274052e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.333431971423136, + "grad_norm": 0.19202323257923126, + "learning_rate": 1.8444089482722476e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.33385470609372, + "grad_norm": 0.23165397346019745, + "learning_rate": 1.8431200348216238e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3342774407643043, + "grad_norm": 0.1776096671819687, + "learning_rate": 1.8418314701179225e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.334700175434888, + "grad_norm": 0.22862765192985535, + "learning_rate": 1.8405432543034963e-05, + "loss": 0.351, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3351229101054725, + "grad_norm": 0.35069766640663147, + "learning_rate": 1.8392553875206536e-05, + "loss": 0.3686, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3355456447760563, + "grad_norm": 0.21016404032707214, + "learning_rate": 1.8379678699116708e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.33596837944664, + "grad_norm": 0.22823452949523926, + "learning_rate": 1.8366807016187797e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 67990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3363911141172244, + "grad_norm": 0.2862728536128998, + "learning_rate": 1.8353938827841777e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3368138487878083, + "grad_norm": 0.13736116886138916, + "learning_rate": 1.8341074135500218e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3372365834583926, + "grad_norm": 0.20671646296977997, + "learning_rate": 1.8328212940584316e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3376593181289764, + "grad_norm": 0.17703862488269806, + "learning_rate": 1.8315355244514865e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3380820527995603, + "grad_norm": 0.22053460776805878, + "learning_rate": 1.830250104871231e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3385047874701446, + "grad_norm": 0.17473354935646057, + "learning_rate": 1.8289650354596637e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3389275221407284, + "grad_norm": 0.20849648118019104, + "learning_rate": 1.827680316358751e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3393502568113123, + "grad_norm": 0.18515297770500183, + "learning_rate": 1.8263959477104194e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3397729914818965, + "grad_norm": 0.21152333915233612, + "learning_rate": 1.8251119296565528e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3401957261524804, + "grad_norm": 0.2877909541130066, + "learning_rate": 1.8238282623390013e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3406184608230642, + "grad_norm": 0.1652747392654419, + "learning_rate": 1.8225449458995737e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3410411954936485, + "grad_norm": 0.27154576778411865, + "learning_rate": 1.8212619804800424e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3414639301642324, + "grad_norm": 0.21860557794570923, + "learning_rate": 1.819979366222136e-05, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3418866648348162, + "grad_norm": 0.22660455107688904, + "learning_rate": 1.8186971032675486e-05, + "loss": 0.3522, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3423093995054005, + "grad_norm": 0.21859806776046753, + "learning_rate": 1.8174151917579352e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3427321341759844, + "grad_norm": 0.23179854452610016, + "learning_rate": 1.816133631834911e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.343154868846568, + "grad_norm": 0.19676589965820312, + "learning_rate": 1.814852423640054e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3435776035171525, + "grad_norm": 0.2793646454811096, + "learning_rate": 1.813571567314898e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3440003381877363, + "grad_norm": 0.27027058601379395, + "learning_rate": 1.8122910630009438e-05, + "loss": 0.3505, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3444230728583206, + "grad_norm": 0.18815086781978607, + "learning_rate": 1.8110109108396533e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3448458075289045, + "grad_norm": 0.22260233759880066, + "learning_rate": 1.8097311109724442e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3452685421994883, + "grad_norm": 0.1752859503030777, + "learning_rate": 1.8084516635407e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3456912768700726, + "grad_norm": 0.288114458322525, + "learning_rate": 1.8071725686857638e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3461140115406565, + "grad_norm": 0.1891327202320099, + "learning_rate": 1.80589382654894e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3465367462112408, + "grad_norm": 0.3038761615753174, + "learning_rate": 1.8046154372714935e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3469594808818246, + "grad_norm": 0.22679300606250763, + "learning_rate": 1.803337400994653e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3473822155524084, + "grad_norm": 0.2679145038127899, + "learning_rate": 1.8020597178596026e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3478049502229927, + "grad_norm": 0.24609996378421783, + "learning_rate": 1.8007823880074903e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3482276848935766, + "grad_norm": 0.24093934893608093, + "learning_rate": 1.799505411579427e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3486504195641604, + "grad_norm": 0.19314289093017578, + "learning_rate": 1.7982287887164816e-05, + "loss": 0.3523, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3490731542347447, + "grad_norm": 0.21846865117549896, + "learning_rate": 1.7969525195596865e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3494958889053286, + "grad_norm": 0.20532992482185364, + "learning_rate": 1.795676604250033e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3499186235759124, + "grad_norm": 0.2233097404241562, + "learning_rate": 1.7944010429284758e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3503413582464967, + "grad_norm": 0.19043664634227753, + "learning_rate": 1.7931258357359254e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3507640929170805, + "grad_norm": 0.18143542110919952, + "learning_rate": 1.7918509828132602e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3511868275876644, + "grad_norm": 0.21171468496322632, + "learning_rate": 1.7905764843013124e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3516095622582487, + "grad_norm": 0.15692788362503052, + "learning_rate": 1.78930234034088e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3520322969288325, + "grad_norm": 0.21762503683567047, + "learning_rate": 1.7880285510727197e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3524550315994164, + "grad_norm": 0.21678996086120605, + "learning_rate": 1.78675511663755e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3528777662700007, + "grad_norm": 0.2533007264137268, + "learning_rate": 1.7854820371760506e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3533005009405845, + "grad_norm": 0.2663845121860504, + "learning_rate": 1.7842093128288616e-05, + "loss": 0.3525, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.353723235611169, + "grad_norm": 0.22550082206726074, + "learning_rate": 1.7829369437365805e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3541459702817527, + "grad_norm": 0.2599629759788513, + "learning_rate": 1.7816649300397703e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3545687049523365, + "grad_norm": 0.23063622415065765, + "learning_rate": 1.7803932718789552e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.354991439622921, + "grad_norm": 0.23269593715667725, + "learning_rate": 1.779121969394613e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3554141742935046, + "grad_norm": 0.25939038395881653, + "learning_rate": 1.77785102272719e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.355836908964089, + "grad_norm": 0.2589537501335144, + "learning_rate": 1.7765804320170898e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3562596436346728, + "grad_norm": 0.17884038388729095, + "learning_rate": 1.775310197404679e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3566823783052566, + "grad_norm": 0.2360350340604782, + "learning_rate": 1.7740403190302796e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.357105112975841, + "grad_norm": 0.19658054411411285, + "learning_rate": 1.772770797034179e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3575278476464248, + "grad_norm": 0.2439577728509903, + "learning_rate": 1.7715016315566234e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3579505823170086, + "grad_norm": 0.21365061402320862, + "learning_rate": 1.7702328227378217e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.358373316987593, + "grad_norm": 0.22845935821533203, + "learning_rate": 1.768964370717942e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3587960516581767, + "grad_norm": 0.21431277692317963, + "learning_rate": 1.76769627563711e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3592187863287606, + "grad_norm": 0.2520793080329895, + "learning_rate": 1.7664285376354166e-05, + "loss": 0.3543, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.359641520999345, + "grad_norm": 0.2182544767856598, + "learning_rate": 1.765161156852913e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3600642556699287, + "grad_norm": 0.24184976518154144, + "learning_rate": 1.763894133429605e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3604869903405126, + "grad_norm": 0.2927775979042053, + "learning_rate": 1.762627467505466e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.360909725011097, + "grad_norm": 0.21297460794448853, + "learning_rate": 1.761361159220427e-05, + "loss": 0.3503, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3613324596816807, + "grad_norm": 0.26112136244773865, + "learning_rate": 1.7600952087143795e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3617551943522646, + "grad_norm": 0.2550103962421417, + "learning_rate": 1.758829616127175e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.362177929022849, + "grad_norm": 0.1973695605993271, + "learning_rate": 1.7575643815986292e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3626006636934327, + "grad_norm": 0.2026260942220688, + "learning_rate": 1.756299505268512e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.363023398364017, + "grad_norm": 0.18302489817142487, + "learning_rate": 1.755034987276556e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.363446133034601, + "grad_norm": 0.23188425600528717, + "learning_rate": 1.753770827762456e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3638688677051847, + "grad_norm": 0.2827504575252533, + "learning_rate": 1.7525070268658672e-05, + "loss": 0.3668, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.364291602375769, + "grad_norm": 0.334481418132782, + "learning_rate": 1.7512435847264036e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.364714337046353, + "grad_norm": 0.19894558191299438, + "learning_rate": 1.7499805014836407e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.365137071716937, + "grad_norm": 0.23919199407100677, + "learning_rate": 1.748717777277115e-05, + "loss": 0.3704, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.365559806387521, + "grad_norm": 0.20131738483905792, + "learning_rate": 1.7474554122463195e-05, + "loss": 0.3526, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.365982541058105, + "grad_norm": 0.2384619414806366, + "learning_rate": 1.7461934065307127e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.366405275728689, + "grad_norm": 0.3020140528678894, + "learning_rate": 1.744931760269708e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.366828010399273, + "grad_norm": 0.283538818359375, + "learning_rate": 1.7436704736026836e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3672507450698568, + "grad_norm": 0.18505091965198517, + "learning_rate": 1.742409546668977e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.367673479740441, + "grad_norm": 0.18933963775634766, + "learning_rate": 1.741148979607885e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.368096214411025, + "grad_norm": 0.2617492973804474, + "learning_rate": 1.7398887725586642e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3685189490816088, + "grad_norm": 0.23649051785469055, + "learning_rate": 1.7386289256605355e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.368941683752193, + "grad_norm": 0.255832701921463, + "learning_rate": 1.737369439052672e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.369364418422777, + "grad_norm": 0.2692866027355194, + "learning_rate": 1.7361103128742134e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3697871530933607, + "grad_norm": 0.22620446979999542, + "learning_rate": 1.734851547264261e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.370209887763945, + "grad_norm": 0.26797163486480713, + "learning_rate": 1.7335931423618683e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.370632622434529, + "grad_norm": 0.21745000779628754, + "learning_rate": 1.732335098306056e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3710553571051127, + "grad_norm": 0.3264535069465637, + "learning_rate": 1.7310774152358035e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.371478091775697, + "grad_norm": 0.2633301019668579, + "learning_rate": 1.72982009329005e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.371900826446281, + "grad_norm": 0.2523590922355652, + "learning_rate": 1.7285631326076918e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.372323561116865, + "grad_norm": 0.218951016664505, + "learning_rate": 1.72730653332759e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.372746295787449, + "grad_norm": 0.31193867325782776, + "learning_rate": 1.7260502955885626e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.373169030458033, + "grad_norm": 0.16725337505340576, + "learning_rate": 1.7247944195293897e-05, + "loss": 0.3714, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.373591765128617, + "grad_norm": 0.196904256939888, + "learning_rate": 1.7235389052888118e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.374014499799201, + "grad_norm": 0.229265958070755, + "learning_rate": 1.7222837530055243e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3744372344697853, + "grad_norm": 0.23094284534454346, + "learning_rate": 1.7210289628181887e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.374859969140369, + "grad_norm": 0.19864507019519806, + "learning_rate": 1.7197745348654254e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.375282703810953, + "grad_norm": 0.28042861819267273, + "learning_rate": 1.7185204692858104e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3757054384815373, + "grad_norm": 0.2654822766780853, + "learning_rate": 1.7172667662178847e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.376128173152121, + "grad_norm": 0.19299523532390594, + "learning_rate": 1.716013425800148e-05, + "loss": 0.354, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.376550907822705, + "grad_norm": 0.25671547651290894, + "learning_rate": 1.7147604481710584e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3769736424932892, + "grad_norm": 0.21390029788017273, + "learning_rate": 1.7135078334690345e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.377396377163873, + "grad_norm": 0.21283097565174103, + "learning_rate": 1.7122555818324586e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.377819111834457, + "grad_norm": 0.20741704106330872, + "learning_rate": 1.711003693399666e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3782418465050412, + "grad_norm": 0.2131756842136383, + "learning_rate": 1.7097521683089545e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 68990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.378664581175625, + "grad_norm": 0.33827704191207886, + "learning_rate": 1.7085010066985846e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.379087315846209, + "grad_norm": 0.17823515832424164, + "learning_rate": 1.7072502087067738e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.379510050516793, + "grad_norm": 0.19946685433387756, + "learning_rate": 1.705999774471701e-05, + "loss": 0.3529, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.379932785187377, + "grad_norm": 0.18653196096420288, + "learning_rate": 1.7047497041315042e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3803555198579613, + "grad_norm": 0.20962375402450562, + "learning_rate": 1.7034999978242805e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.380778254528545, + "grad_norm": 0.16764256358146667, + "learning_rate": 1.70225065568809e-05, + "loss": 0.3532, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.381200989199129, + "grad_norm": 0.2420310378074646, + "learning_rate": 1.701001677860948e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3816237238697133, + "grad_norm": 0.1890764832496643, + "learning_rate": 1.6997530644808297e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.382046458540297, + "grad_norm": 0.2945002317428589, + "learning_rate": 1.6985048156856738e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.382469193210881, + "grad_norm": 0.24919362366199493, + "learning_rate": 1.697256931613377e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3828919278814653, + "grad_norm": 0.21547965705394745, + "learning_rate": 1.6960094124017957e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.383314662552049, + "grad_norm": 0.20509763062000275, + "learning_rate": 1.694762258188745e-05, + "loss": 0.3513, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3837373972226334, + "grad_norm": 0.24545414745807648, + "learning_rate": 1.6935154691120037e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3841601318932173, + "grad_norm": 0.21300587058067322, + "learning_rate": 1.6922690453093027e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.384582866563801, + "grad_norm": 0.18833523988723755, + "learning_rate": 1.6910229869183386e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3850056012343854, + "grad_norm": 0.2453426867723465, + "learning_rate": 1.6897772940767686e-05, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3854283359049693, + "grad_norm": 0.23539935052394867, + "learning_rate": 1.6885319669222027e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.385851070575553, + "grad_norm": 0.2501591145992279, + "learning_rate": 1.6872870055922162e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3862738052461374, + "grad_norm": 0.23543018102645874, + "learning_rate": 1.6860424102243434e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3866965399167213, + "grad_norm": 0.25315141677856445, + "learning_rate": 1.6847981809560765e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.387119274587305, + "grad_norm": 0.23230020701885223, + "learning_rate": 1.68355431792487e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3875420092578894, + "grad_norm": 0.19653701782226562, + "learning_rate": 1.6823108212681333e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3879647439284732, + "grad_norm": 0.33277958631515503, + "learning_rate": 1.6810676911232382e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.388387478599057, + "grad_norm": 0.18210992217063904, + "learning_rate": 1.6798249276275187e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3888102132696414, + "grad_norm": 0.18098410964012146, + "learning_rate": 1.678582530918261e-05, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3892329479402252, + "grad_norm": 0.27255529165267944, + "learning_rate": 1.677340501132718e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3896556826108095, + "grad_norm": 0.19549576938152313, + "learning_rate": 1.676098838408099e-05, + "loss": 0.3536, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3900784172813934, + "grad_norm": 0.2776828408241272, + "learning_rate": 1.6748575428815738e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.390501151951977, + "grad_norm": 0.17554055154323578, + "learning_rate": 1.6736166146902684e-05, + "loss": 0.3537, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3909238866225615, + "grad_norm": 0.22350065410137177, + "learning_rate": 1.672376053971272e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3913466212931453, + "grad_norm": 0.24063608050346375, + "learning_rate": 1.6711358608616316e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.391769355963729, + "grad_norm": 0.21015191078186035, + "learning_rate": 1.6698960354983544e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3921920906343135, + "grad_norm": 0.2253197878599167, + "learning_rate": 1.668656578018408e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3926148253048973, + "grad_norm": 0.25306448340415955, + "learning_rate": 1.6674174885587135e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3930375599754816, + "grad_norm": 0.2656734585762024, + "learning_rate": 1.6661787672561587e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3934602946460655, + "grad_norm": 0.2445519119501114, + "learning_rate": 1.6649404142475876e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3938830293166493, + "grad_norm": 0.21513234078884125, + "learning_rate": 1.6637024296698022e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3943057639872336, + "grad_norm": 0.23082682490348816, + "learning_rate": 1.6624648136595655e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3947284986578175, + "grad_norm": 0.3626869022846222, + "learning_rate": 1.6612275663536004e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3951512333284013, + "grad_norm": 0.18947626650333405, + "learning_rate": 1.659990687888587e-05, + "loss": 0.3712, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3955739679989856, + "grad_norm": 0.20527130365371704, + "learning_rate": 1.6587541784011662e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3959967026695694, + "grad_norm": 0.24476772546768188, + "learning_rate": 1.6575180380279398e-05, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3964194373401533, + "grad_norm": 0.19858305156230927, + "learning_rate": 1.6562822669054646e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3968421720107376, + "grad_norm": 0.24713271856307983, + "learning_rate": 1.655046865170258e-05, + "loss": 0.3679, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3972649066813214, + "grad_norm": 0.24574795365333557, + "learning_rate": 1.653811832958797e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3976876413519053, + "grad_norm": 0.25547516345977783, + "learning_rate": 1.65257717040752e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3981103760224896, + "grad_norm": 0.2012356072664261, + "learning_rate": 1.651342877652822e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3985331106930734, + "grad_norm": 0.20427387952804565, + "learning_rate": 1.6501089548310577e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3989558453636577, + "grad_norm": 0.19329246878623962, + "learning_rate": 1.648875402078543e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3993785800342415, + "grad_norm": 0.17704588174819946, + "learning_rate": 1.647642219531547e-05, + "loss": 0.3543, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.3998013147048254, + "grad_norm": 0.1915993094444275, + "learning_rate": 1.6464094073263065e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4002240493754097, + "grad_norm": 0.21936166286468506, + "learning_rate": 1.645176965599008e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4006467840459935, + "grad_norm": 0.20903833210468292, + "learning_rate": 1.6439448944858045e-05, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4010695187165774, + "grad_norm": 0.22758527100086212, + "learning_rate": 1.6427131941228048e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4014922533871617, + "grad_norm": 0.17986804246902466, + "learning_rate": 1.6414818646460777e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4019149880577455, + "grad_norm": 0.16756562888622284, + "learning_rate": 1.6402509061916505e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.40233772272833, + "grad_norm": 0.1754733920097351, + "learning_rate": 1.639020318895511e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4027604573989136, + "grad_norm": 0.20140352845191956, + "learning_rate": 1.637790102893602e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4031831920694975, + "grad_norm": 0.16214026510715485, + "learning_rate": 1.636560258321829e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4036059267400818, + "grad_norm": 0.338443785905838, + "learning_rate": 1.6353307853160577e-05, + "loss": 0.3521, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4040286614106656, + "grad_norm": 0.18741922080516815, + "learning_rate": 1.634101684012107e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4044513960812495, + "grad_norm": 0.26071420311927795, + "learning_rate": 1.6328729545457594e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4048741307518338, + "grad_norm": 0.18552100658416748, + "learning_rate": 1.6316445970527554e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4052968654224176, + "grad_norm": 0.18551094830036163, + "learning_rate": 1.6304166116687963e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4057196000930015, + "grad_norm": 0.2120252400636673, + "learning_rate": 1.629188998529536e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4061423347635857, + "grad_norm": 0.2455139309167862, + "learning_rate": 1.6279617577705936e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4065650694341696, + "grad_norm": 0.24061474204063416, + "learning_rate": 1.626734889527544e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4069878041047534, + "grad_norm": 0.22931738197803497, + "learning_rate": 1.6255083939359233e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4074105387753377, + "grad_norm": 0.18320302665233612, + "learning_rate": 1.6242822711312255e-05, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4078332734459216, + "grad_norm": 0.346408486366272, + "learning_rate": 1.6230565212489e-05, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.408256008116506, + "grad_norm": 0.20249976217746735, + "learning_rate": 1.6218311444243594e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4086787427870897, + "grad_norm": 0.2687394618988037, + "learning_rate": 1.620606140792975e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4091014774576736, + "grad_norm": 0.22256655991077423, + "learning_rate": 1.619381510490073e-05, + "loss": 0.3539, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.409524212128258, + "grad_norm": 0.23679134249687195, + "learning_rate": 1.618157253650941e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4099469467988417, + "grad_norm": 0.22516769170761108, + "learning_rate": 1.6169333704108265e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4103696814694255, + "grad_norm": 0.2621196210384369, + "learning_rate": 1.6157098609049336e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.41079241614001, + "grad_norm": 0.1767568439245224, + "learning_rate": 1.6144867252684258e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4112151508105937, + "grad_norm": 0.21579816937446594, + "learning_rate": 1.6132639636364278e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.411637885481178, + "grad_norm": 0.3401988744735718, + "learning_rate": 1.6120415761440177e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.412060620151762, + "grad_norm": 0.2096271961927414, + "learning_rate": 1.6108195629262348e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4124833548223457, + "grad_norm": 0.1989821493625641, + "learning_rate": 1.6095979241180782e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.41290608949293, + "grad_norm": 0.20593692362308502, + "learning_rate": 1.6083766598545048e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.413328824163514, + "grad_norm": 0.2607773244380951, + "learning_rate": 1.6071557702704302e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4137515588340976, + "grad_norm": 0.22780172526836395, + "learning_rate": 1.605935255500729e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.414174293504682, + "grad_norm": 0.2633531391620636, + "learning_rate": 1.6047151156802347e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.414597028175266, + "grad_norm": 0.3068642020225525, + "learning_rate": 1.6034953509437368e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4150197628458496, + "grad_norm": 0.19154316186904907, + "learning_rate": 1.602275961425987e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.415442497516434, + "grad_norm": 0.20854578912258148, + "learning_rate": 1.601056947261691e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4158652321870178, + "grad_norm": 0.2561933696269989, + "learning_rate": 1.5998383085855174e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4162879668576016, + "grad_norm": 0.2772531509399414, + "learning_rate": 1.5986200455320917e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.416710701528186, + "grad_norm": 0.2805643081665039, + "learning_rate": 1.5974021582359977e-05, + "loss": 0.3512, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4171334361987697, + "grad_norm": 0.25087180733680725, + "learning_rate": 1.596184646831778e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.417556170869354, + "grad_norm": 0.2619161009788513, + "learning_rate": 1.594967511453936e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.417978905539938, + "grad_norm": 0.1401284784078598, + "learning_rate": 1.593750752236926e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4184016402105217, + "grad_norm": 0.25447413325309753, + "learning_rate": 1.5925343693151695e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.418824374881106, + "grad_norm": 0.22493472695350647, + "learning_rate": 1.591318362823043e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.41924710955169, + "grad_norm": 0.20990173518657684, + "learning_rate": 1.5901027328948785e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4196698442222737, + "grad_norm": 0.4558885991573334, + "learning_rate": 1.5888874796649705e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.420092578892858, + "grad_norm": 0.2891154885292053, + "learning_rate": 1.587672603267571e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.420515313563442, + "grad_norm": 0.24769814312458038, + "learning_rate": 1.5864581038368907e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 69990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.420938048234026, + "grad_norm": 0.2278212606906891, + "learning_rate": 1.5852439815070953e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.42136078290461, + "grad_norm": 0.33043596148490906, + "learning_rate": 1.5840302364123123e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.421783517575194, + "grad_norm": 0.15162217617034912, + "learning_rate": 1.5828168686866272e-05, + "loss": 0.3505, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.422206252245778, + "grad_norm": 0.23934496939182281, + "learning_rate": 1.581603878464083e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.422628986916362, + "grad_norm": 0.16513288021087646, + "learning_rate": 1.5803912658786823e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.423051721586946, + "grad_norm": 0.19609326124191284, + "learning_rate": 1.5791790310643824e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.42347445625753, + "grad_norm": 0.3023439645767212, + "learning_rate": 1.5779671741551028e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.423897190928114, + "grad_norm": 0.3404172956943512, + "learning_rate": 1.5767556952847208e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.424319925598698, + "grad_norm": 0.3422994017601013, + "learning_rate": 1.5756656876352455e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.424742660269282, + "grad_norm": 0.33303841948509216, + "learning_rate": 1.5744549274074466e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.425165394939866, + "grad_norm": 0.26686909794807434, + "learning_rate": 1.5732445456065485e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.42558812961045, + "grad_norm": 0.289299339056015, + "learning_rate": 1.572034542366264e-05, + "loss": 0.3502, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.426010864281034, + "grad_norm": 0.28676778078079224, + "learning_rate": 1.5708249178202654e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.426433598951618, + "grad_norm": 0.258736789226532, + "learning_rate": 1.569615672102183e-05, + "loss": 0.3538, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.426856333622202, + "grad_norm": 0.2417653352022171, + "learning_rate": 1.5684068053456046e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.427279068292786, + "grad_norm": 0.3022714853286743, + "learning_rate": 1.5671983176840756e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.42770180296337, + "grad_norm": 0.22575825452804565, + "learning_rate": 1.565990209251102e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.428124537633954, + "grad_norm": 0.2310343086719513, + "learning_rate": 1.5647824801801424e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.428547272304538, + "grad_norm": 0.26754632592201233, + "learning_rate": 1.5635751306046214e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.428970006975122, + "grad_norm": 0.23737786710262299, + "learning_rate": 1.5623681606579127e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.429392741645706, + "grad_norm": 0.29353567957878113, + "learning_rate": 1.5611615704733553e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.42981547631629, + "grad_norm": 0.2849404811859131, + "learning_rate": 1.5599553601842432e-05, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4302382109868743, + "grad_norm": 0.2843199074268341, + "learning_rate": 1.558749529923829e-05, + "loss": 0.352, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.430660945657458, + "grad_norm": 0.3221134841442108, + "learning_rate": 1.5575440798253237e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.431083680328042, + "grad_norm": 0.23008134961128235, + "learning_rate": 1.5563390100218967e-05, + "loss": 0.3481, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4315064149986263, + "grad_norm": 0.24374309182167053, + "learning_rate": 1.5551343206466716e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.43192914966921, + "grad_norm": 0.2522047460079193, + "learning_rate": 1.553930011832735e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.432351884339794, + "grad_norm": 0.24090909957885742, + "learning_rate": 1.5527260837131298e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4327746190103783, + "grad_norm": 0.3012109100818634, + "learning_rate": 1.5515225364208536e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.433197353680962, + "grad_norm": 0.244545578956604, + "learning_rate": 1.550319370088867e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.433620088351546, + "grad_norm": 0.23704153299331665, + "learning_rate": 1.5491165848500855e-05, + "loss": 0.3541, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4340428230221303, + "grad_norm": 0.28047311305999756, + "learning_rate": 1.547914180837385e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.434465557692714, + "grad_norm": 0.29818978905677795, + "learning_rate": 1.546712158183594e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.434888292363298, + "grad_norm": 0.24350214004516602, + "learning_rate": 1.5455105170215046e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4353110270338822, + "grad_norm": 0.30462124943733215, + "learning_rate": 1.544309257483864e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.435733761704466, + "grad_norm": 0.26091718673706055, + "learning_rate": 1.5431083797033784e-05, + "loss": 0.3662, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4361564963750504, + "grad_norm": 0.21707668900489807, + "learning_rate": 1.5419078838127127e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4365792310456342, + "grad_norm": 0.2678673565387726, + "learning_rate": 1.540707769944484e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.437001965716218, + "grad_norm": 0.22090232372283936, + "learning_rate": 1.539508038231274e-05, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4374247003868024, + "grad_norm": 0.22411230206489563, + "learning_rate": 1.5383086888056196e-05, + "loss": 0.3546, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.437847435057386, + "grad_norm": 0.272408127784729, + "learning_rate": 1.5371097218000142e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.43827016972797, + "grad_norm": 0.24510689079761505, + "learning_rate": 1.5359111373469105e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4386929043985544, + "grad_norm": 0.3619689345359802, + "learning_rate": 1.5347129355787187e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.439115639069138, + "grad_norm": 0.27135375142097473, + "learning_rate": 1.5335151166278068e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4395383737397225, + "grad_norm": 0.24307724833488464, + "learning_rate": 1.5323176806265e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4399611084103063, + "grad_norm": 0.2915303111076355, + "learning_rate": 1.5311206277070826e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.44038384308089, + "grad_norm": 0.24138100445270538, + "learning_rate": 1.5299239580017955e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4408065777514745, + "grad_norm": 0.24152788519859314, + "learning_rate": 1.528727671642834e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4412293124220583, + "grad_norm": 0.20820848643779755, + "learning_rate": 1.5275317687623565e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.441652047092642, + "grad_norm": 0.2408948689699173, + "learning_rate": 1.5263362494924766e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4420747817632265, + "grad_norm": 0.3171011507511139, + "learning_rate": 1.5251411139652661e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4424975164338103, + "grad_norm": 0.23132279515266418, + "learning_rate": 1.5239463623127537e-05, + "loss": 0.3523, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.442920251104394, + "grad_norm": 0.22544582188129425, + "learning_rate": 1.5227519946669262e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4433429857749784, + "grad_norm": 0.24714593589305878, + "learning_rate": 1.5215580111597295e-05, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4437657204455623, + "grad_norm": 0.25386881828308105, + "learning_rate": 1.5203644119230636e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.444188455116146, + "grad_norm": 0.32882723212242126, + "learning_rate": 1.5191711970887867e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4446111897867304, + "grad_norm": 0.28168928623199463, + "learning_rate": 1.5179783667887165e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4450339244573143, + "grad_norm": 0.27508029341697693, + "learning_rate": 1.5167859211546276e-05, + "loss": 0.354, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4454566591278986, + "grad_norm": 0.3765960931777954, + "learning_rate": 1.5155938603182518e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4458793937984824, + "grad_norm": 0.27937188744544983, + "learning_rate": 1.5144021844112793e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4463021284690663, + "grad_norm": 0.24086464941501617, + "learning_rate": 1.5132108935653577e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4467248631396505, + "grad_norm": 0.35012203454971313, + "learning_rate": 1.5120199879120883e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4471475978102344, + "grad_norm": 0.2725888192653656, + "learning_rate": 1.5108294675830342e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4475703324808182, + "grad_norm": 0.23608048260211945, + "learning_rate": 1.5096393327097169e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4479930671514025, + "grad_norm": 0.2876698076725006, + "learning_rate": 1.5084495834236096e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4484158018219864, + "grad_norm": 0.3221053183078766, + "learning_rate": 1.5072602198561474e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4488385364925707, + "grad_norm": 0.2857052981853485, + "learning_rate": 1.5060712421387223e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4492612711631545, + "grad_norm": 0.28175023198127747, + "learning_rate": 1.5048826504026825e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4496840058337384, + "grad_norm": 0.25556400418281555, + "learning_rate": 1.5036944447793362e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4501067405043226, + "grad_norm": 0.2143552303314209, + "learning_rate": 1.5025066253999431e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4505294751749065, + "grad_norm": 0.20689848065376282, + "learning_rate": 1.5013191923957265e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4509522098454903, + "grad_norm": 0.26869136095046997, + "learning_rate": 1.500132145897864e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4513749445160746, + "grad_norm": 0.2555190324783325, + "learning_rate": 1.498945486037493e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4517976791866585, + "grad_norm": 0.22949759662151337, + "learning_rate": 1.4977592129457024e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4522204138572423, + "grad_norm": 0.27091771364212036, + "learning_rate": 1.4965733267535436e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4526431485278266, + "grad_norm": 0.2599382996559143, + "learning_rate": 1.4953878275920268e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4530658831984105, + "grad_norm": 0.2748048007488251, + "learning_rate": 1.4942027155921118e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4534886178689943, + "grad_norm": 0.23269614577293396, + "learning_rate": 1.493017990884723e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4539113525395786, + "grad_norm": 0.2913151979446411, + "learning_rate": 1.4918336536007388e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4543340872101624, + "grad_norm": 0.2848920226097107, + "learning_rate": 1.4906497038709955e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4547568218807467, + "grad_norm": 0.24385464191436768, + "learning_rate": 1.4894661418262862e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4551795565513306, + "grad_norm": 0.2768935263156891, + "learning_rate": 1.488282967597363e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4556022912219144, + "grad_norm": 0.3060908019542694, + "learning_rate": 1.487100181314931e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4560250258924987, + "grad_norm": 0.3185825049877167, + "learning_rate": 1.4859177831096572e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4564477605630826, + "grad_norm": 0.26700422167778015, + "learning_rate": 1.4847357731121608e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4568704952336664, + "grad_norm": 0.29009905457496643, + "learning_rate": 1.4835541514530233e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4572932299042507, + "grad_norm": 0.23043948411941528, + "learning_rate": 1.4823729182627794e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4577159645748345, + "grad_norm": 0.22280216217041016, + "learning_rate": 1.4811920736719226e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.458138699245419, + "grad_norm": 0.32748252153396606, + "learning_rate": 1.4800116178109041e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4585614339160027, + "grad_norm": 0.33648958802223206, + "learning_rate": 1.4788315508101319e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4589841685865865, + "grad_norm": 0.3052650988101959, + "learning_rate": 1.4776518727999694e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.459406903257171, + "grad_norm": 0.21527639031410217, + "learning_rate": 1.4764725839107363e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4598296379277547, + "grad_norm": 0.2514149844646454, + "learning_rate": 1.4752936842727127e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4602523725983385, + "grad_norm": 0.2989913523197174, + "learning_rate": 1.4741151740161335e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.460675107268923, + "grad_norm": 0.26515713334083557, + "learning_rate": 1.4729370532711912e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4610978419395066, + "grad_norm": 0.2564782202243805, + "learning_rate": 1.4717593221680359e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4615205766100905, + "grad_norm": 0.31029212474823, + "learning_rate": 1.470581980836775e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.461943311280675, + "grad_norm": 0.24276621639728546, + "learning_rate": 1.4694050294074685e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4623660459512586, + "grad_norm": 0.25369611382484436, + "learning_rate": 1.4682284680101388e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4627887806218425, + "grad_norm": 0.2732728123664856, + "learning_rate": 1.4670522967747636e-05, + "loss": 0.354, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 70990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4632115152924268, + "grad_norm": 0.24969005584716797, + "learning_rate": 1.4658765158312748e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4636342499630106, + "grad_norm": 0.2939448356628418, + "learning_rate": 1.4647011253095644e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.464056984633595, + "grad_norm": 0.2896196246147156, + "learning_rate": 1.4635261253394799e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4644797193041788, + "grad_norm": 0.28743240237236023, + "learning_rate": 1.462351516050826e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4649024539747626, + "grad_norm": 0.2540375888347626, + "learning_rate": 1.4611772975733667e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.465325188645347, + "grad_norm": 0.20811589062213898, + "learning_rate": 1.460003470036816e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4657479233159307, + "grad_norm": 0.24970147013664246, + "learning_rate": 1.458830033570851e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4661706579865146, + "grad_norm": 0.21734769642353058, + "learning_rate": 1.4576569883051033e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.466593392657099, + "grad_norm": 0.21199725568294525, + "learning_rate": 1.4564843343691637e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4670161273276827, + "grad_norm": 0.2595381438732147, + "learning_rate": 1.4553120718925744e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.467438861998267, + "grad_norm": 0.33631470799446106, + "learning_rate": 1.4541402010048389e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.467861596668851, + "grad_norm": 0.2143462598323822, + "learning_rate": 1.4529687218354176e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4682843313394347, + "grad_norm": 0.22870434820652008, + "learning_rate": 1.451797634513724e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.468707066010019, + "grad_norm": 0.3055882751941681, + "learning_rate": 1.4506269391691307e-05, + "loss": 0.3508, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.469129800680603, + "grad_norm": 0.3114027976989746, + "learning_rate": 1.4494566359309674e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4695525353511867, + "grad_norm": 0.3709700405597687, + "learning_rate": 1.4482867249285203e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.469975270021771, + "grad_norm": 0.2849637567996979, + "learning_rate": 1.4471172062910316e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.470398004692355, + "grad_norm": 0.2629583775997162, + "learning_rate": 1.4459480801477016e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4708207393629387, + "grad_norm": 0.33938851952552795, + "learning_rate": 1.4447793466276826e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.471243474033523, + "grad_norm": 0.363389790058136, + "learning_rate": 1.443611005860091e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.471666208704107, + "grad_norm": 0.2615624666213989, + "learning_rate": 1.4424430579739923e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4720889433746907, + "grad_norm": 0.3268403708934784, + "learning_rate": 1.4412755030984137e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.472511678045275, + "grad_norm": 0.20273953676223755, + "learning_rate": 1.4401083413623368e-05, + "loss": 0.3525, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.472934412715859, + "grad_norm": 0.2164958268404007, + "learning_rate": 1.438941572894701e-05, + "loss": 0.3526, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.473357147386443, + "grad_norm": 0.3355206549167633, + "learning_rate": 1.4377751978244015e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.473779882057027, + "grad_norm": 0.2708030939102173, + "learning_rate": 1.4366092162802908e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4742026167276108, + "grad_norm": 0.23342153429985046, + "learning_rate": 1.435443628391177e-05, + "loss": 0.3683, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.474625351398195, + "grad_norm": 0.3742251992225647, + "learning_rate": 1.4342784342858223e-05, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.475048086068779, + "grad_norm": 0.2742066979408264, + "learning_rate": 1.4331136340929507e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4754708207393628, + "grad_norm": 0.21658441424369812, + "learning_rate": 1.4319492279412388e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.475893555409947, + "grad_norm": 0.31598570942878723, + "learning_rate": 1.430785215959322e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.476316290080531, + "grad_norm": 0.2560594379901886, + "learning_rate": 1.4296215982757905e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.476739024751115, + "grad_norm": 0.3467807173728943, + "learning_rate": 1.4284583750191927e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.477161759421699, + "grad_norm": 0.21879376471042633, + "learning_rate": 1.4272955463180298e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.477584494092283, + "grad_norm": 0.259666383266449, + "learning_rate": 1.4261331123007637e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.478007228762867, + "grad_norm": 0.18255533277988434, + "learning_rate": 1.4250872592460918e-05, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.478429963433451, + "grad_norm": 0.2422705888748169, + "learning_rate": 1.4239255754819814e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.478852698104035, + "grad_norm": 0.26457276940345764, + "learning_rate": 1.4227642867740527e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.479275432774619, + "grad_norm": 0.28860458731651306, + "learning_rate": 1.4216033932505974e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.479698167445203, + "grad_norm": 0.3129434883594513, + "learning_rate": 1.4204428950398623e-05, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.480120902115787, + "grad_norm": 0.24869774281978607, + "learning_rate": 1.4192827922700514e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.480543636786371, + "grad_norm": 0.2834949493408203, + "learning_rate": 1.41812308506932e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.480966371456955, + "grad_norm": 0.3338213860988617, + "learning_rate": 1.416963773565786e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.481389106127539, + "grad_norm": 0.24405960738658905, + "learning_rate": 1.415804857887521e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.481811840798123, + "grad_norm": 0.3072836697101593, + "learning_rate": 1.4146463381625502e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.482234575468707, + "grad_norm": 0.27908143401145935, + "learning_rate": 1.4134882145188594e-05, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4826573101392913, + "grad_norm": 0.2777714133262634, + "learning_rate": 1.412330487084389e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.483080044809875, + "grad_norm": 0.2887359857559204, + "learning_rate": 1.4111731559870356e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.483502779480459, + "grad_norm": 0.300769567489624, + "learning_rate": 1.4100162213546508e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4839255141510432, + "grad_norm": 0.2799198627471924, + "learning_rate": 1.4088596833150463e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.484348248821627, + "grad_norm": 0.26057031750679016, + "learning_rate": 1.4077035419959833e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.484770983492211, + "grad_norm": 0.27632400393486023, + "learning_rate": 1.4065477975251867e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.485193718162795, + "grad_norm": 0.30346083641052246, + "learning_rate": 1.4053924500303305e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.485616452833379, + "grad_norm": 0.27660346031188965, + "learning_rate": 1.4042374996390506e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4860391875039634, + "grad_norm": 0.3094226121902466, + "learning_rate": 1.4030829464789352e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.486461922174547, + "grad_norm": 0.3102957010269165, + "learning_rate": 1.4019287906775314e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.486884656845131, + "grad_norm": 0.22268781065940857, + "learning_rate": 1.4007750323623408e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4873073915157153, + "grad_norm": 0.3271061182022095, + "learning_rate": 1.3996216716608229e-05, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.487730126186299, + "grad_norm": 0.22863616049289703, + "learning_rate": 1.39846870870039e-05, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.488152860856883, + "grad_norm": 0.3388855755329132, + "learning_rate": 1.3973161436084108e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4885755955274673, + "grad_norm": 0.2718760073184967, + "learning_rate": 1.3961639765122136e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.488998330198051, + "grad_norm": 0.33181536197662354, + "learning_rate": 1.3950122075390798e-05, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.489421064868635, + "grad_norm": 0.2646145224571228, + "learning_rate": 1.3938608368162482e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4898437995392193, + "grad_norm": 0.27481648325920105, + "learning_rate": 1.392709864470913e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.490266534209803, + "grad_norm": 0.31109434366226196, + "learning_rate": 1.3915592906302244e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.490689268880387, + "grad_norm": 0.2823808491230011, + "learning_rate": 1.39040911542129e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4911120035509713, + "grad_norm": 0.2860516309738159, + "learning_rate": 1.3892593389711694e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.491534738221555, + "grad_norm": 0.2577662765979767, + "learning_rate": 1.3881099614068833e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4919574728921394, + "grad_norm": 0.29350045323371887, + "learning_rate": 1.3869609828554025e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4923802075627233, + "grad_norm": 0.287381112575531, + "learning_rate": 1.3858124034436593e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.492802942233307, + "grad_norm": 0.4038577079772949, + "learning_rate": 1.3846642232985391e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4932256769038914, + "grad_norm": 0.26242756843566895, + "learning_rate": 1.3835164425468833e-05, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4936484115744753, + "grad_norm": 0.23982733488082886, + "learning_rate": 1.3823690613154922e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.494071146245059, + "grad_norm": 0.32541900873184204, + "learning_rate": 1.3812220797311149e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4944938809156434, + "grad_norm": 0.28673961758613586, + "learning_rate": 1.3800754979204633e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4949166155862272, + "grad_norm": 0.26607373356819153, + "learning_rate": 1.3789293160102023e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4953393502568115, + "grad_norm": 0.23359638452529907, + "learning_rate": 1.3777835341269545e-05, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4957620849273954, + "grad_norm": 0.2775583267211914, + "learning_rate": 1.3766381523972938e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.496184819597979, + "grad_norm": 0.20948931574821472, + "learning_rate": 1.3754931709477542e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4966075542685635, + "grad_norm": 0.39325109124183655, + "learning_rate": 1.3743485899048241e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4970302889391474, + "grad_norm": 0.2589523196220398, + "learning_rate": 1.3732044093949498e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.497453023609731, + "grad_norm": 0.26594629883766174, + "learning_rate": 1.372060629544527e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4978757582803155, + "grad_norm": 0.2908417880535126, + "learning_rate": 1.3709172504799138e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4982984929508993, + "grad_norm": 0.276393860578537, + "learning_rate": 1.3697742723274214e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.498721227621483, + "grad_norm": 0.23481832444667816, + "learning_rate": 1.3686316952133167e-05, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4991439622920675, + "grad_norm": 0.2524189054965973, + "learning_rate": 1.3674895192638243e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.4995666969626513, + "grad_norm": 0.33094650506973267, + "learning_rate": 1.3663477446051198e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.499989431633235, + "grad_norm": 0.2991441786289215, + "learning_rate": 1.3652063713633401e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5004121663038195, + "grad_norm": 0.286679744720459, + "learning_rate": 1.364065399664572e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5008349009744033, + "grad_norm": 0.313639760017395, + "learning_rate": 1.3629248296348623e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.501257635644987, + "grad_norm": 0.25131547451019287, + "learning_rate": 1.3617846614002123e-05, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5016803703155714, + "grad_norm": 0.25361475348472595, + "learning_rate": 1.3606448950865785e-05, + "loss": 0.353, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5021031049861553, + "grad_norm": 0.30004021525382996, + "learning_rate": 1.359505530819874e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5025258396567396, + "grad_norm": 0.3238375186920166, + "learning_rate": 1.3583665687259673e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5029485743273234, + "grad_norm": 0.23791348934173584, + "learning_rate": 1.3572280089306787e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5033713089979077, + "grad_norm": 0.25834646821022034, + "learning_rate": 1.3560898515597909e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5037940436684916, + "grad_norm": 0.23083370923995972, + "learning_rate": 1.354952096739035e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5042167783390754, + "grad_norm": 0.33956921100616455, + "learning_rate": 1.3538147445941029e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5046395130096597, + "grad_norm": 0.19958429038524628, + "learning_rate": 1.3526777952506402e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5050622476802435, + "grad_norm": 0.28420913219451904, + "learning_rate": 1.3515412488342477e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 71990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5054849823508274, + "grad_norm": 0.21803441643714905, + "learning_rate": 1.350405105470482e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5059077170214117, + "grad_norm": 0.27844998240470886, + "learning_rate": 1.349269365284857e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5063304516919955, + "grad_norm": 0.2762557566165924, + "learning_rate": 1.3481340284028365e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5067531863625794, + "grad_norm": 0.2575792968273163, + "learning_rate": 1.3469990949498473e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5071759210331637, + "grad_norm": 0.2775246798992157, + "learning_rate": 1.3458645650512647e-05, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5075986557037475, + "grad_norm": 0.3050996661186218, + "learning_rate": 1.3447304388324233e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5080213903743314, + "grad_norm": 0.28475600481033325, + "learning_rate": 1.343596716418613e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5084441250449157, + "grad_norm": 0.34181836247444153, + "learning_rate": 1.3424633979350782e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5088668597154995, + "grad_norm": 0.3284309506416321, + "learning_rate": 1.3413304835070201e-05, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5092895943860833, + "grad_norm": 0.284221351146698, + "learning_rate": 1.3401979732595915e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5097123290566676, + "grad_norm": 0.27995556592941284, + "learning_rate": 1.3390658673179046e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5101350637272515, + "grad_norm": 0.34582120180130005, + "learning_rate": 1.3379341658070254e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5105577983978353, + "grad_norm": 0.22714416682720184, + "learning_rate": 1.3368028688519762e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5109805330684196, + "grad_norm": 0.31725630164146423, + "learning_rate": 1.3356719765777314e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5114032677390035, + "grad_norm": 0.39958155155181885, + "learning_rate": 1.334541489109224e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5118260024095878, + "grad_norm": 0.30439263582229614, + "learning_rate": 1.3334114065713416e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5122487370801716, + "grad_norm": 0.3627048134803772, + "learning_rate": 1.3322817290889278e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.512671471750756, + "grad_norm": 0.39804428815841675, + "learning_rate": 1.3311524567867778e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5130942064213397, + "grad_norm": 0.2569292485713959, + "learning_rate": 1.3300235897896457e-05, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5135169410919236, + "grad_norm": 0.3162541389465332, + "learning_rate": 1.3288951282222395e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.513939675762508, + "grad_norm": 0.2502129375934601, + "learning_rate": 1.3277670722092233e-05, + "loss": 0.3536, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5143624104330917, + "grad_norm": 0.2347165197134018, + "learning_rate": 1.326639421875217e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5147851451036756, + "grad_norm": 0.25119727849960327, + "learning_rate": 1.3255121773447916e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.51520787977426, + "grad_norm": 0.24356183409690857, + "learning_rate": 1.3243853387424788e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5156306144448437, + "grad_norm": 0.27842459082603455, + "learning_rate": 1.3232589061927598e-05, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5160533491154276, + "grad_norm": 0.29920706152915955, + "learning_rate": 1.3221328798200749e-05, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.516476083786012, + "grad_norm": 0.3034895658493042, + "learning_rate": 1.3210072597488194e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5168988184565957, + "grad_norm": 0.26171720027923584, + "learning_rate": 1.3198820461033422e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5173215531271795, + "grad_norm": 0.3031522333621979, + "learning_rate": 1.3187572390079483e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.517744287797764, + "grad_norm": 0.20189112424850464, + "learning_rate": 1.3176328385868991e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5181670224683477, + "grad_norm": 0.29610052704811096, + "learning_rate": 1.3165088449644053e-05, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5185897571389315, + "grad_norm": 0.30683183670043945, + "learning_rate": 1.3153852582646409e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.519012491809516, + "grad_norm": 0.26974308490753174, + "learning_rate": 1.3142620786117277e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5194352264800997, + "grad_norm": 0.2603539526462555, + "learning_rate": 1.3131393061297464e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5198579611506835, + "grad_norm": 0.28202831745147705, + "learning_rate": 1.3120169409427324e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.520280695821268, + "grad_norm": 0.24253325164318085, + "learning_rate": 1.3108949831746759e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5207034304918516, + "grad_norm": 0.2877110242843628, + "learning_rate": 1.3097734329495215e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.521126165162436, + "grad_norm": 0.2902415990829468, + "learning_rate": 1.3086522903911707e-05, + "loss": 0.3523, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5215488998330198, + "grad_norm": 0.29988738894462585, + "learning_rate": 1.3075315556234757e-05, + "loss": 0.3688, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.521971634503604, + "grad_norm": 0.24732664227485657, + "learning_rate": 1.3064112287702484e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.522394369174188, + "grad_norm": 0.2946760654449463, + "learning_rate": 1.3052913099552516e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5228171038447718, + "grad_norm": 0.2327154576778412, + "learning_rate": 1.3041717993022063e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.523239838515356, + "grad_norm": 0.3347562253475189, + "learning_rate": 1.3030526969347867e-05, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.52366257318594, + "grad_norm": 0.2432052046060562, + "learning_rate": 1.3019340029766225e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5240853078565237, + "grad_norm": 0.25331729650497437, + "learning_rate": 1.3008157175512997e-05, + "loss": 0.354, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.524508042527108, + "grad_norm": 0.23164276778697968, + "learning_rate": 1.2996978407823546e-05, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.524930777197692, + "grad_norm": 0.28572747111320496, + "learning_rate": 1.2985803727932827e-05, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5253535118682757, + "grad_norm": 0.2686900794506073, + "learning_rate": 1.2974633137075326e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.52577624653886, + "grad_norm": 0.3493627905845642, + "learning_rate": 1.29634666364851e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.526198981209444, + "grad_norm": 0.36708131432533264, + "learning_rate": 1.2952304227395706e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5266217158800277, + "grad_norm": 0.2715834975242615, + "learning_rate": 1.2941145911040292e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.527044450550612, + "grad_norm": 0.32894977927207947, + "learning_rate": 1.2929991688651539e-05, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.527467185221196, + "grad_norm": 0.2979084849357605, + "learning_rate": 1.2918841561461686e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5278899198917797, + "grad_norm": 0.2889907956123352, + "learning_rate": 1.290769553070249e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.528312654562364, + "grad_norm": 0.3222661316394806, + "learning_rate": 1.2896553597605288e-05, + "loss": 0.3687, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.528735389232948, + "grad_norm": 0.22168715298175812, + "learning_rate": 1.2885415763400949e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5291581239035317, + "grad_norm": 0.2906121015548706, + "learning_rate": 1.287428202931989e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.529580858574116, + "grad_norm": 0.24510589241981506, + "learning_rate": 1.28631523965921e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5300035932447, + "grad_norm": 0.2783016264438629, + "learning_rate": 1.2852026866447054e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.530426327915284, + "grad_norm": 0.22164200246334076, + "learning_rate": 1.2840905440113843e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.530849062585868, + "grad_norm": 0.29799631237983704, + "learning_rate": 1.2829788118821046e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5312717972564522, + "grad_norm": 0.28624245524406433, + "learning_rate": 1.2818674903796835e-05, + "loss": 0.3691, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.531694531927036, + "grad_norm": 0.2687772214412689, + "learning_rate": 1.28075657962689e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.53211726659762, + "grad_norm": 0.23319992423057556, + "learning_rate": 1.2796460797464499e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.532540001268204, + "grad_norm": 0.26576584577560425, + "learning_rate": 1.2785359908610412e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.532962735938788, + "grad_norm": 0.293433278799057, + "learning_rate": 1.2774263130932979e-05, + "loss": 0.3534, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.533385470609372, + "grad_norm": 0.23491154611110687, + "learning_rate": 1.2763170465658102e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.533808205279956, + "grad_norm": 0.32538947463035583, + "learning_rate": 1.2752081914011194e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.53423093995054, + "grad_norm": 0.25998175144195557, + "learning_rate": 1.2740997477217215e-05, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.534653674621124, + "grad_norm": 0.2818283438682556, + "learning_rate": 1.2729917156500704e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.535076409291708, + "grad_norm": 0.23685702681541443, + "learning_rate": 1.2718840953085715e-05, + "loss": 0.3695, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.535499143962292, + "grad_norm": 0.2205226868391037, + "learning_rate": 1.2707768868195869e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.535921878632876, + "grad_norm": 0.2287520319223404, + "learning_rate": 1.2696700903054315e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.53634461330346, + "grad_norm": 0.2302890419960022, + "learning_rate": 1.2685637058883776e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.536767347974044, + "grad_norm": 0.2650388479232788, + "learning_rate": 1.2674577336906463e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.537190082644628, + "grad_norm": 0.3479280471801758, + "learning_rate": 1.2663521738344198e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.537612817315212, + "grad_norm": 0.27276456356048584, + "learning_rate": 1.2652470264418276e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.538035551985796, + "grad_norm": 0.3506432771682739, + "learning_rate": 1.2641422916349605e-05, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.53845828665638, + "grad_norm": 0.2696276903152466, + "learning_rate": 1.2630379695358602e-05, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.538881021326964, + "grad_norm": 0.20670080184936523, + "learning_rate": 1.2619340602665225e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.539303755997548, + "grad_norm": 0.34991446137428284, + "learning_rate": 1.2608305639488999e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5397264906681323, + "grad_norm": 0.25522714853286743, + "learning_rate": 1.2597274807048987e-05, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.540149225338716, + "grad_norm": 0.38462790846824646, + "learning_rate": 1.2586248106563758e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5405719600093004, + "grad_norm": 0.26367729902267456, + "learning_rate": 1.2575225539251472e-05, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5409946946798843, + "grad_norm": 0.26099780201911926, + "learning_rate": 1.2564207106329823e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.541417429350468, + "grad_norm": 0.27140092849731445, + "learning_rate": 1.2553192809016011e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5418401640210524, + "grad_norm": 0.29142817854881287, + "learning_rate": 1.254218264852683e-05, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5422628986916362, + "grad_norm": 0.2642383575439453, + "learning_rate": 1.2531176626078583e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.54268563336222, + "grad_norm": 0.3125147819519043, + "learning_rate": 1.2520174742887153e-05, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5431083680328044, + "grad_norm": 0.1983969658613205, + "learning_rate": 1.2509177000167905e-05, + "loss": 0.3684, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5435311027033882, + "grad_norm": 0.2531905770301819, + "learning_rate": 1.24981833991358e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.543953837373972, + "grad_norm": 0.28277459740638733, + "learning_rate": 1.248719394100532e-05, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5443765720445564, + "grad_norm": 0.24076645076274872, + "learning_rate": 1.2476208626990509e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.54479930671514, + "grad_norm": 0.2409163862466812, + "learning_rate": 1.2465227458304912e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.545222041385724, + "grad_norm": 0.2266923189163208, + "learning_rate": 1.2454250436161646e-05, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5456447760563083, + "grad_norm": 0.357410192489624, + "learning_rate": 1.244327756177337e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.546067510726892, + "grad_norm": 0.3282985985279083, + "learning_rate": 1.2432308836352297e-05, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.546490245397476, + "grad_norm": 0.24241392314434052, + "learning_rate": 1.2421344261110135e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5469129800680603, + "grad_norm": 0.26649531722068787, + "learning_rate": 1.2410383837258166e-05, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.547335714738644, + "grad_norm": 0.2771565020084381, + "learning_rate": 1.2399427566007227e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 72990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.547758449409228, + "grad_norm": 0.24443021416664124, + "learning_rate": 1.2388475448567666e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5481811840798123, + "grad_norm": 0.2786020338535309, + "learning_rate": 1.237752748614941e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.548603918750396, + "grad_norm": 0.36126697063446045, + "learning_rate": 1.2366583679961868e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5490266534209804, + "grad_norm": 0.2105107307434082, + "learning_rate": 1.2355644031214053e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5494493880915643, + "grad_norm": 0.276345431804657, + "learning_rate": 1.2344708541114463e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5498721227621486, + "grad_norm": 0.31808334589004517, + "learning_rate": 1.2333777210871177e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5502948574327324, + "grad_norm": 0.25659751892089844, + "learning_rate": 1.2322850041691809e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5507175921033163, + "grad_norm": 0.25194281339645386, + "learning_rate": 1.2311927034783493e-05, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5511403267739006, + "grad_norm": 0.2859010696411133, + "learning_rate": 1.230100819135292e-05, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5515630614444844, + "grad_norm": 0.32638606429100037, + "learning_rate": 1.2290093512606337e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5519857961150683, + "grad_norm": 0.272087037563324, + "learning_rate": 1.2279182999749472e-05, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5524085307856526, + "grad_norm": 0.2626107633113861, + "learning_rate": 1.226827665398767e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5528312654562364, + "grad_norm": 0.24899664521217346, + "learning_rate": 1.2257374476525746e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5532540001268202, + "grad_norm": 0.27767521142959595, + "learning_rate": 1.224647646856809e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5536767347974045, + "grad_norm": 0.27875813841819763, + "learning_rate": 1.2235582631318642e-05, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5540994694679884, + "grad_norm": 0.23753316700458527, + "learning_rate": 1.2224692965980856e-05, + "loss": 0.353, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5545222041385722, + "grad_norm": 0.27338773012161255, + "learning_rate": 1.2213807473757744e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5549449388091565, + "grad_norm": 0.30397042632102966, + "learning_rate": 1.2202926155851852e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5553676734797404, + "grad_norm": 0.2978992164134979, + "learning_rate": 1.2192049013465234e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.555790408150324, + "grad_norm": 0.26749250292778015, + "learning_rate": 1.2181176047799552e-05, + "loss": 0.3515, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5562131428209085, + "grad_norm": 0.24221710860729218, + "learning_rate": 1.2170307260055918e-05, + "loss": 0.3696, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5566358774914923, + "grad_norm": 0.31458553671836853, + "learning_rate": 1.215944265143506e-05, + "loss": 0.3501, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5570586121620766, + "grad_norm": 0.24785885214805603, + "learning_rate": 1.2148582223137201e-05, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5574813468326605, + "grad_norm": 0.3372913897037506, + "learning_rate": 1.2137725976362113e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5579040815032443, + "grad_norm": 0.29507413506507874, + "learning_rate": 1.2126873912309133e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5583268161738286, + "grad_norm": 0.20958639681339264, + "learning_rate": 1.2116026032177074e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5587495508444125, + "grad_norm": 0.2976114749908447, + "learning_rate": 1.2105182337164344e-05, + "loss": 0.3697, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5591722855149968, + "grad_norm": 0.24542410671710968, + "learning_rate": 1.2094342828468864e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5595950201855806, + "grad_norm": 0.3801185190677643, + "learning_rate": 1.2083507507288106e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5600177548561645, + "grad_norm": 0.27629244327545166, + "learning_rate": 1.2072676374819047e-05, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5604404895267487, + "grad_norm": 0.2791236937046051, + "learning_rate": 1.2061849432258238e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5608632241973326, + "grad_norm": 0.30102625489234924, + "learning_rate": 1.2051026680801752e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5612859588679164, + "grad_norm": 0.2850607931613922, + "learning_rate": 1.204020812164522e-05, + "loss": 0.37, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5617086935385007, + "grad_norm": 0.27174779772758484, + "learning_rate": 1.2029393755983754e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5621314282090846, + "grad_norm": 0.3381979763507843, + "learning_rate": 1.201858358501205e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5625541628796684, + "grad_norm": 0.29586881399154663, + "learning_rate": 1.2007777609924342e-05, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5629768975502527, + "grad_norm": 0.27921822667121887, + "learning_rate": 1.199697583191438e-05, + "loss": 0.3669, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5633996322208366, + "grad_norm": 0.3825508952140808, + "learning_rate": 1.1986178252175473e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5638223668914204, + "grad_norm": 0.24760784208774567, + "learning_rate": 1.1975384871900425e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5642451015620047, + "grad_norm": 0.30733901262283325, + "learning_rate": 1.1964595692281627e-05, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5646678362325885, + "grad_norm": 0.2667009234428406, + "learning_rate": 1.195381071451096e-05, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5650905709031724, + "grad_norm": 0.2740468680858612, + "learning_rate": 1.194302993977987e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5655133055737567, + "grad_norm": 0.2650156319141388, + "learning_rate": 1.1932253369279333e-05, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5659360402443405, + "grad_norm": 0.2573980689048767, + "learning_rate": 1.1921481004199858e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.566358774914925, + "grad_norm": 0.29560062289237976, + "learning_rate": 1.1910712845731497e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5667815095855087, + "grad_norm": 0.2473151981830597, + "learning_rate": 1.1899948895063834e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5672042442560925, + "grad_norm": 0.3170233964920044, + "learning_rate": 1.188918915338596e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.567626978926677, + "grad_norm": 0.24307742714881897, + "learning_rate": 1.1878433621886558e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5680497135972606, + "grad_norm": 0.27636584639549255, + "learning_rate": 1.186768230175378e-05, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.568472448267845, + "grad_norm": 0.4228670001029968, + "learning_rate": 1.1856935194175362e-05, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.568895182938429, + "grad_norm": 0.2504519820213318, + "learning_rate": 1.1846192300338554e-05, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5693179176090126, + "grad_norm": 0.31112799048423767, + "learning_rate": 1.1835453621430154e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.569740652279597, + "grad_norm": 0.2194390743970871, + "learning_rate": 1.1824719158636483e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5701633869501808, + "grad_norm": 0.2587570250034332, + "learning_rate": 1.1813988913143404e-05, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5705861216207646, + "grad_norm": 0.31479474902153015, + "learning_rate": 1.1803262886136296e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.571008856291349, + "grad_norm": 0.2585769295692444, + "learning_rate": 1.1792541078800102e-05, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5714315909619327, + "grad_norm": 0.2107367068529129, + "learning_rate": 1.1781823492319255e-05, + "loss": 0.3672, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5718543256325166, + "grad_norm": 0.2519958019256592, + "learning_rate": 1.1771110127877766e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.572277060303101, + "grad_norm": 0.30885517597198486, + "learning_rate": 1.176040098665916e-05, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5726997949736847, + "grad_norm": 0.26028725504875183, + "learning_rate": 1.17496960698465e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5731225296442686, + "grad_norm": 0.25052520632743835, + "learning_rate": 1.1738995378622386e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.573545264314853, + "grad_norm": 0.291459321975708, + "learning_rate": 1.1728298914168928e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5739679989854367, + "grad_norm": 0.2638775110244751, + "learning_rate": 1.1717606677667792e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5743907336560206, + "grad_norm": 0.20746690034866333, + "learning_rate": 1.1706918670300177e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.574813468326605, + "grad_norm": 0.22220398485660553, + "learning_rate": 1.1696234893246815e-05, + "loss": 0.354, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5752362029971887, + "grad_norm": 0.3083091676235199, + "learning_rate": 1.1685555347687949e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.575658937667773, + "grad_norm": 0.26660412549972534, + "learning_rate": 1.167488003480337e-05, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.576081672338357, + "grad_norm": 0.31852951645851135, + "learning_rate": 1.1664208955772404e-05, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5765044070089407, + "grad_norm": 0.40787214040756226, + "learning_rate": 1.165354211177393e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.576927141679525, + "grad_norm": 0.27220210433006287, + "learning_rate": 1.1642879503986304e-05, + "loss": 0.3514, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.577349876350109, + "grad_norm": 0.272786945104599, + "learning_rate": 1.1632221133587456e-05, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.577772611020693, + "grad_norm": 0.29803407192230225, + "learning_rate": 1.1621567001754836e-05, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.578195345691277, + "grad_norm": 0.3048146367073059, + "learning_rate": 1.1610917109665436e-05, + "loss": 0.3519, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.578618080361861, + "grad_norm": 0.2793610095977783, + "learning_rate": 1.1600271458495775e-05, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.579040815032445, + "grad_norm": 0.2883415222167969, + "learning_rate": 1.1589630049421884e-05, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.579463549703029, + "grad_norm": 0.2540857493877411, + "learning_rate": 1.157899288361936e-05, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.579886284373613, + "grad_norm": 0.28934216499328613, + "learning_rate": 1.1568359962263286e-05, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.580309019044197, + "grad_norm": 0.24614334106445312, + "learning_rate": 1.1557731286528318e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.580731753714781, + "grad_norm": 0.24508459866046906, + "learning_rate": 1.1547106857588619e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5811544883853648, + "grad_norm": 0.3290275037288666, + "learning_rate": 1.1536486676617908e-05, + "loss": 0.3534, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.581577223055949, + "grad_norm": 0.3108022212982178, + "learning_rate": 1.1525870744789401e-05, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.581999957726533, + "grad_norm": 0.2772188186645508, + "learning_rate": 1.1515259063275874e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5824226923971167, + "grad_norm": 0.2360599786043167, + "learning_rate": 1.1504651633249625e-05, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.582845427067701, + "grad_norm": 0.39641454815864563, + "learning_rate": 1.1494048455882472e-05, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.583268161738285, + "grad_norm": 0.2910590171813965, + "learning_rate": 1.1483449532345747e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5836908964088687, + "grad_norm": 0.23690345883369446, + "learning_rate": 1.1472854863810362e-05, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.584113631079453, + "grad_norm": 0.256264865398407, + "learning_rate": 1.146226445144672e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.584536365750037, + "grad_norm": 0.22138752043247223, + "learning_rate": 1.1451678296424768e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.584959100420621, + "grad_norm": 0.24724504351615906, + "learning_rate": 1.1441096399913975e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.585381835091205, + "grad_norm": 0.27707597613334656, + "learning_rate": 1.1430518763083376e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.585804569761789, + "grad_norm": Infinity, + "learning_rate": 1.142100253292816e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.586227304432373, + "grad_norm": 0.27184173464775085, + "learning_rate": 1.141043299270878e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.586650039102957, + "grad_norm": 0.3497851490974426, + "learning_rate": 1.1399867715557034e-05, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5870727737735413, + "grad_norm": 0.3274669349193573, + "learning_rate": 1.1389306702640051e-05, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.587495508444125, + "grad_norm": 0.303342342376709, + "learning_rate": 1.1378749955124557e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.587918243114709, + "grad_norm": 0.2895084619522095, + "learning_rate": 1.1368197474176761e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5883409777852933, + "grad_norm": 0.2536431550979614, + "learning_rate": 1.1357649260962444e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.588763712455877, + "grad_norm": 0.2602287530899048, + "learning_rate": 1.1347105316646856e-05, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.589186447126461, + "grad_norm": 0.23152463138103485, + "learning_rate": 1.133656564239482e-05, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5896091817970452, + "grad_norm": 0.23950566351413727, + "learning_rate": 1.1326030239370677e-05, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 73990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.590031916467629, + "grad_norm": 0.28022781014442444, + "learning_rate": 1.131549910873831e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.590454651138213, + "grad_norm": 0.27390310168266296, + "learning_rate": 1.1304972251661084e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5908773858087972, + "grad_norm": 0.27512794733047485, + "learning_rate": 1.1294449669301932e-05, + "loss": 0.368, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.591300120479381, + "grad_norm": 0.3230903446674347, + "learning_rate": 1.1283931362823314e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.591722855149965, + "grad_norm": 0.265153706073761, + "learning_rate": 1.1273417333387199e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.592145589820549, + "grad_norm": 0.22918690741062164, + "learning_rate": 1.1262907582155119e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.592568324491133, + "grad_norm": 0.3739636242389679, + "learning_rate": 1.1252402110288069e-05, + "loss": 0.3536, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.592991059161717, + "grad_norm": 0.24341094493865967, + "learning_rate": 1.124190091894664e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.593413793832301, + "grad_norm": 0.28905797004699707, + "learning_rate": 1.1231404009290898e-05, + "loss": 0.353, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.593836528502885, + "grad_norm": 0.2716177701950073, + "learning_rate": 1.1220911382480465e-05, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5942592631734693, + "grad_norm": 0.32758989930152893, + "learning_rate": 1.1210423039674484e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.594681997844053, + "grad_norm": 0.20559120178222656, + "learning_rate": 1.1199938982031622e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.595104732514637, + "grad_norm": 0.25796207785606384, + "learning_rate": 1.1189459210710079e-05, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5955274671852213, + "grad_norm": 0.3032474219799042, + "learning_rate": 1.1178983726867576e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.595950201855805, + "grad_norm": 0.22639667987823486, + "learning_rate": 1.1168512531661374e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5963729365263895, + "grad_norm": 0.22878561913967133, + "learning_rate": 1.1158045626248226e-05, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5967956711969733, + "grad_norm": 0.27361971139907837, + "learning_rate": 1.1147583011784429e-05, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.597218405867557, + "grad_norm": 0.2660936713218689, + "learning_rate": 1.1137124689425821e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5976411405381414, + "grad_norm": 0.3317101001739502, + "learning_rate": 1.1126670660327749e-05, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5980638752087253, + "grad_norm": 0.4218274652957916, + "learning_rate": 1.1116220925645088e-05, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.598486609879309, + "grad_norm": 0.3432953655719757, + "learning_rate": 1.1105775486532255e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5989093445498934, + "grad_norm": 0.23444382846355438, + "learning_rate": 1.1095334344143183e-05, + "loss": 0.3507, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.5993320792204773, + "grad_norm": 0.3127429187297821, + "learning_rate": 1.1084897499631298e-05, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.599754813891061, + "grad_norm": 0.3183389902114868, + "learning_rate": 1.1074464954149605e-05, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6001775485616454, + "grad_norm": 0.30443739891052246, + "learning_rate": 1.1064036708850589e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6006002832322292, + "grad_norm": 0.23836572468280792, + "learning_rate": 1.105361276488629e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.601023017902813, + "grad_norm": 0.3171447217464447, + "learning_rate": 1.1043193123408257e-05, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6014457525733974, + "grad_norm": 0.2899284064769745, + "learning_rate": 1.1032777785567577e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6018684872439812, + "grad_norm": 0.26946237683296204, + "learning_rate": 1.1022366752514846e-05, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.602291221914565, + "grad_norm": 0.34691178798675537, + "learning_rate": 1.1011960025400214e-05, + "loss": 0.3536, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6027139565851494, + "grad_norm": 0.22381474077701569, + "learning_rate": 1.1001557605373298e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.603136691255733, + "grad_norm": 0.2343635857105255, + "learning_rate": 1.0991159493583287e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6035594259263175, + "grad_norm": 0.34166228771209717, + "learning_rate": 1.0980765691178901e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6039821605969014, + "grad_norm": 0.2565763294696808, + "learning_rate": 1.097037619930834e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.604404895267485, + "grad_norm": 0.30642616748809814, + "learning_rate": 1.0959991019119359e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6048276299380695, + "grad_norm": 0.2542300522327423, + "learning_rate": 1.0949610151759232e-05, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6052503646086533, + "grad_norm": 0.33268502354621887, + "learning_rate": 1.0939233598374766e-05, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6056730992792376, + "grad_norm": 0.34131431579589844, + "learning_rate": 1.0928861360112252e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6060958339498215, + "grad_norm": 0.3246873915195465, + "learning_rate": 1.0918493438117554e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6065185686204053, + "grad_norm": 0.281791090965271, + "learning_rate": 1.0908129833536029e-05, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6069413032909896, + "grad_norm": 0.24311038851737976, + "learning_rate": 1.089777054751257e-05, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6073640379615735, + "grad_norm": 0.236586794257164, + "learning_rate": 1.0887415581191595e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6077867726321573, + "grad_norm": 0.2991393506526947, + "learning_rate": 1.0877064935717024e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6082095073027416, + "grad_norm": 0.31601154804229736, + "learning_rate": 1.0866718612232312e-05, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6086322419733254, + "grad_norm": 0.21062426269054413, + "learning_rate": 1.0856376611880464e-05, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6090549766439093, + "grad_norm": 0.227854922413826, + "learning_rate": 1.0846038935803948e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6094777113144936, + "grad_norm": 0.23567137122154236, + "learning_rate": 1.08357055851448e-05, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6099004459850774, + "grad_norm": 0.2738211452960968, + "learning_rate": 1.0825376561044576e-05, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6103231806556613, + "grad_norm": 0.3358602225780487, + "learning_rate": 1.0815051864644333e-05, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6107459153262456, + "grad_norm": 0.49306222796440125, + "learning_rate": 1.080473149708467e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6111686499968294, + "grad_norm": 0.2152244597673416, + "learning_rate": 1.0794415459505703e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6115913846674133, + "grad_norm": 0.30684196949005127, + "learning_rate": 1.078410375304706e-05, + "loss": 0.3465, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6120141193379975, + "grad_norm": 0.21508780121803284, + "learning_rate": 1.0773796378847879e-05, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6124368540085814, + "grad_norm": 0.24860484898090363, + "learning_rate": 1.0763493338046853e-05, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6128595886791657, + "grad_norm": 0.2713392674922943, + "learning_rate": 1.0753194631782176e-05, + "loss": 0.3534, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6132823233497495, + "grad_norm": 0.2323845773935318, + "learning_rate": 1.074290026119157e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6137050580203334, + "grad_norm": 0.35261619091033936, + "learning_rate": 1.0732610227412272e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6141277926909177, + "grad_norm": 0.20282572507858276, + "learning_rate": 1.0722324531581057e-05, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6145505273615015, + "grad_norm": 0.2546280324459076, + "learning_rate": 1.071204317483418e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.614973262032086, + "grad_norm": 0.33931517601013184, + "learning_rate": 1.0701766158307475e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6153959967026696, + "grad_norm": 0.23484137654304504, + "learning_rate": 1.0691493483136233e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6158187313732535, + "grad_norm": 0.31100568175315857, + "learning_rate": 1.068122515045531e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.616241466043838, + "grad_norm": 0.2905367314815521, + "learning_rate": 1.0670961161399069e-05, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6166642007144216, + "grad_norm": 0.2243664413690567, + "learning_rate": 1.06607015171014e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6170869353850055, + "grad_norm": 0.3641640245914459, + "learning_rate": 1.0650446218695697e-05, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6175096700555898, + "grad_norm": 0.2053585797548294, + "learning_rate": 1.0640195267314906e-05, + "loss": 0.3504, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6179324047261736, + "grad_norm": 0.24305474758148193, + "learning_rate": 1.0629948664091443e-05, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6183551393967575, + "grad_norm": 0.207371324300766, + "learning_rate": 1.0619706410157276e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6187778740673417, + "grad_norm": 0.2976485788822174, + "learning_rate": 1.060946850664391e-05, + "loss": 0.3671, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6192006087379256, + "grad_norm": 0.23319436609745026, + "learning_rate": 1.0599234954682313e-05, + "loss": 0.3537, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6196233434085094, + "grad_norm": 0.266701340675354, + "learning_rate": 1.0589005755403025e-05, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6200460780790937, + "grad_norm": 0.2593623101711273, + "learning_rate": 1.0578780909936082e-05, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6204688127496776, + "grad_norm": 0.3034580945968628, + "learning_rate": 1.056856041941106e-05, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6208915474202614, + "grad_norm": 0.2934074103832245, + "learning_rate": 1.0558344284957012e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6213142820908457, + "grad_norm": 0.2843846380710602, + "learning_rate": 1.054813250770254e-05, + "loss": 0.3538, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6217370167614296, + "grad_norm": 0.27995872497558594, + "learning_rate": 1.0537925088775768e-05, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.622159751432014, + "grad_norm": 0.2874550223350525, + "learning_rate": 1.0527722029304332e-05, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6225824861025977, + "grad_norm": 0.22128744423389435, + "learning_rate": 1.0517523330415391e-05, + "loss": 0.3526, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.623005220773182, + "grad_norm": 0.2673306167125702, + "learning_rate": 1.0507328993235593e-05, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.623427955443766, + "grad_norm": 0.26110830903053284, + "learning_rate": 1.0497139018891139e-05, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6238506901143497, + "grad_norm": 0.269621878862381, + "learning_rate": 1.0486953408507754e-05, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.624273424784934, + "grad_norm": 0.2829824686050415, + "learning_rate": 1.047677216321063e-05, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.624696159455518, + "grad_norm": 0.2993442416191101, + "learning_rate": 1.0466595284124519e-05, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6251188941261017, + "grad_norm": 0.27315554022789, + "learning_rate": 1.0456422772373697e-05, + "loss": 0.3692, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.625541628796686, + "grad_norm": 0.3106687366962433, + "learning_rate": 1.044625462908193e-05, + "loss": 0.354, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.62596436346727, + "grad_norm": 0.3152344226837158, + "learning_rate": 1.0436090855372516e-05, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6263870981378536, + "grad_norm": 0.39663147926330566, + "learning_rate": 1.0425931452368282e-05, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.626809832808438, + "grad_norm": 0.23442384600639343, + "learning_rate": 1.0415776421191541e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.627232567479022, + "grad_norm": 0.2467290610074997, + "learning_rate": 1.0405625762964134e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6276553021496056, + "grad_norm": 0.31222498416900635, + "learning_rate": 1.039547947880743e-05, + "loss": 0.3543, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.62807803682019, + "grad_norm": 0.27841421961784363, + "learning_rate": 1.0385337569842313e-05, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6285007714907738, + "grad_norm": 0.2248561680316925, + "learning_rate": 1.0375200037189182e-05, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6289235061613576, + "grad_norm": 0.23371875286102295, + "learning_rate": 1.0365066881967944e-05, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.629346240831942, + "grad_norm": 0.2682299017906189, + "learning_rate": 1.035493810529805e-05, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6297689755025258, + "grad_norm": 0.28634655475616455, + "learning_rate": 1.0344813708298424e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6301917101731096, + "grad_norm": 0.27384093403816223, + "learning_rate": 1.0334693692087538e-05, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.630614444843694, + "grad_norm": 0.2803976833820343, + "learning_rate": 1.0324578057783362e-05, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6310371795142777, + "grad_norm": 0.324240118265152, + "learning_rate": 1.0314466806503398e-05, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.631459914184862, + "grad_norm": 0.26150521636009216, + "learning_rate": 1.030435993936465e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.631882648855446, + "grad_norm": 0.2984221875667572, + "learning_rate": 1.0294257457483646e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 74990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.63230538352603, + "grad_norm": 0.2591257691383362, + "learning_rate": 1.0284159361976437e-05, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.632728118196614, + "grad_norm": 0.35732772946357727, + "learning_rate": 1.0274065653958587e-05, + "loss": 0.3689, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.633150852867198, + "grad_norm": 0.32801347970962524, + "learning_rate": 1.0263976334545139e-05, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.633573587537782, + "grad_norm": 0.3117744028568268, + "learning_rate": 1.0253891404850695e-05, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.633996322208366, + "grad_norm": 0.2706475555896759, + "learning_rate": 1.0243810865989378e-05, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.63441905687895, + "grad_norm": 0.30028125643730164, + "learning_rate": 1.0233734719074773e-05, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.634841791549534, + "grad_norm": 0.32533952593803406, + "learning_rate": 1.0223662965220021e-05, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.635264526220118, + "grad_norm": 0.2524220943450928, + "learning_rate": 1.0213595605537779e-05, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.635687260890702, + "grad_norm": 0.1937369853258133, + "learning_rate": 1.0203532641140213e-05, + "loss": 0.3538, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.636109995561286, + "grad_norm": 0.2531003952026367, + "learning_rate": 1.0193474073138981e-05, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.63653273023187, + "grad_norm": 0.1742367297410965, + "learning_rate": 1.018341990264528e-05, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.636955464902454, + "grad_norm": 0.32330581545829773, + "learning_rate": 1.0173370130769816e-05, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.637378199573038, + "grad_norm": 0.3325989842414856, + "learning_rate": 1.0163324758622811e-05, + "loss": 0.3677, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.637800934243622, + "grad_norm": 0.27676087617874146, + "learning_rate": 1.0153283787314006e-05, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.638223668914206, + "grad_norm": 0.2964950203895569, + "learning_rate": 1.0143247217952623e-05, + "loss": 0.3673, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.63864640358479, + "grad_norm": 0.2374570071697235, + "learning_rate": 1.0133215051647438e-05, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.639069138255374, + "grad_norm": 0.3225710690021515, + "learning_rate": 1.012318728950673e-05, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6394918729259578, + "grad_norm": 0.359254390001297, + "learning_rate": 1.0113163932638275e-05, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.639914607596542, + "grad_norm": 0.24332833290100098, + "learning_rate": 1.0103144982149372e-05, + "loss": 0.3523, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.640337342267126, + "grad_norm": 0.28149282932281494, + "learning_rate": 1.0093130439146836e-05, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.64076007693771, + "grad_norm": 0.2639707922935486, + "learning_rate": 1.0083120304737004e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.641182811608294, + "grad_norm": 0.3135320842266083, + "learning_rate": 1.0073114580025706e-05, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6416055462788783, + "grad_norm": 0.2888941466808319, + "learning_rate": 1.0063113266118312e-05, + "loss": 0.3704, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.642028280949462, + "grad_norm": 0.27668890357017517, + "learning_rate": 1.0053116364119675e-05, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.642451015620046, + "grad_norm": 0.36280953884124756, + "learning_rate": 1.0043123875134153e-05, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6428737502906303, + "grad_norm": 0.30001717805862427, + "learning_rate": 1.0033135800265664e-05, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.643296484961214, + "grad_norm": 0.24687126278877258, + "learning_rate": 1.0023152140617598e-05, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.643719219631798, + "grad_norm": 0.2422260344028473, + "learning_rate": 1.0013172897292871e-05, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6441419543023823, + "grad_norm": 0.2469017058610916, + "learning_rate": 1.0003198071393916e-05, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.644564688972966, + "grad_norm": 0.3363800644874573, + "learning_rate": 9.99322766402267e-06, + "loss": 0.3675, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.64498742364355, + "grad_norm": 0.27297306060791016, + "learning_rate": 9.983261676280592e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6454101583141343, + "grad_norm": 0.3932640850543976, + "learning_rate": 9.973300109268641e-06, + "loss": 0.351, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.645832892984718, + "grad_norm": 0.33339959383010864, + "learning_rate": 9.963342964087274e-06, + "loss": 0.3675, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.646255627655302, + "grad_norm": 0.2756527364253998, + "learning_rate": 9.953390241836492e-06, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6466783623258863, + "grad_norm": 0.19275811314582825, + "learning_rate": 9.943441943615783e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.64710109699647, + "grad_norm": 0.31058183312416077, + "learning_rate": 9.933498070524172e-06, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.647523831667054, + "grad_norm": 0.26140159368515015, + "learning_rate": 9.923558623660162e-06, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6479465663376383, + "grad_norm": 0.25081315636634827, + "learning_rate": 9.913623604121808e-06, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.648369301008222, + "grad_norm": 0.22179006040096283, + "learning_rate": 9.90369301300662e-06, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.648792035678806, + "grad_norm": 0.3679182827472687, + "learning_rate": 9.893766851411668e-06, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6492147703493902, + "grad_norm": 0.3383842706680298, + "learning_rate": 9.883845120433526e-06, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.649637505019974, + "grad_norm": 0.2655051350593567, + "learning_rate": 9.873927821168243e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6500602396905584, + "grad_norm": 0.32016900181770325, + "learning_rate": 9.864014954711415e-06, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.650482974361142, + "grad_norm": 0.2748509645462036, + "learning_rate": 9.854106522158135e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6509057090317265, + "grad_norm": 0.2685069739818573, + "learning_rate": 9.844202524603009e-06, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6513284437023104, + "grad_norm": 0.31626516580581665, + "learning_rate": 9.834302963140163e-06, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.651751178372894, + "grad_norm": 0.2980514168739319, + "learning_rate": 9.824407838863197e-06, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6521739130434785, + "grad_norm": 0.26222261786460876, + "learning_rate": 9.81451715286526e-06, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6525966477140623, + "grad_norm": 0.2349238097667694, + "learning_rate": 9.804630906238993e-06, + "loss": 0.348, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.653019382384646, + "grad_norm": 0.3189358413219452, + "learning_rate": 9.794749100076567e-06, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6534421170552305, + "grad_norm": 0.3034505248069763, + "learning_rate": 9.784871735469613e-06, + "loss": 0.3668, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6538648517258143, + "grad_norm": 0.4258655607700348, + "learning_rate": 9.774998813509323e-06, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.654287586396398, + "grad_norm": 0.2592480778694153, + "learning_rate": 9.765130335286387e-06, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6547103210669825, + "grad_norm": 0.3865870535373688, + "learning_rate": 9.755266301890975e-06, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6551330557375663, + "grad_norm": 0.2721419632434845, + "learning_rate": 9.745406714412792e-06, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.65555579040815, + "grad_norm": 0.27019381523132324, + "learning_rate": 9.735551573941054e-06, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6559785250787344, + "grad_norm": 0.31335243582725525, + "learning_rate": 9.725700881564475e-06, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6564012597493183, + "grad_norm": 0.26653382182121277, + "learning_rate": 9.71585463837129e-06, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.656823994419902, + "grad_norm": 0.2213445007801056, + "learning_rate": 9.706012845449214e-06, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6572467290904864, + "grad_norm": 0.2112971395254135, + "learning_rate": 9.696175503885501e-06, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6576694637610703, + "grad_norm": 0.23922723531723022, + "learning_rate": 9.686342614766913e-06, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.658092198431654, + "grad_norm": 0.27499380707740784, + "learning_rate": 9.676514179179686e-06, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6585149331022384, + "grad_norm": 0.21636322140693665, + "learning_rate": 9.666690198209599e-06, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6589376677728223, + "grad_norm": 0.2730250656604767, + "learning_rate": 9.656870672941925e-06, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6593604024434065, + "grad_norm": 0.3654884696006775, + "learning_rate": 9.647055604461447e-06, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6597831371139904, + "grad_norm": 0.304423987865448, + "learning_rate": 9.637244993852456e-06, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6602058717845747, + "grad_norm": 0.21577437222003937, + "learning_rate": 9.627438842198772e-06, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6606286064551585, + "grad_norm": 0.31207844614982605, + "learning_rate": 9.617637150583675e-06, + "loss": 0.3522, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6610513411257424, + "grad_norm": 0.3221481740474701, + "learning_rate": 9.607839920089967e-06, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6614740757963267, + "grad_norm": 0.25619158148765564, + "learning_rate": 9.598047151799982e-06, + "loss": 0.3533, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6618968104669105, + "grad_norm": 0.3585134744644165, + "learning_rate": 9.588258846795556e-06, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6623195451374944, + "grad_norm": 0.2856697142124176, + "learning_rate": 9.578475006158006e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6627422798080786, + "grad_norm": 0.28630530834198, + "learning_rate": 9.568695630968188e-06, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6631650144786625, + "grad_norm": 0.2326953411102295, + "learning_rate": 9.558920722306452e-06, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6635877491492463, + "grad_norm": 0.3162124752998352, + "learning_rate": 9.549150281252633e-06, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6640104838198306, + "grad_norm": 0.3065391182899475, + "learning_rate": 9.539384308886112e-06, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6644332184904145, + "grad_norm": 0.34794461727142334, + "learning_rate": 9.529622806285732e-06, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6648559531609983, + "grad_norm": 0.2901691794395447, + "learning_rate": 9.519865774529879e-06, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6652786878315826, + "grad_norm": 0.216378316283226, + "learning_rate": 9.510113214696436e-06, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6657014225021665, + "grad_norm": 0.36816170811653137, + "learning_rate": 9.500365127862782e-06, + "loss": 0.3672, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6661241571727503, + "grad_norm": 0.39278993010520935, + "learning_rate": 9.490621515105807e-06, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6665468918433346, + "grad_norm": 0.23863351345062256, + "learning_rate": 9.480882377501926e-06, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6669696265139184, + "grad_norm": 0.2419806867837906, + "learning_rate": 9.471147716127016e-06, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6673923611845023, + "grad_norm": 0.33084410429000854, + "learning_rate": 9.461417532056494e-06, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6678150958550866, + "grad_norm": 0.30066975951194763, + "learning_rate": 9.451691826365282e-06, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6682378305256704, + "grad_norm": 0.2799570560455322, + "learning_rate": 9.441970600127781e-06, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6686605651962547, + "grad_norm": 0.29955148696899414, + "learning_rate": 9.432253854417922e-06, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6690832998668386, + "grad_norm": 0.2168508768081665, + "learning_rate": 9.422541590309137e-06, + "loss": 0.3525, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.669506034537423, + "grad_norm": 0.23260486125946045, + "learning_rate": 9.412833808874372e-06, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6699287692080067, + "grad_norm": 0.305113285779953, + "learning_rate": 9.403130511186037e-06, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6703515038785905, + "grad_norm": 0.22176845371723175, + "learning_rate": 9.393431698316085e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.670774238549175, + "grad_norm": 0.2236020565032959, + "learning_rate": 9.38373737133597e-06, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6711969732197587, + "grad_norm": 0.25294268131256104, + "learning_rate": 9.374047531316648e-06, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6716197078903425, + "grad_norm": 0.3010854423046112, + "learning_rate": 9.364362179328573e-06, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.672042442560927, + "grad_norm": 0.22559510171413422, + "learning_rate": 9.354681316441694e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6724651772315107, + "grad_norm": 0.2184545248746872, + "learning_rate": 9.345004943725482e-06, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6728879119020945, + "grad_norm": 0.36309799551963806, + "learning_rate": 9.33533306224892e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.673310646572679, + "grad_norm": 0.23120364546775818, + "learning_rate": 9.325665673080448e-06, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6737333812432627, + "grad_norm": 0.24693089723587036, + "learning_rate": 9.316002777288064e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6741561159138465, + "grad_norm": 0.23765960335731506, + "learning_rate": 9.306344375939246e-06, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 75990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.674578850584431, + "grad_norm": 0.33461540937423706, + "learning_rate": 9.296690470100971e-06, + "loss": 0.3539, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6750015852550146, + "grad_norm": 0.23361985385417938, + "learning_rate": 9.287041060839735e-06, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6754243199255985, + "grad_norm": 0.28127938508987427, + "learning_rate": 9.277396149221534e-06, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6758470545961828, + "grad_norm": 0.3103938102722168, + "learning_rate": 9.267755736311844e-06, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6762697892667666, + "grad_norm": 0.2582213580608368, + "learning_rate": 9.258119823175665e-06, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6766925239373505, + "grad_norm": 0.2766464948654175, + "learning_rate": 9.248488410877487e-06, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6771152586079348, + "grad_norm": 0.27035948634147644, + "learning_rate": 9.238861500481327e-06, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6775379932785186, + "grad_norm": 0.28495678305625916, + "learning_rate": 9.229239093050684e-06, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.677960727949103, + "grad_norm": 0.2623845934867859, + "learning_rate": 9.219621189648564e-06, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6783834626196867, + "grad_norm": 0.2681528627872467, + "learning_rate": 9.210968928409237e-06, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.678806197290271, + "grad_norm": 0.27757924795150757, + "learning_rate": 9.201359585588114e-06, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.679228931960855, + "grad_norm": 0.25615084171295166, + "learning_rate": 9.19175474987542e-06, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6796516666314387, + "grad_norm": 0.2818496525287628, + "learning_rate": 9.18215442233223e-06, + "loss": 0.3534, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.680074401302023, + "grad_norm": 0.29354193806648254, + "learning_rate": 9.172558604019104e-06, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.680497135972607, + "grad_norm": 0.26021096110343933, + "learning_rate": 9.162967295996105e-06, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6809198706431907, + "grad_norm": 0.2785954475402832, + "learning_rate": 9.153380499322812e-06, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.681342605313775, + "grad_norm": 0.28469833731651306, + "learning_rate": 9.14379821505829e-06, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.681765339984359, + "grad_norm": 0.22574619948863983, + "learning_rate": 9.134220444261138e-06, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6821880746549427, + "grad_norm": 0.39192554354667664, + "learning_rate": 9.124647187989394e-06, + "loss": 0.3671, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.682610809325527, + "grad_norm": 0.2811889946460724, + "learning_rate": 9.115078447300657e-06, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.683033543996111, + "grad_norm": 0.2640833854675293, + "learning_rate": 9.105514223252016e-06, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6834562786666947, + "grad_norm": 0.22716835141181946, + "learning_rate": 9.095954516900018e-06, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.683879013337279, + "grad_norm": 0.32272231578826904, + "learning_rate": 9.086399329300766e-06, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.684301748007863, + "grad_norm": 0.3153391480445862, + "learning_rate": 9.076848661509835e-06, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6847244826784467, + "grad_norm": 0.3188698887825012, + "learning_rate": 9.067302514582305e-06, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.685147217349031, + "grad_norm": 0.3007587790489197, + "learning_rate": 9.057760889572763e-06, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.685569952019615, + "grad_norm": 0.252264142036438, + "learning_rate": 9.048223787535304e-06, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6859926866901986, + "grad_norm": 0.30954793095588684, + "learning_rate": 9.038691209523487e-06, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.686415421360783, + "grad_norm": 0.30226394534111023, + "learning_rate": 9.029163156590409e-06, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6868381560313668, + "grad_norm": 0.24167996644973755, + "learning_rate": 9.01963962978864e-06, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.687260890701951, + "grad_norm": 0.27644017338752747, + "learning_rate": 9.010120630170277e-06, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.687683625372535, + "grad_norm": 0.2905483841896057, + "learning_rate": 9.000606158786895e-06, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.688106360043119, + "grad_norm": 0.311830997467041, + "learning_rate": 8.991096216689576e-06, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.688529094713703, + "grad_norm": 0.3560536503791809, + "learning_rate": 8.981590804928913e-06, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.688951829384287, + "grad_norm": 0.3949386775493622, + "learning_rate": 8.972089924554993e-06, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.689374564054871, + "grad_norm": 0.259321391582489, + "learning_rate": 8.96259357661739e-06, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.689797298725455, + "grad_norm": 0.3016831874847412, + "learning_rate": 8.95310176216516e-06, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.690220033396039, + "grad_norm": 0.37441691756248474, + "learning_rate": 8.943614482246914e-06, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.690642768066623, + "grad_norm": 0.27482661604881287, + "learning_rate": 8.934131737910717e-06, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.691065502737207, + "grad_norm": 0.26688042283058167, + "learning_rate": 8.924653530204152e-06, + "loss": 0.3506, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.691488237407791, + "grad_norm": 0.2472110241651535, + "learning_rate": 8.915179860174294e-06, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.691910972078375, + "grad_norm": 0.2775557041168213, + "learning_rate": 8.905710728867717e-06, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.692333706748959, + "grad_norm": 0.21075287461280823, + "learning_rate": 8.896246137330516e-06, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.692756441419543, + "grad_norm": 0.26950836181640625, + "learning_rate": 8.886786086608229e-06, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.693179176090127, + "grad_norm": 0.38832205533981323, + "learning_rate": 8.877330577745956e-06, + "loss": 0.3682, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.693601910760711, + "grad_norm": 0.2692161202430725, + "learning_rate": 8.867879611788243e-06, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.694024645431295, + "grad_norm": 0.347181111574173, + "learning_rate": 8.85843318977917e-06, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.694447380101879, + "grad_norm": 0.28029865026474, + "learning_rate": 8.848991312762301e-06, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.694870114772463, + "grad_norm": 0.32828205823898315, + "learning_rate": 8.83955398178069e-06, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.695292849443047, + "grad_norm": 0.31559625267982483, + "learning_rate": 8.830121197876928e-06, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.695715584113631, + "grad_norm": 0.29799914360046387, + "learning_rate": 8.820692962093035e-06, + "loss": 0.3688, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.696138318784215, + "grad_norm": 0.2685202956199646, + "learning_rate": 8.811269275470585e-06, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6965610534547992, + "grad_norm": 0.2926424741744995, + "learning_rate": 8.80185013905065e-06, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.696983788125383, + "grad_norm": 0.3164058029651642, + "learning_rate": 8.79243555387374e-06, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6974065227959674, + "grad_norm": 0.28305187821388245, + "learning_rate": 8.783025520979932e-06, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6978292574665512, + "grad_norm": 0.2587366998195648, + "learning_rate": 8.773620041408764e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.698251992137135, + "grad_norm": 0.3202595114707947, + "learning_rate": 8.76421911619928e-06, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6986747268077194, + "grad_norm": 0.26038858294487, + "learning_rate": 8.754822746390029e-06, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.699097461478303, + "grad_norm": 0.34387385845184326, + "learning_rate": 8.745430933019027e-06, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.699520196148887, + "grad_norm": 0.28215673565864563, + "learning_rate": 8.736043677123818e-06, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.6999429308194713, + "grad_norm": 0.3331131339073181, + "learning_rate": 8.726660979741424e-06, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.700365665490055, + "grad_norm": 0.27099481225013733, + "learning_rate": 8.717282841908392e-06, + "loss": 0.3534, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.700788400160639, + "grad_norm": 0.30630868673324585, + "learning_rate": 8.707909264660718e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7012111348312233, + "grad_norm": 0.2721039056777954, + "learning_rate": 8.698540249033927e-06, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.701633869501807, + "grad_norm": 0.27835893630981445, + "learning_rate": 8.689175796063048e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.702056604172391, + "grad_norm": 0.26720356941223145, + "learning_rate": 8.679815906782567e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7024793388429753, + "grad_norm": 0.23094302415847778, + "learning_rate": 8.670460582226509e-06, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.702902073513559, + "grad_norm": 0.2549624443054199, + "learning_rate": 8.661109823428359e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.703324808184143, + "grad_norm": 0.28834086656570435, + "learning_rate": 8.651763631421128e-06, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7037475428547273, + "grad_norm": 0.35769587755203247, + "learning_rate": 8.642422007237306e-06, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.704170277525311, + "grad_norm": 0.3241812586784363, + "learning_rate": 8.633084951908892e-06, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.704593012195895, + "grad_norm": 0.21060296893119812, + "learning_rate": 8.623752466467343e-06, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7050157468664793, + "grad_norm": 0.33785611391067505, + "learning_rate": 8.614424551943672e-06, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.705438481537063, + "grad_norm": 0.29966476559638977, + "learning_rate": 8.60510120936831e-06, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7058612162076474, + "grad_norm": 0.23856499791145325, + "learning_rate": 8.595782439771255e-06, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7062839508782313, + "grad_norm": 0.19412939250469208, + "learning_rate": 8.58646824418196e-06, + "loss": 0.3528, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7067066855488155, + "grad_norm": 0.273836612701416, + "learning_rate": 8.577158623629388e-06, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7071294202193994, + "grad_norm": 0.2172442525625229, + "learning_rate": 8.567853579141994e-06, + "loss": 0.3539, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7075521548899832, + "grad_norm": 0.2687985897064209, + "learning_rate": 8.55855311174773e-06, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7079748895605675, + "grad_norm": 0.2404228001832962, + "learning_rate": 8.549257222474027e-06, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7083976242311514, + "grad_norm": 0.2785215675830841, + "learning_rate": 8.539965912347814e-06, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7088203589017352, + "grad_norm": 0.2640668451786041, + "learning_rate": 8.530679182395528e-06, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7092430935723195, + "grad_norm": 0.2642439603805542, + "learning_rate": 8.521397033643091e-06, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7096658282429034, + "grad_norm": 0.2338387668132782, + "learning_rate": 8.51211946711593e-06, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.710088562913487, + "grad_norm": 0.23741617798805237, + "learning_rate": 8.502846483838945e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7105112975840715, + "grad_norm": 0.3620752692222595, + "learning_rate": 8.493578084836563e-06, + "loss": 0.3544, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7109340322546553, + "grad_norm": 0.3205115795135498, + "learning_rate": 8.484314271132654e-06, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.711356766925239, + "grad_norm": 0.4645453691482544, + "learning_rate": 8.475055043750619e-06, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7117795015958235, + "grad_norm": 0.2603153586387634, + "learning_rate": 8.465800403713365e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7122022362664073, + "grad_norm": 0.24683211743831635, + "learning_rate": 8.456550352043235e-06, + "loss": 0.3531, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.712624970936991, + "grad_norm": 0.3570384979248047, + "learning_rate": 8.447304889762126e-06, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7130477056075755, + "grad_norm": 0.28765109181404114, + "learning_rate": 8.438064017891389e-06, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7134704402781593, + "grad_norm": 0.28126633167266846, + "learning_rate": 8.428827737451894e-06, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.713893174948743, + "grad_norm": 0.3244086802005768, + "learning_rate": 8.419596049464e-06, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7143159096193274, + "grad_norm": 0.26516276597976685, + "learning_rate": 8.410368954947522e-06, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7147386442899113, + "grad_norm": 0.22754809260368347, + "learning_rate": 8.40114645492181e-06, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7151613789604956, + "grad_norm": 0.30417364835739136, + "learning_rate": 8.391928550405692e-06, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7155841136310794, + "grad_norm": 0.23338115215301514, + "learning_rate": 8.382715242417505e-06, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7160068483016637, + "grad_norm": 0.34598177671432495, + "learning_rate": 8.373506531975034e-06, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7164295829722476, + "grad_norm": 0.23511256277561188, + "learning_rate": 8.364302420095593e-06, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 76990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7168523176428314, + "grad_norm": 0.3077768385410309, + "learning_rate": 8.355102907795997e-06, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7172750523134157, + "grad_norm": 0.27655723690986633, + "learning_rate": 8.345907996092511e-06, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7176977869839996, + "grad_norm": 0.2719222903251648, + "learning_rate": 8.336717686000916e-06, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7181205216545834, + "grad_norm": 0.2583894431591034, + "learning_rate": 8.3275319785365e-06, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7185432563251677, + "grad_norm": 0.38953420519828796, + "learning_rate": 8.318350874714014e-06, + "loss": 0.3543, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7189659909957515, + "grad_norm": 0.2760845422744751, + "learning_rate": 8.309174375547724e-06, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7193887256663354, + "grad_norm": 0.2564452290534973, + "learning_rate": 8.300002482051377e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7198114603369197, + "grad_norm": 0.2511036694049835, + "learning_rate": 8.290835195238195e-06, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7202341950075035, + "grad_norm": 0.23728545010089874, + "learning_rate": 8.281672516120931e-06, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7206569296780874, + "grad_norm": 0.2682809829711914, + "learning_rate": 8.272514445711777e-06, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7210796643486717, + "grad_norm": 0.2978539764881134, + "learning_rate": 8.26336098502245e-06, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7215023990192555, + "grad_norm": 0.3309296667575836, + "learning_rate": 8.254212135064165e-06, + "loss": 0.3539, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7219251336898393, + "grad_norm": 0.31451812386512756, + "learning_rate": 8.245067896847602e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7223478683604236, + "grad_norm": 0.36561429500579834, + "learning_rate": 8.235928271382953e-06, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7227706030310075, + "grad_norm": 0.23232749104499817, + "learning_rate": 8.2267932596799e-06, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7231933377015913, + "grad_norm": 0.2910652160644531, + "learning_rate": 8.217662862747582e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7236160723721756, + "grad_norm": 0.3009723722934723, + "learning_rate": 8.208537081594659e-06, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7240388070427595, + "grad_norm": 0.3437750041484833, + "learning_rate": 8.19941591722928e-06, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7244615417133438, + "grad_norm": 0.2808259427547455, + "learning_rate": 8.19029937065907e-06, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7248842763839276, + "grad_norm": 0.29939600825309753, + "learning_rate": 8.18118744289117e-06, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.725307011054512, + "grad_norm": 0.22765551507472992, + "learning_rate": 8.172080134932175e-06, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7257297457250957, + "grad_norm": 0.2380761206150055, + "learning_rate": 8.162977447788206e-06, + "loss": 0.3532, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7261524803956796, + "grad_norm": 0.257631778717041, + "learning_rate": 8.153879382464835e-06, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.726575215066264, + "grad_norm": 0.38183024525642395, + "learning_rate": 8.144785939967154e-06, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7269979497368477, + "grad_norm": 0.3762642741203308, + "learning_rate": 8.13569712129974e-06, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7274206844074316, + "grad_norm": 0.32493314146995544, + "learning_rate": 8.126612927466643e-06, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.727843419078016, + "grad_norm": 0.28994041681289673, + "learning_rate": 8.117533359471408e-06, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7282661537485997, + "grad_norm": 0.3655250370502472, + "learning_rate": 8.10845841831709e-06, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7286888884191836, + "grad_norm": 0.3114764392375946, + "learning_rate": 8.099388105006195e-06, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.729111623089768, + "grad_norm": 0.2752760052680969, + "learning_rate": 8.090322420540769e-06, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7295343577603517, + "grad_norm": 0.2677507698535919, + "learning_rate": 8.081261365922289e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7299570924309355, + "grad_norm": 0.2731553018093109, + "learning_rate": 8.072204942151757e-06, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.73037982710152, + "grad_norm": 0.23941770195960999, + "learning_rate": 8.063153150229652e-06, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7308025617721037, + "grad_norm": 0.31117770075798035, + "learning_rate": 8.054105991155964e-06, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7312252964426875, + "grad_norm": 0.25741052627563477, + "learning_rate": 8.045063465930114e-06, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.731648031113272, + "grad_norm": 0.21464528143405914, + "learning_rate": 8.03602557555107e-06, + "loss": 0.3516, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7320707657838557, + "grad_norm": 0.30125346779823303, + "learning_rate": 8.026992321017279e-06, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7324935004544395, + "grad_norm": 0.31452813744544983, + "learning_rate": 8.017963703326636e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.732916235125024, + "grad_norm": 0.2480584681034088, + "learning_rate": 8.00893972347656e-06, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7333389697956076, + "grad_norm": 0.32308217883110046, + "learning_rate": 7.999920382463955e-06, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.733761704466192, + "grad_norm": 0.26794350147247314, + "learning_rate": 7.990905681285198e-06, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.734184439136776, + "grad_norm": 0.23470965027809143, + "learning_rate": 7.981895620936164e-06, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.73460717380736, + "grad_norm": 0.2446546107530594, + "learning_rate": 7.972890202412232e-06, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.735029908477944, + "grad_norm": 0.27999797463417053, + "learning_rate": 7.963889426708209e-06, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7354526431485278, + "grad_norm": 0.23607614636421204, + "learning_rate": 7.954893294818472e-06, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.735875377819112, + "grad_norm": 0.2871503531932831, + "learning_rate": 7.945901807736799e-06, + "loss": 0.3534, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.736298112489696, + "grad_norm": 0.23538973927497864, + "learning_rate": 7.936914966456528e-06, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7367208471602797, + "grad_norm": 0.24533884227275848, + "learning_rate": 7.927932771970436e-06, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.737143581830864, + "grad_norm": 0.232036754488945, + "learning_rate": 7.918955225270818e-06, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.737566316501448, + "grad_norm": 0.26417359709739685, + "learning_rate": 7.909982327349436e-06, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7379890511720317, + "grad_norm": 0.31632333993911743, + "learning_rate": 7.901014079197561e-06, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.738411785842616, + "grad_norm": 0.2970750033855438, + "learning_rate": 7.892050481805913e-06, + "loss": 0.3725, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7388345205132, + "grad_norm": 0.2939678728580475, + "learning_rate": 7.883091536164711e-06, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7392572551837837, + "grad_norm": 0.20400108397006989, + "learning_rate": 7.874137243263679e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.739679989854368, + "grad_norm": 0.24793632328510284, + "learning_rate": 7.865187604092017e-06, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.740102724524952, + "grad_norm": 0.2954871654510498, + "learning_rate": 7.85624261963841e-06, + "loss": 0.3539, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7405254591955357, + "grad_norm": 0.29210934042930603, + "learning_rate": 7.847302290891029e-06, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.74094819386612, + "grad_norm": 0.2943618595600128, + "learning_rate": 7.838366618837528e-06, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.741370928536704, + "grad_norm": 0.3110719323158264, + "learning_rate": 7.829435604465063e-06, + "loss": 0.3532, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7417936632072877, + "grad_norm": 0.25085583329200745, + "learning_rate": 7.820509248760238e-06, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.742216397877872, + "grad_norm": 0.2505871653556824, + "learning_rate": 7.811587552709187e-06, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.742639132548456, + "grad_norm": 0.2639663815498352, + "learning_rate": 7.802670517297483e-06, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.74306186721904, + "grad_norm": 0.23653462529182434, + "learning_rate": 7.793758143510227e-06, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.743484601889624, + "grad_norm": 0.2676642835140228, + "learning_rate": 7.78485043233198e-06, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7439073365602082, + "grad_norm": 0.31866589188575745, + "learning_rate": 7.775947384746796e-06, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.744330071230792, + "grad_norm": 0.28224843740463257, + "learning_rate": 7.76704900173823e-06, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.744752805901376, + "grad_norm": 0.33713775873184204, + "learning_rate": 7.758155284289275e-06, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7451755405719602, + "grad_norm": 0.33332735300064087, + "learning_rate": 7.749266233382452e-06, + "loss": 0.3544, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.745598275242544, + "grad_norm": 0.27540674805641174, + "learning_rate": 7.74038184999975e-06, + "loss": 0.37, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.746021009913128, + "grad_norm": 0.31406787037849426, + "learning_rate": 7.731502135122664e-06, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.746443744583712, + "grad_norm": 0.2658708095550537, + "learning_rate": 7.722627089732121e-06, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.746866479254296, + "grad_norm": 0.26492834091186523, + "learning_rate": 7.713756714808579e-06, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.74728921392488, + "grad_norm": 0.2768579125404358, + "learning_rate": 7.704891011331978e-06, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.747711948595464, + "grad_norm": 0.26543527841567993, + "learning_rate": 7.696029980281721e-06, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.748134683266048, + "grad_norm": 0.27996939420700073, + "learning_rate": 7.6871736226367e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.748557417936632, + "grad_norm": 0.24668996036052704, + "learning_rate": 7.678321939375293e-06, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.748980152607216, + "grad_norm": 0.21146300435066223, + "learning_rate": 7.669474931475373e-06, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7494028872778, + "grad_norm": 0.2998998165130615, + "learning_rate": 7.660632599914285e-06, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.749825621948384, + "grad_norm": 0.24689795076847076, + "learning_rate": 7.651794945668867e-06, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.750248356618968, + "grad_norm": 0.32685765624046326, + "learning_rate": 7.642961969715412e-06, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.750671091289552, + "grad_norm": 0.2534070312976837, + "learning_rate": 7.634133673029736e-06, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.751093825960136, + "grad_norm": 0.22022657096385956, + "learning_rate": 7.6253100565870986e-06, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.75151656063072, + "grad_norm": 0.2456173449754715, + "learning_rate": 7.616491121362274e-06, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.751939295301304, + "grad_norm": 0.33958831429481506, + "learning_rate": 7.607676868329511e-06, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7523620299718883, + "grad_norm": 0.25162696838378906, + "learning_rate": 7.598867298462537e-06, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.752784764642472, + "grad_norm": 0.2692852020263672, + "learning_rate": 7.5900624127345554e-06, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7532074993130564, + "grad_norm": 0.2652633488178253, + "learning_rate": 7.581262212118278e-06, + "loss": 0.3662, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7536302339836403, + "grad_norm": 0.27007678151130676, + "learning_rate": 7.572466697585862e-06, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.754052968654224, + "grad_norm": 0.2582533359527588, + "learning_rate": 7.563675870108977e-06, + "loss": 0.3662, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7544757033248084, + "grad_norm": 0.30111366510391235, + "learning_rate": 7.554889730658754e-06, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7548984379953922, + "grad_norm": 0.24594268202781677, + "learning_rate": 7.5461082802058155e-06, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.755321172665976, + "grad_norm": 0.31101053953170776, + "learning_rate": 7.53733151972027e-06, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7557439073365604, + "grad_norm": 0.24251754581928253, + "learning_rate": 7.5285594501717074e-06, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7561666420071442, + "grad_norm": 0.3217978775501251, + "learning_rate": 7.519792072529192e-06, + "loss": 0.3507, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.756589376677728, + "grad_norm": 0.23077484965324402, + "learning_rate": 7.511029387761282e-06, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7570121113483124, + "grad_norm": 0.25644299387931824, + "learning_rate": 7.502271396835997e-06, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.757434846018896, + "grad_norm": 0.22205699980258942, + "learning_rate": 7.493518100720864e-06, + "loss": 0.3523, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.75785758068948, + "grad_norm": 0.26006314158439636, + "learning_rate": 7.4847695003828545e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7582803153600643, + "grad_norm": 0.2543211579322815, + "learning_rate": 7.476025596788461e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.758703050030648, + "grad_norm": 0.22300681471824646, + "learning_rate": 7.467286390903638e-06, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 77990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.759125784701232, + "grad_norm": 0.27597326040267944, + "learning_rate": 7.458551883693821e-06, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7595485193718163, + "grad_norm": 0.3051154613494873, + "learning_rate": 7.4498220761239415e-06, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7599712540424, + "grad_norm": 0.2622944712638855, + "learning_rate": 7.441096969158378e-06, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.760393988712984, + "grad_norm": 0.317008376121521, + "learning_rate": 7.432376563761018e-06, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7608167233835683, + "grad_norm": 0.26005667448043823, + "learning_rate": 7.423660860895226e-06, + "loss": 0.3487, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.761239458054152, + "grad_norm": 0.2543475031852722, + "learning_rate": 7.414949861523851e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7616621927247365, + "grad_norm": 0.19529160857200623, + "learning_rate": 7.4062435666092e-06, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7620849273953203, + "grad_norm": 0.30369290709495544, + "learning_rate": 7.397541977113076e-06, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7625076620659046, + "grad_norm": 0.273782342672348, + "learning_rate": 7.388845093996766e-06, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7629303967364884, + "grad_norm": 0.3069462776184082, + "learning_rate": 7.380152918221045e-06, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7633531314070723, + "grad_norm": 0.25475794076919556, + "learning_rate": 7.371465450746135e-06, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7637758660776566, + "grad_norm": 0.3326147198677063, + "learning_rate": 7.362782692531761e-06, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7641986007482404, + "grad_norm": 0.2603188455104828, + "learning_rate": 7.35410464453713e-06, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7646213354188243, + "grad_norm": 0.23760242760181427, + "learning_rate": 7.3454313077209235e-06, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7650440700894086, + "grad_norm": 0.2631063163280487, + "learning_rate": 7.336762683041315e-06, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7654668047599924, + "grad_norm": 0.3085807263851166, + "learning_rate": 7.3280987714559215e-06, + "loss": 0.3688, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7658895394305762, + "grad_norm": 0.3178880512714386, + "learning_rate": 7.319439573921883e-06, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7663122741011605, + "grad_norm": 0.27771732211112976, + "learning_rate": 7.311650327445768e-06, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7667350087717444, + "grad_norm": 0.3883077800273895, + "learning_rate": 7.303000089244283e-06, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7671577434423282, + "grad_norm": 0.3296222984790802, + "learning_rate": 7.29435456786684e-06, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7675804781129125, + "grad_norm": 0.23772361874580383, + "learning_rate": 7.285713764268542e-06, + "loss": 0.3521, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7680032127834964, + "grad_norm": 0.34080934524536133, + "learning_rate": 7.2770776794039375e-06, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.76842594745408, + "grad_norm": 0.21472840011119843, + "learning_rate": 7.268446314227084e-06, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7688486821246645, + "grad_norm": 0.28896915912628174, + "learning_rate": 7.259819669691498e-06, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7692714167952484, + "grad_norm": 0.37847405672073364, + "learning_rate": 7.251197746750194e-06, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.769694151465832, + "grad_norm": 0.2512413263320923, + "learning_rate": 7.242580546355643e-06, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7701168861364165, + "grad_norm": 0.27002617716789246, + "learning_rate": 7.2339680694598235e-06, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7705396208070003, + "grad_norm": 0.27418097853660583, + "learning_rate": 7.225360317014146e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7709623554775846, + "grad_norm": 0.3000577688217163, + "learning_rate": 7.21675728996955e-06, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7713850901481685, + "grad_norm": 0.29041457176208496, + "learning_rate": 7.208158989276409e-06, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7718078248187528, + "grad_norm": 0.2535685896873474, + "learning_rate": 7.1995654158846015e-06, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7722305594893366, + "grad_norm": 0.3610752820968628, + "learning_rate": 7.1909765707434855e-06, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7726532941599205, + "grad_norm": 0.28740474581718445, + "learning_rate": 7.1823924548018785e-06, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7730760288305047, + "grad_norm": 0.25785207748413086, + "learning_rate": 7.173813069008101e-06, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7734987635010886, + "grad_norm": 0.23003482818603516, + "learning_rate": 7.16523841430991e-06, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7739214981716724, + "grad_norm": 0.23129227757453918, + "learning_rate": 7.1566684916545775e-06, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7743442328422567, + "grad_norm": 0.28025493025779724, + "learning_rate": 7.148103301988845e-06, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7747669675128406, + "grad_norm": 0.2598220109939575, + "learning_rate": 7.1395428462589284e-06, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7751897021834244, + "grad_norm": 0.2590217888355255, + "learning_rate": 7.130987125410504e-06, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7756124368540087, + "grad_norm": 0.2208261787891388, + "learning_rate": 7.12243614038875e-06, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7760351715245926, + "grad_norm": 0.22728946805000305, + "learning_rate": 7.113889892138309e-06, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7764579061951764, + "grad_norm": 0.27524644136428833, + "learning_rate": 7.105348381603311e-06, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7768806408657607, + "grad_norm": 0.3332954943180084, + "learning_rate": 7.096811609727333e-06, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7773033755363445, + "grad_norm": 0.23503436148166656, + "learning_rate": 7.088279577453466e-06, + "loss": 0.3671, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7777261102069284, + "grad_norm": 0.35434406995773315, + "learning_rate": 7.0797522857242504e-06, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7781488448775127, + "grad_norm": 0.2694382667541504, + "learning_rate": 7.071229735481721e-06, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7785715795480965, + "grad_norm": 0.23775243759155273, + "learning_rate": 7.062711927667398e-06, + "loss": 0.3517, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7789943142186804, + "grad_norm": 0.2811669409275055, + "learning_rate": 7.0541988632222275e-06, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7794170488892647, + "grad_norm": 0.24925313889980316, + "learning_rate": 7.045690543086686e-06, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7798397835598485, + "grad_norm": 0.2444884479045868, + "learning_rate": 7.037186968200693e-06, + "loss": 0.3517, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.780262518230433, + "grad_norm": 0.2988017201423645, + "learning_rate": 7.028688139503664e-06, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7806852529010166, + "grad_norm": 0.31754401326179504, + "learning_rate": 7.020194057934476e-06, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.781107987571601, + "grad_norm": 0.40647369623184204, + "learning_rate": 7.0117047244314895e-06, + "loss": 0.3544, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.781530722242185, + "grad_norm": 0.20913803577423096, + "learning_rate": 7.003220139932542e-06, + "loss": 0.3536, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7819534569127686, + "grad_norm": 0.292589396238327, + "learning_rate": 6.994740305374942e-06, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.782376191583353, + "grad_norm": 0.3629024624824524, + "learning_rate": 6.986265221695481e-06, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7827989262539368, + "grad_norm": 0.3472800552845001, + "learning_rate": 6.977794889830413e-06, + "loss": 0.3539, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7832216609245206, + "grad_norm": 0.4057736396789551, + "learning_rate": 6.969329310715456e-06, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.783644395595105, + "grad_norm": 0.2441154420375824, + "learning_rate": 6.960868485285838e-06, + "loss": 0.3673, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7840671302656887, + "grad_norm": 0.2802465260028839, + "learning_rate": 6.952412414476233e-06, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7844898649362726, + "grad_norm": 0.2931550145149231, + "learning_rate": 6.943961099220808e-06, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.784912599606857, + "grad_norm": 0.339110404253006, + "learning_rate": 6.935514540453197e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7853353342774407, + "grad_norm": 0.27339693903923035, + "learning_rate": 6.927072739106516e-06, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7857580689480246, + "grad_norm": 0.30428603291511536, + "learning_rate": 6.918635696113329e-06, + "loss": 0.3677, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.786180803618609, + "grad_norm": 0.3230220377445221, + "learning_rate": 6.910203412405714e-06, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7866035382891927, + "grad_norm": 0.23700040578842163, + "learning_rate": 6.90177588891518e-06, + "loss": 0.3509, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7870262729597766, + "grad_norm": 0.36944133043289185, + "learning_rate": 6.893353126572738e-06, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.787449007630361, + "grad_norm": 0.24845048785209656, + "learning_rate": 6.884935126308884e-06, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7878717423009447, + "grad_norm": 0.29697132110595703, + "learning_rate": 6.87652188905355e-06, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7882944769715285, + "grad_norm": 0.2941305339336395, + "learning_rate": 6.868113415736183e-06, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.788717211642113, + "grad_norm": 0.2825191915035248, + "learning_rate": 6.859709707285683e-06, + "loss": 0.3506, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7891399463126967, + "grad_norm": 0.25810152292251587, + "learning_rate": 6.851310764630409e-06, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.789562680983281, + "grad_norm": 0.2922617495059967, + "learning_rate": 6.8429165886982116e-06, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.789985415653865, + "grad_norm": 0.2561768591403961, + "learning_rate": 6.834527180416434e-06, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.790408150324449, + "grad_norm": 0.2795711159706116, + "learning_rate": 6.826142540711844e-06, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.790830884995033, + "grad_norm": 0.30200624465942383, + "learning_rate": 6.817762670510719e-06, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.791253619665617, + "grad_norm": 0.24971184134483337, + "learning_rate": 6.809387570738801e-06, + "loss": 0.3492, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.791676354336201, + "grad_norm": 0.26198622584342957, + "learning_rate": 6.801017242321317e-06, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.792099089006785, + "grad_norm": 0.3354981243610382, + "learning_rate": 6.7926516861829325e-06, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.792521823677369, + "grad_norm": 0.2632620632648468, + "learning_rate": 6.78429090324782e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.792944558347953, + "grad_norm": 0.2504403591156006, + "learning_rate": 6.775934894439606e-06, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.793367293018537, + "grad_norm": 0.2315555214881897, + "learning_rate": 6.767583660681404e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7937900276891208, + "grad_norm": 0.2979613244533539, + "learning_rate": 6.75923720289579e-06, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.794212762359705, + "grad_norm": 0.23218359053134918, + "learning_rate": 6.7508955220048074e-06, + "loss": 0.3506, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.794635497030289, + "grad_norm": 0.2761923372745514, + "learning_rate": 6.742558618929979e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7950582317008728, + "grad_norm": 0.30186066031455994, + "learning_rate": 6.734226494592316e-06, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.795480966371457, + "grad_norm": 0.2655470073223114, + "learning_rate": 6.725899149912257e-06, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.795903701042041, + "grad_norm": 0.24160370230674744, + "learning_rate": 6.717576585809759e-06, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7963264357126247, + "grad_norm": 0.3064635097980499, + "learning_rate": 6.709258803204227e-06, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.796749170383209, + "grad_norm": 0.3599874973297119, + "learning_rate": 6.700945803014547e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.797171905053793, + "grad_norm": 0.2537328004837036, + "learning_rate": 6.6926375861590676e-06, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7975946397243767, + "grad_norm": 0.30504265427589417, + "learning_rate": 6.684334153555633e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.798017374394961, + "grad_norm": 0.31154313683509827, + "learning_rate": 6.6760355061215255e-06, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.798440109065545, + "grad_norm": 0.36358100175857544, + "learning_rate": 6.667741644773501e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.798862843736129, + "grad_norm": 0.29398614168167114, + "learning_rate": 6.65945257042781e-06, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.799285578406713, + "grad_norm": 0.2867151200771332, + "learning_rate": 6.651168284000164e-06, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.7997083130772973, + "grad_norm": 0.29739195108413696, + "learning_rate": 6.642888786405743e-06, + "loss": 0.3491, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.800131047747881, + "grad_norm": 0.25195711851119995, + "learning_rate": 6.634614078559209e-06, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.800553782418465, + "grad_norm": 0.30121150612831116, + "learning_rate": 6.626344161374687e-06, + "loss": 0.3706, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8009765170890493, + "grad_norm": 0.27467966079711914, + "learning_rate": 6.6180790357657505e-06, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 78990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.801399251759633, + "grad_norm": 0.34678149223327637, + "learning_rate": 6.6098187026454875e-06, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.801821986430217, + "grad_norm": 0.2847349941730499, + "learning_rate": 6.6015631629264165e-06, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8022447211008013, + "grad_norm": 0.25087445974349976, + "learning_rate": 6.593312417520553e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.802667455771385, + "grad_norm": 0.43185439705848694, + "learning_rate": 6.585066467339368e-06, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.803090190441969, + "grad_norm": 0.2675507664680481, + "learning_rate": 6.57682531329381e-06, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8035129251125532, + "grad_norm": 0.2823222279548645, + "learning_rate": 6.568588956294303e-06, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.803935659783137, + "grad_norm": 0.23730742931365967, + "learning_rate": 6.560357397250738e-06, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.804358394453721, + "grad_norm": 0.27262362837791443, + "learning_rate": 6.5521306370724565e-06, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.804781129124305, + "grad_norm": 0.2878972589969635, + "learning_rate": 6.543908676668287e-06, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.805203863794889, + "grad_norm": 0.2601111829280853, + "learning_rate": 6.535691516946541e-06, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.805626598465473, + "grad_norm": 0.2517690658569336, + "learning_rate": 6.527479158814964e-06, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.806049333136057, + "grad_norm": 0.274471253156662, + "learning_rate": 6.519271603180804e-06, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.806472067806641, + "grad_norm": 0.28986939787864685, + "learning_rate": 6.511068850950769e-06, + "loss": 0.3543, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.806894802477225, + "grad_norm": 0.27232202887535095, + "learning_rate": 6.502870903031033e-06, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.807317537147809, + "grad_norm": 0.27161386609077454, + "learning_rate": 6.494677760327228e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.807740271818393, + "grad_norm": 0.23807238042354584, + "learning_rate": 6.486489423744479e-06, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8081630064889773, + "grad_norm": 0.27229174971580505, + "learning_rate": 6.478305894187364e-06, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.808585741159561, + "grad_norm": 0.3014351427555084, + "learning_rate": 6.470127172559931e-06, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8090084758301455, + "grad_norm": 0.2314908802509308, + "learning_rate": 6.461953259765719e-06, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8094312105007293, + "grad_norm": 0.3007337152957916, + "learning_rate": 6.453784156707687e-06, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.809853945171313, + "grad_norm": 0.29621925950050354, + "learning_rate": 6.445619864288304e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8102766798418974, + "grad_norm": 0.28770118951797485, + "learning_rate": 6.437460383409516e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8106994145124813, + "grad_norm": 0.2917441129684448, + "learning_rate": 6.429305714972688e-06, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.811122149183065, + "grad_norm": 0.2848264276981354, + "learning_rate": 6.421155859878691e-06, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8115448838536494, + "grad_norm": 0.3114973306655884, + "learning_rate": 6.413010819027865e-06, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8119676185242333, + "grad_norm": 0.18615379929542542, + "learning_rate": 6.40487059332e-06, + "loss": 0.3486, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.812390353194817, + "grad_norm": 0.2103259265422821, + "learning_rate": 6.396735183654379e-06, + "loss": 0.3674, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8128130878654014, + "grad_norm": 0.41372957825660706, + "learning_rate": 6.388604590929714e-06, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8132358225359853, + "grad_norm": 0.41969868540763855, + "learning_rate": 6.380478816044233e-06, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.813658557206569, + "grad_norm": 0.27526143193244934, + "learning_rate": 6.372357859895578e-06, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8140812918771534, + "grad_norm": 0.2277250736951828, + "learning_rate": 6.364241723380904e-06, + "loss": 0.3694, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8145040265477372, + "grad_norm": 0.22531458735466003, + "learning_rate": 6.356130407396815e-06, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.814926761218321, + "grad_norm": 0.25421780347824097, + "learning_rate": 6.3480239128393906e-06, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8153494958889054, + "grad_norm": 0.23639144003391266, + "learning_rate": 6.339922240604163e-06, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.815772230559489, + "grad_norm": 0.23082658648490906, + "learning_rate": 6.331825391586149e-06, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8161949652300735, + "grad_norm": 0.2712261974811554, + "learning_rate": 6.323733366679813e-06, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8166176999006574, + "grad_norm": 0.20522171258926392, + "learning_rate": 6.315646166779115e-06, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.817040434571241, + "grad_norm": 0.41501277685165405, + "learning_rate": 6.307563792777438e-06, + "loss": 0.3679, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8174631692418255, + "grad_norm": 0.2426774799823761, + "learning_rate": 6.299486245567676e-06, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8178859039124093, + "grad_norm": 0.22953350841999054, + "learning_rate": 6.291413526042167e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8183086385829936, + "grad_norm": 0.30405187606811523, + "learning_rate": 6.283345635092719e-06, + "loss": 0.3515, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8187313732535775, + "grad_norm": 0.317634254693985, + "learning_rate": 6.275282573610613e-06, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8191541079241613, + "grad_norm": 0.24670182168483734, + "learning_rate": 6.267224342486605e-06, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8195768425947456, + "grad_norm": 0.37496694922447205, + "learning_rate": 6.259170942610876e-06, + "loss": 0.3684, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8199995772653295, + "grad_norm": 0.3023803234100342, + "learning_rate": 6.251122374873125e-06, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8204223119359133, + "grad_norm": 0.3564065992832184, + "learning_rate": 6.243078640162469e-06, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8208450466064976, + "grad_norm": 0.3735743761062622, + "learning_rate": 6.235039739367538e-06, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8212677812770814, + "grad_norm": 0.26267480850219727, + "learning_rate": 6.2270056733763905e-06, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8216905159476653, + "grad_norm": 0.22876697778701782, + "learning_rate": 6.218976443076574e-06, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8221132506182496, + "grad_norm": 0.21102608740329742, + "learning_rate": 6.210952049355106e-06, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8225359852888334, + "grad_norm": 0.2679338753223419, + "learning_rate": 6.202932493098434e-06, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8229587199594173, + "grad_norm": 0.2226077914237976, + "learning_rate": 6.194917775192505e-06, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8233814546300016, + "grad_norm": 0.2388579398393631, + "learning_rate": 6.186907896522715e-06, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8238041893005854, + "grad_norm": 0.23890264332294464, + "learning_rate": 6.178902857973956e-06, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8242269239711693, + "grad_norm": 0.25940898060798645, + "learning_rate": 6.170902660430528e-06, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8246496586417535, + "grad_norm": 0.278273344039917, + "learning_rate": 6.162907304776244e-06, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8250723933123374, + "grad_norm": 0.33198392391204834, + "learning_rate": 6.154916791894367e-06, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8254951279829217, + "grad_norm": 0.27876463532447815, + "learning_rate": 6.146931122667638e-06, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8259178626535055, + "grad_norm": 0.2648533582687378, + "learning_rate": 6.1389502979782185e-06, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8263405973240894, + "grad_norm": 0.29123297333717346, + "learning_rate": 6.130974318707794e-06, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8267633319946737, + "grad_norm": 0.29339125752449036, + "learning_rate": 6.123003185737475e-06, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8271860666652575, + "grad_norm": 0.2549331784248352, + "learning_rate": 6.115036899947846e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.827608801335842, + "grad_norm": 0.31743958592414856, + "learning_rate": 6.1070754622189805e-06, + "loss": 0.3543, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8280315360064257, + "grad_norm": 0.26782602071762085, + "learning_rate": 6.099118873430365e-06, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8284542706770095, + "grad_norm": 0.23639322817325592, + "learning_rate": 6.091167134461007e-06, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.828877005347594, + "grad_norm": 0.3981369137763977, + "learning_rate": 6.083220246189325e-06, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8292997400181776, + "grad_norm": 0.2609368562698364, + "learning_rate": 6.07527820949324e-06, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8297224746887615, + "grad_norm": 0.2184845507144928, + "learning_rate": 6.067341025250128e-06, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8301452093593458, + "grad_norm": 0.263813316822052, + "learning_rate": 6.059408694336821e-06, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8305679440299296, + "grad_norm": 0.3105104863643646, + "learning_rate": 6.051481217629618e-06, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8309906787005135, + "grad_norm": 0.3492213189601898, + "learning_rate": 6.0435585960042955e-06, + "loss": 0.3535, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8314134133710978, + "grad_norm": 0.2997484803199768, + "learning_rate": 6.035640830336076e-06, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8318361480416816, + "grad_norm": 0.3111365735530853, + "learning_rate": 6.0277279214996544e-06, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8322588827122654, + "grad_norm": 0.2592752277851105, + "learning_rate": 6.019819870369165e-06, + "loss": 0.352, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8326816173828497, + "grad_norm": 0.25985005497932434, + "learning_rate": 6.011916677818241e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8331043520534336, + "grad_norm": 0.307467520236969, + "learning_rate": 6.004018344719964e-06, + "loss": 0.368, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8335270867240174, + "grad_norm": 0.39338499307632446, + "learning_rate": 5.996124871946879e-06, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8339498213946017, + "grad_norm": 0.2511376738548279, + "learning_rate": 5.988236260370994e-06, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8343725560651856, + "grad_norm": 0.24462281167507172, + "learning_rate": 5.98035251086379e-06, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.83479529073577, + "grad_norm": 0.34459444880485535, + "learning_rate": 5.972473624296182e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8352180254063537, + "grad_norm": 0.296822190284729, + "learning_rate": 5.964599601538584e-06, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8356407600769375, + "grad_norm": 0.2720862030982971, + "learning_rate": 5.95673044346084e-06, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.836063494747522, + "grad_norm": 0.29036763310432434, + "learning_rate": 5.94886615093227e-06, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8364862294181057, + "grad_norm": 0.2282905876636505, + "learning_rate": 5.941006724821674e-06, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.83690896408869, + "grad_norm": 0.3303196430206299, + "learning_rate": 5.933152165997286e-06, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.837331698759274, + "grad_norm": 0.2855634093284607, + "learning_rate": 5.925302475326821e-06, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8377544334298577, + "grad_norm": 0.2543388605117798, + "learning_rate": 5.9174576536774605e-06, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.838177168100442, + "grad_norm": 0.2897675633430481, + "learning_rate": 5.909617701915815e-06, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.838599902771026, + "grad_norm": 0.30884549021720886, + "learning_rate": 5.901782620907992e-06, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8390226374416097, + "grad_norm": 0.3001437783241272, + "learning_rate": 5.89395241151956e-06, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.839445372112194, + "grad_norm": 0.25690850615501404, + "learning_rate": 5.886127074615516e-06, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.839868106782778, + "grad_norm": 0.34780019521713257, + "learning_rate": 5.8783066110603524e-06, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8402908414533616, + "grad_norm": 0.24840426445007324, + "learning_rate": 5.8704910217180056e-06, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.840713576123946, + "grad_norm": 0.2699888348579407, + "learning_rate": 5.862680307451901e-06, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8411363107945298, + "grad_norm": 0.25711655616760254, + "learning_rate": 5.854874469124871e-06, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8415590454651136, + "grad_norm": 0.27800261974334717, + "learning_rate": 5.84707350759926e-06, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.841981780135698, + "grad_norm": 0.2942667305469513, + "learning_rate": 5.839277423736861e-06, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8424045148062818, + "grad_norm": 0.2759470045566559, + "learning_rate": 5.831486218398907e-06, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8428272494768656, + "grad_norm": 0.30065616965293884, + "learning_rate": 5.823699892446138e-06, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.84324998414745, + "grad_norm": 0.3006386458873749, + "learning_rate": 5.815918446738689e-06, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 79990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 2.8436727188180337, + "grad_norm": 0.24733510613441467, + "learning_rate": 5.808141882136208e-06, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.000422734670584, + "grad_norm": 0.1523246020078659, + "learning_rate": 5.800370199497807e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.000845469341168, + "grad_norm": 0.1248946487903595, + "learning_rate": 5.792603399682001e-06, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.001268204011752, + "grad_norm": 0.10601712763309479, + "learning_rate": 5.7848414835468266e-06, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.001690938682336, + "grad_norm": 0.10299349576234818, + "learning_rate": 5.77708445194976e-06, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.00211367335292, + "grad_norm": 0.11754082143306732, + "learning_rate": 5.7693323057477265e-06, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.002536408023504, + "grad_norm": 0.1237531304359436, + "learning_rate": 5.76158504579713e-06, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0029591426940883, + "grad_norm": 0.11532095074653625, + "learning_rate": 5.753842672953835e-06, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.003381877364672, + "grad_norm": 0.10186051577329636, + "learning_rate": 5.746105188073142e-06, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.003804612035256, + "grad_norm": 0.11163158714771271, + "learning_rate": 5.738372592009817e-06, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0042273467058402, + "grad_norm": 0.1308857947587967, + "learning_rate": 5.730644885618114e-06, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.004650081376424, + "grad_norm": 0.09983784705400467, + "learning_rate": 5.722922069751718e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.005072816047008, + "grad_norm": 0.10843901336193085, + "learning_rate": 5.7152041452637915e-06, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0054955507175922, + "grad_norm": 0.10588307678699493, + "learning_rate": 5.707491113006941e-06, + "loss": 0.3535, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.005918285388176, + "grad_norm": 0.11752831935882568, + "learning_rate": 5.699782973833262e-06, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.00634102005876, + "grad_norm": 0.13362480700016022, + "learning_rate": 5.692079728594257e-06, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.006763754729344, + "grad_norm": 0.09682904183864594, + "learning_rate": 5.6843813781409515e-06, + "loss": 0.3538, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.007186489399928, + "grad_norm": 0.10981963574886322, + "learning_rate": 5.6766879233237645e-06, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0076092240705123, + "grad_norm": 0.10059976577758789, + "learning_rate": 5.668999364992628e-06, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.008031958741096, + "grad_norm": 0.1308048814535141, + "learning_rate": 5.6613157039969055e-06, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.00845469341168, + "grad_norm": 0.15060535073280334, + "learning_rate": 5.65363694118543e-06, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0088774280822643, + "grad_norm": 0.103150874376297, + "learning_rate": 5.645963077406491e-06, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.009300162752848, + "grad_norm": 0.11820525676012039, + "learning_rate": 5.638294113507847e-06, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.009722897423432, + "grad_norm": 0.09447792917490005, + "learning_rate": 5.630630050336677e-06, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0101456320940163, + "grad_norm": 0.13128693401813507, + "learning_rate": 5.622970888739659e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0105683667646, + "grad_norm": 0.09286212176084518, + "learning_rate": 5.615316629562933e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.010991101435184, + "grad_norm": 0.09855347126722336, + "learning_rate": 5.607667273652051e-06, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0114138361057683, + "grad_norm": 0.10127601772546768, + "learning_rate": 5.600022821852064e-06, + "loss": 0.3533, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.011836570776352, + "grad_norm": 0.0929175391793251, + "learning_rate": 5.592383275007473e-06, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0122593054469364, + "grad_norm": 0.11423037946224213, + "learning_rate": 5.5847486339622445e-06, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0126820401175203, + "grad_norm": 0.11325959861278534, + "learning_rate": 5.577118899559775e-06, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.013104774788104, + "grad_norm": 0.10781975090503693, + "learning_rate": 5.569494072642939e-06, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0135275094586884, + "grad_norm": 0.11050856858491898, + "learning_rate": 5.5618741540540755e-06, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0139502441292723, + "grad_norm": 0.09896565228700638, + "learning_rate": 5.554259144634966e-06, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.014372978799856, + "grad_norm": 0.11548376083374023, + "learning_rate": 5.5466490452268645e-06, + "loss": 0.354, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0147957134704404, + "grad_norm": 0.10432232916355133, + "learning_rate": 5.539043856670462e-06, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0152184481410242, + "grad_norm": 0.10498601943254471, + "learning_rate": 5.531443579805923e-06, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.015641182811608, + "grad_norm": 0.11251366883516312, + "learning_rate": 5.523848215472877e-06, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0160639174821924, + "grad_norm": 0.09880644828081131, + "learning_rate": 5.516257764510379e-06, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0164866521527762, + "grad_norm": 0.12109941244125366, + "learning_rate": 5.508672227756973e-06, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0169093868233605, + "grad_norm": 0.11197292059659958, + "learning_rate": 5.501091606050646e-06, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0173321214939444, + "grad_norm": 0.10054219514131546, + "learning_rate": 5.493515900228846e-06, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.017754856164528, + "grad_norm": 0.105469711124897, + "learning_rate": 5.485945111128482e-06, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0181775908351125, + "grad_norm": 0.13311149179935455, + "learning_rate": 5.478379239585913e-06, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0186003255056963, + "grad_norm": 0.10755143314599991, + "learning_rate": 5.470818286436957e-06, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.01902306017628, + "grad_norm": 0.11098781228065491, + "learning_rate": 5.463262252516871e-06, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0194457948468645, + "grad_norm": 0.1151871606707573, + "learning_rate": 5.4557111386603965e-06, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0198685295174483, + "grad_norm": 0.1008569747209549, + "learning_rate": 5.448164945701723e-06, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.020291264188032, + "grad_norm": 0.11951179802417755, + "learning_rate": 5.440623674474488e-06, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0207139988586165, + "grad_norm": 0.11019152402877808, + "learning_rate": 5.4330873258117974e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0211367335292003, + "grad_norm": 0.11932173371315002, + "learning_rate": 5.425555900546214e-06, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0215594681997846, + "grad_norm": 0.11030244082212448, + "learning_rate": 5.4180293995097335e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0219822028703685, + "grad_norm": 0.10601644963026047, + "learning_rate": 5.410507823533839e-06, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0224049375409523, + "grad_norm": 0.12627093493938446, + "learning_rate": 5.402991173449435e-06, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0228276722115366, + "grad_norm": 0.1187230795621872, + "learning_rate": 5.39547945008691e-06, + "loss": 0.3543, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0232504068821204, + "grad_norm": 0.09296689927577972, + "learning_rate": 5.387972654276102e-06, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0236731415527043, + "grad_norm": 0.11008428037166595, + "learning_rate": 5.380470786846304e-06, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0240958762232886, + "grad_norm": 0.1346094012260437, + "learning_rate": 5.3729738486262556e-06, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0245186108938724, + "grad_norm": 0.11687738448381424, + "learning_rate": 5.3654818404441696e-06, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0249413455644563, + "grad_norm": 0.09938216209411621, + "learning_rate": 5.3579947631276935e-06, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0253640802350406, + "grad_norm": 0.09166499227285385, + "learning_rate": 5.350512617503939e-06, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0257868149056244, + "grad_norm": 0.10721787065267563, + "learning_rate": 5.343035404399488e-06, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0262095495762087, + "grad_norm": 0.1033380925655365, + "learning_rate": 5.3355631246403415e-06, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0266322842467925, + "grad_norm": 0.09937775880098343, + "learning_rate": 5.328095779051989e-06, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0270550189173764, + "grad_norm": 0.11123290657997131, + "learning_rate": 5.32063336845936e-06, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0274777535879607, + "grad_norm": 0.09726149588823318, + "learning_rate": 5.3131758936868615e-06, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0279004882585445, + "grad_norm": 0.09479101002216339, + "learning_rate": 5.3057233555583065e-06, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0283232229291284, + "grad_norm": 0.10519138723611832, + "learning_rate": 5.298275754897003e-06, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0287459575997127, + "grad_norm": 0.09856962412595749, + "learning_rate": 5.29083309252571e-06, + "loss": 0.3672, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0291686922702965, + "grad_norm": 0.11845693737268448, + "learning_rate": 5.2833953692666194e-06, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0295914269408803, + "grad_norm": 0.12814901769161224, + "learning_rate": 5.275962585941419e-06, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0300141616114646, + "grad_norm": 0.11025506258010864, + "learning_rate": 5.268534743371189e-06, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0304368962820485, + "grad_norm": 0.12346432358026505, + "learning_rate": 5.2611118423765145e-06, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0308596309526328, + "grad_norm": 0.11890149861574173, + "learning_rate": 5.253693883777427e-06, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0312823656232166, + "grad_norm": 0.12762656807899475, + "learning_rate": 5.246280868393388e-06, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0317051002938005, + "grad_norm": 0.09883593022823334, + "learning_rate": 5.238872797043332e-06, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0321278349643848, + "grad_norm": 0.11919166147708893, + "learning_rate": 5.231469670545647e-06, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0325505696349686, + "grad_norm": 0.10136095434427261, + "learning_rate": 5.224071489718169e-06, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0329733043055525, + "grad_norm": 0.11674763262271881, + "learning_rate": 5.216678255378193e-06, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0333960389761367, + "grad_norm": 0.10646986216306686, + "learning_rate": 5.2092899683424676e-06, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0338187736467206, + "grad_norm": 0.12654195725917816, + "learning_rate": 5.201906629427195e-06, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0342415083173044, + "grad_norm": 0.0969071164727211, + "learning_rate": 5.194528239448005e-06, + "loss": 0.3528, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0346642429878887, + "grad_norm": 0.115419402718544, + "learning_rate": 5.187154799220018e-06, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0350869776584726, + "grad_norm": 0.1024632602930069, + "learning_rate": 5.179786309557793e-06, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.035509712329057, + "grad_norm": 0.11798956245183945, + "learning_rate": 5.172422771275348e-06, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0359324469996407, + "grad_norm": 0.09672509878873825, + "learning_rate": 5.165064185186136e-06, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0363551816702246, + "grad_norm": 0.09850779920816422, + "learning_rate": 5.157710552103079e-06, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.036777916340809, + "grad_norm": 0.09970993548631668, + "learning_rate": 5.1503618728385665e-06, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0372006510113927, + "grad_norm": 0.1219617947936058, + "learning_rate": 5.143018148204398e-06, + "loss": 0.3688, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0376233856819765, + "grad_norm": 0.12548278272151947, + "learning_rate": 5.135679379011849e-06, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.038046120352561, + "grad_norm": 0.11015117913484573, + "learning_rate": 5.128345566071657e-06, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0384688550231447, + "grad_norm": 0.10170382261276245, + "learning_rate": 5.121016710194004e-06, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0388915896937285, + "grad_norm": 0.10430293530225754, + "learning_rate": 5.1136928121885195e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.039314324364313, + "grad_norm": 0.12139978259801865, + "learning_rate": 5.1063738728642896e-06, + "loss": 0.3671, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0397370590348967, + "grad_norm": 0.10266652703285217, + "learning_rate": 5.099059893029867e-06, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.040159793705481, + "grad_norm": 0.1255132257938385, + "learning_rate": 5.091750873493217e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.040582528376065, + "grad_norm": 0.08738598227500916, + "learning_rate": 5.0844468150617945e-06, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0410052630466486, + "grad_norm": 0.1072571650147438, + "learning_rate": 5.077147718542507e-06, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.041427997717233, + "grad_norm": 0.11940891295671463, + "learning_rate": 5.069853584741674e-06, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.041850732387817, + "grad_norm": 0.11914606392383575, + "learning_rate": 5.062564414465104e-06, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 80990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0422734670584006, + "grad_norm": 0.10670697689056396, + "learning_rate": 5.055280208518049e-06, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.042696201728985, + "grad_norm": 0.11213622987270355, + "learning_rate": 5.048000967705208e-06, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0431189363995688, + "grad_norm": 0.10583041608333588, + "learning_rate": 5.040726692830744e-06, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0435416710701526, + "grad_norm": 0.12704290449619293, + "learning_rate": 5.033457384698243e-06, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.043964405740737, + "grad_norm": 0.11987229436635971, + "learning_rate": 5.0261930441107665e-06, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0443871404113207, + "grad_norm": 0.10359755158424377, + "learning_rate": 5.018933671870835e-06, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.044809875081905, + "grad_norm": 0.10862299799919128, + "learning_rate": 5.011679268780384e-06, + "loss": 0.3485, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.045232609752489, + "grad_norm": 0.13587209582328796, + "learning_rate": 5.004429835640834e-06, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0456553444230727, + "grad_norm": 0.11060182005167007, + "learning_rate": 4.997185373253038e-06, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.046078079093657, + "grad_norm": 0.09922589361667633, + "learning_rate": 4.989945882417329e-06, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.046500813764241, + "grad_norm": 0.10073840618133545, + "learning_rate": 4.982711363933434e-06, + "loss": 0.3515, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0469235484348247, + "grad_norm": 0.1082117110490799, + "learning_rate": 4.97548181860058e-06, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.047346283105409, + "grad_norm": 0.12246713787317276, + "learning_rate": 4.968257247217434e-06, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.047769017775993, + "grad_norm": 0.10055743157863617, + "learning_rate": 4.961037650582107e-06, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0481917524465767, + "grad_norm": 0.1100146621465683, + "learning_rate": 4.953823029492171e-06, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.048614487117161, + "grad_norm": 0.11657445132732391, + "learning_rate": 4.9466133847446195e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.049037221787745, + "grad_norm": 0.10232459008693695, + "learning_rate": 4.939408717135924e-06, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.049459956458329, + "grad_norm": 0.12092990428209305, + "learning_rate": 4.932209027462015e-06, + "loss": 0.3676, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.049882691128913, + "grad_norm": 0.10009041428565979, + "learning_rate": 4.9250143165182335e-06, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.050305425799497, + "grad_norm": 0.10718832165002823, + "learning_rate": 4.917824585099406e-06, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.050728160470081, + "grad_norm": 0.10988004505634308, + "learning_rate": 4.910639833999792e-06, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.051150895140665, + "grad_norm": 0.1396501511335373, + "learning_rate": 4.90346006401311e-06, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.051573629811249, + "grad_norm": 0.10951828956604004, + "learning_rate": 4.8962852759325164e-06, + "loss": 0.3684, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.051996364481833, + "grad_norm": 0.11158096045255661, + "learning_rate": 4.889115470550648e-06, + "loss": 0.3684, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.052419099152417, + "grad_norm": 0.1044619083404541, + "learning_rate": 4.8819506486595445e-06, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.052841833823001, + "grad_norm": 0.12530940771102905, + "learning_rate": 4.874790811050711e-06, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.053264568493585, + "grad_norm": 0.1045951172709465, + "learning_rate": 4.8676359585151265e-06, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.053687303164169, + "grad_norm": 0.1380660980939865, + "learning_rate": 4.8604860918431975e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.054110037834753, + "grad_norm": 0.11536575853824615, + "learning_rate": 4.853341211824786e-06, + "loss": 0.3537, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.054532772505337, + "grad_norm": 0.1060611829161644, + "learning_rate": 4.846201319249194e-06, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.054955507175921, + "grad_norm": 0.10630341619253159, + "learning_rate": 4.8390664149051965e-06, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.055378241846505, + "grad_norm": 0.11658594757318497, + "learning_rate": 4.831936499580986e-06, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.055800976517089, + "grad_norm": 0.09544280916452408, + "learning_rate": 4.824811574064225e-06, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.056223711187673, + "grad_norm": 0.1164543628692627, + "learning_rate": 4.817691639142008e-06, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.056646445858257, + "grad_norm": 0.1181173026561737, + "learning_rate": 4.810576695600899e-06, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.057069180528841, + "grad_norm": 0.12688657641410828, + "learning_rate": 4.803466744226898e-06, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.057491915199425, + "grad_norm": 0.1292998194694519, + "learning_rate": 4.796361785805453e-06, + "loss": 0.3546, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.057914649870009, + "grad_norm": 0.1322363168001175, + "learning_rate": 4.789261821121466e-06, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.058337384540593, + "grad_norm": 0.09767744690179825, + "learning_rate": 4.782166850959291e-06, + "loss": 0.3517, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0587601192111773, + "grad_norm": 0.1273125559091568, + "learning_rate": 4.7750768761027135e-06, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.059182853881761, + "grad_norm": 0.11791028082370758, + "learning_rate": 4.767991897334972e-06, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.059605588552345, + "grad_norm": 0.09948500245809555, + "learning_rate": 4.760911915438787e-06, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0600283232229293, + "grad_norm": 0.13922983407974243, + "learning_rate": 4.75383693119626e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.060451057893513, + "grad_norm": 0.10623703896999359, + "learning_rate": 4.746766945389003e-06, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.060873792564097, + "grad_norm": 0.11034159362316132, + "learning_rate": 4.7397019587980425e-06, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0612965272346813, + "grad_norm": 0.11411680281162262, + "learning_rate": 4.732641972203877e-06, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.061719261905265, + "grad_norm": 0.10587763041257858, + "learning_rate": 4.725586986386416e-06, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.062141996575849, + "grad_norm": 0.08428416401147842, + "learning_rate": 4.718537002125051e-06, + "loss": 0.3521, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0625647312464332, + "grad_norm": 0.10715455561876297, + "learning_rate": 4.711492020198599e-06, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.062987465917017, + "grad_norm": 0.10857054591178894, + "learning_rate": 4.704452041385343e-06, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0634102005876014, + "grad_norm": 0.09067583084106445, + "learning_rate": 4.697417066463011e-06, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0638329352581852, + "grad_norm": 0.0995609238743782, + "learning_rate": 4.6903870962087485e-06, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.064255669928769, + "grad_norm": 0.09668145328760147, + "learning_rate": 4.683362131399183e-06, + "loss": 0.353, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0646784045993534, + "grad_norm": 0.11535310745239258, + "learning_rate": 4.676342172810383e-06, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.065101139269937, + "grad_norm": 0.09837060421705246, + "learning_rate": 4.669327221217845e-06, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.065523873940521, + "grad_norm": 0.11059394478797913, + "learning_rate": 4.662317277396528e-06, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0659466086111054, + "grad_norm": 0.10111658275127411, + "learning_rate": 4.655312342120832e-06, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.066369343281689, + "grad_norm": 0.13590508699417114, + "learning_rate": 4.6483124161646185e-06, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.066792077952273, + "grad_norm": 0.11603836715221405, + "learning_rate": 4.641317500301173e-06, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0672148126228573, + "grad_norm": 0.12270884960889816, + "learning_rate": 4.634327595303251e-06, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.067637547293441, + "grad_norm": 0.12711556255817413, + "learning_rate": 4.627342701943033e-06, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0680602819640255, + "grad_norm": 0.10272988677024841, + "learning_rate": 4.620362820992142e-06, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0684830166346093, + "grad_norm": 0.11423898488283157, + "learning_rate": 4.613387953221671e-06, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.068905751305193, + "grad_norm": 0.11453207582235336, + "learning_rate": 4.6064180994021485e-06, + "loss": 0.3686, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0693284859757775, + "grad_norm": 0.11792844533920288, + "learning_rate": 4.599453260303549e-06, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0697512206463613, + "grad_norm": 0.11329293251037598, + "learning_rate": 4.592493436695289e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.070173955316945, + "grad_norm": 0.10326626896858215, + "learning_rate": 4.585538629346242e-06, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0705966899875294, + "grad_norm": 0.0970868244767189, + "learning_rate": 4.578588839024706e-06, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0710194246581133, + "grad_norm": 0.1262325644493103, + "learning_rate": 4.571644066498459e-06, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.071442159328697, + "grad_norm": 0.12011207640171051, + "learning_rate": 4.564704312534679e-06, + "loss": 0.3543, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0718648939992814, + "grad_norm": 0.09233388304710388, + "learning_rate": 4.557769577900028e-06, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0722876286698653, + "grad_norm": 0.09715259820222855, + "learning_rate": 4.5508398633605955e-06, + "loss": 0.3672, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0727103633404496, + "grad_norm": 0.10978258401155472, + "learning_rate": 4.5439151696819285e-06, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0731330980110334, + "grad_norm": 0.10170488059520721, + "learning_rate": 4.536995497629004e-06, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0735558326816173, + "grad_norm": 0.10805223882198334, + "learning_rate": 4.530080847966267e-06, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0739785673522015, + "grad_norm": 0.09208887815475464, + "learning_rate": 4.523171221457578e-06, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0744013020227854, + "grad_norm": 0.09961234033107758, + "learning_rate": 4.516266618866255e-06, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0748240366933692, + "grad_norm": 0.10657251626253128, + "learning_rate": 4.509367040955082e-06, + "loss": 0.3546, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0752467713639535, + "grad_norm": 0.12821529805660248, + "learning_rate": 4.5024724884862476e-06, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0756695060345374, + "grad_norm": 0.11509158462285995, + "learning_rate": 4.495582962221417e-06, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.076092240705121, + "grad_norm": 0.10705316066741943, + "learning_rate": 4.488698462921687e-06, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0765149753757055, + "grad_norm": 0.09582885354757309, + "learning_rate": 4.48181899134762e-06, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0769377100462894, + "grad_norm": 0.10612348467111588, + "learning_rate": 4.474944548259175e-06, + "loss": 0.3714, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0773604447168736, + "grad_norm": 0.09949025511741638, + "learning_rate": 4.468075134415805e-06, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0777831793874575, + "grad_norm": 0.12442309409379959, + "learning_rate": 4.461210750576378e-06, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0782059140580413, + "grad_norm": 0.11698231846094131, + "learning_rate": 4.454351397499229e-06, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0786286487286256, + "grad_norm": 0.1030348688364029, + "learning_rate": 4.44749707594212e-06, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0790513833992095, + "grad_norm": 0.11837390065193176, + "learning_rate": 4.440647786662255e-06, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0794741180697933, + "grad_norm": 0.1318058967590332, + "learning_rate": 4.433803530416297e-06, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0798968527403776, + "grad_norm": 0.1295020431280136, + "learning_rate": 4.426964307960346e-06, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0803195874109615, + "grad_norm": 0.11840829253196716, + "learning_rate": 4.420130120049931e-06, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0807423220815453, + "grad_norm": 0.10718177258968353, + "learning_rate": 4.413300967440048e-06, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0811650567521296, + "grad_norm": 0.12133599072694778, + "learning_rate": 4.406476850885122e-06, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0815877914227134, + "grad_norm": 0.09591041505336761, + "learning_rate": 4.399657771139038e-06, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0820105260932977, + "grad_norm": 0.09642042964696884, + "learning_rate": 4.392843728955109e-06, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0824332607638816, + "grad_norm": 0.09878113865852356, + "learning_rate": 4.386034725086096e-06, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0828559954344654, + "grad_norm": 0.1144447922706604, + "learning_rate": 4.3792307602842085e-06, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0832787301050497, + "grad_norm": 0.11906454712152481, + "learning_rate": 4.372431835301077e-06, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0837014647756336, + "grad_norm": 0.10305213928222656, + "learning_rate": 4.365637950887802e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0841241994462174, + "grad_norm": 0.12169712036848068, + "learning_rate": 4.358849107794921e-06, + "loss": 0.3665, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 81990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0845469341168017, + "grad_norm": 0.10272271931171417, + "learning_rate": 4.352065306772407e-06, + "loss": 0.367, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0849696687873855, + "grad_norm": 0.10364782065153122, + "learning_rate": 4.345286548569683e-06, + "loss": 0.3674, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.08539240345797, + "grad_norm": 0.11394933611154556, + "learning_rate": 4.338512833935615e-06, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0858151381285537, + "grad_norm": 0.09744058549404144, + "learning_rate": 4.331744163618512e-06, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0862378727991375, + "grad_norm": 0.09695398807525635, + "learning_rate": 4.3249805383661135e-06, + "loss": 0.3537, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.086660607469722, + "grad_norm": 0.10266648232936859, + "learning_rate": 4.318221958925605e-06, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0870833421403057, + "grad_norm": 0.12456762790679932, + "learning_rate": 4.3114684260436355e-06, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0875060768108895, + "grad_norm": 0.13469870388507843, + "learning_rate": 4.3047199404662675e-06, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.087928811481474, + "grad_norm": 0.11687690764665604, + "learning_rate": 4.2979765029390324e-06, + "loss": 0.3681, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0883515461520576, + "grad_norm": 0.09021880477666855, + "learning_rate": 4.291238114206886e-06, + "loss": 0.3505, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0887742808226415, + "grad_norm": 0.14719808101654053, + "learning_rate": 4.284504775014236e-06, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.089197015493226, + "grad_norm": 0.12959499657154083, + "learning_rate": 4.277776486104917e-06, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0896197501638096, + "grad_norm": 0.10539942979812622, + "learning_rate": 4.271053248222229e-06, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0900424848343935, + "grad_norm": 0.10854099690914154, + "learning_rate": 4.264335062108904e-06, + "loss": 0.3499, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0904652195049778, + "grad_norm": 0.11905502527952194, + "learning_rate": 4.257621928507094e-06, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0908879541755616, + "grad_norm": 0.09299279749393463, + "learning_rate": 4.250913848158422e-06, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.091310688846146, + "grad_norm": 0.1055336445569992, + "learning_rate": 4.244210821803951e-06, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0917334235167298, + "grad_norm": 0.11964215338230133, + "learning_rate": 4.237512850184172e-06, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0921561581873136, + "grad_norm": 0.1006922498345375, + "learning_rate": 4.230819934039032e-06, + "loss": 0.3713, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.092578892857898, + "grad_norm": 0.10125339776277542, + "learning_rate": 4.224132074107895e-06, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0930016275284817, + "grad_norm": 0.1098286584019661, + "learning_rate": 4.217449271129592e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0934243621990656, + "grad_norm": 0.10427585989236832, + "learning_rate": 4.210771525842383e-06, + "loss": 0.3672, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.09384709686965, + "grad_norm": 0.1247074156999588, + "learning_rate": 4.204098838983983e-06, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0942698315402337, + "grad_norm": 0.10978814214468002, + "learning_rate": 4.19743121129152e-06, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.094692566210818, + "grad_norm": 0.11206142604351044, + "learning_rate": 4.190768643501585e-06, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.095115300881402, + "grad_norm": 0.11799365282058716, + "learning_rate": 4.184111136350222e-06, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0955380355519857, + "grad_norm": 0.11161308735609055, + "learning_rate": 4.177458690572872e-06, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.09596077022257, + "grad_norm": 0.09831337630748749, + "learning_rate": 4.170811306904459e-06, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.096383504893154, + "grad_norm": 0.10546303540468216, + "learning_rate": 4.164168986079331e-06, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0968062395637377, + "grad_norm": 0.09609152376651764, + "learning_rate": 4.157531728831282e-06, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.097228974234322, + "grad_norm": 0.11156036704778671, + "learning_rate": 4.150899535893538e-06, + "loss": 0.3669, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.097651708904906, + "grad_norm": 0.11962047964334488, + "learning_rate": 4.144272407998784e-06, + "loss": 0.3526, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0980744435754897, + "grad_norm": 0.11810597032308578, + "learning_rate": 4.137650345879113e-06, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.098497178246074, + "grad_norm": 0.11553143709897995, + "learning_rate": 4.1310333502660945e-06, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.098919912916658, + "grad_norm": 0.12196386605501175, + "learning_rate": 4.124421421890706e-06, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.0993426475872417, + "grad_norm": 0.09829060733318329, + "learning_rate": 4.117814561483385e-06, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.099765382257826, + "grad_norm": 0.10083173960447311, + "learning_rate": 4.111212769774009e-06, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.10018811692841, + "grad_norm": 0.09950599819421768, + "learning_rate": 4.104616047491894e-06, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.100610851598994, + "grad_norm": 0.10261036455631256, + "learning_rate": 4.0980243953657845e-06, + "loss": 0.3695, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.101033586269578, + "grad_norm": 0.10475487262010574, + "learning_rate": 4.091437814123888e-06, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1014563209401618, + "grad_norm": 0.09931155294179916, + "learning_rate": 4.084856304493828e-06, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.101879055610746, + "grad_norm": 0.11543694883584976, + "learning_rate": 4.0782798672026665e-06, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.10230179028133, + "grad_norm": 0.10322713851928711, + "learning_rate": 4.071708502976929e-06, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1027245249519138, + "grad_norm": 0.0958469957113266, + "learning_rate": 4.065142212542567e-06, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.103147259622498, + "grad_norm": 0.11566828191280365, + "learning_rate": 4.058580996624961e-06, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.103569994293082, + "grad_norm": 0.08702944219112396, + "learning_rate": 4.05202485594896e-06, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.103992728963666, + "grad_norm": 0.12434764206409454, + "learning_rate": 4.045473791238829e-06, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.10441546363425, + "grad_norm": 0.10128818452358246, + "learning_rate": 4.038927803218262e-06, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.104838198304834, + "grad_norm": 0.0872817188501358, + "learning_rate": 4.032386892610424e-06, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.105260932975418, + "grad_norm": 0.10678096115589142, + "learning_rate": 4.025851060137903e-06, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.105683667646002, + "grad_norm": 0.11681125313043594, + "learning_rate": 4.019320306522711e-06, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.106106402316586, + "grad_norm": 0.13395749032497406, + "learning_rate": 4.012794632486322e-06, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.10652913698717, + "grad_norm": 0.0928216278553009, + "learning_rate": 4.006274038749641e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.106951871657754, + "grad_norm": 0.11854377388954163, + "learning_rate": 3.99975852603301e-06, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.107374606328338, + "grad_norm": 0.10490284115076065, + "learning_rate": 3.993248095056223e-06, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.107797340998922, + "grad_norm": 0.09046804904937744, + "learning_rate": 3.98674274653848e-06, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.108220075669506, + "grad_norm": 0.10665866732597351, + "learning_rate": 3.980242481198449e-06, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.10864281034009, + "grad_norm": 0.11945503205060959, + "learning_rate": 3.973747299754227e-06, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.109065545010674, + "grad_norm": 0.10885261744260788, + "learning_rate": 3.967257202923364e-06, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.109488279681258, + "grad_norm": 0.09282349050045013, + "learning_rate": 3.9607721914228065e-06, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1099110143518423, + "grad_norm": 0.10483716428279877, + "learning_rate": 3.954292265968984e-06, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.110333749022426, + "grad_norm": 0.1417795568704605, + "learning_rate": 3.947817427277756e-06, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.11075648369301, + "grad_norm": 0.12152040004730225, + "learning_rate": 3.941347676064383e-06, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1111792183635942, + "grad_norm": 0.11482692509889603, + "learning_rate": 3.934883013043611e-06, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.111601953034178, + "grad_norm": 0.09579096734523773, + "learning_rate": 3.928423438929607e-06, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.112024687704762, + "grad_norm": 0.10604561865329742, + "learning_rate": 3.921968954435962e-06, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.112447422375346, + "grad_norm": 0.11824272572994232, + "learning_rate": 3.915519560275721e-06, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.11287015704593, + "grad_norm": 0.1036282330751419, + "learning_rate": 3.909075257161371e-06, + "loss": 0.3528, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1132928917165144, + "grad_norm": 0.11356998980045319, + "learning_rate": 3.902636045804814e-06, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.113715626387098, + "grad_norm": 0.11238189041614532, + "learning_rate": 3.896201926917409e-06, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.114138361057682, + "grad_norm": 0.09933710098266602, + "learning_rate": 3.889772901209937e-06, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1145610957282663, + "grad_norm": 0.11034074425697327, + "learning_rate": 3.883348969392636e-06, + "loss": 0.3523, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.11498383039885, + "grad_norm": 0.09951896220445633, + "learning_rate": 3.876930132175166e-06, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.115406565069434, + "grad_norm": 0.10062923282384872, + "learning_rate": 3.8705163902666316e-06, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1158292997400183, + "grad_norm": 0.09485418349504471, + "learning_rate": 3.864107744375567e-06, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.116252034410602, + "grad_norm": 0.11752889305353165, + "learning_rate": 3.8577041952099655e-06, + "loss": 0.3523, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.116674769081186, + "grad_norm": 0.102942556142807, + "learning_rate": 3.8513057434772235e-06, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1170975037517703, + "grad_norm": 0.11280204355716705, + "learning_rate": 3.8449123898841865e-06, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.117520238422354, + "grad_norm": 0.10448126494884491, + "learning_rate": 3.8385241351371445e-06, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.117942973092938, + "grad_norm": 0.11758572608232498, + "learning_rate": 3.8321409799418284e-06, + "loss": 0.3668, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1183657077635223, + "grad_norm": 0.098630890250206, + "learning_rate": 3.825762925003396e-06, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.118788442434106, + "grad_norm": 0.11589750647544861, + "learning_rate": 3.819389971026444e-06, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1192111771046904, + "grad_norm": 0.11385764181613922, + "learning_rate": 3.8130221187150095e-06, + "loss": 0.3662, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1196339117752743, + "grad_norm": 0.10046626627445221, + "learning_rate": 3.8066593687725473e-06, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.120056646445858, + "grad_norm": 0.11710578948259354, + "learning_rate": 3.800301721901989e-06, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1204793811164424, + "grad_norm": 0.09301894158124924, + "learning_rate": 3.793949178805645e-06, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1209021157870263, + "grad_norm": 0.12695367634296417, + "learning_rate": 3.787601740185309e-06, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.12132485045761, + "grad_norm": 0.08854811638593674, + "learning_rate": 3.781259406742199e-06, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1217475851281944, + "grad_norm": 0.09520964324474335, + "learning_rate": 3.7749221791769652e-06, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1221703197987782, + "grad_norm": 0.11893545836210251, + "learning_rate": 3.768590058189686e-06, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1225930544693625, + "grad_norm": 0.10478578507900238, + "learning_rate": 3.762263044479897e-06, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1230157891399464, + "grad_norm": 0.11246010661125183, + "learning_rate": 3.7559411387465438e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.12343852381053, + "grad_norm": 0.10737476497888565, + "learning_rate": 3.7496243416880183e-06, + "loss": 0.3503, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1238612584811145, + "grad_norm": 0.09469365328550339, + "learning_rate": 3.7433126540021677e-06, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1242839931516984, + "grad_norm": 0.11413243412971497, + "learning_rate": 3.7370060763862347e-06, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.124706727822282, + "grad_norm": 0.13132187724113464, + "learning_rate": 3.730704609536928e-06, + "loss": 0.369, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1251294624928665, + "grad_norm": 0.09998204559087753, + "learning_rate": 3.724408254150391e-06, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1255521971634503, + "grad_norm": 0.11532704532146454, + "learning_rate": 3.71811701092219e-06, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.125974931834034, + "grad_norm": 0.11814186722040176, + "learning_rate": 3.711830880547329e-06, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1263976665046185, + "grad_norm": 0.10861798375844955, + "learning_rate": 3.705549863720248e-06, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 82990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1268204011752023, + "grad_norm": 0.1164318323135376, + "learning_rate": 3.699273961134825e-06, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.127243135845786, + "grad_norm": 0.10603933781385422, + "learning_rate": 3.6930031734843775e-06, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1276658705163705, + "grad_norm": 0.10672251135110855, + "learning_rate": 3.6867375014616514e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1280886051869543, + "grad_norm": 0.08992306888103485, + "learning_rate": 3.6804769457588207e-06, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1285113398575386, + "grad_norm": 0.12011739611625671, + "learning_rate": 3.6742215070675044e-06, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1289340745281224, + "grad_norm": 0.09250015020370483, + "learning_rate": 3.6679711860787615e-06, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1293568091987063, + "grad_norm": 0.12097031623125076, + "learning_rate": 3.6617259834830662e-06, + "loss": 0.3496, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1297795438692906, + "grad_norm": 0.11797837167978287, + "learning_rate": 3.65548589997034e-06, + "loss": 0.3511, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1302022785398744, + "grad_norm": 0.09820786118507385, + "learning_rate": 3.649250936229942e-06, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1306250132104583, + "grad_norm": 0.08645977079868317, + "learning_rate": 3.64302109295066e-06, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1310477478810426, + "grad_norm": 0.13692456483840942, + "learning_rate": 3.6367963708207163e-06, + "loss": 0.3688, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1314704825516264, + "grad_norm": 0.10924520343542099, + "learning_rate": 3.6305767705277827e-06, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1318932172222107, + "grad_norm": 0.10544862598180771, + "learning_rate": 3.6243622927589318e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1323159518927945, + "grad_norm": 0.10390780121088028, + "learning_rate": 3.618152938200692e-06, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1327386865633784, + "grad_norm": 0.136841282248497, + "learning_rate": 3.611948707539026e-06, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1331614212339627, + "grad_norm": 0.11755307018756866, + "learning_rate": 3.6057496014593293e-06, + "loss": 0.3672, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1335841559045465, + "grad_norm": 0.09702187776565552, + "learning_rate": 3.5995556206464333e-06, + "loss": 0.3662, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1340068905751304, + "grad_norm": 0.09192138910293579, + "learning_rate": 3.5933667657845895e-06, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1344296252457147, + "grad_norm": 0.11346664279699326, + "learning_rate": 3.587183037557501e-06, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1348523599162985, + "grad_norm": 0.10859084129333496, + "learning_rate": 3.581004436648305e-06, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1352750945868824, + "grad_norm": 0.09712924808263779, + "learning_rate": 3.5748309637395506e-06, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1356978292574667, + "grad_norm": 0.1149146556854248, + "learning_rate": 3.5686626195132357e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1361205639280505, + "grad_norm": 0.11161921173334122, + "learning_rate": 3.5624994046507874e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1365432985986343, + "grad_norm": 0.10247299075126648, + "learning_rate": 3.5563413198330732e-06, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1369660332692186, + "grad_norm": 0.10526075214147568, + "learning_rate": 3.5501883657403868e-06, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1373887679398025, + "grad_norm": 0.11141068488359451, + "learning_rate": 3.5440405430524628e-06, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1378115026103868, + "grad_norm": 0.10022301971912384, + "learning_rate": 3.5378978524484627e-06, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1382342372809706, + "grad_norm": 0.13936278223991394, + "learning_rate": 3.5317602946069783e-06, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1386569719515545, + "grad_norm": 0.10801060497760773, + "learning_rate": 3.525627870206033e-06, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1390797066221388, + "grad_norm": 0.11088043451309204, + "learning_rate": 3.5195005799231075e-06, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1395024412927226, + "grad_norm": 0.09600705653429031, + "learning_rate": 3.513378424435071e-06, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1399251759633064, + "grad_norm": 0.09027500450611115, + "learning_rate": 3.507261404418266e-06, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1403479106338907, + "grad_norm": 0.10556026548147202, + "learning_rate": 3.501149520548447e-06, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1407706453044746, + "grad_norm": 0.14073459804058075, + "learning_rate": 3.495042773500806e-06, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.141193379975059, + "grad_norm": 0.10689658671617508, + "learning_rate": 3.4889411639499768e-06, + "loss": 0.3536, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1416161146456427, + "grad_norm": 0.1049795150756836, + "learning_rate": 3.482844692570003e-06, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1420388493162266, + "grad_norm": 0.1139625608921051, + "learning_rate": 3.4767533600343795e-06, + "loss": 0.3507, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.142461583986811, + "grad_norm": 0.09686768800020218, + "learning_rate": 3.4706671670160285e-06, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1428843186573947, + "grad_norm": 0.10574439913034439, + "learning_rate": 3.4645861141873125e-06, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1433070533279786, + "grad_norm": 0.10131373256444931, + "learning_rate": 3.4585102022200058e-06, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.143729787998563, + "grad_norm": 0.09746529161930084, + "learning_rate": 3.452439431785326e-06, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1441525226691467, + "grad_norm": 0.10051785409450531, + "learning_rate": 3.446373803553937e-06, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1445752573397305, + "grad_norm": 0.08548030257225037, + "learning_rate": 3.440313318195898e-06, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.144997992010315, + "grad_norm": 0.13352932035923004, + "learning_rate": 3.4342579763807446e-06, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1454207266808987, + "grad_norm": 0.12119831889867783, + "learning_rate": 3.4282077787774146e-06, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1458434613514825, + "grad_norm": 0.1072508916258812, + "learning_rate": 3.4221627260542844e-06, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.146266196022067, + "grad_norm": 0.11015790700912476, + "learning_rate": 3.416122818879164e-06, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1466889306926507, + "grad_norm": 0.12029817700386047, + "learning_rate": 3.410088057919303e-06, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.147111665363235, + "grad_norm": 0.10923074185848236, + "learning_rate": 3.404058443841357e-06, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.147534400033819, + "grad_norm": 0.08859322220087051, + "learning_rate": 3.3980339773114487e-06, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1479571347044026, + "grad_norm": 0.09348300099372864, + "learning_rate": 3.3920146589950963e-06, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.148379869374987, + "grad_norm": 0.12327638268470764, + "learning_rate": 3.386000489557267e-06, + "loss": 0.3541, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1488026040455708, + "grad_norm": 0.10675467550754547, + "learning_rate": 3.3799914696623693e-06, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1492253387161546, + "grad_norm": 0.1254352629184723, + "learning_rate": 3.373987599974221e-06, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.149648073386739, + "grad_norm": 0.11745137721300125, + "learning_rate": 3.367988881156092e-06, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1500708080573228, + "grad_norm": 0.10163547843694687, + "learning_rate": 3.361995313870675e-06, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.150493542727907, + "grad_norm": 0.11226019263267517, + "learning_rate": 3.3560068987800843e-06, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.150916277398491, + "grad_norm": 0.09473676234483719, + "learning_rate": 3.3500236365458626e-06, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1513390120690747, + "grad_norm": 0.09683308005332947, + "learning_rate": 3.344045527829004e-06, + "loss": 0.3507, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.151761746739659, + "grad_norm": 0.11332429200410843, + "learning_rate": 3.3380725732899243e-06, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.152184481410243, + "grad_norm": 0.1047583743929863, + "learning_rate": 3.3321047735884627e-06, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1526072160808267, + "grad_norm": 0.10033160448074341, + "learning_rate": 3.326142129383891e-06, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.153029950751411, + "grad_norm": 0.10181707888841629, + "learning_rate": 3.3201846413349334e-06, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.153452685421995, + "grad_norm": 0.12902018427848816, + "learning_rate": 3.3142323100997018e-06, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1538754200925787, + "grad_norm": 0.12010544538497925, + "learning_rate": 3.3082851363357757e-06, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.154298154763163, + "grad_norm": 0.10573292523622513, + "learning_rate": 3.302343120700152e-06, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.154720889433747, + "grad_norm": 0.12801702320575714, + "learning_rate": 3.29640626384925e-06, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1551436241043307, + "grad_norm": 0.09944334626197815, + "learning_rate": 3.290474566438928e-06, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.155566358774915, + "grad_norm": 0.10595155507326126, + "learning_rate": 3.2845480291244733e-06, + "loss": 0.3522, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.155989093445499, + "grad_norm": 0.09780077636241913, + "learning_rate": 3.2786266525606015e-06, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.156411828116083, + "grad_norm": 0.09608149528503418, + "learning_rate": 3.2727104374014717e-06, + "loss": 0.3523, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.156834562786667, + "grad_norm": 0.10098965466022491, + "learning_rate": 3.2667993843006396e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.157257297457251, + "grad_norm": 0.10976491868495941, + "learning_rate": 3.260893493911121e-06, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.157680032127835, + "grad_norm": 0.12959299981594086, + "learning_rate": 3.2549927668853565e-06, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.158102766798419, + "grad_norm": 0.11890706419944763, + "learning_rate": 3.249097203875212e-06, + "loss": 0.3534, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.158525501469003, + "grad_norm": 0.13808132708072662, + "learning_rate": 3.2432068055319673e-06, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.158948236139587, + "grad_norm": 0.11769885569810867, + "learning_rate": 3.237321572506358e-06, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.159370970810171, + "grad_norm": 0.11484089493751526, + "learning_rate": 3.2314415054485457e-06, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.159793705480755, + "grad_norm": 0.09239485114812851, + "learning_rate": 3.22556660500809e-06, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.160216440151339, + "grad_norm": 0.0929921418428421, + "learning_rate": 3.2196968718340213e-06, + "loss": 0.3538, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.160639174821923, + "grad_norm": 0.1141209527850151, + "learning_rate": 3.2138323065747768e-06, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.161061909492507, + "grad_norm": 0.11907503008842468, + "learning_rate": 3.2079729098782206e-06, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.161484644163091, + "grad_norm": 0.11034678667783737, + "learning_rate": 3.202118682391664e-06, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.161907378833675, + "grad_norm": 0.1144934892654419, + "learning_rate": 3.196269624761833e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.162330113504259, + "grad_norm": 0.12343272566795349, + "learning_rate": 3.1904257376348724e-06, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.162752848174843, + "grad_norm": 0.11360954493284225, + "learning_rate": 3.1845870216563876e-06, + "loss": 0.3725, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.163175582845427, + "grad_norm": 0.1349467635154724, + "learning_rate": 3.1787534774713743e-06, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.163598317516011, + "grad_norm": 0.11683741211891174, + "learning_rate": 3.1729251057242835e-06, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.164021052186595, + "grad_norm": 0.13243815302848816, + "learning_rate": 3.167101907058989e-06, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.164443786857179, + "grad_norm": 0.10918988287448883, + "learning_rate": 3.161283882118793e-06, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.164866521527763, + "grad_norm": 0.11697351187467575, + "learning_rate": 3.15547103154642e-06, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.165289256198347, + "grad_norm": 0.11397526413202286, + "learning_rate": 3.149663355984034e-06, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1657119908689313, + "grad_norm": 0.08453447371721268, + "learning_rate": 3.1438608560732162e-06, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.166134725539515, + "grad_norm": 0.1456601917743683, + "learning_rate": 3.138063532454977e-06, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.166557460210099, + "grad_norm": 0.13219799101352692, + "learning_rate": 3.1322713857697585e-06, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1669801948806833, + "grad_norm": 0.09652557224035263, + "learning_rate": 3.1264844166574325e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.167402929551267, + "grad_norm": 0.119762122631073, + "learning_rate": 3.1207026257573048e-06, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.167825664221851, + "grad_norm": 0.0880596935749054, + "learning_rate": 3.1149260137080914e-06, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1682483988924353, + "grad_norm": 0.09367838501930237, + "learning_rate": 3.109154581147955e-06, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.168671133563019, + "grad_norm": 0.10192928463220596, + "learning_rate": 3.103388328714474e-06, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 83990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1690938682336034, + "grad_norm": 0.11770544946193695, + "learning_rate": 3.0976272570446495e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1695166029041872, + "grad_norm": 0.10309958457946777, + "learning_rate": 3.0918713667749344e-06, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.169939337574771, + "grad_norm": 0.10112018138170242, + "learning_rate": 3.0861206585411805e-06, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1703620722453554, + "grad_norm": 0.10438515990972519, + "learning_rate": 3.0803751329786858e-06, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1707848069159392, + "grad_norm": 0.11017291992902756, + "learning_rate": 3.074634790722164e-06, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.171207541586523, + "grad_norm": 0.12489500641822815, + "learning_rate": 3.068899632405775e-06, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1716302762571074, + "grad_norm": 0.1100999116897583, + "learning_rate": 3.06316965866309e-06, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.172053010927691, + "grad_norm": 0.1047070324420929, + "learning_rate": 3.0574448701270965e-06, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.172475745598275, + "grad_norm": 0.08906829357147217, + "learning_rate": 3.0517252674302332e-06, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1728984802688593, + "grad_norm": 0.12648719549179077, + "learning_rate": 3.0460108512043617e-06, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.173321214939443, + "grad_norm": 0.11459321528673172, + "learning_rate": 3.0403016220807655e-06, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.173743949610027, + "grad_norm": 0.10953362286090851, + "learning_rate": 3.034597580690146e-06, + "loss": 0.3686, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1741666842806113, + "grad_norm": 0.10464169830083847, + "learning_rate": 3.0288987276626378e-06, + "loss": 0.3516, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.174589418951195, + "grad_norm": 0.10811000317335129, + "learning_rate": 3.0232050636278208e-06, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1750121536217795, + "grad_norm": 0.11178943514823914, + "learning_rate": 3.0175165892146693e-06, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1754348882923633, + "grad_norm": 0.14822730422019958, + "learning_rate": 3.0118333050516035e-06, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.175857622962947, + "grad_norm": 0.1089983582496643, + "learning_rate": 3.0061552117664703e-06, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1762803576335314, + "grad_norm": 0.09589004516601562, + "learning_rate": 3.000482309986541e-06, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1767030923041153, + "grad_norm": 0.11657485365867615, + "learning_rate": 2.9948146003385135e-06, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.177125826974699, + "grad_norm": 0.11398564279079437, + "learning_rate": 2.9891520834485154e-06, + "loss": 0.3544, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1775485616452834, + "grad_norm": 0.10917269438505173, + "learning_rate": 2.983494759942085e-06, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1779712963158673, + "grad_norm": 0.11300209164619446, + "learning_rate": 2.9778426304442107e-06, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1783940309864516, + "grad_norm": 0.1057644784450531, + "learning_rate": 2.972195695579277e-06, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1788167656570354, + "grad_norm": 0.12734556198120117, + "learning_rate": 2.9665539559711297e-06, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1792395003276193, + "grad_norm": 0.12413902580738068, + "learning_rate": 2.9609174122430137e-06, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1796622349982036, + "grad_norm": 0.10472843050956726, + "learning_rate": 2.9552860650176095e-06, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1800849696687874, + "grad_norm": 0.11134059727191925, + "learning_rate": 2.94965991491703e-06, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1805077043393712, + "grad_norm": 0.13012637197971344, + "learning_rate": 2.944038962562806e-06, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1809304390099555, + "grad_norm": 0.1014445498585701, + "learning_rate": 2.938423208575897e-06, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1813531736805394, + "grad_norm": 0.09975551813840866, + "learning_rate": 2.9328126535766776e-06, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1817759083511232, + "grad_norm": 0.12444489449262619, + "learning_rate": 2.9272072981849587e-06, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1821986430217075, + "grad_norm": 0.09062996506690979, + "learning_rate": 2.9216071430199776e-06, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1826213776922914, + "grad_norm": 0.11677880585193634, + "learning_rate": 2.9160121887004012e-06, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.183044112362875, + "grad_norm": 0.0973840281367302, + "learning_rate": 2.9104224358443066e-06, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1834668470334595, + "grad_norm": 0.10153448581695557, + "learning_rate": 2.9048378850692117e-06, + "loss": 0.3515, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1838895817040433, + "grad_norm": 0.10727284103631973, + "learning_rate": 2.8992585369920554e-06, + "loss": 0.3707, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1843123163746276, + "grad_norm": 0.11182466894388199, + "learning_rate": 2.8936843922291847e-06, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1847350510452115, + "grad_norm": 0.09164273738861084, + "learning_rate": 2.888115451396406e-06, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1851577857157953, + "grad_norm": 0.11509303748607635, + "learning_rate": 2.8825517151089166e-06, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1855805203863796, + "grad_norm": 0.0977163165807724, + "learning_rate": 2.876993183981358e-06, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1860032550569635, + "grad_norm": 0.09650690108537674, + "learning_rate": 2.8714398586277947e-06, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1864259897275473, + "grad_norm": 0.0981847494840622, + "learning_rate": 2.865891739661708e-06, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1868487243981316, + "grad_norm": 0.10440757125616074, + "learning_rate": 2.8603488276960245e-06, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1872714590687155, + "grad_norm": 0.12486783415079117, + "learning_rate": 2.8548111233430653e-06, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1876941937392997, + "grad_norm": 0.13244372606277466, + "learning_rate": 2.8492786272145967e-06, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1881169284098836, + "grad_norm": 0.12003055214881897, + "learning_rate": 2.8437513399218018e-06, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1885396630804674, + "grad_norm": 0.10200183838605881, + "learning_rate": 2.8382292620753036e-06, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1889623977510517, + "grad_norm": 0.08990033715963364, + "learning_rate": 2.832712394285125e-06, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1893851324216356, + "grad_norm": 0.10662620514631271, + "learning_rate": 2.8272007371607235e-06, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1898078670922194, + "grad_norm": 0.11361958086490631, + "learning_rate": 2.8216942913109947e-06, + "loss": 0.3664, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1902306017628037, + "grad_norm": 0.11838813871145248, + "learning_rate": 2.816193057344241e-06, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1906533364333876, + "grad_norm": 0.11352438479661942, + "learning_rate": 2.8106970358681927e-06, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1910760711039714, + "grad_norm": 0.09597624838352203, + "learning_rate": 2.8052062274900036e-06, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1914988057745557, + "grad_norm": 0.12277199327945709, + "learning_rate": 2.7997206328162606e-06, + "loss": 0.3527, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1919215404451395, + "grad_norm": 0.10902358591556549, + "learning_rate": 2.7942402524529676e-06, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1923442751157234, + "grad_norm": 0.10910855978727341, + "learning_rate": 2.7887650870055624e-06, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1927670097863077, + "grad_norm": 0.12307683378458023, + "learning_rate": 2.783295137078873e-06, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1931897444568915, + "grad_norm": 0.11966792494058609, + "learning_rate": 2.777830403277204e-06, + "loss": 0.3742, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.193612479127476, + "grad_norm": 0.10439316928386688, + "learning_rate": 2.772370886204234e-06, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1940352137980597, + "grad_norm": 0.10960353910923004, + "learning_rate": 2.7669165864630974e-06, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1944579484686435, + "grad_norm": 0.11367065459489822, + "learning_rate": 2.7614675046563345e-06, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.194880683139228, + "grad_norm": 0.10163185000419617, + "learning_rate": 2.7560236413859244e-06, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1953034178098116, + "grad_norm": 0.09799762070178986, + "learning_rate": 2.7505849972532583e-06, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1957261524803955, + "grad_norm": 0.11646436899900436, + "learning_rate": 2.7451515728591613e-06, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.19614888715098, + "grad_norm": 0.11604702472686768, + "learning_rate": 2.739723368803859e-06, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1965716218215636, + "grad_norm": 0.11070554703474045, + "learning_rate": 2.7343003856870384e-06, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.196994356492148, + "grad_norm": 0.1077062115073204, + "learning_rate": 2.728882624107759e-06, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1974170911627318, + "grad_norm": 0.10789674520492554, + "learning_rate": 2.7234700846645534e-06, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1978398258333156, + "grad_norm": 0.08885697275400162, + "learning_rate": 2.718062767955348e-06, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1982625605039, + "grad_norm": 0.155824214220047, + "learning_rate": 2.7126606745774996e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1986852951744837, + "grad_norm": 0.10163003951311111, + "learning_rate": 2.707263805127791e-06, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1991080298450676, + "grad_norm": 0.10278850793838501, + "learning_rate": 2.7018721602024342e-06, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.199530764515652, + "grad_norm": 0.10038980096578598, + "learning_rate": 2.6964857403970423e-06, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.1999534991862357, + "grad_norm": 0.13927799463272095, + "learning_rate": 2.6911045463066663e-06, + "loss": 0.3523, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2003762338568196, + "grad_norm": 0.10870685428380966, + "learning_rate": 2.6857285785257704e-06, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.200798968527404, + "grad_norm": 0.09696473926305771, + "learning_rate": 2.680357837648262e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2012217031979877, + "grad_norm": 0.10436976701021194, + "learning_rate": 2.67499232426745e-06, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2016444378685716, + "grad_norm": 0.11289708316326141, + "learning_rate": 2.6696320389760778e-06, + "loss": 0.3529, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.202067172539156, + "grad_norm": 0.1062619760632515, + "learning_rate": 2.6642769823663093e-06, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2024899072097397, + "grad_norm": 0.12114892899990082, + "learning_rate": 2.6589271550297224e-06, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.202912641880324, + "grad_norm": 0.1201259195804596, + "learning_rate": 2.6535825575573215e-06, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.203335376550908, + "grad_norm": 0.10201510041952133, + "learning_rate": 2.6482431905395457e-06, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2037581112214917, + "grad_norm": 0.12507887184619904, + "learning_rate": 2.6429090545662336e-06, + "loss": 0.366, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.204180845892076, + "grad_norm": 0.12246356159448624, + "learning_rate": 2.637580150226665e-06, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.20460358056266, + "grad_norm": 0.1472168266773224, + "learning_rate": 2.6322564781095295e-06, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2050263152332437, + "grad_norm": 0.14048168063163757, + "learning_rate": 2.6269380388029507e-06, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.205449049903828, + "grad_norm": 0.11195366084575653, + "learning_rate": 2.6216248328944705e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.205871784574412, + "grad_norm": 0.08803091943264008, + "learning_rate": 2.616316860971035e-06, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.206294519244996, + "grad_norm": 0.10259804874658585, + "learning_rate": 2.6110141236190377e-06, + "loss": 0.352, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.20671725391558, + "grad_norm": 0.1120024248957634, + "learning_rate": 2.6057166214242758e-06, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.207139988586164, + "grad_norm": 0.1024867594242096, + "learning_rate": 2.6004243549719866e-06, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.207562723256748, + "grad_norm": 0.10784466564655304, + "learning_rate": 2.595137324846808e-06, + "loss": 0.3675, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.207985457927332, + "grad_norm": 0.10588018596172333, + "learning_rate": 2.5898555316328066e-06, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2084081925979158, + "grad_norm": 0.10544314980506897, + "learning_rate": 2.5845789759134876e-06, + "loss": 0.3533, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2088309272685, + "grad_norm": 0.1177460253238678, + "learning_rate": 2.57930765827174e-06, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.209253661939084, + "grad_norm": 0.12643662095069885, + "learning_rate": 2.5740415792899097e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2096763966096677, + "grad_norm": 0.11052107810974121, + "learning_rate": 2.5687807395497587e-06, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.210099131280252, + "grad_norm": 0.127157062292099, + "learning_rate": 2.5635251396324443e-06, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.210521865950836, + "grad_norm": 0.10632462054491043, + "learning_rate": 2.55827478011858e-06, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2109446006214197, + "grad_norm": 0.1212998554110527, + "learning_rate": 2.5530296615881855e-06, + "loss": 0.3686, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 84990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.211367335292004, + "grad_norm": 0.1492234617471695, + "learning_rate": 2.54778978462068e-06, + "loss": 0.368, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.211790069962588, + "grad_norm": 0.1151144951581955, + "learning_rate": 2.542555149794945e-06, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.212212804633172, + "grad_norm": 0.15245382487773895, + "learning_rate": 2.5373257576892404e-06, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.212635539303756, + "grad_norm": 0.11024574935436249, + "learning_rate": 2.532101608881282e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.21305827397434, + "grad_norm": 0.09660177677869797, + "learning_rate": 2.5268827039481856e-06, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.213481008644924, + "grad_norm": 0.1076725646853447, + "learning_rate": 2.5216690434664957e-06, + "loss": 0.3677, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.213903743315508, + "grad_norm": 0.09233919531106949, + "learning_rate": 2.5164606280121794e-06, + "loss": 0.3646, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.214326477986092, + "grad_norm": 0.10486886650323868, + "learning_rate": 2.5112574581606263e-06, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.214749212656676, + "grad_norm": 0.12417053431272507, + "learning_rate": 2.5060595344866323e-06, + "loss": 0.3694, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.21517194732726, + "grad_norm": 0.11014200001955032, + "learning_rate": 2.5008668575644213e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2155946819978443, + "grad_norm": 0.10167574882507324, + "learning_rate": 2.4956794279676345e-06, + "loss": 0.3542, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.216017416668428, + "grad_norm": 0.10744424909353256, + "learning_rate": 2.490497246269352e-06, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.216440151339012, + "grad_norm": 0.1035485714673996, + "learning_rate": 2.4853203130420442e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2168628860095962, + "grad_norm": 0.11302728205919266, + "learning_rate": 2.4801486288576314e-06, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.21728562068018, + "grad_norm": 0.0979243591427803, + "learning_rate": 2.474982194287434e-06, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.217708355350764, + "grad_norm": 0.09869988262653351, + "learning_rate": 2.4698210099022014e-06, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2181310900213482, + "grad_norm": 0.1129160076379776, + "learning_rate": 2.4646650762720935e-06, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.218553824691932, + "grad_norm": 0.13442644476890564, + "learning_rate": 2.459514393966711e-06, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.218976559362516, + "grad_norm": 0.12018177658319473, + "learning_rate": 2.454368963555037e-06, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2193992940331, + "grad_norm": 0.14434972405433655, + "learning_rate": 2.449228785605512e-06, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.219822028703684, + "grad_norm": 0.1340024173259735, + "learning_rate": 2.4440938606859864e-06, + "loss": 0.3692, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2202447633742683, + "grad_norm": 0.13748455047607422, + "learning_rate": 2.438964189363713e-06, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.220667498044852, + "grad_norm": 0.10345939546823502, + "learning_rate": 2.433839772205393e-06, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.221090232715436, + "grad_norm": 0.10267113894224167, + "learning_rate": 2.428720609777113e-06, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2215129673860203, + "grad_norm": 0.09812238067388535, + "learning_rate": 2.42360670264441e-06, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.221935702056604, + "grad_norm": 0.0922931507229805, + "learning_rate": 2.4184980513722198e-06, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.222358436727188, + "grad_norm": 0.11281381547451019, + "learning_rate": 2.41339465652492e-06, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2227811713977723, + "grad_norm": 0.10440373420715332, + "learning_rate": 2.40829651866627e-06, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.223203906068356, + "grad_norm": 0.09908762574195862, + "learning_rate": 2.4032036383594914e-06, + "loss": 0.3497, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.22362664073894, + "grad_norm": 0.1027647852897644, + "learning_rate": 2.39811601616719e-06, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2240493754095243, + "grad_norm": 0.1487075835466385, + "learning_rate": 2.3930336526514275e-06, + "loss": 0.3695, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.224472110080108, + "grad_norm": 0.10410419851541519, + "learning_rate": 2.387956548373638e-06, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2248948447506924, + "grad_norm": 0.09189613163471222, + "learning_rate": 2.3828847038947054e-06, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2253175794212763, + "grad_norm": 0.10336057096719742, + "learning_rate": 2.3778181197749383e-06, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.22574031409186, + "grad_norm": 0.11118049919605255, + "learning_rate": 2.372756796574044e-06, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2261630487624444, + "grad_norm": 0.13823041319847107, + "learning_rate": 2.3677007348511636e-06, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2265857834330283, + "grad_norm": 0.13797758519649506, + "learning_rate": 2.3626499351648403e-06, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.227008518103612, + "grad_norm": 0.10654763132333755, + "learning_rate": 2.3576043980730546e-06, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2274312527741964, + "grad_norm": 0.09417349845170975, + "learning_rate": 2.3525641241331888e-06, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2278539874447802, + "grad_norm": 0.11892693489789963, + "learning_rate": 2.3475291139020583e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.228276722115364, + "grad_norm": 0.11660292744636536, + "learning_rate": 2.342499367935891e-06, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2286994567859484, + "grad_norm": 0.09995485097169876, + "learning_rate": 2.3374748867903307e-06, + "loss": 0.3533, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2291221914565322, + "grad_norm": 0.12355124205350876, + "learning_rate": 2.3324556710204448e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2295449261271165, + "grad_norm": 0.10003965348005295, + "learning_rate": 2.3274417211807174e-06, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2299676607977004, + "grad_norm": 0.10680094361305237, + "learning_rate": 2.3224330378250447e-06, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.230390395468284, + "grad_norm": 0.1063949316740036, + "learning_rate": 2.317429621506756e-06, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2308131301388685, + "grad_norm": 0.1169922947883606, + "learning_rate": 2.31243147277857e-06, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2312358648094524, + "grad_norm": 0.10486500710248947, + "learning_rate": 2.3074385921926567e-06, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.231658599480036, + "grad_norm": 0.10094503313302994, + "learning_rate": 2.3024509803005858e-06, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2320813341506205, + "grad_norm": 0.10226593166589737, + "learning_rate": 2.297468637653349e-06, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2325040688212043, + "grad_norm": 0.08995571732521057, + "learning_rate": 2.292491564801358e-06, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.232926803491788, + "grad_norm": 0.12156754732131958, + "learning_rate": 2.2875197622944435e-06, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2333495381623725, + "grad_norm": 0.11465311795473099, + "learning_rate": 2.2825532306818386e-06, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2337722728329563, + "grad_norm": 0.1010330468416214, + "learning_rate": 2.277591970512222e-06, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2341950075035406, + "grad_norm": 0.10155104845762253, + "learning_rate": 2.2726359823336598e-06, + "loss": 0.3544, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2346177421741245, + "grad_norm": 0.09630978107452393, + "learning_rate": 2.267685266693653e-06, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2350404768447083, + "grad_norm": 0.11840330809354782, + "learning_rate": 2.2627398241391205e-06, + "loss": 0.3539, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2354632115152926, + "grad_norm": 0.11584962159395218, + "learning_rate": 2.2577996552163914e-06, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2358859461858764, + "grad_norm": 0.14552854001522064, + "learning_rate": 2.2528647604712295e-06, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2363086808564603, + "grad_norm": 0.1108233630657196, + "learning_rate": 2.247935140448787e-06, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2367314155270446, + "grad_norm": 0.09554221481084824, + "learning_rate": 2.2430107956936508e-06, + "loss": 0.3543, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2371541501976284, + "grad_norm": 0.09880156069993973, + "learning_rate": 2.23809172674983e-06, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2375768848682123, + "grad_norm": 0.11659755557775497, + "learning_rate": 2.233177934160752e-06, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2379996195387966, + "grad_norm": 0.11512094736099243, + "learning_rate": 2.2282694184692255e-06, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2384223542093804, + "grad_norm": 0.11229123175144196, + "learning_rate": 2.2233661802175285e-06, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2388450888799647, + "grad_norm": 0.10443108528852463, + "learning_rate": 2.218468219947323e-06, + "loss": 0.3529, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2392678235505485, + "grad_norm": 0.09978324919939041, + "learning_rate": 2.2135755381997027e-06, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2396905582211324, + "grad_norm": 0.09319782257080078, + "learning_rate": 2.2086881355151633e-06, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2401132928917167, + "grad_norm": 0.10307757556438446, + "learning_rate": 2.203806012433629e-06, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2405360275623005, + "grad_norm": 0.1281629204750061, + "learning_rate": 2.1989291694944403e-06, + "loss": 0.3684, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2409587622328844, + "grad_norm": 0.10411323606967926, + "learning_rate": 2.1940576072363497e-06, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2413814969034687, + "grad_norm": 0.11880529671907425, + "learning_rate": 2.1891913261975317e-06, + "loss": 0.3681, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2418042315740525, + "grad_norm": 0.10027006268501282, + "learning_rate": 2.1843303269155677e-06, + "loss": 0.3536, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2422269662446364, + "grad_norm": 0.10218115150928497, + "learning_rate": 2.1794746099274733e-06, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2426497009152206, + "grad_norm": 0.11425536870956421, + "learning_rate": 2.1746241757696574e-06, + "loss": 0.3544, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2430724355858045, + "grad_norm": 0.12624718248844147, + "learning_rate": 2.1697790249779636e-06, + "loss": 0.3683, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.243495170256389, + "grad_norm": 0.10200080275535583, + "learning_rate": 2.1649391580876423e-06, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2439179049269726, + "grad_norm": 0.10800783336162567, + "learning_rate": 2.1601045756333647e-06, + "loss": 0.3687, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2443406395975565, + "grad_norm": 0.11233137547969818, + "learning_rate": 2.1552752781492157e-06, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2447633742681408, + "grad_norm": 0.10395548492670059, + "learning_rate": 2.150451266168707e-06, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2451861089387246, + "grad_norm": 0.09445767104625702, + "learning_rate": 2.1456325402247455e-06, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2456088436093085, + "grad_norm": 0.09856030344963074, + "learning_rate": 2.1408191008496725e-06, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2460315782798927, + "grad_norm": 0.12621469795703888, + "learning_rate": 2.136010948575229e-06, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2464543129504766, + "grad_norm": 0.11583548039197922, + "learning_rate": 2.13120808393259e-06, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2468770476210604, + "grad_norm": 0.10696325451135635, + "learning_rate": 2.1264105074523365e-06, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2472997822916447, + "grad_norm": 0.12442493438720703, + "learning_rate": 2.1216182196644616e-06, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2477225169622286, + "grad_norm": 0.10034463554620743, + "learning_rate": 2.1168312210983865e-06, + "loss": 0.3696, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.248145251632813, + "grad_norm": 0.10476464778184891, + "learning_rate": 2.112049512282943e-06, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2485679863033967, + "grad_norm": 0.0943712592124939, + "learning_rate": 2.107273093746359e-06, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2489907209739806, + "grad_norm": 0.10424279421567917, + "learning_rate": 2.102501966016318e-06, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.249413455644565, + "grad_norm": 0.13013094663619995, + "learning_rate": 2.097736129619876e-06, + "loss": 0.3525, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2498361903151487, + "grad_norm": 0.12116508185863495, + "learning_rate": 2.092975585083529e-06, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2502589249857325, + "grad_norm": 0.13685689866542816, + "learning_rate": 2.088220332933194e-06, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.250681659656317, + "grad_norm": 0.09434591978788376, + "learning_rate": 2.083470373694185e-06, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2511043943269007, + "grad_norm": 0.10017138719558716, + "learning_rate": 2.078725707891249e-06, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.251527128997485, + "grad_norm": 0.08578740805387497, + "learning_rate": 2.0739863360485222e-06, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.251949863668069, + "grad_norm": 0.12100805342197418, + "learning_rate": 2.0692522586895857e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2523725983386527, + "grad_norm": 0.10923408716917038, + "learning_rate": 2.0645234763374155e-06, + "loss": 0.3671, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.252795333009237, + "grad_norm": 0.08608725666999817, + "learning_rate": 2.0597999895144213e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.253218067679821, + "grad_norm": 0.09147503226995468, + "learning_rate": 2.055081798742403e-06, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 85990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2536408023504046, + "grad_norm": 0.11695092916488647, + "learning_rate": 2.0503689045425934e-06, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.254063537020989, + "grad_norm": 0.11291204392910004, + "learning_rate": 2.0456613074356368e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.254486271691573, + "grad_norm": 0.08678264170885086, + "learning_rate": 2.0409590079415954e-06, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2549090063621566, + "grad_norm": 0.10487481206655502, + "learning_rate": 2.0362620065799308e-06, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.255331741032741, + "grad_norm": 0.11819268763065338, + "learning_rate": 2.0315703038695345e-06, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2557544757033248, + "grad_norm": 0.11723897606134415, + "learning_rate": 2.0268839003287132e-06, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2561772103739086, + "grad_norm": 0.11461301147937775, + "learning_rate": 2.0222027964751756e-06, + "loss": 0.3546, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.256599945044493, + "grad_norm": 0.11838391423225403, + "learning_rate": 2.0175269928260687e-06, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2570226797150768, + "grad_norm": 0.09240944683551788, + "learning_rate": 2.0128564898979187e-06, + "loss": 0.3538, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2574454143856606, + "grad_norm": 0.09746381640434265, + "learning_rate": 2.0081912882066954e-06, + "loss": 0.3506, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.257868149056245, + "grad_norm": 0.11671920865774155, + "learning_rate": 2.0035313882677707e-06, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2582908837268287, + "grad_norm": 0.11113519966602325, + "learning_rate": 1.998876790595927e-06, + "loss": 0.3521, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.258713618397413, + "grad_norm": 0.0977865606546402, + "learning_rate": 1.994227495705381e-06, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.259136353067997, + "grad_norm": 0.12142278999090195, + "learning_rate": 1.9895835041097376e-06, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2595590877385807, + "grad_norm": 0.14189524948596954, + "learning_rate": 1.984944816322032e-06, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.259981822409165, + "grad_norm": 0.12281966954469681, + "learning_rate": 1.9803114328547146e-06, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.260404557079749, + "grad_norm": 0.1294793337583542, + "learning_rate": 1.975683354219643e-06, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.260827291750333, + "grad_norm": 0.09306836873292923, + "learning_rate": 1.9710605809280858e-06, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.261250026420917, + "grad_norm": 0.12182030081748962, + "learning_rate": 1.966443113490729e-06, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.261672761091501, + "grad_norm": 0.1233694776892662, + "learning_rate": 1.961830952417676e-06, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.262095495762085, + "grad_norm": 0.10907920449972153, + "learning_rate": 1.957224098218441e-06, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.262518230432669, + "grad_norm": 0.11241703480482101, + "learning_rate": 1.952622551401956e-06, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.262940965103253, + "grad_norm": 0.11132419854402542, + "learning_rate": 1.9480263124765585e-06, + "loss": 0.3498, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.263363699773837, + "grad_norm": 0.13126635551452637, + "learning_rate": 1.943435381950015e-06, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.263786434444421, + "grad_norm": 0.1258988380432129, + "learning_rate": 1.938849760329475e-06, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.264209169115005, + "grad_norm": 0.11234536021947861, + "learning_rate": 1.934269448121545e-06, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.264631903785589, + "grad_norm": 0.15630847215652466, + "learning_rate": 1.929694445832203e-06, + "loss": 0.3538, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.265054638456173, + "grad_norm": 0.11725788563489914, + "learning_rate": 1.9251247539668613e-06, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.265477373126757, + "grad_norm": 0.10169960558414459, + "learning_rate": 1.92056037303035e-06, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.265900107797341, + "grad_norm": 0.14233630895614624, + "learning_rate": 1.9160013035268987e-06, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.266322842467925, + "grad_norm": 0.11114580184221268, + "learning_rate": 1.9114475459601657e-06, + "loss": 0.3514, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2667455771385088, + "grad_norm": 0.12976165115833282, + "learning_rate": 1.9068991008332094e-06, + "loss": 0.3688, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.267168311809093, + "grad_norm": 0.12187295407056808, + "learning_rate": 1.9023559686485004e-06, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.267591046479677, + "grad_norm": 0.08938663452863693, + "learning_rate": 1.8978181499079373e-06, + "loss": 0.3523, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.268013781150261, + "grad_norm": 0.10807134211063385, + "learning_rate": 1.893285645112819e-06, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.268436515820845, + "grad_norm": 0.13179193437099457, + "learning_rate": 1.8887584547638504e-06, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.268859250491429, + "grad_norm": 0.11746007949113846, + "learning_rate": 1.8842365793611705e-06, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.269281985162013, + "grad_norm": 0.11494182795286179, + "learning_rate": 1.8797200194043185e-06, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.269704719832597, + "grad_norm": 0.1110086441040039, + "learning_rate": 1.875208775392251e-06, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2701274545031813, + "grad_norm": 0.10933975875377655, + "learning_rate": 1.8707028478233247e-06, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.270550189173765, + "grad_norm": 0.1003277599811554, + "learning_rate": 1.8662022371953247e-06, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.270972923844349, + "grad_norm": 0.1004534587264061, + "learning_rate": 1.8617069440054368e-06, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2713956585149333, + "grad_norm": 0.10590460151433945, + "learning_rate": 1.857216968750275e-06, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.271818393185517, + "grad_norm": 0.11667834967374802, + "learning_rate": 1.8527323119258587e-06, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.272241127856101, + "grad_norm": 0.11729994416236877, + "learning_rate": 1.8482529740275979e-06, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2726638625266853, + "grad_norm": 0.10868220031261444, + "learning_rate": 1.843778955550346e-06, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.273086597197269, + "grad_norm": 0.09082154929637909, + "learning_rate": 1.8393102569883636e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.273509331867853, + "grad_norm": 0.12750057876110077, + "learning_rate": 1.8348468788353058e-06, + "loss": 0.3732, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2739320665384373, + "grad_norm": 0.1142381802201271, + "learning_rate": 1.8303888215842502e-06, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.274354801209021, + "grad_norm": 0.11596769094467163, + "learning_rate": 1.8259360857276975e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.274777535879605, + "grad_norm": 0.10932992398738861, + "learning_rate": 1.821488671757543e-06, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2752002705501893, + "grad_norm": 0.10553571581840515, + "learning_rate": 1.8170465801651103e-06, + "loss": 0.3515, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.275623005220773, + "grad_norm": 0.09841123968362808, + "learning_rate": 1.8126098114411072e-06, + "loss": 0.3492, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.276045739891357, + "grad_norm": 0.15577931702136993, + "learning_rate": 1.8081783660756968e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2764684745619412, + "grad_norm": 0.10005783289670944, + "learning_rate": 1.8037522445584098e-06, + "loss": 0.3514, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.276891209232525, + "grad_norm": 0.09524083882570267, + "learning_rate": 1.799331447378222e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2773139439031094, + "grad_norm": 0.08958212286233902, + "learning_rate": 1.7949159750234977e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.277736678573693, + "grad_norm": 0.10013040900230408, + "learning_rate": 1.7905058279820308e-06, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.278159413244277, + "grad_norm": 0.0921129658818245, + "learning_rate": 1.7861010067410145e-06, + "loss": 0.3673, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2785821479148614, + "grad_norm": 0.1075829416513443, + "learning_rate": 1.781701511787065e-06, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.279004882585445, + "grad_norm": 0.12788018584251404, + "learning_rate": 1.7773073436061937e-06, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2794276172560295, + "grad_norm": 0.10818632692098618, + "learning_rate": 1.7733571470356202e-06, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2798503519266133, + "grad_norm": 0.09713885933160782, + "learning_rate": 1.7689731010604837e-06, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.280273086597197, + "grad_norm": 0.08994700014591217, + "learning_rate": 1.7645943832645784e-06, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2806958212677815, + "grad_norm": 0.13819073140621185, + "learning_rate": 1.7602209941316006e-06, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2811185559383653, + "grad_norm": 0.1217389926314354, + "learning_rate": 1.7558529341447083e-06, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.281541290608949, + "grad_norm": 0.09219576418399811, + "learning_rate": 1.751490203786449e-06, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2819640252795335, + "grad_norm": 0.12325670570135117, + "learning_rate": 1.7471328035387702e-06, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2823867599501173, + "grad_norm": 0.11488538980484009, + "learning_rate": 1.742780733883048e-06, + "loss": 0.3669, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.282809494620701, + "grad_norm": 0.12802155315876007, + "learning_rate": 1.7384339953000707e-06, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2832322292912854, + "grad_norm": 0.08792394399642944, + "learning_rate": 1.734092588270031e-06, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2836549639618693, + "grad_norm": 0.10789870470762253, + "learning_rate": 1.7297565132725236e-06, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.284077698632453, + "grad_norm": 0.10646877437829971, + "learning_rate": 1.725425770786565e-06, + "loss": 0.354, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2845004333030374, + "grad_norm": 0.09753011167049408, + "learning_rate": 1.721100361290584e-06, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2849231679736213, + "grad_norm": 0.13996130228042603, + "learning_rate": 1.7167802852624203e-06, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.285345902644205, + "grad_norm": 0.10901742428541183, + "learning_rate": 1.7124655431793257e-06, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2857686373147894, + "grad_norm": 0.1185331717133522, + "learning_rate": 1.7081561355179465e-06, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2861913719853733, + "grad_norm": 0.09787381440401077, + "learning_rate": 1.7038520627543574e-06, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2866141066559575, + "grad_norm": 0.13524243235588074, + "learning_rate": 1.699553325364045e-06, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2870368413265414, + "grad_norm": 0.09788890182971954, + "learning_rate": 1.6952599238218846e-06, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2874595759971252, + "grad_norm": 0.11169235408306122, + "learning_rate": 1.6909718586021805e-06, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2878823106677095, + "grad_norm": 0.10280909389257431, + "learning_rate": 1.6866891301786537e-06, + "loss": 0.3702, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2883050453382934, + "grad_norm": 0.10186376422643661, + "learning_rate": 1.6824117390244199e-06, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2887277800088777, + "grad_norm": 0.11245307326316833, + "learning_rate": 1.6781396856120069e-06, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2891505146794615, + "grad_norm": 0.120211161673069, + "learning_rate": 1.6738729704133705e-06, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2895732493500454, + "grad_norm": 0.1192639023065567, + "learning_rate": 1.6696115938998557e-06, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2899959840206296, + "grad_norm": 0.11480138450860977, + "learning_rate": 1.6653555565422196e-06, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2904187186912135, + "grad_norm": 0.12152085453271866, + "learning_rate": 1.6611048588106358e-06, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2908414533617973, + "grad_norm": 0.10232239216566086, + "learning_rate": 1.6568595011746956e-06, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2912641880323816, + "grad_norm": 0.10561666637659073, + "learning_rate": 1.6526194841033848e-06, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2916869227029655, + "grad_norm": 0.09975261241197586, + "learning_rate": 1.6483848080651122e-06, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2921096573735493, + "grad_norm": 0.09317562729120255, + "learning_rate": 1.6441554735276975e-06, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2925323920441336, + "grad_norm": 0.11690711975097656, + "learning_rate": 1.6399314809583454e-06, + "loss": 0.3529, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2929551267147175, + "grad_norm": 0.12859822809696198, + "learning_rate": 1.6357128308237046e-06, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2933778613853013, + "grad_norm": 0.10908562690019608, + "learning_rate": 1.631499523589808e-06, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2938005960558856, + "grad_norm": 0.13021890819072723, + "learning_rate": 1.6272915597221162e-06, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2942233307264694, + "grad_norm": 0.09620825946331024, + "learning_rate": 1.6230889396854798e-06, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2946460653970533, + "grad_norm": 0.10248015075922012, + "learning_rate": 1.6188916639441832e-06, + "loss": 0.3543, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2950688000676376, + "grad_norm": 0.10938766598701477, + "learning_rate": 1.614699732961905e-06, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2954915347382214, + "grad_norm": 0.09560249745845795, + "learning_rate": 1.6105131472017366e-06, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 86990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2959142694088057, + "grad_norm": 0.10817702114582062, + "learning_rate": 1.606331907126174e-06, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2963370040793896, + "grad_norm": 0.0935860276222229, + "learning_rate": 1.6021560131971258e-06, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2967597387499734, + "grad_norm": 0.11021450906991959, + "learning_rate": 1.5979854658759285e-06, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2971824734205577, + "grad_norm": 0.12075947970151901, + "learning_rate": 1.5938202656232858e-06, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2976052080911415, + "grad_norm": 0.1035563200712204, + "learning_rate": 1.589660412899352e-06, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.298027942761726, + "grad_norm": 0.13785366714000702, + "learning_rate": 1.5855059081636647e-06, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2984506774323097, + "grad_norm": 0.13932166993618011, + "learning_rate": 1.5813567518751959e-06, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2988734121028935, + "grad_norm": 0.10585059970617294, + "learning_rate": 1.5772129444923011e-06, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.299296146773478, + "grad_norm": 0.12141478061676025, + "learning_rate": 1.5730744864727475e-06, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.2997188814440617, + "grad_norm": 0.10106194764375687, + "learning_rate": 1.5689413782737306e-06, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3001416161146455, + "grad_norm": 0.10534578561782837, + "learning_rate": 1.5648136203518404e-06, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.30056435078523, + "grad_norm": 0.11995046585798264, + "learning_rate": 1.5606912131630792e-06, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3009870854558137, + "grad_norm": 0.11516968905925751, + "learning_rate": 1.5565741571628546e-06, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3014098201263975, + "grad_norm": 0.1404808759689331, + "learning_rate": 1.5524624528059916e-06, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.301832554796982, + "grad_norm": 0.10014116764068604, + "learning_rate": 1.5483561005467162e-06, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3022552894675656, + "grad_norm": 0.12198803573846817, + "learning_rate": 1.5442551008386595e-06, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3026780241381495, + "grad_norm": 0.1177566647529602, + "learning_rate": 1.5401594541348707e-06, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3031007588087338, + "grad_norm": 0.10546384006738663, + "learning_rate": 1.5360691608878042e-06, + "loss": 0.3679, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3035234934793176, + "grad_norm": 0.1095609962940216, + "learning_rate": 1.5319842215493262e-06, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3039462281499015, + "grad_norm": 0.1326664239168167, + "learning_rate": 1.527904636570704e-06, + "loss": 0.3677, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3043689628204858, + "grad_norm": 0.10290572792291641, + "learning_rate": 1.5238304064026266e-06, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3047916974910696, + "grad_norm": 0.11073851585388184, + "learning_rate": 1.5197615314951786e-06, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.305214432161654, + "grad_norm": 0.11468800157308578, + "learning_rate": 1.5156980122978449e-06, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3056371668322377, + "grad_norm": 0.12031232565641403, + "learning_rate": 1.5116398492595384e-06, + "loss": 0.3542, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3060599015028216, + "grad_norm": 0.09612184762954712, + "learning_rate": 1.5075870428285788e-06, + "loss": 0.364, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.306482636173406, + "grad_norm": 0.09683016687631607, + "learning_rate": 1.5035395934526796e-06, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3069053708439897, + "grad_norm": 0.12974566221237183, + "learning_rate": 1.4994975015789726e-06, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.307328105514574, + "grad_norm": 0.10861220955848694, + "learning_rate": 1.4954607676540056e-06, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.307750840185158, + "grad_norm": 0.10015501081943512, + "learning_rate": 1.491429392123711e-06, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3081735748557417, + "grad_norm": 0.1134454682469368, + "learning_rate": 1.487403375433455e-06, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.308596309526326, + "grad_norm": 0.100718192756176, + "learning_rate": 1.4833827180279814e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.30901904419691, + "grad_norm": 0.1427217423915863, + "learning_rate": 1.4793674203514797e-06, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3094417788674937, + "grad_norm": 0.1181027889251709, + "learning_rate": 1.475357482847517e-06, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.309864513538078, + "grad_norm": 0.10620643198490143, + "learning_rate": 1.4713529059590835e-06, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.310287248208662, + "grad_norm": 0.12593887746334076, + "learning_rate": 1.4673536901285701e-06, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3107099828792457, + "grad_norm": 0.10566359758377075, + "learning_rate": 1.4633598357977896e-06, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.31113271754983, + "grad_norm": 0.11602848023176193, + "learning_rate": 1.4593713434079337e-06, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.311555452220414, + "grad_norm": 0.09726407378911972, + "learning_rate": 1.4553882133996278e-06, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3119781868909977, + "grad_norm": 0.11589604616165161, + "learning_rate": 1.451410446212903e-06, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.312400921561582, + "grad_norm": 0.10160373151302338, + "learning_rate": 1.4474380422871802e-06, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.312823656232166, + "grad_norm": 0.14377592504024506, + "learning_rate": 1.4434710020612973e-06, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.31324639090275, + "grad_norm": 0.13462059199810028, + "learning_rate": 1.4395093259735094e-06, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.313669125573334, + "grad_norm": 0.12003999203443527, + "learning_rate": 1.435553014461477e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3140918602439178, + "grad_norm": 0.09508813172578812, + "learning_rate": 1.4316020679622455e-06, + "loss": 0.3661, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.314514594914502, + "grad_norm": 0.11511321365833282, + "learning_rate": 1.4276564869122933e-06, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.314937329585086, + "grad_norm": 0.10469274967908859, + "learning_rate": 1.423716271747494e-06, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3153600642556698, + "grad_norm": 0.10433505475521088, + "learning_rate": 1.4197814229031382e-06, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.315782798926254, + "grad_norm": 0.08967577666044235, + "learning_rate": 1.415851940813906e-06, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.316205533596838, + "grad_norm": 0.10810808092355728, + "learning_rate": 1.4119278259138946e-06, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.316628268267422, + "grad_norm": 0.15663276612758636, + "learning_rate": 1.4080090786366185e-06, + "loss": 0.3669, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.317051002938006, + "grad_norm": 0.13567538559436798, + "learning_rate": 1.4040956994149924e-06, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.31747373760859, + "grad_norm": 0.1082153171300888, + "learning_rate": 1.4001876886813202e-06, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.317896472279174, + "grad_norm": 0.11182795464992523, + "learning_rate": 1.3962850468673406e-06, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.318319206949758, + "grad_norm": 0.10166703164577484, + "learning_rate": 1.3923877744041746e-06, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.318741941620342, + "grad_norm": 0.09675919264554977, + "learning_rate": 1.3884958717223729e-06, + "loss": 0.354, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.319164676290926, + "grad_norm": 0.1093243956565857, + "learning_rate": 1.38460933925188e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.31958741096151, + "grad_norm": 0.12754587829113007, + "learning_rate": 1.380728177422047e-06, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.320010145632094, + "grad_norm": 0.10950154066085815, + "learning_rate": 1.3768523866616367e-06, + "loss": 0.3525, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.320432880302678, + "grad_norm": 0.10675719380378723, + "learning_rate": 1.3729819673988008e-06, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.320855614973262, + "grad_norm": 0.1055559292435646, + "learning_rate": 1.369116920061131e-06, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.321278349643846, + "grad_norm": 0.08985098451375961, + "learning_rate": 1.3652572450755963e-06, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.32170108431443, + "grad_norm": 0.10629637539386749, + "learning_rate": 1.3614029428685892e-06, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.322123818985014, + "grad_norm": 0.09886962175369263, + "learning_rate": 1.357554013865897e-06, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3225465536555983, + "grad_norm": 0.10012736916542053, + "learning_rate": 1.353710458492724e-06, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.322969288326182, + "grad_norm": 0.11987382918596268, + "learning_rate": 1.349872277173675e-06, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.323392022996766, + "grad_norm": 0.10955885052680969, + "learning_rate": 1.3460394703327606e-06, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3238147576673502, + "grad_norm": 0.09134525805711746, + "learning_rate": 1.3422120383933923e-06, + "loss": 0.3533, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.324237492337934, + "grad_norm": 0.13382919132709503, + "learning_rate": 1.3383899817783984e-06, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.324660227008518, + "grad_norm": 0.11662974208593369, + "learning_rate": 1.3345733009100082e-06, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3250829616791022, + "grad_norm": 0.10573703795671463, + "learning_rate": 1.3307619962098615e-06, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.325505696349686, + "grad_norm": 0.1434873640537262, + "learning_rate": 1.326956068099e-06, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3259284310202704, + "grad_norm": 0.10284318774938583, + "learning_rate": 1.3231555169978816e-06, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.326351165690854, + "grad_norm": 0.10053572058677673, + "learning_rate": 1.3193603433263424e-06, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.326773900361438, + "grad_norm": 0.11921489983797073, + "learning_rate": 1.315570547503653e-06, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3271966350320223, + "grad_norm": 0.10813061147928238, + "learning_rate": 1.311786129948478e-06, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.327619369702606, + "grad_norm": 0.10670316964387894, + "learning_rate": 1.3080070910788888e-06, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.32804210437319, + "grad_norm": 0.11054617911577225, + "learning_rate": 1.3042334313123626e-06, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3284648390437743, + "grad_norm": 0.11389025300741196, + "learning_rate": 1.3004651510657884e-06, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.328887573714358, + "grad_norm": 0.11463514715433121, + "learning_rate": 1.2967022507554493e-06, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.329310308384942, + "grad_norm": 0.09762617200613022, + "learning_rate": 1.292944730797052e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3297330430555263, + "grad_norm": 0.1493247151374817, + "learning_rate": 1.2891925916056813e-06, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.33015577772611, + "grad_norm": 0.13785907626152039, + "learning_rate": 1.2854458335958552e-06, + "loss": 0.3521, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.330578512396694, + "grad_norm": 0.12735123932361603, + "learning_rate": 1.2817044571814873e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3310012470672783, + "grad_norm": 0.10920757055282593, + "learning_rate": 1.2779684627758803e-06, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.331423981737862, + "grad_norm": 0.10476209968328476, + "learning_rate": 1.2742378507917707e-06, + "loss": 0.3525, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3318467164084464, + "grad_norm": 0.09610897302627563, + "learning_rate": 1.2705126216412788e-06, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3322694510790303, + "grad_norm": 0.12542656064033508, + "learning_rate": 1.2667927757359476e-06, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.332692185749614, + "grad_norm": 0.11715077608823776, + "learning_rate": 1.2630783134867096e-06, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3331149204201984, + "grad_norm": 0.11260583251714706, + "learning_rate": 1.259369235303909e-06, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3335376550907823, + "grad_norm": 0.13680265843868256, + "learning_rate": 1.2556655415972952e-06, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.333960389761366, + "grad_norm": 0.0844261422753334, + "learning_rate": 1.251967232776019e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3343831244319504, + "grad_norm": 0.10206592828035355, + "learning_rate": 1.2482743092486538e-06, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3348058591025342, + "grad_norm": 0.14495225250720978, + "learning_rate": 1.2445867714231507e-06, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3352285937731185, + "grad_norm": 0.09206939488649368, + "learning_rate": 1.2409046197068841e-06, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3356513284437024, + "grad_norm": 0.11349248141050339, + "learning_rate": 1.2372278545066284e-06, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3360740631142862, + "grad_norm": 0.08907965570688248, + "learning_rate": 1.2335564762285644e-06, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3364967977848705, + "grad_norm": 0.11363344639539719, + "learning_rate": 1.2298904852782734e-06, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3369195324554544, + "grad_norm": 0.12779274582862854, + "learning_rate": 1.2262298820607477e-06, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.337342267126038, + "grad_norm": 0.10484647750854492, + "learning_rate": 1.2225746669803807e-06, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3377650017966225, + "grad_norm": 0.11626499891281128, + "learning_rate": 1.2189248404409715e-06, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 87990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3381877364672063, + "grad_norm": 0.09217232465744019, + "learning_rate": 1.215280402845731e-06, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.33861047113779, + "grad_norm": 0.09553053230047226, + "learning_rate": 1.2116413545972593e-06, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3390332058083745, + "grad_norm": 0.13344787061214447, + "learning_rate": 1.2080076960975628e-06, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3394559404789583, + "grad_norm": 0.11790820211172104, + "learning_rate": 1.2043794277480701e-06, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.339878675149542, + "grad_norm": 0.10981932282447815, + "learning_rate": 1.2007565499495998e-06, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3403014098201265, + "grad_norm": 0.11363859474658966, + "learning_rate": 1.1971390631023816e-06, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3407241444907103, + "grad_norm": 0.11851304024457932, + "learning_rate": 1.1935269676060402e-06, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3411468791612946, + "grad_norm": 0.10994300991296768, + "learning_rate": 1.189920263859623e-06, + "loss": 0.35, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3415696138318784, + "grad_norm": 0.08812606334686279, + "learning_rate": 1.1863189522615558e-06, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3419923485024623, + "grad_norm": 0.09477672725915909, + "learning_rate": 1.1827230332096929e-06, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3424150831730466, + "grad_norm": 0.10309144109487534, + "learning_rate": 1.1791325071012716e-06, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3428378178436304, + "grad_norm": 0.10439348220825195, + "learning_rate": 1.1755473743329582e-06, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3432605525142143, + "grad_norm": 0.12368718534708023, + "learning_rate": 1.1719676353007968e-06, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3436832871847986, + "grad_norm": 0.10184065997600555, + "learning_rate": 1.1683932904002602e-06, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3441060218553824, + "grad_norm": 0.10587786138057709, + "learning_rate": 1.164824340026205e-06, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3445287565259667, + "grad_norm": 0.13832394778728485, + "learning_rate": 1.1612607845729096e-06, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3449514911965506, + "grad_norm": 0.09646216034889221, + "learning_rate": 1.1577026244340317e-06, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3453742258671344, + "grad_norm": 0.13728667795658112, + "learning_rate": 1.1541498600026623e-06, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3457969605377187, + "grad_norm": 0.12618520855903625, + "learning_rate": 1.1506024916712822e-06, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3462196952083025, + "grad_norm": 0.10702238231897354, + "learning_rate": 1.1470605198317663e-06, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3466424298788864, + "grad_norm": 0.125213161110878, + "learning_rate": 1.1435239448754132e-06, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3470651645494707, + "grad_norm": 0.10230310261249542, + "learning_rate": 1.1399927671929046e-06, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3474878992200545, + "grad_norm": 0.09812687337398529, + "learning_rate": 1.136466987174356e-06, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3479106338906384, + "grad_norm": 0.09764697402715683, + "learning_rate": 1.1329466052092453e-06, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3483333685612227, + "grad_norm": 0.1092756912112236, + "learning_rate": 1.1294316216864886e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3487561032318065, + "grad_norm": 0.09757382422685623, + "learning_rate": 1.1259220369943868e-06, + "loss": 0.3523, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3491788379023903, + "grad_norm": 0.10823287814855576, + "learning_rate": 1.1224178515206573e-06, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3496015725729746, + "grad_norm": 0.09455700218677521, + "learning_rate": 1.1189190656524185e-06, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3500243072435585, + "grad_norm": 0.12334947288036346, + "learning_rate": 1.1154256797761776e-06, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3504470419141428, + "grad_norm": 0.11020775139331818, + "learning_rate": 1.1119376942778537e-06, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3508697765847266, + "grad_norm": 0.1123390644788742, + "learning_rate": 1.1084551095427886e-06, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3512925112553105, + "grad_norm": 0.09732818603515625, + "learning_rate": 1.104977925955697e-06, + "loss": 0.3517, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3517152459258948, + "grad_norm": 0.10107895731925964, + "learning_rate": 1.1015061439007102e-06, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3521379805964786, + "grad_norm": 0.12414014339447021, + "learning_rate": 1.098039763761366e-06, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3525607152670625, + "grad_norm": 0.119242824614048, + "learning_rate": 1.094578785920608e-06, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3529834499376467, + "grad_norm": 0.10683377087116241, + "learning_rate": 1.0911232107607694e-06, + "loss": 0.3676, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3534061846082306, + "grad_norm": 0.10522927343845367, + "learning_rate": 1.0876730386636003e-06, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.353828919278815, + "grad_norm": 0.11520703881978989, + "learning_rate": 1.0842282700102457e-06, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3542516539493987, + "grad_norm": 0.08713746815919876, + "learning_rate": 1.0807889051812515e-06, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3546743886199826, + "grad_norm": 0.10595320165157318, + "learning_rate": 1.0773549445565744e-06, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.355097123290567, + "grad_norm": 0.10714545100927353, + "learning_rate": 1.0739263885155727e-06, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3555198579611507, + "grad_norm": 0.12041866779327393, + "learning_rate": 1.0705032374370094e-06, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3559425926317346, + "grad_norm": 0.10121575742959976, + "learning_rate": 1.067085491699038e-06, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.356365327302319, + "grad_norm": 0.12577742338180542, + "learning_rate": 1.0636731516792342e-06, + "loss": 0.3697, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3567880619729027, + "grad_norm": 0.1400633156299591, + "learning_rate": 1.0602662177545575e-06, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3572107966434865, + "grad_norm": 0.10396068543195724, + "learning_rate": 1.0568646903013845e-06, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.357633531314071, + "grad_norm": 0.11650323867797852, + "learning_rate": 1.053468569695487e-06, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3580562659846547, + "grad_norm": 0.10570424050092697, + "learning_rate": 1.0500778563120372e-06, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3584790006552385, + "grad_norm": 0.12151902914047241, + "learning_rate": 1.0466925505256131e-06, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.358901735325823, + "grad_norm": 0.11215509474277496, + "learning_rate": 1.0433126527102045e-06, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3593244699964067, + "grad_norm": 0.11855512112379074, + "learning_rate": 1.0399381632391958e-06, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.359747204666991, + "grad_norm": 0.09909716993570328, + "learning_rate": 1.0365690824853668e-06, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.360169939337575, + "grad_norm": 0.08904041349887848, + "learning_rate": 1.0332054108209088e-06, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3605926740081586, + "grad_norm": 0.11234572529792786, + "learning_rate": 1.0298471486174133e-06, + "loss": 0.3673, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.361015408678743, + "grad_norm": 0.1542762964963913, + "learning_rate": 1.0264942962458834e-06, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.361438143349327, + "grad_norm": 0.11323047429323196, + "learning_rate": 1.0231468540766954e-06, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3618608780199106, + "grad_norm": 0.10961014032363892, + "learning_rate": 1.019804822479664e-06, + "loss": 0.3682, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.362283612690495, + "grad_norm": 0.10817579925060272, + "learning_rate": 1.016468201823989e-06, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3627063473610788, + "grad_norm": 0.12439766526222229, + "learning_rate": 1.0131369924782696e-06, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.363129082031663, + "grad_norm": 0.11563335359096527, + "learning_rate": 1.0098111948105116e-06, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.363551816702247, + "grad_norm": 0.11046774685382843, + "learning_rate": 1.006490809188121e-06, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3639745513728307, + "grad_norm": 0.09979134052991867, + "learning_rate": 1.0031758359779098e-06, + "loss": 0.3529, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.364397286043415, + "grad_norm": 0.126149982213974, + "learning_rate": 9.998662755460907e-07, + "loss": 0.3703, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.364820020713999, + "grad_norm": 0.10475330054759979, + "learning_rate": 9.965621282582828e-07, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3652427553845827, + "grad_norm": 0.12776301801204681, + "learning_rate": 9.932633944794878e-07, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.365665490055167, + "grad_norm": 0.15789641439914703, + "learning_rate": 9.899700745741313e-07, + "loss": 0.354, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.366088224725751, + "grad_norm": 0.13081614673137665, + "learning_rate": 9.86682168906039e-07, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3665109593963347, + "grad_norm": 0.10872527956962585, + "learning_rate": 9.833996778384258e-07, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.366933694066919, + "grad_norm": 0.11948160082101822, + "learning_rate": 9.801226017339126e-07, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.367356428737503, + "grad_norm": 0.10366534441709518, + "learning_rate": 9.768509409545268e-07, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3677791634080867, + "grad_norm": 0.09392853826284409, + "learning_rate": 9.735846958617012e-07, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.368201898078671, + "grad_norm": 0.12756240367889404, + "learning_rate": 9.703238668162528e-07, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.368624632749255, + "grad_norm": 0.11588818579912186, + "learning_rate": 9.670684541784325e-07, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.369047367419839, + "grad_norm": 0.11234644055366516, + "learning_rate": 9.638184583078525e-07, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.369470102090423, + "grad_norm": 0.1221565380692482, + "learning_rate": 9.605738795635532e-07, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.369892836761007, + "grad_norm": 0.09870100021362305, + "learning_rate": 9.573347183039648e-07, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.370315571431591, + "grad_norm": 0.0948696956038475, + "learning_rate": 9.544241054155355e-07, + "loss": 0.3671, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.370738306102175, + "grad_norm": 0.10068465769290924, + "learning_rate": 9.511952383622569e-07, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.371161040772759, + "grad_norm": 0.11050383746623993, + "learning_rate": 9.479717898297658e-07, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.371583775443343, + "grad_norm": 0.11825679987668991, + "learning_rate": 9.447537601741718e-07, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.372006510113927, + "grad_norm": 0.10091434419155121, + "learning_rate": 9.415411497509796e-07, + "loss": 0.3539, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3724292447845112, + "grad_norm": 0.09859151393175125, + "learning_rate": 9.383339589150775e-07, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.372851979455095, + "grad_norm": 0.10005994141101837, + "learning_rate": 9.351321880207875e-07, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.373274714125679, + "grad_norm": 0.08535583317279816, + "learning_rate": 9.319358374218101e-07, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.373697448796263, + "grad_norm": 0.12004393339157104, + "learning_rate": 9.287449074712462e-07, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.374120183466847, + "grad_norm": 0.10533779114484787, + "learning_rate": 9.255593985216083e-07, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.374542918137431, + "grad_norm": 0.12821900844573975, + "learning_rate": 9.223793109248091e-07, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.374965652808015, + "grad_norm": 0.1107846274971962, + "learning_rate": 9.192046450321568e-07, + "loss": 0.3515, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.375388387478599, + "grad_norm": 0.14462034404277802, + "learning_rate": 9.160354011943595e-07, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.375811122149183, + "grad_norm": 0.11471831053495407, + "learning_rate": 9.128715797615373e-07, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.376233856819767, + "grad_norm": 0.08811358362436295, + "learning_rate": 9.097131810831938e-07, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.376656591490351, + "grad_norm": 0.10655159503221512, + "learning_rate": 9.065602055082612e-07, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.377079326160935, + "grad_norm": 0.11072466522455215, + "learning_rate": 9.034126533850385e-07, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.377502060831519, + "grad_norm": 0.12271154671907425, + "learning_rate": 9.002705250612476e-07, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.377924795502103, + "grad_norm": 0.10823298990726471, + "learning_rate": 8.971338208840052e-07, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3783475301726873, + "grad_norm": 0.13020622730255127, + "learning_rate": 8.940025411998343e-07, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.378770264843271, + "grad_norm": 0.11570342630147934, + "learning_rate": 8.908766863546469e-07, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.379192999513855, + "grad_norm": 0.10266716033220291, + "learning_rate": 8.877562566937669e-07, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3796157341844393, + "grad_norm": 0.10957985371351242, + "learning_rate": 8.84641252561913e-07, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.380038468855023, + "grad_norm": 0.09715583920478821, + "learning_rate": 8.815316743032043e-07, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 88990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.380461203525607, + "grad_norm": 0.10897105187177658, + "learning_rate": 8.784275222611604e-07, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3808839381961913, + "grad_norm": 0.10968326777219772, + "learning_rate": 8.753287967787072e-07, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.381306672866775, + "grad_norm": 0.10578370839357376, + "learning_rate": 8.722354981981707e-07, + "loss": 0.3669, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3817294075373594, + "grad_norm": 0.14190222322940826, + "learning_rate": 8.691476268612664e-07, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3821521422079432, + "grad_norm": 0.12979063391685486, + "learning_rate": 8.660651831091271e-07, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.382574876878527, + "grad_norm": 0.1087869182229042, + "learning_rate": 8.629881672822638e-07, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3829976115491114, + "grad_norm": 0.10882148146629333, + "learning_rate": 8.599165797206099e-07, + "loss": 0.3563, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3834203462196952, + "grad_norm": 0.08533572405576706, + "learning_rate": 8.568504207634886e-07, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.383843080890279, + "grad_norm": 0.127577543258667, + "learning_rate": 8.537896907496235e-07, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3842658155608634, + "grad_norm": 0.11476845294237137, + "learning_rate": 8.507343900171327e-07, + "loss": 0.3538, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.384688550231447, + "grad_norm": 0.1125326007604599, + "learning_rate": 8.476845189035521e-07, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.385111284902031, + "grad_norm": 0.11546861380338669, + "learning_rate": 8.446400777458063e-07, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3855340195726153, + "grad_norm": 0.08860640227794647, + "learning_rate": 8.41601066880221e-07, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.385956754243199, + "grad_norm": 0.1184566542506218, + "learning_rate": 8.385674866425164e-07, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.386379488913783, + "grad_norm": 0.10696568340063095, + "learning_rate": 8.355393373678188e-07, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3868022235843673, + "grad_norm": 0.09931330382823944, + "learning_rate": 8.325166193906553e-07, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.387224958254951, + "grad_norm": 0.12146598100662231, + "learning_rate": 8.29499333044953e-07, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3876476929255355, + "grad_norm": 0.12019597738981247, + "learning_rate": 8.264874786640342e-07, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3880704275961193, + "grad_norm": 0.08845622837543488, + "learning_rate": 8.234810565806328e-07, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.388493162266703, + "grad_norm": 0.11790802329778671, + "learning_rate": 8.204800671268664e-07, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3889158969372875, + "grad_norm": 0.08858910948038101, + "learning_rate": 8.174845106342643e-07, + "loss": 0.3513, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3893386316078713, + "grad_norm": 0.10153213888406754, + "learning_rate": 8.144943874337452e-07, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.389761366278455, + "grad_norm": 0.10613299161195755, + "learning_rate": 8.115096978556446e-07, + "loss": 0.3533, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3901841009490394, + "grad_norm": 0.11341740936040878, + "learning_rate": 8.085304422296769e-07, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3906068356196233, + "grad_norm": 0.10676609724760056, + "learning_rate": 8.05556620884973e-07, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3910295702902076, + "grad_norm": 0.10593468695878983, + "learning_rate": 8.02588234150059e-07, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3914523049607914, + "grad_norm": 0.10802337527275085, + "learning_rate": 7.996252823528505e-07, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3918750396313753, + "grad_norm": 0.1053975373506546, + "learning_rate": 7.9666776582068e-07, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3922977743019596, + "grad_norm": 0.11431518942117691, + "learning_rate": 7.937156848802585e-07, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3927205089725434, + "grad_norm": 0.11070626974105835, + "learning_rate": 7.907690398577195e-07, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3931432436431272, + "grad_norm": 0.12318029254674911, + "learning_rate": 7.878278310785747e-07, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3935659783137115, + "grad_norm": 0.08693493902683258, + "learning_rate": 7.848920588677644e-07, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3939887129842954, + "grad_norm": 0.1017991453409195, + "learning_rate": 7.819617235495847e-07, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3944114476548792, + "grad_norm": 0.125751793384552, + "learning_rate": 7.790368254477709e-07, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3948341823254635, + "grad_norm": 0.11162877827882767, + "learning_rate": 7.761173648854425e-07, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3952569169960474, + "grad_norm": 0.11073944717645645, + "learning_rate": 7.732033421851082e-07, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.395679651666631, + "grad_norm": 0.111997090280056, + "learning_rate": 7.702947576686936e-07, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3961023863372155, + "grad_norm": 0.10502658039331436, + "learning_rate": 7.673916116575142e-07, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3965251210077994, + "grad_norm": 0.104624904692173, + "learning_rate": 7.644939044722854e-07, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3969478556783836, + "grad_norm": 0.11640224605798721, + "learning_rate": 7.616016364331291e-07, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3973705903489675, + "grad_norm": 0.10952044278383255, + "learning_rate": 7.587148078595563e-07, + "loss": 0.3655, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3977933250195513, + "grad_norm": 0.11903035640716553, + "learning_rate": 7.55833419070473e-07, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3982160596901356, + "grad_norm": 0.12908390164375305, + "learning_rate": 7.529574703842079e-07, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3986387943607195, + "grad_norm": 0.10874100774526596, + "learning_rate": 7.500869621184514e-07, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3990615290313033, + "grad_norm": 0.10056973993778229, + "learning_rate": 7.472218945903331e-07, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3994842637018876, + "grad_norm": 0.13912762701511383, + "learning_rate": 7.443622681163554e-07, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.3999069983724715, + "grad_norm": 0.12613625824451447, + "learning_rate": 7.415080830124266e-07, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4003297330430557, + "grad_norm": 0.11328399181365967, + "learning_rate": 7.386593395938557e-07, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4007524677136396, + "grad_norm": 0.10955154150724411, + "learning_rate": 7.358160381753576e-07, + "loss": 0.3671, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4011752023842234, + "grad_norm": 0.10762911289930344, + "learning_rate": 7.329781790710255e-07, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4015979370548077, + "grad_norm": 0.09879492968320847, + "learning_rate": 7.301457625943587e-07, + "loss": 0.3539, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4020206717253916, + "grad_norm": 0.09905239939689636, + "learning_rate": 7.273187890582733e-07, + "loss": 0.3512, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4024434063959754, + "grad_norm": 0.10173781961202621, + "learning_rate": 7.244972587750643e-07, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4028661410665597, + "grad_norm": 0.10902294516563416, + "learning_rate": 7.216811720564376e-07, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4032888757371436, + "grad_norm": 0.11880140751600266, + "learning_rate": 7.188705292134834e-07, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4037116104077274, + "grad_norm": 0.10395684838294983, + "learning_rate": 7.160653305567033e-07, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4041343450783117, + "grad_norm": 0.12379021942615509, + "learning_rate": 7.132655763959939e-07, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4045570797488955, + "grad_norm": 0.10679204761981964, + "learning_rate": 7.104712670406522e-07, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4049798144194794, + "grad_norm": 0.11129704117774963, + "learning_rate": 7.0768240279937e-07, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4054025490900637, + "grad_norm": 0.0877891331911087, + "learning_rate": 7.048989839802289e-07, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4058252837606475, + "grad_norm": 0.11166001856327057, + "learning_rate": 7.021210108907328e-07, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.406248018431232, + "grad_norm": 0.10527089983224869, + "learning_rate": 6.99348483837764e-07, + "loss": 0.354, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4066707531018157, + "grad_norm": 0.10115140676498413, + "learning_rate": 6.965814031276052e-07, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4070934877723995, + "grad_norm": 0.103690966963768, + "learning_rate": 6.938197690659509e-07, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.407516222442984, + "grad_norm": 0.12855622172355652, + "learning_rate": 6.910635819578737e-07, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4079389571135676, + "grad_norm": 0.10348877310752869, + "learning_rate": 6.883128421078633e-07, + "loss": 0.3523, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4083616917841515, + "grad_norm": 0.11632906645536423, + "learning_rate": 6.855675498197989e-07, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.408784426454736, + "grad_norm": 0.11468525230884552, + "learning_rate": 6.828277053969545e-07, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4092071611253196, + "grad_norm": 0.11669173091650009, + "learning_rate": 6.800933091420048e-07, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.409629895795904, + "grad_norm": 0.11514151841402054, + "learning_rate": 6.773643613570302e-07, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4100526304664878, + "grad_norm": 0.112629234790802, + "learning_rate": 6.746408623435063e-07, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4104753651370716, + "grad_norm": 0.11054021120071411, + "learning_rate": 6.719228124022869e-07, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.410898099807656, + "grad_norm": 0.10973858088254929, + "learning_rate": 6.69210211833654e-07, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4113208344782397, + "grad_norm": 0.10767678171396255, + "learning_rate": 6.665030609372736e-07, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4117435691488236, + "grad_norm": 0.11050905287265778, + "learning_rate": 6.638013600122061e-07, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.412166303819408, + "grad_norm": 0.09452743828296661, + "learning_rate": 6.611051093569131e-07, + "loss": 0.3565, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4125890384899917, + "grad_norm": 0.11554065346717834, + "learning_rate": 6.584143092692674e-07, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4130117731605756, + "grad_norm": 0.0964326336979866, + "learning_rate": 6.557289600465088e-07, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.41343450783116, + "grad_norm": 0.13553644716739655, + "learning_rate": 6.530490619853003e-07, + "loss": 0.3644, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4138572425017437, + "grad_norm": 0.12424715608358383, + "learning_rate": 6.503746153816992e-07, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4142799771723276, + "grad_norm": 0.12334781140089035, + "learning_rate": 6.477056205311527e-07, + "loss": 0.3651, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.414702711842912, + "grad_norm": 0.11786612868309021, + "learning_rate": 6.450420777285138e-07, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4151254465134957, + "grad_norm": 0.13793550431728363, + "learning_rate": 6.423839872680304e-07, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.41554818118408, + "grad_norm": 0.0884370431303978, + "learning_rate": 6.397313494433399e-07, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.415970915854664, + "grad_norm": 0.11130383610725403, + "learning_rate": 6.370841645474912e-07, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4163936505252477, + "grad_norm": 0.10762582719326019, + "learning_rate": 6.344424328729281e-07, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.416816385195832, + "grad_norm": 0.10730849206447601, + "learning_rate": 6.318061547114729e-07, + "loss": 0.3528, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.417239119866416, + "grad_norm": 0.10183140635490417, + "learning_rate": 6.291753303543701e-07, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4176618545369997, + "grad_norm": 0.09384392201900482, + "learning_rate": 6.265499600922542e-07, + "loss": 0.3648, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.418084589207584, + "grad_norm": 0.11451227217912674, + "learning_rate": 6.239300442151541e-07, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.418507323878168, + "grad_norm": 0.140297994017601, + "learning_rate": 6.213155830124884e-07, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.418930058548752, + "grad_norm": 0.11326835304498672, + "learning_rate": 6.187065767730982e-07, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.419352793219336, + "grad_norm": 0.15950913727283478, + "learning_rate": 6.161030257851974e-07, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.41977552788992, + "grad_norm": 0.09889830648899078, + "learning_rate": 6.135049303364004e-07, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.420198262560504, + "grad_norm": 0.11856921762228012, + "learning_rate": 6.109122907137332e-07, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.420620997231088, + "grad_norm": 0.14353002607822418, + "learning_rate": 6.083251072036e-07, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4210437319016718, + "grad_norm": 0.0849485769867897, + "learning_rate": 6.057433800918167e-07, + "loss": 0.3517, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.421466466572256, + "grad_norm": 0.12606360018253326, + "learning_rate": 6.031671096635994e-07, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.42188920124284, + "grad_norm": 0.1234479695558548, + "learning_rate": 6.005962962035428e-07, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4223119359134238, + "grad_norm": 0.10995946079492569, + "learning_rate": 5.980309399956585e-07, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 89990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.422734670584008, + "grad_norm": 0.11848143488168716, + "learning_rate": 5.954710413233367e-07, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.423157405254592, + "grad_norm": 0.11545474082231522, + "learning_rate": 5.929166004693842e-07, + "loss": 0.3523, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4235801399251757, + "grad_norm": 0.10364442318677902, + "learning_rate": 5.903676177159922e-07, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.42400287459576, + "grad_norm": 0.09755828976631165, + "learning_rate": 5.878240933447521e-07, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.424425609266344, + "grad_norm": 0.13961169123649597, + "learning_rate": 5.852860276366445e-07, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.424848343936928, + "grad_norm": 0.11178848892450333, + "learning_rate": 5.827534208720675e-07, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.425271078607512, + "grad_norm": 0.09264565259218216, + "learning_rate": 5.802262733307973e-07, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.425693813278096, + "grad_norm": 0.09647703915834427, + "learning_rate": 5.77704585292016e-07, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.42611654794868, + "grad_norm": 0.12594571709632874, + "learning_rate": 5.751883570342897e-07, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.426539282619264, + "grad_norm": 0.11671227216720581, + "learning_rate": 5.726775888356018e-07, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.426962017289848, + "grad_norm": 0.1177118718624115, + "learning_rate": 5.701722809733135e-07, + "loss": 0.3545, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.427384751960432, + "grad_norm": 0.10378707200288773, + "learning_rate": 5.676724337242035e-07, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.427807486631016, + "grad_norm": 0.10414203256368637, + "learning_rate": 5.65178047364423e-07, + "loss": 0.3505, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4282302213016003, + "grad_norm": 0.11328674107789993, + "learning_rate": 5.626891221695352e-07, + "loss": 0.367, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.428652955972184, + "grad_norm": 0.10346713662147522, + "learning_rate": 5.602056584145032e-07, + "loss": 0.3502, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.429075690642768, + "grad_norm": 0.12079237401485443, + "learning_rate": 5.577276563736744e-07, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4294984253133523, + "grad_norm": 0.10596983134746552, + "learning_rate": 5.552551163207964e-07, + "loss": 0.3539, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.429921159983936, + "grad_norm": 0.1266157627105713, + "learning_rate": 5.527880385290174e-07, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.43034389465452, + "grad_norm": 0.13249537348747253, + "learning_rate": 5.503264232708805e-07, + "loss": 0.3547, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4307666293251042, + "grad_norm": 0.12539143860340118, + "learning_rate": 5.478702708183292e-07, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.431189363995688, + "grad_norm": 0.11380508542060852, + "learning_rate": 5.454195814427021e-07, + "loss": 0.3534, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.431612098666272, + "grad_norm": 0.10781680792570114, + "learning_rate": 5.429743554147215e-07, + "loss": 0.3521, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.432034833336856, + "grad_norm": 0.0936770960688591, + "learning_rate": 5.405345930045269e-07, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.43245756800744, + "grad_norm": 0.12883618474006653, + "learning_rate": 5.381002944816304e-07, + "loss": 0.3551, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.432880302678024, + "grad_norm": 0.10838532447814941, + "learning_rate": 5.356714601149671e-07, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.433303037348608, + "grad_norm": 0.09914236515760422, + "learning_rate": 5.332480901728443e-07, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.433725772019192, + "grad_norm": 0.09305386245250702, + "learning_rate": 5.308301849229869e-07, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4341485066897763, + "grad_norm": 0.10351357609033585, + "learning_rate": 5.284177446325034e-07, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.43457124136036, + "grad_norm": 0.10768196731805801, + "learning_rate": 5.260107695678973e-07, + "loss": 0.3675, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.434993976030944, + "grad_norm": 0.1033562421798706, + "learning_rate": 5.236092599950782e-07, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4354167107015283, + "grad_norm": 0.11646769195795059, + "learning_rate": 5.212132161793337e-07, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.435839445372112, + "grad_norm": 0.10045039653778076, + "learning_rate": 5.188226383853689e-07, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.436262180042696, + "grad_norm": 0.10726740211248398, + "learning_rate": 5.164375268772726e-07, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4366849147132803, + "grad_norm": 0.10201476514339447, + "learning_rate": 5.140578819185337e-07, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.437107649383864, + "grad_norm": 0.10424003005027771, + "learning_rate": 5.116837037720423e-07, + "loss": 0.367, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4375303840544484, + "grad_norm": 0.10967638343572617, + "learning_rate": 5.093149927000718e-07, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4379531187250323, + "grad_norm": 0.12576670944690704, + "learning_rate": 5.06951748964296e-07, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.438375853395616, + "grad_norm": 0.11080148816108704, + "learning_rate": 5.045939728257953e-07, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4387985880662004, + "grad_norm": 0.09521294385194778, + "learning_rate": 5.022416645450334e-07, + "loss": 0.3633, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4392213227367843, + "grad_norm": 0.09403475373983383, + "learning_rate": 4.998948243818746e-07, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.439644057407368, + "grad_norm": 0.11885688453912735, + "learning_rate": 4.975534525955783e-07, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4400667920779524, + "grad_norm": 0.10629577934741974, + "learning_rate": 4.952175494448042e-07, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4404895267485363, + "grad_norm": 0.11557453870773315, + "learning_rate": 4.928871151875958e-07, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.44091226141912, + "grad_norm": 0.11131192743778229, + "learning_rate": 4.905621500814139e-07, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4413349960897044, + "grad_norm": 0.10252323001623154, + "learning_rate": 4.88242654383092e-07, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4417577307602882, + "grad_norm": 0.092289038002491, + "learning_rate": 4.85928628348875e-07, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.442180465430872, + "grad_norm": 0.12130726873874664, + "learning_rate": 4.83620072234392e-07, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4426032001014564, + "grad_norm": 0.12802593410015106, + "learning_rate": 4.813169862946831e-07, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.44302593477204, + "grad_norm": 0.1010117158293724, + "learning_rate": 4.790193707841673e-07, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4434486694426245, + "grad_norm": 0.10894417017698288, + "learning_rate": 4.767272259566691e-07, + "loss": 0.3502, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4438714041132084, + "grad_norm": 0.1260586380958557, + "learning_rate": 4.7444055206540825e-07, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.444294138783792, + "grad_norm": 0.11305016279220581, + "learning_rate": 4.7215934936298833e-07, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4447168734543765, + "grad_norm": 0.13200640678405762, + "learning_rate": 4.698836181014299e-07, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4451396081249603, + "grad_norm": 0.09669926762580872, + "learning_rate": 4.676133585321374e-07, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.445562342795544, + "grad_norm": 0.11563310027122498, + "learning_rate": 4.6534857090590467e-07, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4459850774661285, + "grad_norm": 0.1042291447520256, + "learning_rate": 4.630892554729316e-07, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4464078121367123, + "grad_norm": 0.1520799845457077, + "learning_rate": 4.6083541248280737e-07, + "loss": 0.3662, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4468305468072966, + "grad_norm": 0.1042601615190506, + "learning_rate": 4.5858704218452173e-07, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4472532814778805, + "grad_norm": 0.097703717648983, + "learning_rate": 4.563441448264538e-07, + "loss": 0.3525, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4476760161484643, + "grad_norm": 0.09242242574691772, + "learning_rate": 4.541067206563776e-07, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4480987508190486, + "grad_norm": 0.11550085246562958, + "learning_rate": 4.5187476992147314e-07, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4485214854896324, + "grad_norm": 0.11405012756586075, + "learning_rate": 4.4964829286829877e-07, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4489442201602163, + "grad_norm": 0.11898912489414215, + "learning_rate": 4.4742728974283e-07, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4493669548308006, + "grad_norm": 0.09645283967256546, + "learning_rate": 4.452117607904205e-07, + "loss": 0.3682, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4497896895013844, + "grad_norm": 0.14281730353832245, + "learning_rate": 4.4300170625582447e-07, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4502124241719683, + "grad_norm": 0.12192203849554062, + "learning_rate": 4.407971263831912e-07, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4506351588425526, + "grad_norm": 0.09538878500461578, + "learning_rate": 4.38598021416059e-07, + "loss": 0.3537, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4510578935131364, + "grad_norm": 0.1007314920425415, + "learning_rate": 4.364043915973726e-07, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4514806281837203, + "grad_norm": 0.11020541936159134, + "learning_rate": 4.342162371694658e-07, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4519033628543045, + "grad_norm": 0.11676766723394394, + "learning_rate": 4.320335583740731e-07, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4523260975248884, + "grad_norm": 0.09623955935239792, + "learning_rate": 4.298563554523127e-07, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4527488321954727, + "grad_norm": 0.1257479190826416, + "learning_rate": 4.276846286447145e-07, + "loss": 0.3559, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4531715668660565, + "grad_norm": 0.11072058230638504, + "learning_rate": 4.255183781911809e-07, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4535943015366404, + "grad_norm": 0.11388764530420303, + "learning_rate": 4.2335760433102613e-07, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4540170362072247, + "grad_norm": 0.11600778251886368, + "learning_rate": 4.212023073029647e-07, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4544397708778085, + "grad_norm": 0.12017619609832764, + "learning_rate": 4.1905248734507853e-07, + "loss": 0.3676, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4548625055483924, + "grad_norm": 0.08892545104026794, + "learning_rate": 4.169081446948775e-07, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4552852402189767, + "grad_norm": 0.12871742248535156, + "learning_rate": 4.1476927958924994e-07, + "loss": 0.367, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4557079748895605, + "grad_norm": 0.1120435819029808, + "learning_rate": 4.1263589226447354e-07, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.456130709560145, + "grad_norm": 0.10793851315975189, + "learning_rate": 4.1050798295623193e-07, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4565534442307286, + "grad_norm": 0.11278434097766876, + "learning_rate": 4.0838555189959825e-07, + "loss": 0.3678, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4569761789013125, + "grad_norm": 0.10868996381759644, + "learning_rate": 4.0626859932904604e-07, + "loss": 0.3494, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4573989135718968, + "grad_norm": 0.11122975498437881, + "learning_rate": 4.041571254784382e-07, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4578216482424806, + "grad_norm": 0.1107703372836113, + "learning_rate": 4.0205113058102707e-07, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4582443829130645, + "grad_norm": 0.10907492786645889, + "learning_rate": 3.9995061486947094e-07, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4586671175836488, + "grad_norm": 0.12878604233264923, + "learning_rate": 3.978555785758231e-07, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4590898522542326, + "grad_norm": 0.1171470358967781, + "learning_rate": 3.957660219315207e-07, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4595125869248164, + "grad_norm": 0.09423330426216125, + "learning_rate": 3.9368194516739566e-07, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4599353215954007, + "grad_norm": 0.11455272138118744, + "learning_rate": 3.9160334851368605e-07, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4603580562659846, + "grad_norm": 0.14423483610153198, + "learning_rate": 3.8953023220002494e-07, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4607807909365684, + "grad_norm": 0.10056940466165543, + "learning_rate": 3.874625964554235e-07, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4612035256071527, + "grad_norm": 0.11535909026861191, + "learning_rate": 3.8540044150829903e-07, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4616262602777366, + "grad_norm": 0.12552987039089203, + "learning_rate": 3.833437675864748e-07, + "loss": 0.3537, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.462048994948321, + "grad_norm": 0.11063595861196518, + "learning_rate": 3.812925749171359e-07, + "loss": 0.3619, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4624717296189047, + "grad_norm": 0.1059623584151268, + "learning_rate": 3.7924686372690087e-07, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4628944642894885, + "grad_norm": 0.12394015491008759, + "learning_rate": 3.772066342417446e-07, + "loss": 0.3541, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.463317198960073, + "grad_norm": 0.10470841825008392, + "learning_rate": 3.7517188668707014e-07, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4637399336306567, + "grad_norm": 0.0921901986002922, + "learning_rate": 3.731426212876532e-07, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4641626683012405, + "grad_norm": 0.12596887350082397, + "learning_rate": 3.7111883826767e-07, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.464585402971825, + "grad_norm": 0.15081609785556793, + "learning_rate": 3.691005378506973e-07, + "loss": 0.3537, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 90990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4650081376424087, + "grad_norm": 0.09134108573198318, + "learning_rate": 3.670877202597012e-07, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.465430872312993, + "grad_norm": 0.132229283452034, + "learning_rate": 3.6508038571703706e-07, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.465853606983577, + "grad_norm": 0.10533016175031662, + "learning_rate": 3.630785344444609e-07, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4662763416541607, + "grad_norm": 0.10356292128562927, + "learning_rate": 3.6108216666311814e-07, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.466699076324745, + "grad_norm": 0.10295750200748444, + "learning_rate": 3.590912825935544e-07, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.467121810995329, + "grad_norm": 0.1002407893538475, + "learning_rate": 3.5710588245571055e-07, + "loss": 0.3532, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4675445456659126, + "grad_norm": 0.12490545213222504, + "learning_rate": 3.5512596646891104e-07, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.467967280336497, + "grad_norm": 0.14268940687179565, + "learning_rate": 3.5315153485188657e-07, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4683900150070808, + "grad_norm": 0.11716294288635254, + "learning_rate": 3.511825878227515e-07, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4688127496776646, + "grad_norm": 0.11378806829452515, + "learning_rate": 3.4921912559902626e-07, + "loss": 0.3502, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.469235484348249, + "grad_norm": 0.1170380637049675, + "learning_rate": 3.4726114839761514e-07, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4696582190188328, + "grad_norm": 0.1060662493109703, + "learning_rate": 3.453086564348118e-07, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4700809536894166, + "grad_norm": 0.09837742894887924, + "learning_rate": 3.43361649926327e-07, + "loss": 0.3513, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.470503688360001, + "grad_norm": 0.10145172476768494, + "learning_rate": 3.4142012908723877e-07, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4709264230305847, + "grad_norm": 0.10432492941617966, + "learning_rate": 3.39484094132031e-07, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.471349157701169, + "grad_norm": 0.12022513151168823, + "learning_rate": 3.3755354527459373e-07, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.471771892371753, + "grad_norm": 0.12128941714763641, + "learning_rate": 3.3562848272818416e-07, + "loss": 0.3673, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4721946270423367, + "grad_norm": 0.13967815041542053, + "learning_rate": 3.3370890670547663e-07, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.472617361712921, + "grad_norm": 0.09201038628816605, + "learning_rate": 3.317948174185237e-07, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.473040096383505, + "grad_norm": 0.1387704312801361, + "learning_rate": 3.298862150787896e-07, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.473462831054089, + "grad_norm": 0.1386033296585083, + "learning_rate": 3.279830998971112e-07, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.473885565724673, + "grad_norm": 0.13100019097328186, + "learning_rate": 3.260854720837314e-07, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.474308300395257, + "grad_norm": 0.09982665628194809, + "learning_rate": 3.2419333184828815e-07, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.474731035065841, + "grad_norm": 0.09874990582466125, + "learning_rate": 3.224950976882968e-07, + "loss": 0.3532, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.475153769736425, + "grad_norm": 0.10835470259189606, + "learning_rate": 3.2061338442630486e-07, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.475576504407009, + "grad_norm": 0.12141257524490356, + "learning_rate": 3.187371593467547e-07, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.475999239077593, + "grad_norm": 0.1053297370672226, + "learning_rate": 3.168664226569307e-07, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.476421973748177, + "grad_norm": 0.11991740018129349, + "learning_rate": 3.150011745634784e-07, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.476844708418761, + "grad_norm": 0.11079537123441696, + "learning_rate": 3.131414152724721e-07, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.477267443089345, + "grad_norm": 0.09772443771362305, + "learning_rate": 3.1128714498935285e-07, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.477690177759929, + "grad_norm": 0.09113850444555283, + "learning_rate": 3.094383639189735e-07, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.478112912430513, + "grad_norm": 0.1403668075799942, + "learning_rate": 3.075950722655707e-07, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.478535647101097, + "grad_norm": 0.1073421910405159, + "learning_rate": 3.057572702327705e-07, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.478958381771681, + "grad_norm": 0.12446584552526474, + "learning_rate": 3.0392495802360477e-07, + "loss": 0.365, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4793811164422648, + "grad_norm": 0.12329819053411484, + "learning_rate": 3.0209813584049507e-07, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.479803851112849, + "grad_norm": 0.1063227504491806, + "learning_rate": 3.0027680388524105e-07, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.480226585783433, + "grad_norm": 0.12507116794586182, + "learning_rate": 2.984609623590651e-07, + "loss": 0.3515, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.480649320454017, + "grad_norm": 0.09822031110525131, + "learning_rate": 2.9665061146255113e-07, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.481072055124601, + "grad_norm": 0.10669213533401489, + "learning_rate": 2.948457513957059e-07, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.481494789795185, + "grad_norm": 0.1030283272266388, + "learning_rate": 2.9304638235791435e-07, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.481917524465769, + "grad_norm": 0.10708016157150269, + "learning_rate": 2.9125250454795085e-07, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.482340259136353, + "grad_norm": 0.10194991528987885, + "learning_rate": 2.894641181639901e-07, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4827629938069373, + "grad_norm": 0.10671535134315491, + "learning_rate": 2.8768122340359636e-07, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.483185728477521, + "grad_norm": 0.12329309433698654, + "learning_rate": 2.859038204637343e-07, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.483608463148105, + "grad_norm": 0.10501214116811752, + "learning_rate": 2.84131909540758e-07, + "loss": 0.3663, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4840311978186893, + "grad_norm": 0.10027166455984116, + "learning_rate": 2.823654908304107e-07, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.484453932489273, + "grad_norm": 0.1084509789943695, + "learning_rate": 2.80604564527831e-07, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.484876667159857, + "grad_norm": 0.10447623580694199, + "learning_rate": 2.788491308275576e-07, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4852994018304413, + "grad_norm": 0.09929949045181274, + "learning_rate": 2.770991899235131e-07, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.485722136501025, + "grad_norm": 0.10651985555887222, + "learning_rate": 2.753547420090152e-07, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.486144871171609, + "grad_norm": 0.10525155812501907, + "learning_rate": 2.736157872767764e-07, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4865676058421933, + "grad_norm": 0.10460832715034485, + "learning_rate": 2.718823259189096e-07, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.486990340512777, + "grad_norm": 0.11258503049612045, + "learning_rate": 2.7015435812690613e-07, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.487413075183361, + "grad_norm": 0.11183301359415054, + "learning_rate": 2.684318840916633e-07, + "loss": 0.3505, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4878358098539453, + "grad_norm": 0.11632508784532547, + "learning_rate": 2.6671490400346223e-07, + "loss": 0.361, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.488258544524529, + "grad_norm": 0.11937867105007172, + "learning_rate": 2.650034180519845e-07, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.488681279195113, + "grad_norm": 0.1101418137550354, + "learning_rate": 2.6329742642630106e-07, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4891040138656972, + "grad_norm": 0.09456237405538559, + "learning_rate": 2.6159692931487237e-07, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.489526748536281, + "grad_norm": 0.09935254603624344, + "learning_rate": 2.5990192690555916e-07, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4899494832068654, + "grad_norm": 0.1172834262251854, + "learning_rate": 2.582124193856117e-07, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4903722178774492, + "grad_norm": 0.09091950207948685, + "learning_rate": 2.565284069416696e-07, + "loss": 0.3656, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.490794952548033, + "grad_norm": 0.0916210412979126, + "learning_rate": 2.548498897597784e-07, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4912176872186174, + "grad_norm": 0.1061248779296875, + "learning_rate": 2.531768680253566e-07, + "loss": 0.3576, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.491640421889201, + "grad_norm": 0.11450686305761337, + "learning_rate": 2.5150934192323394e-07, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4920631565597855, + "grad_norm": 0.09141802042722702, + "learning_rate": 2.498473116376188e-07, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4924858912303693, + "grad_norm": 0.13036802411079407, + "learning_rate": 2.4819077735212527e-07, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.492908625900953, + "grad_norm": 0.10680390149354935, + "learning_rate": 2.4653973924974596e-07, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4933313605715375, + "grad_norm": 0.09731248766183853, + "learning_rate": 2.448941975128849e-07, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4937540952421213, + "grad_norm": 0.11832616478204727, + "learning_rate": 2.432541523233245e-07, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.494176829912705, + "grad_norm": 0.11519182473421097, + "learning_rate": 2.41619603862242e-07, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4945995645832895, + "grad_norm": 0.09153961390256882, + "learning_rate": 2.3999055231020973e-07, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4950222992538733, + "grad_norm": 0.11878925561904907, + "learning_rate": 2.383669978471892e-07, + "loss": 0.3642, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.495445033924457, + "grad_norm": 0.1054602712392807, + "learning_rate": 2.367489406525425e-07, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4958677685950414, + "grad_norm": 0.1055767834186554, + "learning_rate": 2.351363809050211e-07, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4962905032656253, + "grad_norm": 0.12060358375310898, + "learning_rate": 2.335293187827603e-07, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.496713237936209, + "grad_norm": 0.12697429955005646, + "learning_rate": 2.3192775446330695e-07, + "loss": 0.3527, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4971359726067934, + "grad_norm": 0.09154585003852844, + "learning_rate": 2.3033168812357507e-07, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4975587072773773, + "grad_norm": 0.09629922360181808, + "learning_rate": 2.2874111993989587e-07, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.497981441947961, + "grad_norm": 0.11815176159143448, + "learning_rate": 2.2715605008798435e-07, + "loss": 0.3696, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4984041766185454, + "grad_norm": 0.10856788605451584, + "learning_rate": 2.2557647874293376e-07, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4988269112891293, + "grad_norm": 0.09991713613271713, + "learning_rate": 2.2400240607925448e-07, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4992496459597136, + "grad_norm": 0.09725439548492432, + "learning_rate": 2.224338322708297e-07, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.4996723806302974, + "grad_norm": 0.1068696454167366, + "learning_rate": 2.2087075749094854e-07, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5000951153008812, + "grad_norm": 0.10176991671323776, + "learning_rate": 2.19313181912284e-07, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5005178499714655, + "grad_norm": 0.12522479891777039, + "learning_rate": 2.17761105706904e-07, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5009405846420494, + "grad_norm": 0.10660164058208466, + "learning_rate": 2.1621452904627136e-07, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5013633193126337, + "grad_norm": 0.10970480740070343, + "learning_rate": 2.146734521012439e-07, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5017860539832175, + "grad_norm": 0.10153599083423615, + "learning_rate": 2.1313787504205763e-07, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5022087886538014, + "grad_norm": 0.1018524095416069, + "learning_rate": 2.1160779803836017e-07, + "loss": 0.3535, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5026315233243857, + "grad_norm": 0.12109001725912094, + "learning_rate": 2.1008322125917744e-07, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5030542579949695, + "grad_norm": 0.11515356600284576, + "learning_rate": 2.085641448729303e-07, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5034769926655533, + "grad_norm": 0.1207016333937645, + "learning_rate": 2.0705056904744003e-07, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5038997273361376, + "grad_norm": 0.10267564654350281, + "learning_rate": 2.055424939499062e-07, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5043224620067215, + "grad_norm": 0.09764846414327621, + "learning_rate": 2.0403991974694003e-07, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5047451966773053, + "grad_norm": 0.11559835076332092, + "learning_rate": 2.025428466045254e-07, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5051679313478896, + "grad_norm": 0.10419715940952301, + "learning_rate": 2.0105127468805217e-07, + "loss": 0.3569, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5055906660184735, + "grad_norm": 0.12077189981937408, + "learning_rate": 1.9956520416229419e-07, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5060134006890573, + "grad_norm": 0.1370617300271988, + "learning_rate": 1.9808463519142007e-07, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5064361353596416, + "grad_norm": 0.10564082860946655, + "learning_rate": 1.9660956793899344e-07, + "loss": 0.3549, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5068588700302255, + "grad_norm": 0.09886042773723602, + "learning_rate": 1.9514000256796727e-07, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 91990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5072816047008093, + "grad_norm": 0.096245676279068, + "learning_rate": 1.9367593924068395e-07, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5077043393713936, + "grad_norm": 0.10005001723766327, + "learning_rate": 1.922173781188863e-07, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5081270740419774, + "grad_norm": 0.10064556449651718, + "learning_rate": 1.9076431936370654e-07, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5085498087125617, + "grad_norm": 0.11619777232408524, + "learning_rate": 1.893167631356607e-07, + "loss": 0.3596, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5089725433831456, + "grad_norm": 0.09411630034446716, + "learning_rate": 1.8787470959466537e-07, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5093952780537294, + "grad_norm": 0.08851435035467148, + "learning_rate": 1.8643815890003191e-07, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5098180127243137, + "grad_norm": 0.09892331808805466, + "learning_rate": 1.8500711121045012e-07, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5102407473948976, + "grad_norm": 0.10261942446231842, + "learning_rate": 1.835815666840157e-07, + "loss": 0.35, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.510663482065482, + "grad_norm": 0.11151175945997238, + "learning_rate": 1.8216152547821385e-07, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5110862167360657, + "grad_norm": 0.1259998083114624, + "learning_rate": 1.807469877499135e-07, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5115089514066495, + "grad_norm": 0.10495734959840775, + "learning_rate": 1.7933795365538963e-07, + "loss": 0.3688, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.511931686077234, + "grad_norm": 0.0990443080663681, + "learning_rate": 1.7793442335028998e-07, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5123544207478177, + "grad_norm": 0.09488427639007568, + "learning_rate": 1.7653639698967938e-07, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5127771554184015, + "grad_norm": 0.09490164369344711, + "learning_rate": 1.7514387472798987e-07, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.513199890088986, + "grad_norm": 0.11101995408535004, + "learning_rate": 1.737568567190595e-07, + "loss": 0.3657, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5136226247595697, + "grad_norm": 0.12815964221954346, + "learning_rate": 1.7237534311611014e-07, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5140453594301535, + "grad_norm": 0.09615283459424973, + "learning_rate": 1.709993340717697e-07, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.514468094100738, + "grad_norm": 0.09628140181303024, + "learning_rate": 1.6962882973803884e-07, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5148908287713216, + "grad_norm": 0.10145691782236099, + "learning_rate": 1.6826383026632976e-07, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5153135634419055, + "grad_norm": 0.13341332972049713, + "learning_rate": 1.6690433580743293e-07, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5157362981124898, + "grad_norm": 0.10572512447834015, + "learning_rate": 1.6555034651152823e-07, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5161590327830736, + "grad_norm": 0.11665776371955872, + "learning_rate": 1.6420186252820157e-07, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5165817674536575, + "grad_norm": 0.11297620087862015, + "learning_rate": 1.6285888400642267e-07, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5170045021242418, + "grad_norm": 0.09763596951961517, + "learning_rate": 1.6152141109455065e-07, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5174272367948256, + "grad_norm": 0.10647560656070709, + "learning_rate": 1.6018944394033397e-07, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.51784997146541, + "grad_norm": 0.12406311184167862, + "learning_rate": 1.5886298269092713e-07, + "loss": 0.3647, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5182727061359937, + "grad_norm": 0.10345445573329926, + "learning_rate": 1.575420274928574e-07, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5186954408065776, + "grad_norm": 0.0992983877658844, + "learning_rate": 1.5622657849206356e-07, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.519118175477162, + "grad_norm": 0.11799334734678268, + "learning_rate": 1.5491663583385717e-07, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5195409101477457, + "grad_norm": 0.11106571555137634, + "learning_rate": 1.5361219966295026e-07, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.51996364481833, + "grad_norm": 0.1277833729982376, + "learning_rate": 1.523132701234553e-07, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.520386379488914, + "grad_norm": 0.10517584532499313, + "learning_rate": 1.510198473588631e-07, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5208091141594977, + "grad_norm": 0.10369572043418884, + "learning_rate": 1.4973193151205934e-07, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.521231848830082, + "grad_norm": 0.10780161619186401, + "learning_rate": 1.484495227253191e-07, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.521654583500666, + "grad_norm": 0.1006104126572609, + "learning_rate": 1.471726211403235e-07, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5220773181712497, + "grad_norm": 0.12373150885105133, + "learning_rate": 1.459012268981208e-07, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.522500052841834, + "grad_norm": 0.12912334501743317, + "learning_rate": 1.4463534013917645e-07, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.522922787512418, + "grad_norm": 0.1212938129901886, + "learning_rate": 1.433749610033286e-07, + "loss": 0.3643, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5233455221830017, + "grad_norm": 0.13442200422286987, + "learning_rate": 1.4212008962981583e-07, + "loss": 0.357, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.523768256853586, + "grad_norm": 0.09013635665178299, + "learning_rate": 1.4087072615726615e-07, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.52419099152417, + "grad_norm": 0.10896851867437363, + "learning_rate": 1.3962687072369694e-07, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5246137261947537, + "grad_norm": 0.09753019362688065, + "learning_rate": 1.3838852346652608e-07, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.525036460865338, + "grad_norm": 0.10733480751514435, + "learning_rate": 1.3715568452255522e-07, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.525459195535922, + "grad_norm": 0.11621741205453873, + "learning_rate": 1.3592835402796989e-07, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5258819302065056, + "grad_norm": 0.1121358796954155, + "learning_rate": 1.347065321183616e-07, + "loss": 0.3541, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.52630466487709, + "grad_norm": 0.11296884715557098, + "learning_rate": 1.3349021892870573e-07, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.526727399547674, + "grad_norm": 0.10223450511693954, + "learning_rate": 1.3227941459337811e-07, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.527150134218258, + "grad_norm": 0.10535154491662979, + "learning_rate": 1.3107411924612735e-07, + "loss": 0.3627, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.527572868888842, + "grad_norm": 0.11386307328939438, + "learning_rate": 1.2987433302011908e-07, + "loss": 0.3654, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5279956035594258, + "grad_norm": 0.10220169275999069, + "learning_rate": 1.2868005604788068e-07, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.52841833823001, + "grad_norm": 0.10542961210012436, + "learning_rate": 1.274912884613566e-07, + "loss": 0.3673, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.528841072900594, + "grad_norm": 0.08763300627470016, + "learning_rate": 1.2630803039186402e-07, + "loss": 0.353, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.529263807571178, + "grad_norm": 0.12197844684123993, + "learning_rate": 1.251302819701261e-07, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.529686542241762, + "grad_norm": 0.11722242832183838, + "learning_rate": 1.239580433262555e-07, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.530109276912346, + "grad_norm": 0.08502458035945892, + "learning_rate": 1.2279131458973748e-07, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.53053201158293, + "grad_norm": 0.09933581948280334, + "learning_rate": 1.2163009588948006e-07, + "loss": 0.351, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.530954746253514, + "grad_norm": 0.11061685532331467, + "learning_rate": 1.2047438735375283e-07, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.531377480924098, + "grad_norm": 0.11485497653484344, + "learning_rate": 1.19324189110237e-07, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.531800215594682, + "grad_norm": 0.1099918931722641, + "learning_rate": 1.1817950128598653e-07, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.532222950265266, + "grad_norm": 0.11580216139554977, + "learning_rate": 1.1704032400747245e-07, + "loss": 0.3659, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.53264568493585, + "grad_norm": 0.12200535088777542, + "learning_rate": 1.1590665740053297e-07, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.533068419606434, + "grad_norm": 0.12306876480579376, + "learning_rate": 1.1477850159040126e-07, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.533491154277018, + "grad_norm": 0.09639657288789749, + "learning_rate": 1.1365585670172208e-07, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.533913888947602, + "grad_norm": 0.10392292588949203, + "learning_rate": 1.1253872285850176e-07, + "loss": 0.3667, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.534336623618186, + "grad_norm": 0.10821636766195297, + "learning_rate": 1.1142710018415825e-07, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.53475935828877, + "grad_norm": 0.1100778877735138, + "learning_rate": 1.1032098880149889e-07, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.535182092959354, + "grad_norm": 0.09881186485290527, + "learning_rate": 1.0922038883270924e-07, + "loss": 0.368, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.535604827629938, + "grad_norm": 0.10563471913337708, + "learning_rate": 1.0812530039938096e-07, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.536027562300522, + "grad_norm": 0.10862407833337784, + "learning_rate": 1.0703572362249503e-07, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5364502969711062, + "grad_norm": 0.12317362427711487, + "learning_rate": 1.059516586224052e-07, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.53687303164169, + "grad_norm": 0.1362549066543579, + "learning_rate": 1.048731055188823e-07, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.537295766312274, + "grad_norm": 0.09347543865442276, + "learning_rate": 1.0380006443106993e-07, + "loss": 0.3562, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5377185009828582, + "grad_norm": 0.12347140908241272, + "learning_rate": 1.0273253547751216e-07, + "loss": 0.3678, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.538141235653442, + "grad_norm": 0.10190429538488388, + "learning_rate": 1.0167051877614243e-07, + "loss": 0.3504, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5385639703240264, + "grad_norm": 0.10659221559762955, + "learning_rate": 1.0061401444428354e-07, + "loss": 0.3649, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.53898670499461, + "grad_norm": 0.11179859936237335, + "learning_rate": 9.956302259864214e-08, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.539409439665194, + "grad_norm": 0.10402395576238632, + "learning_rate": 9.851754335533647e-08, + "loss": 0.3645, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5398321743357783, + "grad_norm": 0.11428465694189072, + "learning_rate": 9.747757682985192e-08, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.540254909006362, + "grad_norm": 0.11176520586013794, + "learning_rate": 9.644312313707993e-08, + "loss": 0.3636, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.540677643676946, + "grad_norm": 0.09254579991102219, + "learning_rate": 9.541418239130129e-08, + "loss": 0.3498, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5411003783475303, + "grad_norm": 0.10028686374425888, + "learning_rate": 9.439075470617508e-08, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.541523113018114, + "grad_norm": 0.10800550132989883, + "learning_rate": 9.3372840194772e-08, + "loss": 0.3668, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.541945847688698, + "grad_norm": 0.10036197304725647, + "learning_rate": 9.236043896954094e-08, + "loss": 0.3552, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5423685823592823, + "grad_norm": 0.11787360906600952, + "learning_rate": 9.135355114232025e-08, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.542791317029866, + "grad_norm": 0.1124798133969307, + "learning_rate": 9.035217682434871e-08, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.54321405170045, + "grad_norm": 0.11876571923494339, + "learning_rate": 8.93563161262434e-08, + "loss": 0.3624, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5436367863710343, + "grad_norm": 0.12017911672592163, + "learning_rate": 8.836596915802742e-08, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.544059521041618, + "grad_norm": 0.11439058929681778, + "learning_rate": 8.738113602909658e-08, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.544482255712202, + "grad_norm": 0.09868721663951874, + "learning_rate": 8.640181684825277e-08, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5449049903827863, + "grad_norm": 0.09330840408802032, + "learning_rate": 8.542801172368165e-08, + "loss": 0.3584, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.54532772505337, + "grad_norm": 0.10513084381818771, + "learning_rate": 8.445972076296382e-08, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5457504597239544, + "grad_norm": 0.10262434184551239, + "learning_rate": 8.349694407306374e-08, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5461731943945383, + "grad_norm": 0.11540938168764114, + "learning_rate": 8.253968176034632e-08, + "loss": 0.3548, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.546595929065122, + "grad_norm": 0.1388172209262848, + "learning_rate": 8.158793393056585e-08, + "loss": 0.3628, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5470186637357064, + "grad_norm": 0.11076226830482483, + "learning_rate": 8.06417006888549e-08, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5474413984062902, + "grad_norm": 0.11268085986375809, + "learning_rate": 7.970098213974652e-08, + "loss": 0.3658, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5478641330768745, + "grad_norm": 0.1272452473640442, + "learning_rate": 7.87657783871687e-08, + "loss": 0.3539, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5482868677474584, + "grad_norm": 0.1196521446108818, + "learning_rate": 7.783608953443322e-08, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5487096024180422, + "grad_norm": 0.12672175467014313, + "learning_rate": 7.691191568424128e-08, + "loss": 0.3537, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5491323370886265, + "grad_norm": 0.10554368048906326, + "learning_rate": 7.599325693870007e-08, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 92990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5495550717592104, + "grad_norm": 0.11163350939750671, + "learning_rate": 7.508011339927845e-08, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.549977806429794, + "grad_norm": 0.12447045743465424, + "learning_rate": 7.417248516686792e-08, + "loss": 0.362, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5504005411003785, + "grad_norm": 0.12234809249639511, + "learning_rate": 7.327037234172718e-08, + "loss": 0.3696, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5508232757709624, + "grad_norm": 0.10698001831769943, + "learning_rate": 7.237377502352094e-08, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.551246010441546, + "grad_norm": 0.11829589307308197, + "learning_rate": 7.148269331129221e-08, + "loss": 0.358, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5516687451121305, + "grad_norm": 0.114902064204216, + "learning_rate": 7.059712730348444e-08, + "loss": 0.3517, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5520914797827143, + "grad_norm": 0.09210319817066193, + "learning_rate": 6.971707709792497e-08, + "loss": 0.3553, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.552514214453298, + "grad_norm": 0.10633233189582825, + "learning_rate": 6.8842542791836e-08, + "loss": 0.3526, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5529369491238825, + "grad_norm": 0.12323427200317383, + "learning_rate": 6.797352448182914e-08, + "loss": 0.3623, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5533596837944663, + "grad_norm": 0.11826176196336746, + "learning_rate": 6.71100222639054e-08, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.55378241846505, + "grad_norm": 0.1138303354382515, + "learning_rate": 6.625203623346065e-08, + "loss": 0.354, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5542051531356345, + "grad_norm": 0.09954869747161865, + "learning_rate": 6.539956648527468e-08, + "loss": 0.3556, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5546278878062183, + "grad_norm": 0.11545317620038986, + "learning_rate": 6.455261311352768e-08, + "loss": 0.3666, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5550506224768026, + "grad_norm": 0.10089591890573502, + "learning_rate": 6.371117621177814e-08, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5554733571473864, + "grad_norm": 0.10177202522754669, + "learning_rate": 6.287525587298504e-08, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5558960918179707, + "grad_norm": 0.10644425451755524, + "learning_rate": 6.204485218949119e-08, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5563188264885546, + "grad_norm": 0.09067561477422714, + "learning_rate": 6.12199652530343e-08, + "loss": 0.356, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5567415611591384, + "grad_norm": 0.13002145290374756, + "learning_rate": 6.04005951547415e-08, + "loss": 0.3709, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5571642958297227, + "grad_norm": 0.12702281773090363, + "learning_rate": 5.958674198512926e-08, + "loss": 0.3615, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5575870305003066, + "grad_norm": 0.1256522536277771, + "learning_rate": 5.877840583410343e-08, + "loss": 0.3581, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5580097651708904, + "grad_norm": 0.10873854905366898, + "learning_rate": 5.7975586790970374e-08, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5584324998414747, + "grad_norm": 0.09704042226076126, + "learning_rate": 5.7178284944414686e-08, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5588552345120585, + "grad_norm": 0.1450384110212326, + "learning_rate": 5.6386500382510365e-08, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5592779691826424, + "grad_norm": 0.10534929484128952, + "learning_rate": 5.560023319273744e-08, + "loss": 0.3703, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5597007038532267, + "grad_norm": 0.12384098768234253, + "learning_rate": 5.481948346194865e-08, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5601234385238105, + "grad_norm": 0.12714819610118866, + "learning_rate": 5.404425127639723e-08, + "loss": 0.3631, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5605461731943944, + "grad_norm": 0.1261363923549652, + "learning_rate": 5.3274536721725775e-08, + "loss": 0.3699, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5609689078649787, + "grad_norm": 0.10320067405700684, + "learning_rate": 5.2510339882971825e-08, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5613916425355625, + "grad_norm": 0.10143525898456573, + "learning_rate": 5.175166084454564e-08, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5618143772061464, + "grad_norm": 0.11348342150449753, + "learning_rate": 5.099849969026904e-08, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5622371118767306, + "grad_norm": 0.10737266391515732, + "learning_rate": 5.025085650333661e-08, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5626598465473145, + "grad_norm": 0.1049749031662941, + "learning_rate": 4.9508731366354475e-08, + "loss": 0.3534, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5630825812178983, + "grad_norm": 0.10705693066120148, + "learning_rate": 4.877212436129597e-08, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5635053158884826, + "grad_norm": 0.1041068285703659, + "learning_rate": 4.804103556954043e-08, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5639280505590665, + "grad_norm": 0.0914791077375412, + "learning_rate": 4.731546507185103e-08, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5643507852296508, + "grad_norm": 0.09650910645723343, + "learning_rate": 4.659541294838587e-08, + "loss": 0.3586, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5647735199002346, + "grad_norm": 0.10694479197263718, + "learning_rate": 4.588087927868689e-08, + "loss": 0.3635, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.565196254570819, + "grad_norm": 0.09081555157899857, + "learning_rate": 4.517186414169094e-08, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5656189892414027, + "grad_norm": 0.09764540940523148, + "learning_rate": 4.446836761572981e-08, + "loss": 0.3672, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5660417239119866, + "grad_norm": 0.09240083396434784, + "learning_rate": 4.3770389778513556e-08, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.566464458582571, + "grad_norm": 0.11152216792106628, + "learning_rate": 4.3077930707147165e-08, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5668871932531547, + "grad_norm": 0.11883712559938431, + "learning_rate": 4.2390990478136105e-08, + "loss": 0.3625, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5673099279237386, + "grad_norm": 0.134576678276062, + "learning_rate": 4.1709569167358575e-08, + "loss": 0.3573, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.567732662594323, + "grad_norm": 0.11177273094654083, + "learning_rate": 4.103366685010435e-08, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5681553972649067, + "grad_norm": 0.13701848685741425, + "learning_rate": 4.036328360103037e-08, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5685781319354906, + "grad_norm": 0.10456906259059906, + "learning_rate": 3.9698419494205165e-08, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.569000866606075, + "grad_norm": 0.11482881009578705, + "learning_rate": 3.903907460306444e-08, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5694236012766587, + "grad_norm": 0.1316891461610794, + "learning_rate": 3.838524900046103e-08, + "loss": 0.3616, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5698463359472425, + "grad_norm": 0.0970022976398468, + "learning_rate": 3.77369427586205e-08, + "loss": 0.3567, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.570269070617827, + "grad_norm": 0.1399657428264618, + "learning_rate": 3.709415594915777e-08, + "loss": 0.3566, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5706918052884107, + "grad_norm": 0.10519219189882278, + "learning_rate": 3.645688864308827e-08, + "loss": 0.3575, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5711145399589945, + "grad_norm": 0.09522317349910736, + "learning_rate": 3.582514091080569e-08, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.571537274629579, + "grad_norm": 0.10133116692304611, + "learning_rate": 3.519891282210974e-08, + "loss": 0.3614, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5719600093001627, + "grad_norm": 0.10342449694871902, + "learning_rate": 3.457820444617288e-08, + "loss": 0.3618, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5723827439707465, + "grad_norm": 0.12033414095640182, + "learning_rate": 3.396301585156803e-08, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.572805478641331, + "grad_norm": 0.13351590931415558, + "learning_rate": 3.335334710626303e-08, + "loss": 0.3611, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5732282133119146, + "grad_norm": 0.12627911567687988, + "learning_rate": 3.274919827759848e-08, + "loss": 0.3585, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.573650947982499, + "grad_norm": 0.10667847096920013, + "learning_rate": 3.215056943232098e-08, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.574073682653083, + "grad_norm": 0.10860628634691238, + "learning_rate": 3.1557460636566506e-08, + "loss": 0.3546, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.574496417323667, + "grad_norm": 0.12033797055482864, + "learning_rate": 3.0969871955849325e-08, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.574919151994251, + "grad_norm": 0.09757328033447266, + "learning_rate": 3.038780345508419e-08, + "loss": 0.3591, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5753418866648348, + "grad_norm": 0.10850946605205536, + "learning_rate": 2.9811255198580747e-08, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.575764621335419, + "grad_norm": 0.10227686166763306, + "learning_rate": 2.9240227250015852e-08, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93620 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.576187356006003, + "grad_norm": 0.10836614668369293, + "learning_rate": 2.8674719672489027e-08, + "loss": 0.3621, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93630 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5766100906765868, + "grad_norm": 0.11309939622879028, + "learning_rate": 2.811473252846142e-08, + "loss": 0.3539, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93640 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.577032825347171, + "grad_norm": 0.11038028448820114, + "learning_rate": 2.7560265879800207e-08, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93650 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.577455560017755, + "grad_norm": 0.10419828444719315, + "learning_rate": 2.7011319787756396e-08, + "loss": 0.3605, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93660 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5778782946883387, + "grad_norm": 0.10243642330169678, + "learning_rate": 2.6467894312975916e-08, + "loss": 0.3517, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93670 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.578301029358923, + "grad_norm": 0.10635053366422653, + "learning_rate": 2.592998951549408e-08, + "loss": 0.3546, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93680 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.578723764029507, + "grad_norm": 0.10602924227714539, + "learning_rate": 2.539760545473002e-08, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93690 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5791464987000907, + "grad_norm": 0.10254249721765518, + "learning_rate": 2.487074218949781e-08, + "loss": 0.3536, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93700 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.579569233370675, + "grad_norm": 0.11235740780830383, + "learning_rate": 2.434939977800088e-08, + "loss": 0.3637, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93710 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.579991968041259, + "grad_norm": 0.09863253682851791, + "learning_rate": 2.383357827783206e-08, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93720 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5804147027118427, + "grad_norm": 0.14667902886867523, + "learning_rate": 2.332327774597909e-08, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93730 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.580837437382427, + "grad_norm": 0.10334224253892899, + "learning_rate": 2.2818498238813547e-08, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93740 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.581260172053011, + "grad_norm": 0.1168394684791565, + "learning_rate": 2.2319239812101933e-08, + "loss": 0.3554, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93750 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5816829067235947, + "grad_norm": 0.1036597415804863, + "learning_rate": 2.182550252099458e-08, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93760 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.582105641394179, + "grad_norm": 0.10444650053977966, + "learning_rate": 2.133728642003674e-08, + "loss": 0.355, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93770 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.582528376064763, + "grad_norm": 0.12061912566423416, + "learning_rate": 2.08545915631575e-08, + "loss": 0.3517, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93780 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.582951110735347, + "grad_norm": 0.1024804636836052, + "learning_rate": 2.0377418003697522e-08, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93790 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.583373845405931, + "grad_norm": 0.12866011261940002, + "learning_rate": 1.9905765794353547e-08, + "loss": 0.3652, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93800 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5837965800765152, + "grad_norm": 0.11433824151754379, + "learning_rate": 1.9439634987239442e-08, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93810 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.584219314747099, + "grad_norm": 0.12380384653806686, + "learning_rate": 1.8979025633841797e-08, + "loss": 0.3578, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93820 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.584642049417683, + "grad_norm": 0.12163959443569183, + "learning_rate": 1.8523937785053234e-08, + "loss": 0.3594, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93830 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5850647840882672, + "grad_norm": 0.1253044307231903, + "learning_rate": 1.807437149115021e-08, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93840 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.585487518758851, + "grad_norm": 0.10522466897964478, + "learning_rate": 1.7630326801787446e-08, + "loss": 0.3609, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93850 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.585910253429435, + "grad_norm": 0.11790837347507477, + "learning_rate": 1.719180376602014e-08, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93860 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.586332988100019, + "grad_norm": 0.11256470531225204, + "learning_rate": 1.680185408771684e-08, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93870 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.586755722770603, + "grad_norm": 0.10804030299186707, + "learning_rate": 1.6373822326753995e-08, + "loss": 0.3597, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93880 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.587178457441187, + "grad_norm": 0.09946746379137039, + "learning_rate": 1.59513123581978e-08, + "loss": 0.3531, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93890 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.587601192111771, + "grad_norm": 0.1357773244380951, + "learning_rate": 1.5534324228727582e-08, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93900 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.588023926782355, + "grad_norm": 0.13633233308792114, + "learning_rate": 1.512285798440094e-08, + "loss": 0.3668, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93910 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.588446661452939, + "grad_norm": 0.0954364538192749, + "learning_rate": 1.4716913670687061e-08, + "loss": 0.3561, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93920 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.588869396123523, + "grad_norm": 0.10312850028276443, + "learning_rate": 1.4316491332416748e-08, + "loss": 0.3669, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93930 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.589292130794107, + "grad_norm": 0.11706611514091492, + "learning_rate": 1.3921591013837942e-08, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93940 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.589714865464691, + "grad_norm": 0.11648543924093246, + "learning_rate": 1.3532212758565754e-08, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93950 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.590137600135275, + "grad_norm": 0.11465286463499069, + "learning_rate": 1.3148356609621326e-08, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93960 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.590560334805859, + "grad_norm": 0.12062661349773407, + "learning_rate": 1.2770022609409626e-08, + "loss": 0.3506, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93970 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.590983069476443, + "grad_norm": 0.12166398763656616, + "learning_rate": 1.2397210799725002e-08, + "loss": 0.3589, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93980 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.591405804147027, + "grad_norm": 0.1284291297197342, + "learning_rate": 1.2029921221751173e-08, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 93990 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.591828538817611, + "grad_norm": 0.10517995804548264, + "learning_rate": 1.1668153916061242e-08, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94000 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5922512734881953, + "grad_norm": 0.10223699361085892, + "learning_rate": 1.131190892262879e-08, + "loss": 0.3592, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94010 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.592674008158779, + "grad_norm": 0.09700525552034378, + "learning_rate": 1.0961186280805669e-08, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94020 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5930967428293634, + "grad_norm": 0.1396346390247345, + "learning_rate": 1.0615986029333114e-08, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94030 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5935194774999473, + "grad_norm": 0.1176207885146141, + "learning_rate": 1.0276308206352836e-08, + "loss": 0.3577, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94040 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.593942212170531, + "grad_norm": 0.11855370551347733, + "learning_rate": 9.942152849379271e-09, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94050 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5943649468411154, + "grad_norm": 0.10889440029859543, + "learning_rate": 9.613519995338439e-09, + "loss": 0.3534, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94060 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5947876815116993, + "grad_norm": 0.10756544023752213, + "learning_rate": 9.290409680523527e-09, + "loss": 0.3632, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94070 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.595210416182283, + "grad_norm": 0.1323046088218689, + "learning_rate": 8.972821940639309e-09, + "loss": 0.3579, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94080 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5956331508528674, + "grad_norm": 0.1218147948384285, + "learning_rate": 8.660756810768834e-09, + "loss": 0.3603, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94090 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5960558855234512, + "grad_norm": 0.11229364573955536, + "learning_rate": 8.354214325384524e-09, + "loss": 0.3574, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94100 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.596478620194035, + "grad_norm": 0.11682156473398209, + "learning_rate": 8.053194518348183e-09, + "loss": 0.3593, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94110 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5969013548646194, + "grad_norm": 0.13120746612548828, + "learning_rate": 7.757697422916543e-09, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94120 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.597324089535203, + "grad_norm": 0.09252487868070602, + "learning_rate": 7.46772307173571e-09, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94130 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.597746824205787, + "grad_norm": 0.13458864390850067, + "learning_rate": 7.18327149683562e-09, + "loss": 0.3626, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94140 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5981695588763714, + "grad_norm": 0.0982547178864479, + "learning_rate": 6.904342729641133e-09, + "loss": 0.3588, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94150 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.598592293546955, + "grad_norm": 0.08926671743392944, + "learning_rate": 6.630936800972043e-09, + "loss": 0.3638, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94160 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.599015028217539, + "grad_norm": 0.10484462976455688, + "learning_rate": 6.363053741020864e-09, + "loss": 0.3599, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94170 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.5994377628881233, + "grad_norm": 0.13946717977523804, + "learning_rate": 6.100693579391692e-09, + "loss": 0.3601, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94180 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.599860497558707, + "grad_norm": 0.09647223353385925, + "learning_rate": 5.8438563450669006e-09, + "loss": 0.3555, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94190 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.600283232229291, + "grad_norm": 0.12461600452661514, + "learning_rate": 5.592542066412687e-09, + "loss": 0.3558, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94200 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6007059668998753, + "grad_norm": 0.12045758962631226, + "learning_rate": 5.3467507711957296e-09, + "loss": 0.3582, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94210 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.601128701570459, + "grad_norm": 0.09201820194721222, + "learning_rate": 5.106482486572084e-09, + "loss": 0.3639, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94220 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6015514362410435, + "grad_norm": 0.09926457703113556, + "learning_rate": 4.871737239081631e-09, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94230 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6019741709116273, + "grad_norm": 0.105511873960495, + "learning_rate": 4.64251505465918e-09, + "loss": 0.3607, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94240 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6023969055822116, + "grad_norm": 0.0900980606675148, + "learning_rate": 4.418815958623368e-09, + "loss": 0.3604, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94250 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6028196402527954, + "grad_norm": 0.127242311835289, + "learning_rate": 4.20063997569331e-09, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94260 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6032423749233793, + "grad_norm": 0.09898936003446579, + "learning_rate": 3.9879871299608465e-09, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94270 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6036651095939636, + "grad_norm": 0.10764959454536438, + "learning_rate": 3.780857444929398e-09, + "loss": 0.3606, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94280 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6040878442645474, + "grad_norm": 0.10095633566379547, + "learning_rate": 3.57925094347511e-09, + "loss": 0.3491, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94290 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6045105789351313, + "grad_norm": 0.10740885138511658, + "learning_rate": 3.383167647874608e-09, + "loss": 0.3602, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94300 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6049333136057156, + "grad_norm": 0.14028073847293854, + "learning_rate": 3.1926075797827917e-09, + "loss": 0.363, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94310 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6053560482762994, + "grad_norm": 0.11474642157554626, + "learning_rate": 3.0075707602550407e-09, + "loss": 0.3516, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94320 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6057787829468833, + "grad_norm": 0.11175432801246643, + "learning_rate": 2.8280572097361125e-09, + "loss": 0.3641, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94330 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6062015176174675, + "grad_norm": 0.10266385227441788, + "learning_rate": 2.6540669480490387e-09, + "loss": 0.3583, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94340 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6066242522880514, + "grad_norm": 0.09747838973999023, + "learning_rate": 2.4855999944173313e-09, + "loss": 0.3572, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94350 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6070469869586352, + "grad_norm": 0.11723248660564423, + "learning_rate": 2.3226563674594302e-09, + "loss": 0.3595, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94360 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6074697216292195, + "grad_norm": 0.10641251504421234, + "learning_rate": 2.1652360851665e-09, + "loss": 0.3691, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94370 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6078924562998034, + "grad_norm": 0.09124258160591125, + "learning_rate": 2.0133391649301838e-09, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94380 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.608315190970387, + "grad_norm": 0.10655014961957932, + "learning_rate": 1.866965623537054e-09, + "loss": 0.3568, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94390 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6087379256409715, + "grad_norm": 0.10684570670127869, + "learning_rate": 1.7261154771575083e-09, + "loss": 0.3617, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94400 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6091606603115554, + "grad_norm": 0.11073987931013107, + "learning_rate": 1.5907887413457722e-09, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94410 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.609583394982139, + "grad_norm": 0.10007400810718536, + "learning_rate": 1.4609854310509986e-09, + "loss": 0.3532, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94420 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6100061296527235, + "grad_norm": 0.10517394542694092, + "learning_rate": 1.3367055606172685e-09, + "loss": 0.3564, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94430 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6104288643233073, + "grad_norm": 0.12493422627449036, + "learning_rate": 1.21794914377249e-09, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94440 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6108515989938916, + "grad_norm": 0.08815211802721024, + "learning_rate": 1.1047161936339478e-09, + "loss": 0.3546, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94450 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6112743336644755, + "grad_norm": 0.1106037124991417, + "learning_rate": 9.970067227138558e-10, + "loss": 0.359, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94460 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6116970683350598, + "grad_norm": 0.1057862639427185, + "learning_rate": 8.948207429138045e-10, + "loss": 0.3518, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94470 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6121198030056436, + "grad_norm": 0.11397109925746918, + "learning_rate": 7.981582655136599e-10, + "loss": 0.3612, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94480 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6125425376762275, + "grad_norm": 0.13173116743564606, + "learning_rate": 7.070193011937676e-10, + "loss": 0.3634, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94490 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6129652723468118, + "grad_norm": 0.11786304414272308, + "learning_rate": 6.214038600294014e-10, + "loss": 0.3691, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94500 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6133880070173956, + "grad_norm": 0.11473164707422256, + "learning_rate": 5.413119514796617e-10, + "loss": 0.3587, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94510 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6138107416879794, + "grad_norm": 0.09856856614351273, + "learning_rate": 4.667435843819234e-10, + "loss": 0.3524, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94520 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6142334763585637, + "grad_norm": 0.11705709248781204, + "learning_rate": 3.9769876697959283e-10, + "loss": 0.3571, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94530 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6146562110291476, + "grad_norm": 0.09081391245126724, + "learning_rate": 3.34177506899902e-10, + "loss": 0.3598, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94540 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6150789456997314, + "grad_norm": 0.09620621055364609, + "learning_rate": 2.7617981115946046e-10, + "loss": 0.3613, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94550 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6155016803703157, + "grad_norm": 0.08778068423271179, + "learning_rate": 2.2370568616980614e-10, + "loss": 0.3557, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94560 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6159244150408996, + "grad_norm": 0.10248315334320068, + "learning_rate": 1.7675513772075214e-10, + "loss": 0.3608, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94570 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6163471497114834, + "grad_norm": 0.10253268480300903, + "learning_rate": 1.3532817100814222e-10, + "loss": 0.3622, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94580 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6167698843820677, + "grad_norm": 0.13053113222122192, + "learning_rate": 9.9424790594993e-11, + "loss": 0.3653, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94590 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6171926190526515, + "grad_norm": 0.12059774994850159, + "learning_rate": 6.904500046145401e-11, + "loss": 0.3629, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94600 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6176153537232354, + "grad_norm": 0.11743923276662827, + "learning_rate": 4.41888039548477e-11, + "loss": 0.36, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94610 + }, + { + "data/cache_hit_ratio": 0.0, + "epoch": 3.6180380883938197, + "grad_norm": 0.09406457841396332, + "learning_rate": 2.4856203828527157e-11, + "loss": 0.3533, + "memory_allocated_GB": 3.6032471656799316, + "memory_reserved_GB": 54.42578125, + "step": 94620 + } + ], + "logging_steps": 10, + "max_steps": 94620, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +}