| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.997824139255088, | |
| "eval_steps": 500, | |
| "global_step": 9765, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02559836170485089, | |
| "grad_norm": 0.7305641763872902, | |
| "learning_rate": 1.9897593445980545e-05, | |
| "loss": 0.24, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05119672340970178, | |
| "grad_norm": 0.05103642085416945, | |
| "learning_rate": 1.9795186891961087e-05, | |
| "loss": 0.0049, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07679508511455267, | |
| "grad_norm": 0.030274497499991383, | |
| "learning_rate": 1.969278033794163e-05, | |
| "loss": 0.0017, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.10239344681940356, | |
| "grad_norm": 0.029756392410786685, | |
| "learning_rate": 1.9590373783922173e-05, | |
| "loss": 0.0011, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.12799180852425446, | |
| "grad_norm": 0.02549118945457913, | |
| "learning_rate": 1.9487967229902716e-05, | |
| "loss": 0.0008, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.15359017022910534, | |
| "grad_norm": 0.02438470537104193, | |
| "learning_rate": 1.9385560675883256e-05, | |
| "loss": 0.0008, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.17918853193395623, | |
| "grad_norm": 0.016258758537083494, | |
| "learning_rate": 1.9283154121863802e-05, | |
| "loss": 0.0007, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.20478689363880712, | |
| "grad_norm": 0.014976187170917211, | |
| "learning_rate": 1.9180747567844345e-05, | |
| "loss": 0.0005, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.230385255343658, | |
| "grad_norm": 0.014196620668655414, | |
| "learning_rate": 1.9078341013824884e-05, | |
| "loss": 0.0005, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2559836170485089, | |
| "grad_norm": 0.01865887251990293, | |
| "learning_rate": 1.897593445980543e-05, | |
| "loss": 0.0004, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2815819787533598, | |
| "grad_norm": 0.016349380331586796, | |
| "learning_rate": 1.887352790578597e-05, | |
| "loss": 0.0004, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3071803404582107, | |
| "grad_norm": 0.01413169547355441, | |
| "learning_rate": 1.8771121351766516e-05, | |
| "loss": 0.0003, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.33277870216306155, | |
| "grad_norm": 0.010152573483527069, | |
| "learning_rate": 1.866871479774706e-05, | |
| "loss": 0.0003, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.35837706386791246, | |
| "grad_norm": 0.01017225834937972, | |
| "learning_rate": 1.85663082437276e-05, | |
| "loss": 0.0003, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.3839754255727633, | |
| "grad_norm": 0.013000431764113899, | |
| "learning_rate": 1.8463901689708145e-05, | |
| "loss": 0.0003, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.40957378727761423, | |
| "grad_norm": 0.013254090310897974, | |
| "learning_rate": 1.8361495135688684e-05, | |
| "loss": 0.0003, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.43517214898246515, | |
| "grad_norm": 0.00966168025347855, | |
| "learning_rate": 1.8259088581669227e-05, | |
| "loss": 0.0002, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.460770510687316, | |
| "grad_norm": 0.00712828374097157, | |
| "learning_rate": 1.815668202764977e-05, | |
| "loss": 0.0002, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4863688723921669, | |
| "grad_norm": 0.010211960398621855, | |
| "learning_rate": 1.8054275473630313e-05, | |
| "loss": 0.0002, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.5119672340970178, | |
| "grad_norm": 0.01261082529443916, | |
| "learning_rate": 1.7951868919610856e-05, | |
| "loss": 0.0002, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5375655958018687, | |
| "grad_norm": 0.011854166152885242, | |
| "learning_rate": 1.78494623655914e-05, | |
| "loss": 0.0002, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5631639575067195, | |
| "grad_norm": 0.009916438252277934, | |
| "learning_rate": 1.7747055811571942e-05, | |
| "loss": 0.0003, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.5887623192115704, | |
| "grad_norm": 0.005025129187771019, | |
| "learning_rate": 1.7644649257552485e-05, | |
| "loss": 0.0002, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.6143606809164214, | |
| "grad_norm": 0.006788101186805052, | |
| "learning_rate": 1.7542242703533028e-05, | |
| "loss": 0.0002, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6399590426212722, | |
| "grad_norm": 0.00978494920049853, | |
| "learning_rate": 1.743983614951357e-05, | |
| "loss": 0.0002, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.6655574043261231, | |
| "grad_norm": 0.009494329535464946, | |
| "learning_rate": 1.7337429595494113e-05, | |
| "loss": 0.0002, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.6911557660309741, | |
| "grad_norm": 0.010405998997878094, | |
| "learning_rate": 1.7235023041474656e-05, | |
| "loss": 0.0002, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.7167541277358249, | |
| "grad_norm": 0.01406008137282546, | |
| "learning_rate": 1.71326164874552e-05, | |
| "loss": 0.0001, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.7423524894406758, | |
| "grad_norm": 0.012511809648905668, | |
| "learning_rate": 1.7030209933435742e-05, | |
| "loss": 0.0002, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.7679508511455266, | |
| "grad_norm": 0.014402924650339832, | |
| "learning_rate": 1.6927803379416285e-05, | |
| "loss": 0.0002, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7935492128503776, | |
| "grad_norm": 0.007985015692090758, | |
| "learning_rate": 1.6825396825396828e-05, | |
| "loss": 0.0002, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.8191475745552285, | |
| "grad_norm": 0.016922696684847503, | |
| "learning_rate": 1.6722990271377367e-05, | |
| "loss": 0.0002, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.8447459362600793, | |
| "grad_norm": 0.0058610303905484145, | |
| "learning_rate": 1.6620583717357914e-05, | |
| "loss": 0.0002, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.8703442979649303, | |
| "grad_norm": 0.005758710688055935, | |
| "learning_rate": 1.6518177163338457e-05, | |
| "loss": 0.0001, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.8959426596697811, | |
| "grad_norm": 0.010005518642531934, | |
| "learning_rate": 1.6415770609318996e-05, | |
| "loss": 0.0001, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.921541021374632, | |
| "grad_norm": 0.006740034442277339, | |
| "learning_rate": 1.6313364055299542e-05, | |
| "loss": 0.0001, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.9471393830794829, | |
| "grad_norm": 0.0062403985576606705, | |
| "learning_rate": 1.6210957501280082e-05, | |
| "loss": 0.0001, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.9727377447843338, | |
| "grad_norm": 0.006225396199669411, | |
| "learning_rate": 1.6108550947260625e-05, | |
| "loss": 0.0001, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.9983361064891847, | |
| "grad_norm": 0.006345423633281202, | |
| "learning_rate": 1.600614439324117e-05, | |
| "loss": 0.0001, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.0235504927684629, | |
| "grad_norm": 0.00782413794335222, | |
| "learning_rate": 1.590373783922171e-05, | |
| "loss": 0.0001, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.0491488544733136, | |
| "grad_norm": 0.004640264761208562, | |
| "learning_rate": 1.5801331285202253e-05, | |
| "loss": 0.0001, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.0747472161781646, | |
| "grad_norm": 0.004837968806765287, | |
| "learning_rate": 1.5698924731182796e-05, | |
| "loss": 0.0001, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.1003455778830156, | |
| "grad_norm": 0.003966873491178343, | |
| "learning_rate": 1.559651817716334e-05, | |
| "loss": 0.0001, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.1259439395878663, | |
| "grad_norm": 0.007280756408676898, | |
| "learning_rate": 1.5494111623143882e-05, | |
| "loss": 0.0001, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.1515423012927173, | |
| "grad_norm": 0.007676063330830094, | |
| "learning_rate": 1.5391705069124425e-05, | |
| "loss": 0.0001, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.177140662997568, | |
| "grad_norm": 0.00759666513908814, | |
| "learning_rate": 1.5289298515104968e-05, | |
| "loss": 0.0001, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.202739024702419, | |
| "grad_norm": 0.006094073180808279, | |
| "learning_rate": 1.518689196108551e-05, | |
| "loss": 0.0001, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.22833738640727, | |
| "grad_norm": 0.009992171934592443, | |
| "learning_rate": 1.5084485407066054e-05, | |
| "loss": 0.0001, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.253935748112121, | |
| "grad_norm": 0.004425939861516632, | |
| "learning_rate": 1.4982078853046595e-05, | |
| "loss": 0.0001, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.2795341098169717, | |
| "grad_norm": 0.00618221779159838, | |
| "learning_rate": 1.487967229902714e-05, | |
| "loss": 0.0001, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.3051324715218227, | |
| "grad_norm": 0.0037721408282199286, | |
| "learning_rate": 1.477726574500768e-05, | |
| "loss": 0.0001, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.3307308332266734, | |
| "grad_norm": 0.006148728469232912, | |
| "learning_rate": 1.4674859190988225e-05, | |
| "loss": 0.0001, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.3563291949315244, | |
| "grad_norm": 0.00661518038661282, | |
| "learning_rate": 1.4572452636968768e-05, | |
| "loss": 0.0001, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.3819275566363753, | |
| "grad_norm": 0.0036588312853667437, | |
| "learning_rate": 1.447004608294931e-05, | |
| "loss": 0.0001, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.407525918341226, | |
| "grad_norm": 0.005484459005497015, | |
| "learning_rate": 1.4367639528929854e-05, | |
| "loss": 0.0001, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.433124280046077, | |
| "grad_norm": 0.010402616539983395, | |
| "learning_rate": 1.4265232974910395e-05, | |
| "loss": 0.0001, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.4587226417509278, | |
| "grad_norm": 0.007335666061283071, | |
| "learning_rate": 1.4162826420890938e-05, | |
| "loss": 0.0001, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.4843210034557788, | |
| "grad_norm": 0.006550005824502188, | |
| "learning_rate": 1.4060419866871483e-05, | |
| "loss": 0.0001, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.5099193651606297, | |
| "grad_norm": 0.0027811575400146435, | |
| "learning_rate": 1.3958013312852024e-05, | |
| "loss": 0.0001, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.5355177268654807, | |
| "grad_norm": 0.006308963330505965, | |
| "learning_rate": 1.3855606758832567e-05, | |
| "loss": 0.0001, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.5611160885703315, | |
| "grad_norm": 0.006401332782035864, | |
| "learning_rate": 1.3753200204813108e-05, | |
| "loss": 0.0001, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.5867144502751824, | |
| "grad_norm": 0.004718742517696451, | |
| "learning_rate": 1.3650793650793652e-05, | |
| "loss": 0.0001, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.6123128119800332, | |
| "grad_norm": 0.003877950477268835, | |
| "learning_rate": 1.3548387096774194e-05, | |
| "loss": 0.0001, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.6379111736848841, | |
| "grad_norm": 0.0063083392896106745, | |
| "learning_rate": 1.3445980542754738e-05, | |
| "loss": 0.0001, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.6635095353897351, | |
| "grad_norm": 0.006412039922690925, | |
| "learning_rate": 1.3343573988735281e-05, | |
| "loss": 0.0001, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.689107897094586, | |
| "grad_norm": 0.0029627793040849877, | |
| "learning_rate": 1.3241167434715822e-05, | |
| "loss": 0.0001, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.7147062587994368, | |
| "grad_norm": 0.002164481803452725, | |
| "learning_rate": 1.3138760880696367e-05, | |
| "loss": 0.0001, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.7403046205042876, | |
| "grad_norm": 0.004111311446657877, | |
| "learning_rate": 1.3036354326676908e-05, | |
| "loss": 0.0001, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.7659029822091385, | |
| "grad_norm": 0.0024071410600000186, | |
| "learning_rate": 1.2933947772657451e-05, | |
| "loss": 0.0001, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.7915013439139895, | |
| "grad_norm": 0.00428027777175206, | |
| "learning_rate": 1.2831541218637992e-05, | |
| "loss": 0.0001, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.8170997056188405, | |
| "grad_norm": 0.0035937450146907115, | |
| "learning_rate": 1.2729134664618537e-05, | |
| "loss": 0.0001, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.8426980673236912, | |
| "grad_norm": 0.007360372295628917, | |
| "learning_rate": 1.262672811059908e-05, | |
| "loss": 0.0001, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.8682964290285422, | |
| "grad_norm": 0.004225210869508024, | |
| "learning_rate": 1.2524321556579622e-05, | |
| "loss": 0.0001, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.893894790733393, | |
| "grad_norm": 0.00344941681643163, | |
| "learning_rate": 1.2421915002560165e-05, | |
| "loss": 0.0001, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.919493152438244, | |
| "grad_norm": 0.0036839082828609084, | |
| "learning_rate": 1.2319508448540707e-05, | |
| "loss": 0.0001, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.945091514143095, | |
| "grad_norm": 0.009934710271474315, | |
| "learning_rate": 1.2217101894521251e-05, | |
| "loss": 0.0001, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.9706898758479459, | |
| "grad_norm": 0.0024663729558732648, | |
| "learning_rate": 1.2114695340501794e-05, | |
| "loss": 0.0001, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.9962882375527966, | |
| "grad_norm": 0.003817898440024657, | |
| "learning_rate": 1.2012288786482335e-05, | |
| "loss": 0.0001, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.021502623832075, | |
| "grad_norm": 0.0031639489328900696, | |
| "learning_rate": 1.190988223246288e-05, | |
| "loss": 0.0001, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.0471009855369258, | |
| "grad_norm": 0.002020596329737904, | |
| "learning_rate": 1.1807475678443421e-05, | |
| "loss": 0.0001, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.0726993472417767, | |
| "grad_norm": 0.0041297671592259375, | |
| "learning_rate": 1.1705069124423964e-05, | |
| "loss": 0.0001, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.0982977089466273, | |
| "grad_norm": 0.0030187753698489852, | |
| "learning_rate": 1.1602662570404507e-05, | |
| "loss": 0.0001, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.1238960706514782, | |
| "grad_norm": 0.006719688660763743, | |
| "learning_rate": 1.150025601638505e-05, | |
| "loss": 0.0001, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.149494432356329, | |
| "grad_norm": 0.007455082822481147, | |
| "learning_rate": 1.1397849462365593e-05, | |
| "loss": 0.0001, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.17509279406118, | |
| "grad_norm": 0.0020929393058777236, | |
| "learning_rate": 1.1295442908346135e-05, | |
| "loss": 0.0001, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.200691155766031, | |
| "grad_norm": 0.004647943941373522, | |
| "learning_rate": 1.1193036354326678e-05, | |
| "loss": 0.0001, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.2262895174708817, | |
| "grad_norm": 0.002919096778092067, | |
| "learning_rate": 1.109062980030722e-05, | |
| "loss": 0.0001, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.2518878791757326, | |
| "grad_norm": 0.0022980302252630044, | |
| "learning_rate": 1.0988223246287764e-05, | |
| "loss": 0.0001, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.2774862408805836, | |
| "grad_norm": 0.0007777129612344223, | |
| "learning_rate": 1.0885816692268305e-05, | |
| "loss": 0.0001, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.3030846025854346, | |
| "grad_norm": 0.001856334209823885, | |
| "learning_rate": 1.0783410138248848e-05, | |
| "loss": 0.0001, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.3286829642902855, | |
| "grad_norm": 0.009562277401713636, | |
| "learning_rate": 1.0681003584229393e-05, | |
| "loss": 0.0115, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.354281325995136, | |
| "grad_norm": 0.008162550932912013, | |
| "learning_rate": 1.0578597030209934e-05, | |
| "loss": 0.0001, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.379879687699987, | |
| "grad_norm": 0.006086517667163692, | |
| "learning_rate": 1.0476190476190477e-05, | |
| "loss": 0.0001, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.405478049404838, | |
| "grad_norm": 0.0027507057924501116, | |
| "learning_rate": 1.037378392217102e-05, | |
| "loss": 0.0001, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.431076411109689, | |
| "grad_norm": 0.005652923712553444, | |
| "learning_rate": 1.0271377368151563e-05, | |
| "loss": 0.0001, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.45667477281454, | |
| "grad_norm": 0.0022548738506609723, | |
| "learning_rate": 1.0168970814132104e-05, | |
| "loss": 0.0001, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.482273134519391, | |
| "grad_norm": 0.0034968078517645545, | |
| "learning_rate": 1.0066564260112648e-05, | |
| "loss": 0.0001, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.507871496224242, | |
| "grad_norm": 0.0029725026316189704, | |
| "learning_rate": 9.96415770609319e-06, | |
| "loss": 0.0001, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.5334698579290924, | |
| "grad_norm": 0.002417471051371214, | |
| "learning_rate": 9.861751152073733e-06, | |
| "loss": 0.0, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.5590682196339434, | |
| "grad_norm": 0.0037793013717612994, | |
| "learning_rate": 9.759344598054277e-06, | |
| "loss": 0.0, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.5846665813387943, | |
| "grad_norm": 0.0020282875993942345, | |
| "learning_rate": 9.65693804403482e-06, | |
| "loss": 0.0, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.6102649430436453, | |
| "grad_norm": 0.006653751475623009, | |
| "learning_rate": 9.554531490015361e-06, | |
| "loss": 0.0, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.6358633047484963, | |
| "grad_norm": 0.0006177303859045243, | |
| "learning_rate": 9.452124935995904e-06, | |
| "loss": 0.0, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.661461666453347, | |
| "grad_norm": 0.001574016672401847, | |
| "learning_rate": 9.349718381976447e-06, | |
| "loss": 0.0, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.6870600281581978, | |
| "grad_norm": 0.002722823477162341, | |
| "learning_rate": 9.24731182795699e-06, | |
| "loss": 0.0, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.7126583898630487, | |
| "grad_norm": 0.0031352467257030996, | |
| "learning_rate": 9.144905273937533e-06, | |
| "loss": 0.0, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.7382567515678997, | |
| "grad_norm": 0.002855241030593728, | |
| "learning_rate": 9.042498719918076e-06, | |
| "loss": 0.0, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.7638551132727507, | |
| "grad_norm": 0.001616961495391244, | |
| "learning_rate": 8.940092165898619e-06, | |
| "loss": 0.0, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.789453474977601, | |
| "grad_norm": 0.005071888691549564, | |
| "learning_rate": 8.837685611879161e-06, | |
| "loss": 0.0001, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.815051836682452, | |
| "grad_norm": 0.0029692529220848914, | |
| "learning_rate": 8.735279057859704e-06, | |
| "loss": 0.0, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.840650198387303, | |
| "grad_norm": 0.0004172545224757254, | |
| "learning_rate": 8.632872503840246e-06, | |
| "loss": 0.0, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.866248560092154, | |
| "grad_norm": 0.008642812223624062, | |
| "learning_rate": 8.530465949820788e-06, | |
| "loss": 0.0, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.891846921797005, | |
| "grad_norm": 0.0027205512248527812, | |
| "learning_rate": 8.428059395801333e-06, | |
| "loss": 0.0001, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.9174452835018556, | |
| "grad_norm": 0.0019007511180201269, | |
| "learning_rate": 8.325652841781874e-06, | |
| "loss": 0.0, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.943043645206707, | |
| "grad_norm": 0.0011932680335579203, | |
| "learning_rate": 8.223246287762417e-06, | |
| "loss": 0.0, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.9686420069115576, | |
| "grad_norm": 0.0017683528985724605, | |
| "learning_rate": 8.12083973374296e-06, | |
| "loss": 0.0, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.9942403686164085, | |
| "grad_norm": 0.0010172759283751088, | |
| "learning_rate": 8.018433179723503e-06, | |
| "loss": 0.0, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 3.0194547548956865, | |
| "grad_norm": 0.0035687260604544088, | |
| "learning_rate": 7.916026625704046e-06, | |
| "loss": 0.0, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 3.0450531166005375, | |
| "grad_norm": 0.0017674951945728509, | |
| "learning_rate": 7.813620071684589e-06, | |
| "loss": 0.0, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 3.0706514783053884, | |
| "grad_norm": 0.001417796923981359, | |
| "learning_rate": 7.711213517665132e-06, | |
| "loss": 0.0, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.0962498400102394, | |
| "grad_norm": 0.001392466393900218, | |
| "learning_rate": 7.6088069636456744e-06, | |
| "loss": 0.0, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 3.1218482017150904, | |
| "grad_norm": 0.0010317131953453697, | |
| "learning_rate": 7.5064004096262165e-06, | |
| "loss": 0.0, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 3.1474465634199413, | |
| "grad_norm": 0.002671567960001357, | |
| "learning_rate": 7.403993855606759e-06, | |
| "loss": 0.0, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 3.173044925124792, | |
| "grad_norm": 0.0010993308683270022, | |
| "learning_rate": 7.301587301587301e-06, | |
| "loss": 0.0, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 3.198643286829643, | |
| "grad_norm": 0.0011792521113742672, | |
| "learning_rate": 7.199180747567845e-06, | |
| "loss": 0.0, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 3.224241648534494, | |
| "grad_norm": 0.002377211855286283, | |
| "learning_rate": 7.096774193548388e-06, | |
| "loss": 0.0, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 3.2498400102393448, | |
| "grad_norm": 0.0013709631857059255, | |
| "learning_rate": 6.994367639528931e-06, | |
| "loss": 0.0, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 3.2754383719441957, | |
| "grad_norm": 0.0010686686441566805, | |
| "learning_rate": 6.891961085509473e-06, | |
| "loss": 0.0, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 3.3010367336490463, | |
| "grad_norm": 0.0013656971047177361, | |
| "learning_rate": 6.789554531490016e-06, | |
| "loss": 0.0, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 3.3266350953538972, | |
| "grad_norm": 0.0030691234388346175, | |
| "learning_rate": 6.687147977470559e-06, | |
| "loss": 0.0, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.352233457058748, | |
| "grad_norm": 0.0016958004183478155, | |
| "learning_rate": 6.584741423451101e-06, | |
| "loss": 0.0, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 3.377831818763599, | |
| "grad_norm": 0.002679660529985062, | |
| "learning_rate": 6.4823348694316445e-06, | |
| "loss": 0.0, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 3.40343018046845, | |
| "grad_norm": 0.0009123671464204819, | |
| "learning_rate": 6.379928315412187e-06, | |
| "loss": 0.0, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 3.4290285421733007, | |
| "grad_norm": 0.0011492363622442438, | |
| "learning_rate": 6.2775217613927295e-06, | |
| "loss": 0.0, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 3.4546269038781516, | |
| "grad_norm": 0.0008232117328172145, | |
| "learning_rate": 6.175115207373272e-06, | |
| "loss": 0.0, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 3.4802252655830026, | |
| "grad_norm": 0.0022449544565699437, | |
| "learning_rate": 6.072708653353815e-06, | |
| "loss": 0.0, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 3.5058236272878536, | |
| "grad_norm": 0.0015276485422571994, | |
| "learning_rate": 5.970302099334357e-06, | |
| "loss": 0.0, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 3.5314219889927045, | |
| "grad_norm": 0.0011827584209431721, | |
| "learning_rate": 5.867895545314901e-06, | |
| "loss": 0.0, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 3.5570203506975555, | |
| "grad_norm": 0.003917245208198937, | |
| "learning_rate": 5.765488991295444e-06, | |
| "loss": 0.0, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 3.5826187124024065, | |
| "grad_norm": 0.0016006551963278512, | |
| "learning_rate": 5.663082437275986e-06, | |
| "loss": 0.0, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.608217074107257, | |
| "grad_norm": 0.0006324168438582504, | |
| "learning_rate": 5.560675883256529e-06, | |
| "loss": 0.0, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 3.633815435812108, | |
| "grad_norm": 0.0019190937453439484, | |
| "learning_rate": 5.458269329237072e-06, | |
| "loss": 0.0, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 3.659413797516959, | |
| "grad_norm": 0.0014235404292782222, | |
| "learning_rate": 5.355862775217614e-06, | |
| "loss": 0.0, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 3.68501215922181, | |
| "grad_norm": 0.002036273934913596, | |
| "learning_rate": 5.253456221198157e-06, | |
| "loss": 0.0, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 3.710610520926661, | |
| "grad_norm": 0.001317406088761277, | |
| "learning_rate": 5.1510496671787e-06, | |
| "loss": 0.0, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 3.7362088826315114, | |
| "grad_norm": 0.0036103172590374257, | |
| "learning_rate": 5.0486431131592425e-06, | |
| "loss": 0.0, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 3.7618072443363624, | |
| "grad_norm": 0.004126819254015297, | |
| "learning_rate": 4.946236559139785e-06, | |
| "loss": 0.0, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 3.7874056060412133, | |
| "grad_norm": 0.0016332834938483983, | |
| "learning_rate": 4.843830005120328e-06, | |
| "loss": 0.0, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 3.8130039677460643, | |
| "grad_norm": 0.0004651327688046216, | |
| "learning_rate": 4.741423451100871e-06, | |
| "loss": 0.0, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 3.8386023294509153, | |
| "grad_norm": 0.0014442256648981874, | |
| "learning_rate": 4.639016897081414e-06, | |
| "loss": 0.0, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.864200691155766, | |
| "grad_norm": 0.0044845026985442255, | |
| "learning_rate": 4.536610343061956e-06, | |
| "loss": 0.0, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 3.889799052860617, | |
| "grad_norm": 0.0018809502401269417, | |
| "learning_rate": 4.434203789042499e-06, | |
| "loss": 0.0, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 3.9153974145654677, | |
| "grad_norm": 0.0038926669396500383, | |
| "learning_rate": 4.331797235023042e-06, | |
| "loss": 0.0, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 3.9409957762703187, | |
| "grad_norm": 0.004267638189231195, | |
| "learning_rate": 4.229390681003585e-06, | |
| "loss": 0.0, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 3.9665941379751697, | |
| "grad_norm": 0.00238145784225052, | |
| "learning_rate": 4.126984126984127e-06, | |
| "loss": 0.0, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 3.99219249968002, | |
| "grad_norm": 0.0014756463100693462, | |
| "learning_rate": 4.0245775729646705e-06, | |
| "loss": 0.0, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 4.017406885959298, | |
| "grad_norm": 0.0014856407081637794, | |
| "learning_rate": 3.9221710189452126e-06, | |
| "loss": 0.0, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 4.04300524766415, | |
| "grad_norm": 0.0023551139030403893, | |
| "learning_rate": 3.8197644649257554e-06, | |
| "loss": 0.0, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 4.068603609369, | |
| "grad_norm": 0.0011981597110419454, | |
| "learning_rate": 3.7173579109062983e-06, | |
| "loss": 0.0, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 4.0942019710738515, | |
| "grad_norm": 0.0028843508232214783, | |
| "learning_rate": 3.6149513568868412e-06, | |
| "loss": 0.0, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 4.119800332778702, | |
| "grad_norm": 0.0018843735220208588, | |
| "learning_rate": 3.5125448028673837e-06, | |
| "loss": 0.0, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 4.1453986944835535, | |
| "grad_norm": 0.0009664312361068302, | |
| "learning_rate": 3.4101382488479266e-06, | |
| "loss": 0.0, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 4.170997056188404, | |
| "grad_norm": 0.0007573250297130035, | |
| "learning_rate": 3.3077316948284695e-06, | |
| "loss": 0.0, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 4.1965954178932545, | |
| "grad_norm": 0.0011501400163836143, | |
| "learning_rate": 3.205325140809012e-06, | |
| "loss": 0.0, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 4.222193779598106, | |
| "grad_norm": 0.001612883833598221, | |
| "learning_rate": 3.1029185867895553e-06, | |
| "loss": 0.0, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 4.2477921413029565, | |
| "grad_norm": 0.0018641211713703483, | |
| "learning_rate": 3.0005120327700977e-06, | |
| "loss": 0.0, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 4.273390503007808, | |
| "grad_norm": 0.0009646184071954378, | |
| "learning_rate": 2.89810547875064e-06, | |
| "loss": 0.0, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 4.298988864712658, | |
| "grad_norm": 0.0009154804456547847, | |
| "learning_rate": 2.7956989247311827e-06, | |
| "loss": 0.0, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 4.324587226417509, | |
| "grad_norm": 0.0020826965736280532, | |
| "learning_rate": 2.693292370711726e-06, | |
| "loss": 0.0, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 4.35018558812236, | |
| "grad_norm": 0.0011199777755488004, | |
| "learning_rate": 2.5908858166922684e-06, | |
| "loss": 0.0, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 4.375783949827211, | |
| "grad_norm": 0.0008422274985046506, | |
| "learning_rate": 2.4884792626728113e-06, | |
| "loss": 0.0, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 4.401382311532062, | |
| "grad_norm": 0.0006334420803463363, | |
| "learning_rate": 2.386072708653354e-06, | |
| "loss": 0.0, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 4.426980673236913, | |
| "grad_norm": 0.001586741258322779, | |
| "learning_rate": 2.2836661546338967e-06, | |
| "loss": 0.0, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 4.452579034941763, | |
| "grad_norm": 0.0009727630299961603, | |
| "learning_rate": 2.1812596006144396e-06, | |
| "loss": 0.0, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 4.478177396646615, | |
| "grad_norm": 0.0018829318719699433, | |
| "learning_rate": 2.078853046594982e-06, | |
| "loss": 0.0, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 4.503775758351465, | |
| "grad_norm": 0.0010542211381998285, | |
| "learning_rate": 1.976446492575525e-06, | |
| "loss": 0.0, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 4.529374120056317, | |
| "grad_norm": 0.0008019247102530823, | |
| "learning_rate": 1.8740399385560678e-06, | |
| "loss": 0.0, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 4.554972481761167, | |
| "grad_norm": 0.0025552327229986455, | |
| "learning_rate": 1.7716333845366105e-06, | |
| "loss": 0.0, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 4.580570843466019, | |
| "grad_norm": 0.003395542465106546, | |
| "learning_rate": 1.6692268305171534e-06, | |
| "loss": 0.0, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 4.606169205170869, | |
| "grad_norm": 0.0013482230913774654, | |
| "learning_rate": 1.5668202764976959e-06, | |
| "loss": 0.0, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 4.63176756687572, | |
| "grad_norm": 0.0015382731152315148, | |
| "learning_rate": 1.4644137224782387e-06, | |
| "loss": 0.0, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 4.657365928580571, | |
| "grad_norm": 0.0019761534195212524, | |
| "learning_rate": 1.3620071684587816e-06, | |
| "loss": 0.0, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 4.682964290285422, | |
| "grad_norm": 0.00038991149426242233, | |
| "learning_rate": 1.259600614439324e-06, | |
| "loss": 0.0, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 4.708562651990272, | |
| "grad_norm": 0.002601115466918085, | |
| "learning_rate": 1.157194060419867e-06, | |
| "loss": 0.0, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 4.7341610136951235, | |
| "grad_norm": 0.002305779477709061, | |
| "learning_rate": 1.0547875064004097e-06, | |
| "loss": 0.0, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 4.759759375399974, | |
| "grad_norm": 0.0014673554584682805, | |
| "learning_rate": 9.523809523809525e-07, | |
| "loss": 0.0, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 4.7853577371048255, | |
| "grad_norm": 0.0032349538713087054, | |
| "learning_rate": 8.499743983614952e-07, | |
| "loss": 0.0, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 4.810956098809676, | |
| "grad_norm": 0.000820434912366508, | |
| "learning_rate": 7.475678443420379e-07, | |
| "loss": 0.0, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 4.836554460514527, | |
| "grad_norm": 0.0018281915833400296, | |
| "learning_rate": 6.451612903225807e-07, | |
| "loss": 0.0, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 4.862152822219378, | |
| "grad_norm": 0.001079014832848559, | |
| "learning_rate": 5.427547363031235e-07, | |
| "loss": 0.0, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 4.8877511839242285, | |
| "grad_norm": 0.0009643703560768136, | |
| "learning_rate": 4.4034818228366616e-07, | |
| "loss": 0.0, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 4.91334954562908, | |
| "grad_norm": 0.0012576968463295232, | |
| "learning_rate": 3.3794162826420895e-07, | |
| "loss": 0.0, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 4.93894790733393, | |
| "grad_norm": 0.0017081309137603447, | |
| "learning_rate": 2.355350742447517e-07, | |
| "loss": 0.0, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 4.964546269038782, | |
| "grad_norm": 0.0014382920472094229, | |
| "learning_rate": 1.3312852022529444e-07, | |
| "loss": 0.0, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 4.990144630743632, | |
| "grad_norm": 0.0014054516528993024, | |
| "learning_rate": 3.0721966205837177e-08, | |
| "loss": 0.0, | |
| "step": 9750 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 9765, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.471170805648589e+16, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |