| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.916590284142988, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0018331805682859762, | |
| "grad_norm": 0.07204689830541611, | |
| "learning_rate": 4e-05, | |
| "loss": 0.02, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0036663611365719525, | |
| "grad_norm": 0.0971730649471283, | |
| "learning_rate": 8e-05, | |
| "loss": 0.037, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.005499541704857928, | |
| "grad_norm": 0.0824146643280983, | |
| "learning_rate": 0.00012, | |
| "loss": 0.0258, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.007332722273143905, | |
| "grad_norm": 0.08816614001989365, | |
| "learning_rate": 0.00016, | |
| "loss": 0.0266, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.00916590284142988, | |
| "grad_norm": 0.09286794066429138, | |
| "learning_rate": 0.0002, | |
| "loss": 0.0329, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.010999083409715857, | |
| "grad_norm": 0.10284189879894257, | |
| "learning_rate": 0.00019981566820276498, | |
| "loss": 0.0338, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.012832263978001834, | |
| "grad_norm": 0.07772455364465714, | |
| "learning_rate": 0.00019963133640552995, | |
| "loss": 0.0238, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.01466544454628781, | |
| "grad_norm": 0.10870955139398575, | |
| "learning_rate": 0.00019944700460829492, | |
| "loss": 0.0302, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.016498625114573784, | |
| "grad_norm": 0.10848188400268555, | |
| "learning_rate": 0.00019926267281105992, | |
| "loss": 0.0314, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.01833180568285976, | |
| "grad_norm": 0.10071466863155365, | |
| "learning_rate": 0.0001990783410138249, | |
| "loss": 0.0281, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02016498625114574, | |
| "grad_norm": 0.08322206884622574, | |
| "learning_rate": 0.00019889400921658986, | |
| "loss": 0.0236, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.021998166819431713, | |
| "grad_norm": 0.12429114431142807, | |
| "learning_rate": 0.00019870967741935483, | |
| "loss": 0.0488, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.02383134738771769, | |
| "grad_norm": 0.08638562262058258, | |
| "learning_rate": 0.0001985253456221198, | |
| "loss": 0.0217, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.025664527956003668, | |
| "grad_norm": 0.07736796885728836, | |
| "learning_rate": 0.0001983410138248848, | |
| "loss": 0.0223, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.027497708524289642, | |
| "grad_norm": 0.08814552426338196, | |
| "learning_rate": 0.00019815668202764977, | |
| "loss": 0.0314, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.02933088909257562, | |
| "grad_norm": 0.09797844290733337, | |
| "learning_rate": 0.00019797235023041477, | |
| "loss": 0.0341, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.031164069660861594, | |
| "grad_norm": 0.11390747874975204, | |
| "learning_rate": 0.00019778801843317974, | |
| "loss": 0.0329, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.03299725022914757, | |
| "grad_norm": 0.12596647441387177, | |
| "learning_rate": 0.00019760368663594472, | |
| "loss": 0.0479, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.034830430797433545, | |
| "grad_norm": 0.08672292530536652, | |
| "learning_rate": 0.00019741935483870969, | |
| "loss": 0.0242, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.03666361136571952, | |
| "grad_norm": 0.07240811735391617, | |
| "learning_rate": 0.00019723502304147466, | |
| "loss": 0.023, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0384967919340055, | |
| "grad_norm": 0.09740838408470154, | |
| "learning_rate": 0.00019705069124423966, | |
| "loss": 0.0325, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.04032997250229148, | |
| "grad_norm": 0.10614212602376938, | |
| "learning_rate": 0.00019686635944700463, | |
| "loss": 0.0415, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.04216315307057745, | |
| "grad_norm": 0.06703123450279236, | |
| "learning_rate": 0.0001966820276497696, | |
| "loss": 0.0201, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.043996333638863426, | |
| "grad_norm": 0.11590456962585449, | |
| "learning_rate": 0.00019649769585253457, | |
| "loss": 0.0444, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.045829514207149404, | |
| "grad_norm": 0.08663053065538406, | |
| "learning_rate": 0.00019631336405529954, | |
| "loss": 0.0331, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.04766269477543538, | |
| "grad_norm": 0.0977555587887764, | |
| "learning_rate": 0.0001961290322580645, | |
| "loss": 0.0262, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.04949587534372136, | |
| "grad_norm": 0.07877817749977112, | |
| "learning_rate": 0.0001959447004608295, | |
| "loss": 0.0309, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.051329055912007336, | |
| "grad_norm": 0.09205110371112823, | |
| "learning_rate": 0.00019576036866359448, | |
| "loss": 0.0363, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.05316223648029331, | |
| "grad_norm": 0.07161393016576767, | |
| "learning_rate": 0.00019557603686635945, | |
| "loss": 0.0231, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.054995417048579284, | |
| "grad_norm": 0.1110185980796814, | |
| "learning_rate": 0.00019539170506912442, | |
| "loss": 0.0414, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.05682859761686526, | |
| "grad_norm": 0.08546338975429535, | |
| "learning_rate": 0.0001952073732718894, | |
| "loss": 0.0297, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.05866177818515124, | |
| "grad_norm": 0.08454198390245438, | |
| "learning_rate": 0.00019502304147465436, | |
| "loss": 0.0244, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.06049495875343722, | |
| "grad_norm": 0.09889410436153412, | |
| "learning_rate": 0.00019483870967741936, | |
| "loss": 0.0334, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.06232813932172319, | |
| "grad_norm": 0.10116388648748398, | |
| "learning_rate": 0.00019465437788018433, | |
| "loss": 0.0309, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.06416131989000917, | |
| "grad_norm": 0.12611277401447296, | |
| "learning_rate": 0.00019447004608294933, | |
| "loss": 0.0511, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.06599450045829514, | |
| "grad_norm": 0.11593794077634811, | |
| "learning_rate": 0.0001942857142857143, | |
| "loss": 0.046, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.06782768102658111, | |
| "grad_norm": 0.09269952028989792, | |
| "learning_rate": 0.00019410138248847927, | |
| "loss": 0.0269, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.06966086159486709, | |
| "grad_norm": 0.08128319680690765, | |
| "learning_rate": 0.00019391705069124425, | |
| "loss": 0.0267, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.07149404216315307, | |
| "grad_norm": 0.16776973009109497, | |
| "learning_rate": 0.00019373271889400924, | |
| "loss": 0.0333, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.07332722273143905, | |
| "grad_norm": 0.09216058254241943, | |
| "learning_rate": 0.00019354838709677422, | |
| "loss": 0.03, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07516040329972502, | |
| "grad_norm": 0.0869499146938324, | |
| "learning_rate": 0.00019336405529953919, | |
| "loss": 0.0227, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.076993583868011, | |
| "grad_norm": 0.08851771056652069, | |
| "learning_rate": 0.00019317972350230416, | |
| "loss": 0.0269, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.07882676443629698, | |
| "grad_norm": 0.07213577628135681, | |
| "learning_rate": 0.00019299539170506913, | |
| "loss": 0.0271, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.08065994500458296, | |
| "grad_norm": 0.08532075583934784, | |
| "learning_rate": 0.0001928110599078341, | |
| "loss": 0.0343, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.08249312557286893, | |
| "grad_norm": 0.1002303957939148, | |
| "learning_rate": 0.0001926267281105991, | |
| "loss": 0.0421, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.0843263061411549, | |
| "grad_norm": 0.10179378092288971, | |
| "learning_rate": 0.00019244239631336407, | |
| "loss": 0.0472, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.08615948670944087, | |
| "grad_norm": 0.08771562576293945, | |
| "learning_rate": 0.00019225806451612904, | |
| "loss": 0.0329, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.08799266727772685, | |
| "grad_norm": 0.0852821096777916, | |
| "learning_rate": 0.000192073732718894, | |
| "loss": 0.0271, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.08982584784601283, | |
| "grad_norm": 0.0855298638343811, | |
| "learning_rate": 0.00019188940092165898, | |
| "loss": 0.0366, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.09165902841429881, | |
| "grad_norm": 0.10242363810539246, | |
| "learning_rate": 0.00019170506912442395, | |
| "loss": 0.0409, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09349220898258478, | |
| "grad_norm": 0.07894504070281982, | |
| "learning_rate": 0.00019152073732718895, | |
| "loss": 0.0261, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.09532538955087076, | |
| "grad_norm": 0.0891820639371872, | |
| "learning_rate": 0.00019133640552995392, | |
| "loss": 0.0371, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.09715857011915674, | |
| "grad_norm": 0.09067510813474655, | |
| "learning_rate": 0.0001911520737327189, | |
| "loss": 0.0369, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.09899175068744272, | |
| "grad_norm": 0.07762423902750015, | |
| "learning_rate": 0.0001909677419354839, | |
| "loss": 0.0267, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.1008249312557287, | |
| "grad_norm": 0.09637603908777237, | |
| "learning_rate": 0.00019078341013824886, | |
| "loss": 0.0384, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.10265811182401467, | |
| "grad_norm": 0.08040502667427063, | |
| "learning_rate": 0.00019059907834101383, | |
| "loss": 0.0291, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.10449129239230064, | |
| "grad_norm": 0.07859490811824799, | |
| "learning_rate": 0.00019041474654377883, | |
| "loss": 0.0307, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.10632447296058661, | |
| "grad_norm": 0.08587613701820374, | |
| "learning_rate": 0.0001902304147465438, | |
| "loss": 0.0327, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.10815765352887259, | |
| "grad_norm": 0.09821908921003342, | |
| "learning_rate": 0.00019004608294930877, | |
| "loss": 0.0302, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.10999083409715857, | |
| "grad_norm": 0.09804884344339371, | |
| "learning_rate": 0.00018986175115207375, | |
| "loss": 0.0397, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11182401466544455, | |
| "grad_norm": 0.1171504408121109, | |
| "learning_rate": 0.00018967741935483872, | |
| "loss": 0.0406, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.11365719523373052, | |
| "grad_norm": 0.09860570728778839, | |
| "learning_rate": 0.0001894930875576037, | |
| "loss": 0.0381, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1154903758020165, | |
| "grad_norm": 0.10370708256959915, | |
| "learning_rate": 0.00018930875576036869, | |
| "loss": 0.0419, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.11732355637030248, | |
| "grad_norm": 0.06204281374812126, | |
| "learning_rate": 0.00018912442396313366, | |
| "loss": 0.0219, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.11915673693858846, | |
| "grad_norm": 0.09292633086442947, | |
| "learning_rate": 0.00018894009216589863, | |
| "loss": 0.0293, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.12098991750687443, | |
| "grad_norm": 0.10509534925222397, | |
| "learning_rate": 0.0001887557603686636, | |
| "loss": 0.04, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.1228230980751604, | |
| "grad_norm": 0.08757118135690689, | |
| "learning_rate": 0.00018857142857142857, | |
| "loss": 0.0295, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.12465627864344637, | |
| "grad_norm": 0.08099905401468277, | |
| "learning_rate": 0.00018838709677419354, | |
| "loss": 0.027, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.12648945921173235, | |
| "grad_norm": 0.0924796611070633, | |
| "learning_rate": 0.0001882027649769585, | |
| "loss": 0.0289, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.12832263978001834, | |
| "grad_norm": 0.09552083164453506, | |
| "learning_rate": 0.0001880184331797235, | |
| "loss": 0.0322, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1301558203483043, | |
| "grad_norm": 0.09000714868307114, | |
| "learning_rate": 0.00018783410138248848, | |
| "loss": 0.0304, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.13198900091659027, | |
| "grad_norm": 0.08881785720586777, | |
| "learning_rate": 0.00018764976958525345, | |
| "loss": 0.0305, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.13382218148487626, | |
| "grad_norm": 0.10535095632076263, | |
| "learning_rate": 0.00018746543778801845, | |
| "loss": 0.0435, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.13565536205316223, | |
| "grad_norm": 0.097115658223629, | |
| "learning_rate": 0.00018728110599078342, | |
| "loss": 0.0501, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.13748854262144822, | |
| "grad_norm": 0.10467157512903214, | |
| "learning_rate": 0.0001870967741935484, | |
| "loss": 0.0523, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.13932172318973418, | |
| "grad_norm": 0.08117009699344635, | |
| "learning_rate": 0.0001869124423963134, | |
| "loss": 0.0272, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.14115490375802017, | |
| "grad_norm": 0.09257414937019348, | |
| "learning_rate": 0.00018672811059907836, | |
| "loss": 0.0375, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.14298808432630614, | |
| "grad_norm": 0.07603290677070618, | |
| "learning_rate": 0.00018654377880184333, | |
| "loss": 0.0245, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.14482126489459213, | |
| "grad_norm": 0.08395121246576309, | |
| "learning_rate": 0.0001863594470046083, | |
| "loss": 0.0379, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.1466544454628781, | |
| "grad_norm": 0.09991391003131866, | |
| "learning_rate": 0.00018617511520737328, | |
| "loss": 0.0326, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.14848762603116408, | |
| "grad_norm": 0.0780223086476326, | |
| "learning_rate": 0.00018599078341013825, | |
| "loss": 0.0332, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.15032080659945005, | |
| "grad_norm": 0.07637890428304672, | |
| "learning_rate": 0.00018580645161290325, | |
| "loss": 0.0271, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.152153987167736, | |
| "grad_norm": 0.0815526694059372, | |
| "learning_rate": 0.00018562211981566822, | |
| "loss": 0.0289, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.153987167736022, | |
| "grad_norm": 0.07798799872398376, | |
| "learning_rate": 0.0001854377880184332, | |
| "loss": 0.032, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.15582034830430797, | |
| "grad_norm": 0.06180557608604431, | |
| "learning_rate": 0.00018525345622119816, | |
| "loss": 0.0207, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.15765352887259396, | |
| "grad_norm": 0.07907485961914062, | |
| "learning_rate": 0.00018506912442396313, | |
| "loss": 0.0281, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.15948670944087992, | |
| "grad_norm": 0.0909823551774025, | |
| "learning_rate": 0.0001848847926267281, | |
| "loss": 0.032, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.1613198900091659, | |
| "grad_norm": 0.09836460649967194, | |
| "learning_rate": 0.0001847004608294931, | |
| "loss": 0.0409, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.16315307057745188, | |
| "grad_norm": 0.05404837429523468, | |
| "learning_rate": 0.00018451612903225807, | |
| "loss": 0.02, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.16498625114573787, | |
| "grad_norm": 0.09892542660236359, | |
| "learning_rate": 0.00018433179723502304, | |
| "loss": 0.0321, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.16681943171402383, | |
| "grad_norm": 0.08493707329034805, | |
| "learning_rate": 0.000184147465437788, | |
| "loss": 0.0343, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.1686526122823098, | |
| "grad_norm": 0.10508857667446136, | |
| "learning_rate": 0.000183963133640553, | |
| "loss": 0.0397, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.17048579285059579, | |
| "grad_norm": 0.08235018700361252, | |
| "learning_rate": 0.00018377880184331798, | |
| "loss": 0.0262, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.17231897341888175, | |
| "grad_norm": 0.06784114986658096, | |
| "learning_rate": 0.00018359447004608298, | |
| "loss": 0.0247, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.17415215398716774, | |
| "grad_norm": 0.10621548444032669, | |
| "learning_rate": 0.00018341013824884795, | |
| "loss": 0.035, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.1759853345554537, | |
| "grad_norm": 0.10951874405145645, | |
| "learning_rate": 0.00018322580645161292, | |
| "loss": 0.0409, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.1778185151237397, | |
| "grad_norm": 0.08758855611085892, | |
| "learning_rate": 0.0001830414746543779, | |
| "loss": 0.0389, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.17965169569202566, | |
| "grad_norm": 0.10536627471446991, | |
| "learning_rate": 0.00018285714285714286, | |
| "loss": 0.0372, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.18148487626031165, | |
| "grad_norm": 0.10224272310733795, | |
| "learning_rate": 0.00018267281105990784, | |
| "loss": 0.0394, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.18331805682859761, | |
| "grad_norm": 0.07795912027359009, | |
| "learning_rate": 0.00018248847926267283, | |
| "loss": 0.0271, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1851512373968836, | |
| "grad_norm": 0.0965060293674469, | |
| "learning_rate": 0.0001823041474654378, | |
| "loss": 0.0357, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.18698441796516957, | |
| "grad_norm": 0.061280328780412674, | |
| "learning_rate": 0.00018211981566820278, | |
| "loss": 0.0263, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.18881759853345553, | |
| "grad_norm": 0.08007590472698212, | |
| "learning_rate": 0.00018193548387096775, | |
| "loss": 0.0324, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.19065077910174152, | |
| "grad_norm": 0.08586332201957703, | |
| "learning_rate": 0.00018175115207373272, | |
| "loss": 0.0362, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.1924839596700275, | |
| "grad_norm": 0.08350996673107147, | |
| "learning_rate": 0.0001815668202764977, | |
| "loss": 0.029, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.19431714023831348, | |
| "grad_norm": 0.07932569086551666, | |
| "learning_rate": 0.0001813824884792627, | |
| "loss": 0.0262, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.19615032080659944, | |
| "grad_norm": 0.08850853145122528, | |
| "learning_rate": 0.00018119815668202766, | |
| "loss": 0.0376, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.19798350137488543, | |
| "grad_norm": 0.10090523213148117, | |
| "learning_rate": 0.00018101382488479263, | |
| "loss": 0.0437, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.1998166819431714, | |
| "grad_norm": 0.09374283999204636, | |
| "learning_rate": 0.0001808294930875576, | |
| "loss": 0.0348, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.2016498625114574, | |
| "grad_norm": 0.09550321102142334, | |
| "learning_rate": 0.00018064516129032257, | |
| "loss": 0.0415, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.20348304307974335, | |
| "grad_norm": 0.09793347865343094, | |
| "learning_rate": 0.00018046082949308757, | |
| "loss": 0.0363, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.20531622364802934, | |
| "grad_norm": 0.05917959660291672, | |
| "learning_rate": 0.00018027649769585254, | |
| "loss": 0.0233, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.2071494042163153, | |
| "grad_norm": 0.08095124363899231, | |
| "learning_rate": 0.00018009216589861754, | |
| "loss": 0.0273, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.20898258478460127, | |
| "grad_norm": 0.09870299696922302, | |
| "learning_rate": 0.0001799078341013825, | |
| "loss": 0.0341, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.21081576535288726, | |
| "grad_norm": 0.06982927024364471, | |
| "learning_rate": 0.00017972350230414748, | |
| "loss": 0.0263, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.21264894592117323, | |
| "grad_norm": 0.10992158949375153, | |
| "learning_rate": 0.00017953917050691245, | |
| "loss": 0.0444, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.21448212648945922, | |
| "grad_norm": 0.11806368082761765, | |
| "learning_rate": 0.00017935483870967742, | |
| "loss": 0.045, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.21631530705774518, | |
| "grad_norm": 0.061528291553258896, | |
| "learning_rate": 0.00017917050691244242, | |
| "loss": 0.0194, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.21814848762603117, | |
| "grad_norm": 0.08814897388219833, | |
| "learning_rate": 0.0001789861751152074, | |
| "loss": 0.0335, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.21998166819431714, | |
| "grad_norm": 0.07923831045627594, | |
| "learning_rate": 0.00017880184331797236, | |
| "loss": 0.0304, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.22181484876260313, | |
| "grad_norm": 0.06676612794399261, | |
| "learning_rate": 0.00017861751152073734, | |
| "loss": 0.0261, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.2236480293308891, | |
| "grad_norm": 0.10044591873884201, | |
| "learning_rate": 0.0001784331797235023, | |
| "loss": 0.0352, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.22548120989917506, | |
| "grad_norm": 0.09440556168556213, | |
| "learning_rate": 0.00017824884792626728, | |
| "loss": 0.0394, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.22731439046746105, | |
| "grad_norm": 0.07944708317518234, | |
| "learning_rate": 0.00017806451612903228, | |
| "loss": 0.0292, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.229147571035747, | |
| "grad_norm": 0.11380550265312195, | |
| "learning_rate": 0.00017788018433179725, | |
| "loss": 0.0479, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.230980751604033, | |
| "grad_norm": 0.09983845055103302, | |
| "learning_rate": 0.00017769585253456222, | |
| "loss": 0.043, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.23281393217231897, | |
| "grad_norm": 0.09731481224298477, | |
| "learning_rate": 0.0001775115207373272, | |
| "loss": 0.0346, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.23464711274060496, | |
| "grad_norm": 0.08189846575260162, | |
| "learning_rate": 0.00017732718894009216, | |
| "loss": 0.0314, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.23648029330889092, | |
| "grad_norm": 0.11007854342460632, | |
| "learning_rate": 0.00017714285714285713, | |
| "loss": 0.0395, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.2383134738771769, | |
| "grad_norm": 0.06934443861246109, | |
| "learning_rate": 0.00017695852534562213, | |
| "loss": 0.0289, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.24014665444546288, | |
| "grad_norm": 0.0965190976858139, | |
| "learning_rate": 0.0001767741935483871, | |
| "loss": 0.0341, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.24197983501374887, | |
| "grad_norm": 0.11201060563325882, | |
| "learning_rate": 0.0001765898617511521, | |
| "loss": 0.0404, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.24381301558203483, | |
| "grad_norm": 0.07356410473585129, | |
| "learning_rate": 0.00017640552995391707, | |
| "loss": 0.0225, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.2456461961503208, | |
| "grad_norm": 0.09507370740175247, | |
| "learning_rate": 0.00017622119815668204, | |
| "loss": 0.0409, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.24747937671860679, | |
| "grad_norm": 0.09385097771883011, | |
| "learning_rate": 0.000176036866359447, | |
| "loss": 0.0314, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.24931255728689275, | |
| "grad_norm": 0.08933474868535995, | |
| "learning_rate": 0.00017585253456221198, | |
| "loss": 0.0308, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.25114573785517874, | |
| "grad_norm": 0.07607486099004745, | |
| "learning_rate": 0.00017566820276497698, | |
| "loss": 0.0272, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.2529789184234647, | |
| "grad_norm": 0.060726869851350784, | |
| "learning_rate": 0.00017548387096774195, | |
| "loss": 0.0254, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.25481209899175067, | |
| "grad_norm": 0.08471496403217316, | |
| "learning_rate": 0.00017529953917050692, | |
| "loss": 0.0299, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.2566452795600367, | |
| "grad_norm": 0.06967601180076599, | |
| "learning_rate": 0.0001751152073732719, | |
| "loss": 0.0234, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.25847846012832265, | |
| "grad_norm": 0.08925054222345352, | |
| "learning_rate": 0.00017493087557603687, | |
| "loss": 0.0375, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.2603116406966086, | |
| "grad_norm": 0.07857096195220947, | |
| "learning_rate": 0.00017474654377880184, | |
| "loss": 0.0294, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.2621448212648946, | |
| "grad_norm": 0.10110893100500107, | |
| "learning_rate": 0.00017456221198156684, | |
| "loss": 0.0388, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.26397800183318054, | |
| "grad_norm": 0.10537184774875641, | |
| "learning_rate": 0.0001743778801843318, | |
| "loss": 0.0426, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.26581118240146656, | |
| "grad_norm": 0.10398052632808685, | |
| "learning_rate": 0.00017419354838709678, | |
| "loss": 0.0362, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.2676443629697525, | |
| "grad_norm": 0.06848938763141632, | |
| "learning_rate": 0.00017400921658986175, | |
| "loss": 0.028, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.2694775435380385, | |
| "grad_norm": 0.08947031199932098, | |
| "learning_rate": 0.00017382488479262672, | |
| "loss": 0.0272, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.27131072410632445, | |
| "grad_norm": 0.09318867325782776, | |
| "learning_rate": 0.0001736405529953917, | |
| "loss": 0.0328, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.27314390467461047, | |
| "grad_norm": 0.0872950330376625, | |
| "learning_rate": 0.0001734562211981567, | |
| "loss": 0.0304, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.27497708524289644, | |
| "grad_norm": 0.12327979505062103, | |
| "learning_rate": 0.00017327188940092166, | |
| "loss": 0.0542, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2768102658111824, | |
| "grad_norm": 0.08264505118131638, | |
| "learning_rate": 0.00017308755760368666, | |
| "loss": 0.0262, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.27864344637946836, | |
| "grad_norm": 0.09241585433483124, | |
| "learning_rate": 0.00017290322580645163, | |
| "loss": 0.0323, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.2804766269477543, | |
| "grad_norm": 0.09120775014162064, | |
| "learning_rate": 0.0001727188940092166, | |
| "loss": 0.031, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.28230980751604035, | |
| "grad_norm": 0.05967549607157707, | |
| "learning_rate": 0.00017253456221198157, | |
| "loss": 0.02, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.2841429880843263, | |
| "grad_norm": 0.08845420181751251, | |
| "learning_rate": 0.00017235023041474657, | |
| "loss": 0.0359, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.2859761686526123, | |
| "grad_norm": 0.10303748399019241, | |
| "learning_rate": 0.00017216589861751154, | |
| "loss": 0.0336, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.28780934922089824, | |
| "grad_norm": 0.11286526173353195, | |
| "learning_rate": 0.0001719815668202765, | |
| "loss": 0.0433, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.28964252978918426, | |
| "grad_norm": 0.05800803378224373, | |
| "learning_rate": 0.00017179723502304148, | |
| "loss": 0.0191, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.2914757103574702, | |
| "grad_norm": 0.06295135617256165, | |
| "learning_rate": 0.00017161290322580645, | |
| "loss": 0.0211, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.2933088909257562, | |
| "grad_norm": 0.061198145151138306, | |
| "learning_rate": 0.00017142857142857143, | |
| "loss": 0.0222, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.29514207149404215, | |
| "grad_norm": 0.08423091471195221, | |
| "learning_rate": 0.00017124423963133642, | |
| "loss": 0.029, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.29697525206232817, | |
| "grad_norm": 0.07596798241138458, | |
| "learning_rate": 0.0001710599078341014, | |
| "loss": 0.0257, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.29880843263061413, | |
| "grad_norm": 0.12243133038282394, | |
| "learning_rate": 0.00017087557603686637, | |
| "loss": 0.0488, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.3006416131989001, | |
| "grad_norm": 0.10156381130218506, | |
| "learning_rate": 0.00017069124423963134, | |
| "loss": 0.0309, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.30247479376718606, | |
| "grad_norm": 0.05903761461377144, | |
| "learning_rate": 0.0001705069124423963, | |
| "loss": 0.0179, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.304307974335472, | |
| "grad_norm": 0.08366727083921432, | |
| "learning_rate": 0.00017032258064516128, | |
| "loss": 0.0297, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.30614115490375804, | |
| "grad_norm": 0.09768462926149368, | |
| "learning_rate": 0.00017013824884792628, | |
| "loss": 0.0388, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.307974335472044, | |
| "grad_norm": 0.07999719679355621, | |
| "learning_rate": 0.00016995391705069125, | |
| "loss": 0.0285, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.30980751604032997, | |
| "grad_norm": 0.10129693150520325, | |
| "learning_rate": 0.00016976958525345622, | |
| "loss": 0.0437, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.31164069660861593, | |
| "grad_norm": 0.07942084223031998, | |
| "learning_rate": 0.00016958525345622122, | |
| "loss": 0.028, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.31347387717690195, | |
| "grad_norm": 0.09509172290563583, | |
| "learning_rate": 0.0001694009216589862, | |
| "loss": 0.035, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.3153070577451879, | |
| "grad_norm": 0.10119883716106415, | |
| "learning_rate": 0.00016921658986175116, | |
| "loss": 0.0407, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.3171402383134739, | |
| "grad_norm": 0.10474774241447449, | |
| "learning_rate": 0.00016903225806451616, | |
| "loss": 0.0382, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.31897341888175984, | |
| "grad_norm": 0.08802273869514465, | |
| "learning_rate": 0.00016884792626728113, | |
| "loss": 0.0311, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3208065994500458, | |
| "grad_norm": 0.06499020010232925, | |
| "learning_rate": 0.0001686635944700461, | |
| "loss": 0.0247, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.3226397800183318, | |
| "grad_norm": 0.09561455249786377, | |
| "learning_rate": 0.00016847926267281107, | |
| "loss": 0.0376, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.3244729605866178, | |
| "grad_norm": 0.051068369299173355, | |
| "learning_rate": 0.00016829493087557604, | |
| "loss": 0.0194, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.32630614115490375, | |
| "grad_norm": 0.09048140794038773, | |
| "learning_rate": 0.000168110599078341, | |
| "loss": 0.0344, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.3281393217231897, | |
| "grad_norm": 0.08035707473754883, | |
| "learning_rate": 0.000167926267281106, | |
| "loss": 0.0323, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.32997250229147573, | |
| "grad_norm": 0.10091862827539444, | |
| "learning_rate": 0.00016774193548387098, | |
| "loss": 0.0406, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3318056828597617, | |
| "grad_norm": 0.09862423688173294, | |
| "learning_rate": 0.00016755760368663595, | |
| "loss": 0.0475, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.33363886342804766, | |
| "grad_norm": 0.10344900190830231, | |
| "learning_rate": 0.00016737327188940092, | |
| "loss": 0.0389, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.3354720439963336, | |
| "grad_norm": 0.10084162652492523, | |
| "learning_rate": 0.0001671889400921659, | |
| "loss": 0.0387, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.3373052245646196, | |
| "grad_norm": 0.08419749140739441, | |
| "learning_rate": 0.00016700460829493087, | |
| "loss": 0.0298, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.3391384051329056, | |
| "grad_norm": 0.09623179584741592, | |
| "learning_rate": 0.00016682027649769587, | |
| "loss": 0.0291, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.34097158570119157, | |
| "grad_norm": 0.0974535197019577, | |
| "learning_rate": 0.00016663594470046084, | |
| "loss": 0.0337, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.34280476626947753, | |
| "grad_norm": 0.10164261609315872, | |
| "learning_rate": 0.0001664516129032258, | |
| "loss": 0.0466, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.3446379468377635, | |
| "grad_norm": 0.0877864882349968, | |
| "learning_rate": 0.00016626728110599078, | |
| "loss": 0.0244, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.3464711274060495, | |
| "grad_norm": 0.08111972361803055, | |
| "learning_rate": 0.00016608294930875578, | |
| "loss": 0.0285, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.3483043079743355, | |
| "grad_norm": 0.0656951442360878, | |
| "learning_rate": 0.00016589861751152075, | |
| "loss": 0.0235, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.35013748854262144, | |
| "grad_norm": 0.08031731843948364, | |
| "learning_rate": 0.00016571428571428575, | |
| "loss": 0.0321, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.3519706691109074, | |
| "grad_norm": 0.1022307500243187, | |
| "learning_rate": 0.00016552995391705072, | |
| "loss": 0.0437, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.3538038496791934, | |
| "grad_norm": 0.07790978997945786, | |
| "learning_rate": 0.0001653456221198157, | |
| "loss": 0.0305, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.3556370302474794, | |
| "grad_norm": 0.06656166166067123, | |
| "learning_rate": 0.00016516129032258066, | |
| "loss": 0.027, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.35747021081576535, | |
| "grad_norm": 0.08255946636199951, | |
| "learning_rate": 0.00016497695852534563, | |
| "loss": 0.0294, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.3593033913840513, | |
| "grad_norm": 0.0771537646651268, | |
| "learning_rate": 0.0001647926267281106, | |
| "loss": 0.027, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.3611365719523373, | |
| "grad_norm": 0.08929789811372757, | |
| "learning_rate": 0.00016460829493087557, | |
| "loss": 0.0435, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.3629697525206233, | |
| "grad_norm": 0.07332134991884232, | |
| "learning_rate": 0.00016442396313364057, | |
| "loss": 0.0254, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.36480293308890926, | |
| "grad_norm": 0.09681219607591629, | |
| "learning_rate": 0.00016423963133640554, | |
| "loss": 0.0404, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.36663611365719523, | |
| "grad_norm": 0.0849548950791359, | |
| "learning_rate": 0.0001640552995391705, | |
| "loss": 0.0337, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3684692942254812, | |
| "grad_norm": 0.10003779083490372, | |
| "learning_rate": 0.00016387096774193548, | |
| "loss": 0.034, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.3703024747937672, | |
| "grad_norm": 0.09228444844484329, | |
| "learning_rate": 0.00016368663594470046, | |
| "loss": 0.0362, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.3721356553620532, | |
| "grad_norm": 0.06095033884048462, | |
| "learning_rate": 0.00016350230414746543, | |
| "loss": 0.0197, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.37396883593033914, | |
| "grad_norm": 0.09504009038209915, | |
| "learning_rate": 0.00016331797235023042, | |
| "loss": 0.0356, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.3758020164986251, | |
| "grad_norm": 0.07681944966316223, | |
| "learning_rate": 0.0001631336405529954, | |
| "loss": 0.0329, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.37763519706691107, | |
| "grad_norm": 0.0894550010561943, | |
| "learning_rate": 0.00016294930875576037, | |
| "loss": 0.0327, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.3794683776351971, | |
| "grad_norm": 0.09541459381580353, | |
| "learning_rate": 0.00016276497695852534, | |
| "loss": 0.0368, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.38130155820348305, | |
| "grad_norm": 0.07463543862104416, | |
| "learning_rate": 0.00016258064516129034, | |
| "loss": 0.0272, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.383134738771769, | |
| "grad_norm": 0.08396443724632263, | |
| "learning_rate": 0.0001623963133640553, | |
| "loss": 0.0316, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.384967919340055, | |
| "grad_norm": 0.07742145657539368, | |
| "learning_rate": 0.0001622119815668203, | |
| "loss": 0.0276, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.386801099908341, | |
| "grad_norm": 0.07310166209936142, | |
| "learning_rate": 0.00016202764976958528, | |
| "loss": 0.0241, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.38863428047662696, | |
| "grad_norm": 0.09384534507989883, | |
| "learning_rate": 0.00016184331797235025, | |
| "loss": 0.0386, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.3904674610449129, | |
| "grad_norm": 0.09084580093622208, | |
| "learning_rate": 0.00016165898617511522, | |
| "loss": 0.0293, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.3923006416131989, | |
| "grad_norm": 0.10701391100883484, | |
| "learning_rate": 0.0001614746543778802, | |
| "loss": 0.046, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.39413382218148485, | |
| "grad_norm": 0.07608213275671005, | |
| "learning_rate": 0.00016129032258064516, | |
| "loss": 0.031, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.39596700274977087, | |
| "grad_norm": 0.09281232208013535, | |
| "learning_rate": 0.00016110599078341016, | |
| "loss": 0.0311, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.39780018331805683, | |
| "grad_norm": 0.07548707723617554, | |
| "learning_rate": 0.00016092165898617513, | |
| "loss": 0.0244, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.3996333638863428, | |
| "grad_norm": 0.08597145974636078, | |
| "learning_rate": 0.0001607373271889401, | |
| "loss": 0.0296, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.40146654445462876, | |
| "grad_norm": 0.090579092502594, | |
| "learning_rate": 0.00016055299539170507, | |
| "loss": 0.0298, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.4032997250229148, | |
| "grad_norm": 0.07005604356527328, | |
| "learning_rate": 0.00016036866359447004, | |
| "loss": 0.0254, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.40513290559120074, | |
| "grad_norm": 0.09800952672958374, | |
| "learning_rate": 0.00016018433179723501, | |
| "loss": 0.0353, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.4069660861594867, | |
| "grad_norm": 0.09682459384202957, | |
| "learning_rate": 0.00016, | |
| "loss": 0.0342, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.40879926672777267, | |
| "grad_norm": 0.08852767199277878, | |
| "learning_rate": 0.00015981566820276498, | |
| "loss": 0.0356, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.4106324472960587, | |
| "grad_norm": 0.07314983755350113, | |
| "learning_rate": 0.00015963133640552996, | |
| "loss": 0.0266, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.41246562786434465, | |
| "grad_norm": 0.08612879365682602, | |
| "learning_rate": 0.00015944700460829493, | |
| "loss": 0.0314, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.4142988084326306, | |
| "grad_norm": 0.09286980330944061, | |
| "learning_rate": 0.0001592626728110599, | |
| "loss": 0.0359, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.4161319890009166, | |
| "grad_norm": 0.0787709653377533, | |
| "learning_rate": 0.0001590783410138249, | |
| "loss": 0.0299, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.41796516956920254, | |
| "grad_norm": 0.08612968027591705, | |
| "learning_rate": 0.00015889400921658987, | |
| "loss": 0.0299, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.41979835013748856, | |
| "grad_norm": 0.06926131248474121, | |
| "learning_rate": 0.00015870967741935487, | |
| "loss": 0.0257, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.4216315307057745, | |
| "grad_norm": 0.09236182272434235, | |
| "learning_rate": 0.00015852534562211984, | |
| "loss": 0.0284, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4234647112740605, | |
| "grad_norm": 0.07634787261486053, | |
| "learning_rate": 0.0001583410138248848, | |
| "loss": 0.0335, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.42529789184234645, | |
| "grad_norm": 0.10165869444608688, | |
| "learning_rate": 0.00015815668202764978, | |
| "loss": 0.0407, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.4271310724106325, | |
| "grad_norm": 0.09464540332555771, | |
| "learning_rate": 0.00015797235023041475, | |
| "loss": 0.0389, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.42896425297891844, | |
| "grad_norm": 0.0773908719420433, | |
| "learning_rate": 0.00015778801843317975, | |
| "loss": 0.0329, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.4307974335472044, | |
| "grad_norm": 0.08985087275505066, | |
| "learning_rate": 0.00015760368663594472, | |
| "loss": 0.0328, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.43263061411549036, | |
| "grad_norm": 0.06766209751367569, | |
| "learning_rate": 0.0001574193548387097, | |
| "loss": 0.0227, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.4344637946837763, | |
| "grad_norm": 0.07593253999948502, | |
| "learning_rate": 0.00015723502304147466, | |
| "loss": 0.0252, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.43629697525206235, | |
| "grad_norm": 0.0670701190829277, | |
| "learning_rate": 0.00015705069124423963, | |
| "loss": 0.022, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.4381301558203483, | |
| "grad_norm": 0.08073533326387405, | |
| "learning_rate": 0.0001568663594470046, | |
| "loss": 0.0314, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.4399633363886343, | |
| "grad_norm": 0.09111002087593079, | |
| "learning_rate": 0.0001566820276497696, | |
| "loss": 0.0298, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.44179651695692024, | |
| "grad_norm": 0.07969270646572113, | |
| "learning_rate": 0.00015649769585253457, | |
| "loss": 0.0295, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.44362969752520626, | |
| "grad_norm": 0.05996888503432274, | |
| "learning_rate": 0.00015631336405529954, | |
| "loss": 0.0233, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.4454628780934922, | |
| "grad_norm": 0.0899052619934082, | |
| "learning_rate": 0.00015612903225806451, | |
| "loss": 0.0328, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.4472960586617782, | |
| "grad_norm": 0.11184020340442657, | |
| "learning_rate": 0.00015594470046082949, | |
| "loss": 0.0456, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.44912923923006415, | |
| "grad_norm": 0.08015838265419006, | |
| "learning_rate": 0.00015576036866359446, | |
| "loss": 0.0244, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.4509624197983501, | |
| "grad_norm": 0.10306031256914139, | |
| "learning_rate": 0.00015557603686635946, | |
| "loss": 0.0397, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.45279560036663613, | |
| "grad_norm": 0.08525974303483963, | |
| "learning_rate": 0.00015539170506912443, | |
| "loss": 0.0317, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.4546287809349221, | |
| "grad_norm": 0.10056892782449722, | |
| "learning_rate": 0.00015520737327188942, | |
| "loss": 0.0456, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.45646196150320806, | |
| "grad_norm": 0.09121023863554001, | |
| "learning_rate": 0.0001550230414746544, | |
| "loss": 0.0308, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.458295142071494, | |
| "grad_norm": 0.06088346242904663, | |
| "learning_rate": 0.00015483870967741937, | |
| "loss": 0.0235, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.46012832263978004, | |
| "grad_norm": 0.07533244043588638, | |
| "learning_rate": 0.00015465437788018434, | |
| "loss": 0.0309, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.461961503208066, | |
| "grad_norm": 0.08711180835962296, | |
| "learning_rate": 0.00015447004608294934, | |
| "loss": 0.038, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.46379468377635197, | |
| "grad_norm": 0.08756791800260544, | |
| "learning_rate": 0.0001542857142857143, | |
| "loss": 0.0332, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.46562786434463793, | |
| "grad_norm": 0.09424729645252228, | |
| "learning_rate": 0.00015410138248847928, | |
| "loss": 0.0282, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.4674610449129239, | |
| "grad_norm": 0.10178559273481369, | |
| "learning_rate": 0.00015391705069124425, | |
| "loss": 0.0319, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.4692942254812099, | |
| "grad_norm": 0.09317290782928467, | |
| "learning_rate": 0.00015373271889400922, | |
| "loss": 0.0294, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.4711274060494959, | |
| "grad_norm": 0.11380328983068466, | |
| "learning_rate": 0.0001535483870967742, | |
| "loss": 0.0474, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.47296058661778184, | |
| "grad_norm": 0.0751282274723053, | |
| "learning_rate": 0.0001533640552995392, | |
| "loss": 0.0328, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.4747937671860678, | |
| "grad_norm": 0.07097076624631882, | |
| "learning_rate": 0.00015317972350230416, | |
| "loss": 0.0314, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.4766269477543538, | |
| "grad_norm": 0.0817055031657219, | |
| "learning_rate": 0.00015299539170506913, | |
| "loss": 0.0348, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.4784601283226398, | |
| "grad_norm": 0.08542328327894211, | |
| "learning_rate": 0.0001528110599078341, | |
| "loss": 0.0314, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.48029330889092575, | |
| "grad_norm": 0.08346550911664963, | |
| "learning_rate": 0.00015262672811059907, | |
| "loss": 0.0342, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.4821264894592117, | |
| "grad_norm": 0.06232753023505211, | |
| "learning_rate": 0.00015244239631336405, | |
| "loss": 0.024, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.48395967002749773, | |
| "grad_norm": 0.08413577824831009, | |
| "learning_rate": 0.00015225806451612902, | |
| "loss": 0.0289, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.4857928505957837, | |
| "grad_norm": 0.1072312667965889, | |
| "learning_rate": 0.00015207373271889401, | |
| "loss": 0.0436, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.48762603116406966, | |
| "grad_norm": 0.09056718647480011, | |
| "learning_rate": 0.00015188940092165899, | |
| "loss": 0.0299, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.4894592117323556, | |
| "grad_norm": 0.07195229828357697, | |
| "learning_rate": 0.00015170506912442398, | |
| "loss": 0.0243, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.4912923923006416, | |
| "grad_norm": 0.09091556072235107, | |
| "learning_rate": 0.00015152073732718895, | |
| "loss": 0.0371, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.4931255728689276, | |
| "grad_norm": 0.08215435594320297, | |
| "learning_rate": 0.00015133640552995393, | |
| "loss": 0.0269, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.49495875343721357, | |
| "grad_norm": 0.07215309143066406, | |
| "learning_rate": 0.0001511520737327189, | |
| "loss": 0.0253, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.49679193400549954, | |
| "grad_norm": 0.08347906917333603, | |
| "learning_rate": 0.0001509677419354839, | |
| "loss": 0.0273, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.4986251145737855, | |
| "grad_norm": 0.061136987060308456, | |
| "learning_rate": 0.00015078341013824887, | |
| "loss": 0.0231, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5004582951420715, | |
| "grad_norm": 0.10640832036733627, | |
| "learning_rate": 0.00015059907834101384, | |
| "loss": 0.0455, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.5022914757103575, | |
| "grad_norm": 0.08627432584762573, | |
| "learning_rate": 0.0001504147465437788, | |
| "loss": 0.0243, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.5041246562786434, | |
| "grad_norm": 0.08695763349533081, | |
| "learning_rate": 0.00015023041474654378, | |
| "loss": 0.034, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5059578368469294, | |
| "grad_norm": 0.07717976719141006, | |
| "learning_rate": 0.00015004608294930875, | |
| "loss": 0.0275, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5077910174152154, | |
| "grad_norm": 0.0763443112373352, | |
| "learning_rate": 0.00014986175115207375, | |
| "loss": 0.0274, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.5096241979835013, | |
| "grad_norm": 0.06165534630417824, | |
| "learning_rate": 0.00014967741935483872, | |
| "loss": 0.0234, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.5114573785517873, | |
| "grad_norm": 0.08029788732528687, | |
| "learning_rate": 0.0001494930875576037, | |
| "loss": 0.0272, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.5132905591200734, | |
| "grad_norm": 0.08326773345470428, | |
| "learning_rate": 0.00014930875576036866, | |
| "loss": 0.0262, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5151237396883593, | |
| "grad_norm": 0.08208340406417847, | |
| "learning_rate": 0.00014912442396313363, | |
| "loss": 0.0264, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.5169569202566453, | |
| "grad_norm": 0.0779559388756752, | |
| "learning_rate": 0.0001489400921658986, | |
| "loss": 0.0211, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.5187901008249313, | |
| "grad_norm": 0.09281262755393982, | |
| "learning_rate": 0.0001487557603686636, | |
| "loss": 0.0346, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.5206232813932172, | |
| "grad_norm": 0.09531724452972412, | |
| "learning_rate": 0.00014857142857142857, | |
| "loss": 0.0374, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.5224564619615032, | |
| "grad_norm": 0.10953988879919052, | |
| "learning_rate": 0.00014838709677419355, | |
| "loss": 0.038, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5242896425297892, | |
| "grad_norm": 0.09352127462625504, | |
| "learning_rate": 0.00014820276497695854, | |
| "loss": 0.0378, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.5261228230980751, | |
| "grad_norm": 0.05571591481566429, | |
| "learning_rate": 0.00014801843317972351, | |
| "loss": 0.02, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.5279560036663611, | |
| "grad_norm": 0.09128769487142563, | |
| "learning_rate": 0.00014783410138248849, | |
| "loss": 0.0327, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.5297891842346472, | |
| "grad_norm": 0.09302745014429092, | |
| "learning_rate": 0.00014764976958525348, | |
| "loss": 0.029, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.5316223648029331, | |
| "grad_norm": 0.05895543098449707, | |
| "learning_rate": 0.00014746543778801845, | |
| "loss": 0.0203, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5334555453712191, | |
| "grad_norm": 0.08418423682451248, | |
| "learning_rate": 0.00014728110599078343, | |
| "loss": 0.0329, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.535288725939505, | |
| "grad_norm": 0.09295564144849777, | |
| "learning_rate": 0.0001470967741935484, | |
| "loss": 0.0401, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.537121906507791, | |
| "grad_norm": 0.08649936318397522, | |
| "learning_rate": 0.00014691244239631337, | |
| "loss": 0.0343, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.538955087076077, | |
| "grad_norm": 0.08355950564146042, | |
| "learning_rate": 0.00014672811059907834, | |
| "loss": 0.0315, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.5407882676443629, | |
| "grad_norm": 0.06531458348035812, | |
| "learning_rate": 0.00014654377880184334, | |
| "loss": 0.0217, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.5426214482126489, | |
| "grad_norm": 0.09049852937459946, | |
| "learning_rate": 0.0001463594470046083, | |
| "loss": 0.0298, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.5444546287809349, | |
| "grad_norm": 0.08500348031520844, | |
| "learning_rate": 0.00014617511520737328, | |
| "loss": 0.0253, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.5462878093492209, | |
| "grad_norm": 0.09162382036447525, | |
| "learning_rate": 0.00014599078341013825, | |
| "loss": 0.0299, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.5481209899175069, | |
| "grad_norm": 0.07611165195703506, | |
| "learning_rate": 0.00014580645161290322, | |
| "loss": 0.0256, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.5499541704857929, | |
| "grad_norm": 0.09361441433429718, | |
| "learning_rate": 0.0001456221198156682, | |
| "loss": 0.029, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5517873510540788, | |
| "grad_norm": 0.09738872200250626, | |
| "learning_rate": 0.0001454377880184332, | |
| "loss": 0.0288, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.5536205316223648, | |
| "grad_norm": 0.08112243562936783, | |
| "learning_rate": 0.00014525345622119816, | |
| "loss": 0.0311, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.5554537121906508, | |
| "grad_norm": 0.08341687172651291, | |
| "learning_rate": 0.00014506912442396313, | |
| "loss": 0.0316, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.5572868927589367, | |
| "grad_norm": 0.07399066537618637, | |
| "learning_rate": 0.0001448847926267281, | |
| "loss": 0.0257, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.5591200733272227, | |
| "grad_norm": 0.07534675300121307, | |
| "learning_rate": 0.0001447004608294931, | |
| "loss": 0.0259, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.5609532538955087, | |
| "grad_norm": 0.08384109288454056, | |
| "learning_rate": 0.00014451612903225807, | |
| "loss": 0.0277, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.5627864344637947, | |
| "grad_norm": 0.08456786721944809, | |
| "learning_rate": 0.00014433179723502307, | |
| "loss": 0.0375, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.5646196150320807, | |
| "grad_norm": 0.07096688449382782, | |
| "learning_rate": 0.00014414746543778804, | |
| "loss": 0.0241, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.5664527956003667, | |
| "grad_norm": 0.09720040112733841, | |
| "learning_rate": 0.00014396313364055301, | |
| "loss": 0.0333, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.5682859761686526, | |
| "grad_norm": 0.11616980284452438, | |
| "learning_rate": 0.00014377880184331799, | |
| "loss": 0.0431, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5701191567369386, | |
| "grad_norm": 0.08847475051879883, | |
| "learning_rate": 0.00014359447004608296, | |
| "loss": 0.0366, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.5719523373052245, | |
| "grad_norm": 0.09937264025211334, | |
| "learning_rate": 0.00014341013824884793, | |
| "loss": 0.0371, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.5737855178735105, | |
| "grad_norm": 0.059645283967256546, | |
| "learning_rate": 0.00014322580645161293, | |
| "loss": 0.0232, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.5756186984417965, | |
| "grad_norm": 0.06614042073488235, | |
| "learning_rate": 0.0001430414746543779, | |
| "loss": 0.0234, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.5774518790100825, | |
| "grad_norm": 0.10562101751565933, | |
| "learning_rate": 0.00014285714285714287, | |
| "loss": 0.0387, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.5792850595783685, | |
| "grad_norm": 0.10281278938055038, | |
| "learning_rate": 0.00014267281105990784, | |
| "loss": 0.0339, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.5811182401466545, | |
| "grad_norm": 0.10205813497304916, | |
| "learning_rate": 0.0001424884792626728, | |
| "loss": 0.0426, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.5829514207149404, | |
| "grad_norm": 0.07571630924940109, | |
| "learning_rate": 0.00014230414746543778, | |
| "loss": 0.0311, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.5847846012832264, | |
| "grad_norm": 0.08494407683610916, | |
| "learning_rate": 0.00014211981566820278, | |
| "loss": 0.0301, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.5866177818515124, | |
| "grad_norm": 0.05080355703830719, | |
| "learning_rate": 0.00014193548387096775, | |
| "loss": 0.017, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5884509624197983, | |
| "grad_norm": 0.08271925151348114, | |
| "learning_rate": 0.00014175115207373272, | |
| "loss": 0.0304, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.5902841429880843, | |
| "grad_norm": 0.0785074308514595, | |
| "learning_rate": 0.0001415668202764977, | |
| "loss": 0.0315, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.5921173235563703, | |
| "grad_norm": 0.08633995056152344, | |
| "learning_rate": 0.00014138248847926266, | |
| "loss": 0.0366, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.5939505041246563, | |
| "grad_norm": 0.07227237522602081, | |
| "learning_rate": 0.00014119815668202766, | |
| "loss": 0.0256, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.5957836846929423, | |
| "grad_norm": 0.07901857793331146, | |
| "learning_rate": 0.00014101382488479263, | |
| "loss": 0.0224, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.5976168652612283, | |
| "grad_norm": 0.0696311965584755, | |
| "learning_rate": 0.00014082949308755763, | |
| "loss": 0.0245, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.5994500458295142, | |
| "grad_norm": 0.0882924422621727, | |
| "learning_rate": 0.0001406451612903226, | |
| "loss": 0.0349, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.6012832263978002, | |
| "grad_norm": 0.07235971093177795, | |
| "learning_rate": 0.00014046082949308757, | |
| "loss": 0.0224, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.6031164069660861, | |
| "grad_norm": 0.059238139539957047, | |
| "learning_rate": 0.00014027649769585254, | |
| "loss": 0.0204, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.6049495875343721, | |
| "grad_norm": 0.07901135087013245, | |
| "learning_rate": 0.00014009216589861752, | |
| "loss": 0.0273, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6067827681026581, | |
| "grad_norm": 0.07229477912187576, | |
| "learning_rate": 0.0001399078341013825, | |
| "loss": 0.029, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.608615948670944, | |
| "grad_norm": 0.07699091732501984, | |
| "learning_rate": 0.00013972350230414749, | |
| "loss": 0.026, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.6104491292392301, | |
| "grad_norm": 0.08664306253194809, | |
| "learning_rate": 0.00013953917050691246, | |
| "loss": 0.032, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.6122823098075161, | |
| "grad_norm": 0.09364963322877884, | |
| "learning_rate": 0.00013935483870967743, | |
| "loss": 0.0395, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.614115490375802, | |
| "grad_norm": 0.0945357009768486, | |
| "learning_rate": 0.0001391705069124424, | |
| "loss": 0.0326, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.615948670944088, | |
| "grad_norm": 0.08954328298568726, | |
| "learning_rate": 0.00013898617511520737, | |
| "loss": 0.0326, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.617781851512374, | |
| "grad_norm": 0.09588358551263809, | |
| "learning_rate": 0.00013880184331797234, | |
| "loss": 0.0335, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.6196150320806599, | |
| "grad_norm": 0.0669722706079483, | |
| "learning_rate": 0.00013861751152073734, | |
| "loss": 0.0217, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.6214482126489459, | |
| "grad_norm": 0.06408808380365372, | |
| "learning_rate": 0.0001384331797235023, | |
| "loss": 0.0211, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.6232813932172319, | |
| "grad_norm": 0.07464352995157242, | |
| "learning_rate": 0.00013824884792626728, | |
| "loss": 0.0314, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6251145737855178, | |
| "grad_norm": 0.08413645625114441, | |
| "learning_rate": 0.00013806451612903225, | |
| "loss": 0.024, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.6269477543538039, | |
| "grad_norm": 0.08873338252305984, | |
| "learning_rate": 0.00013788018433179722, | |
| "loss": 0.0281, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.6287809349220899, | |
| "grad_norm": 0.07131095975637436, | |
| "learning_rate": 0.00013769585253456222, | |
| "loss": 0.0288, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.6306141154903758, | |
| "grad_norm": 0.05225027725100517, | |
| "learning_rate": 0.0001375115207373272, | |
| "loss": 0.0178, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.6324472960586618, | |
| "grad_norm": 0.06744900345802307, | |
| "learning_rate": 0.0001373271889400922, | |
| "loss": 0.0217, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.6342804766269478, | |
| "grad_norm": 0.07560716569423676, | |
| "learning_rate": 0.00013714285714285716, | |
| "loss": 0.0272, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.6361136571952337, | |
| "grad_norm": 0.0856630727648735, | |
| "learning_rate": 0.00013695852534562213, | |
| "loss": 0.0296, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.6379468377635197, | |
| "grad_norm": 0.07206695526838303, | |
| "learning_rate": 0.0001367741935483871, | |
| "loss": 0.0212, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.6397800183318056, | |
| "grad_norm": 0.08413973450660706, | |
| "learning_rate": 0.00013658986175115208, | |
| "loss": 0.0309, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.6416131989000916, | |
| "grad_norm": 0.08946281671524048, | |
| "learning_rate": 0.00013640552995391707, | |
| "loss": 0.0341, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6434463794683777, | |
| "grad_norm": 0.09534583985805511, | |
| "learning_rate": 0.00013622119815668204, | |
| "loss": 0.0301, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.6452795600366636, | |
| "grad_norm": 0.11210379004478455, | |
| "learning_rate": 0.00013603686635944702, | |
| "loss": 0.0546, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.6471127406049496, | |
| "grad_norm": 0.08674897998571396, | |
| "learning_rate": 0.000135852534562212, | |
| "loss": 0.034, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.6489459211732356, | |
| "grad_norm": 0.0715477392077446, | |
| "learning_rate": 0.00013566820276497696, | |
| "loss": 0.0294, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.6507791017415215, | |
| "grad_norm": 0.06651375442743301, | |
| "learning_rate": 0.00013548387096774193, | |
| "loss": 0.0246, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.6526122823098075, | |
| "grad_norm": 0.103563591837883, | |
| "learning_rate": 0.00013529953917050693, | |
| "loss": 0.048, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.6544454628780935, | |
| "grad_norm": 0.11457951366901398, | |
| "learning_rate": 0.0001351152073732719, | |
| "loss": 0.0287, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.6562786434463794, | |
| "grad_norm": 0.060861390084028244, | |
| "learning_rate": 0.00013493087557603687, | |
| "loss": 0.025, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.6581118240146654, | |
| "grad_norm": 0.058961618691682816, | |
| "learning_rate": 0.00013474654377880184, | |
| "loss": 0.0208, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.6599450045829515, | |
| "grad_norm": 0.10199262201786041, | |
| "learning_rate": 0.0001345622119815668, | |
| "loss": 0.0319, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6617781851512374, | |
| "grad_norm": 0.0710933730006218, | |
| "learning_rate": 0.00013437788018433178, | |
| "loss": 0.0245, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.6636113657195234, | |
| "grad_norm": 0.06901897490024567, | |
| "learning_rate": 0.00013419354838709678, | |
| "loss": 0.0189, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.6654445462878094, | |
| "grad_norm": 0.08602811396121979, | |
| "learning_rate": 0.00013400921658986175, | |
| "loss": 0.0344, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.6672777268560953, | |
| "grad_norm": 0.052524056285619736, | |
| "learning_rate": 0.00013382488479262675, | |
| "loss": 0.0211, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.6691109074243813, | |
| "grad_norm": 0.07917725294828415, | |
| "learning_rate": 0.00013364055299539172, | |
| "loss": 0.0285, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.6709440879926672, | |
| "grad_norm": 0.0799289420247078, | |
| "learning_rate": 0.0001334562211981567, | |
| "loss": 0.0296, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.6727772685609532, | |
| "grad_norm": 0.08307263255119324, | |
| "learning_rate": 0.00013327188940092166, | |
| "loss": 0.0259, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.6746104491292392, | |
| "grad_norm": 0.08490724861621857, | |
| "learning_rate": 0.00013308755760368666, | |
| "loss": 0.0317, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.6764436296975253, | |
| "grad_norm": 0.07663150131702423, | |
| "learning_rate": 0.00013290322580645163, | |
| "loss": 0.0279, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.6782768102658112, | |
| "grad_norm": 0.08253347873687744, | |
| "learning_rate": 0.0001327188940092166, | |
| "loss": 0.0323, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6801099908340972, | |
| "grad_norm": 0.06597882509231567, | |
| "learning_rate": 0.00013253456221198157, | |
| "loss": 0.0275, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.6819431714023831, | |
| "grad_norm": 0.08167731761932373, | |
| "learning_rate": 0.00013235023041474655, | |
| "loss": 0.0256, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.6837763519706691, | |
| "grad_norm": 0.1000673770904541, | |
| "learning_rate": 0.00013216589861751152, | |
| "loss": 0.0428, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.6856095325389551, | |
| "grad_norm": 0.08605007827281952, | |
| "learning_rate": 0.00013198156682027652, | |
| "loss": 0.0365, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.687442713107241, | |
| "grad_norm": 0.06620384752750397, | |
| "learning_rate": 0.0001317972350230415, | |
| "loss": 0.0212, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.689275893675527, | |
| "grad_norm": 0.08665503561496735, | |
| "learning_rate": 0.00013161290322580646, | |
| "loss": 0.0311, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.6911090742438131, | |
| "grad_norm": 0.06734751164913177, | |
| "learning_rate": 0.00013142857142857143, | |
| "loss": 0.0252, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.692942254812099, | |
| "grad_norm": 0.06160259246826172, | |
| "learning_rate": 0.0001312442396313364, | |
| "loss": 0.0238, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.694775435380385, | |
| "grad_norm": 0.0831260159611702, | |
| "learning_rate": 0.00013105990783410137, | |
| "loss": 0.0322, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.696608615948671, | |
| "grad_norm": 0.07255002856254578, | |
| "learning_rate": 0.00013087557603686637, | |
| "loss": 0.0265, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6984417965169569, | |
| "grad_norm": 0.08250346034765244, | |
| "learning_rate": 0.00013069124423963134, | |
| "loss": 0.031, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.7002749770852429, | |
| "grad_norm": 0.06267958134412766, | |
| "learning_rate": 0.0001305069124423963, | |
| "loss": 0.023, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.7021081576535289, | |
| "grad_norm": 0.08702743053436279, | |
| "learning_rate": 0.0001303225806451613, | |
| "loss": 0.0294, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.7039413382218148, | |
| "grad_norm": 0.08447282761335373, | |
| "learning_rate": 0.00013013824884792628, | |
| "loss": 0.0282, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.7057745187901008, | |
| "grad_norm": 0.0854048877954483, | |
| "learning_rate": 0.00012995391705069125, | |
| "loss": 0.0301, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.7076076993583869, | |
| "grad_norm": 0.08276678621768951, | |
| "learning_rate": 0.00012976958525345625, | |
| "loss": 0.0273, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.7094408799266728, | |
| "grad_norm": 0.11089053750038147, | |
| "learning_rate": 0.00012958525345622122, | |
| "loss": 0.0385, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.7112740604949588, | |
| "grad_norm": 0.08636972308158875, | |
| "learning_rate": 0.0001294009216589862, | |
| "loss": 0.0272, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.7131072410632447, | |
| "grad_norm": 0.09062766283750534, | |
| "learning_rate": 0.00012921658986175116, | |
| "loss": 0.0327, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.7149404216315307, | |
| "grad_norm": 0.0708693340420723, | |
| "learning_rate": 0.00012903225806451613, | |
| "loss": 0.0249, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7167736021998167, | |
| "grad_norm": 0.07402048259973526, | |
| "learning_rate": 0.0001288479262672811, | |
| "loss": 0.0184, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.7186067827681026, | |
| "grad_norm": 0.13229697942733765, | |
| "learning_rate": 0.00012866359447004608, | |
| "loss": 0.0489, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.7204399633363886, | |
| "grad_norm": 0.06817866861820221, | |
| "learning_rate": 0.00012847926267281107, | |
| "loss": 0.0233, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.7222731439046746, | |
| "grad_norm": 0.09490000456571579, | |
| "learning_rate": 0.00012829493087557605, | |
| "loss": 0.0303, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.7241063244729606, | |
| "grad_norm": 0.06846782565116882, | |
| "learning_rate": 0.00012811059907834102, | |
| "loss": 0.0238, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.7259395050412466, | |
| "grad_norm": 0.09812495112419128, | |
| "learning_rate": 0.000127926267281106, | |
| "loss": 0.0324, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.7277726856095326, | |
| "grad_norm": 0.08136089891195297, | |
| "learning_rate": 0.00012774193548387096, | |
| "loss": 0.0309, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.7296058661778185, | |
| "grad_norm": 0.07089602202177048, | |
| "learning_rate": 0.00012755760368663593, | |
| "loss": 0.0251, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.7314390467461045, | |
| "grad_norm": 0.10192608833312988, | |
| "learning_rate": 0.00012737327188940093, | |
| "loss": 0.0345, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.7332722273143905, | |
| "grad_norm": 0.07810863107442856, | |
| "learning_rate": 0.0001271889400921659, | |
| "loss": 0.0264, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7351054078826764, | |
| "grad_norm": 0.0839025229215622, | |
| "learning_rate": 0.00012700460829493087, | |
| "loss": 0.0279, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.7369385884509624, | |
| "grad_norm": 0.08430635184049606, | |
| "learning_rate": 0.00012682027649769587, | |
| "loss": 0.0345, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.7387717690192483, | |
| "grad_norm": 0.05288069695234299, | |
| "learning_rate": 0.00012663594470046084, | |
| "loss": 0.0181, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.7406049495875344, | |
| "grad_norm": 0.06337739527225494, | |
| "learning_rate": 0.0001264516129032258, | |
| "loss": 0.024, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.7424381301558204, | |
| "grad_norm": 0.08434322476387024, | |
| "learning_rate": 0.0001262672811059908, | |
| "loss": 0.0277, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.7442713107241063, | |
| "grad_norm": 0.07922643423080444, | |
| "learning_rate": 0.00012608294930875578, | |
| "loss": 0.0277, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.7461044912923923, | |
| "grad_norm": 0.0888214185833931, | |
| "learning_rate": 0.00012589861751152075, | |
| "loss": 0.0405, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.7479376718606783, | |
| "grad_norm": 0.08394885063171387, | |
| "learning_rate": 0.00012571428571428572, | |
| "loss": 0.0268, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.7497708524289642, | |
| "grad_norm": 0.08741919696331024, | |
| "learning_rate": 0.0001255299539170507, | |
| "loss": 0.034, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.7516040329972502, | |
| "grad_norm": 0.08985461294651031, | |
| "learning_rate": 0.00012534562211981566, | |
| "loss": 0.0349, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7534372135655362, | |
| "grad_norm": 0.055699512362480164, | |
| "learning_rate": 0.00012516129032258066, | |
| "loss": 0.0205, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.7552703941338221, | |
| "grad_norm": 0.09050854295492172, | |
| "learning_rate": 0.00012497695852534563, | |
| "loss": 0.0283, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.7571035747021082, | |
| "grad_norm": 0.11931566148996353, | |
| "learning_rate": 0.0001247926267281106, | |
| "loss": 0.0614, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.7589367552703942, | |
| "grad_norm": 0.06575947254896164, | |
| "learning_rate": 0.00012460829493087558, | |
| "loss": 0.0214, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.7607699358386801, | |
| "grad_norm": 0.0591006726026535, | |
| "learning_rate": 0.00012442396313364055, | |
| "loss": 0.0198, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.7626031164069661, | |
| "grad_norm": 0.12222932279109955, | |
| "learning_rate": 0.00012423963133640552, | |
| "loss": 0.0453, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.7644362969752521, | |
| "grad_norm": 0.07346849143505096, | |
| "learning_rate": 0.00012405529953917052, | |
| "loss": 0.027, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.766269477543538, | |
| "grad_norm": 0.05644283443689346, | |
| "learning_rate": 0.0001238709677419355, | |
| "loss": 0.0225, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.768102658111824, | |
| "grad_norm": 0.07721511274576187, | |
| "learning_rate": 0.00012368663594470046, | |
| "loss": 0.0279, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.76993583868011, | |
| "grad_norm": 0.08552254736423492, | |
| "learning_rate": 0.00012350230414746543, | |
| "loss": 0.0294, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7717690192483959, | |
| "grad_norm": 0.06860620528459549, | |
| "learning_rate": 0.00012331797235023043, | |
| "loss": 0.0287, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.773602199816682, | |
| "grad_norm": 0.09986478835344315, | |
| "learning_rate": 0.0001231336405529954, | |
| "loss": 0.0396, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.775435380384968, | |
| "grad_norm": 0.08369968086481094, | |
| "learning_rate": 0.00012294930875576037, | |
| "loss": 0.0251, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.7772685609532539, | |
| "grad_norm": 0.06935148686170578, | |
| "learning_rate": 0.00012276497695852537, | |
| "loss": 0.0261, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.7791017415215399, | |
| "grad_norm": 0.08505766093730927, | |
| "learning_rate": 0.00012258064516129034, | |
| "loss": 0.0318, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.7809349220898258, | |
| "grad_norm": 0.07806456089019775, | |
| "learning_rate": 0.0001223963133640553, | |
| "loss": 0.0264, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.7827681026581118, | |
| "grad_norm": 0.10398893058300018, | |
| "learning_rate": 0.00012221198156682028, | |
| "loss": 0.0397, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.7846012832263978, | |
| "grad_norm": 0.07210515439510345, | |
| "learning_rate": 0.00012202764976958525, | |
| "loss": 0.0292, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.7864344637946837, | |
| "grad_norm": 0.06596438586711884, | |
| "learning_rate": 0.00012184331797235025, | |
| "loss": 0.0233, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.7882676443629697, | |
| "grad_norm": 0.11197281628847122, | |
| "learning_rate": 0.00012165898617511522, | |
| "loss": 0.0362, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.7901008249312558, | |
| "grad_norm": 0.10142118483781815, | |
| "learning_rate": 0.0001214746543778802, | |
| "loss": 0.0384, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.7919340054995417, | |
| "grad_norm": 0.08676854521036148, | |
| "learning_rate": 0.00012129032258064516, | |
| "loss": 0.0287, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.7937671860678277, | |
| "grad_norm": 0.08402638882398605, | |
| "learning_rate": 0.00012110599078341014, | |
| "loss": 0.03, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.7956003666361137, | |
| "grad_norm": 0.07576748728752136, | |
| "learning_rate": 0.00012092165898617511, | |
| "loss": 0.027, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.7974335472043996, | |
| "grad_norm": 0.08877792209386826, | |
| "learning_rate": 0.0001207373271889401, | |
| "loss": 0.0294, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.7992667277726856, | |
| "grad_norm": 0.09211436659097672, | |
| "learning_rate": 0.00012055299539170508, | |
| "loss": 0.0306, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.8010999083409716, | |
| "grad_norm": 0.0953056812286377, | |
| "learning_rate": 0.00012036866359447006, | |
| "loss": 0.0343, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.8029330889092575, | |
| "grad_norm": 0.08642080426216125, | |
| "learning_rate": 0.00012018433179723503, | |
| "loss": 0.0343, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.8047662694775435, | |
| "grad_norm": 0.09826899319887161, | |
| "learning_rate": 0.00012, | |
| "loss": 0.0301, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.8065994500458296, | |
| "grad_norm": 0.09221632778644562, | |
| "learning_rate": 0.00011981566820276497, | |
| "loss": 0.0342, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8084326306141155, | |
| "grad_norm": 0.091212198138237, | |
| "learning_rate": 0.00011963133640552997, | |
| "loss": 0.0388, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.8102658111824015, | |
| "grad_norm": 0.07262887060642242, | |
| "learning_rate": 0.00011944700460829494, | |
| "loss": 0.0269, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.8120989917506874, | |
| "grad_norm": 0.09429402649402618, | |
| "learning_rate": 0.00011926267281105991, | |
| "loss": 0.0344, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.8139321723189734, | |
| "grad_norm": 0.07602915167808533, | |
| "learning_rate": 0.00011907834101382489, | |
| "loss": 0.0229, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.8157653528872594, | |
| "grad_norm": 0.08622591942548752, | |
| "learning_rate": 0.00011889400921658986, | |
| "loss": 0.0299, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.8175985334555453, | |
| "grad_norm": 0.07046458125114441, | |
| "learning_rate": 0.00011870967741935484, | |
| "loss": 0.0283, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.8194317140238313, | |
| "grad_norm": 0.06315992772579193, | |
| "learning_rate": 0.00011852534562211983, | |
| "loss": 0.0231, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.8212648945921174, | |
| "grad_norm": 0.11382705718278885, | |
| "learning_rate": 0.00011834101382488481, | |
| "loss": 0.0292, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.8230980751604033, | |
| "grad_norm": 0.0543043278157711, | |
| "learning_rate": 0.00011815668202764978, | |
| "loss": 0.021, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.8249312557286893, | |
| "grad_norm": 0.08020134270191193, | |
| "learning_rate": 0.00011797235023041475, | |
| "loss": 0.024, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8267644362969753, | |
| "grad_norm": 0.08001308888196945, | |
| "learning_rate": 0.00011778801843317972, | |
| "loss": 0.026, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.8285976168652612, | |
| "grad_norm": 0.07345568388700485, | |
| "learning_rate": 0.0001176036866359447, | |
| "loss": 0.0252, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.8304307974335472, | |
| "grad_norm": 0.08047681301832199, | |
| "learning_rate": 0.00011741935483870967, | |
| "loss": 0.0382, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.8322639780018332, | |
| "grad_norm": 0.07006672024726868, | |
| "learning_rate": 0.00011723502304147466, | |
| "loss": 0.0282, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.8340971585701191, | |
| "grad_norm": 0.08989156782627106, | |
| "learning_rate": 0.00011705069124423964, | |
| "loss": 0.0358, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.8359303391384051, | |
| "grad_norm": 0.07250788062810898, | |
| "learning_rate": 0.00011686635944700462, | |
| "loss": 0.0282, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.8377635197066912, | |
| "grad_norm": 0.07556509971618652, | |
| "learning_rate": 0.00011668202764976959, | |
| "loss": 0.0285, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.8395967002749771, | |
| "grad_norm": 0.07886163890361786, | |
| "learning_rate": 0.00011649769585253456, | |
| "loss": 0.0245, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.8414298808432631, | |
| "grad_norm": 0.10710224509239197, | |
| "learning_rate": 0.00011631336405529953, | |
| "loss": 0.0448, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.843263061411549, | |
| "grad_norm": 0.092331163585186, | |
| "learning_rate": 0.00011612903225806453, | |
| "loss": 0.0319, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.845096241979835, | |
| "grad_norm": 0.07957134395837784, | |
| "learning_rate": 0.0001159447004608295, | |
| "loss": 0.0308, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.846929422548121, | |
| "grad_norm": 0.06576257944107056, | |
| "learning_rate": 0.00011576036866359447, | |
| "loss": 0.0244, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.8487626031164069, | |
| "grad_norm": 0.060584548860788345, | |
| "learning_rate": 0.00011557603686635945, | |
| "loss": 0.0211, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.8505957836846929, | |
| "grad_norm": 0.0770929604768753, | |
| "learning_rate": 0.00011539170506912442, | |
| "loss": 0.0283, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.8524289642529789, | |
| "grad_norm": 0.10374728590250015, | |
| "learning_rate": 0.0001152073732718894, | |
| "loss": 0.031, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.854262144821265, | |
| "grad_norm": 0.08729080855846405, | |
| "learning_rate": 0.00011502304147465439, | |
| "loss": 0.0292, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.8560953253895509, | |
| "grad_norm": 0.08986352384090424, | |
| "learning_rate": 0.00011483870967741937, | |
| "loss": 0.0416, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.8579285059578369, | |
| "grad_norm": 0.09187289327383041, | |
| "learning_rate": 0.00011465437788018434, | |
| "loss": 0.0379, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.8597616865261228, | |
| "grad_norm": 0.07304208725690842, | |
| "learning_rate": 0.00011447004608294931, | |
| "loss": 0.0233, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.8615948670944088, | |
| "grad_norm": 0.08240114897489548, | |
| "learning_rate": 0.00011428571428571428, | |
| "loss": 0.0231, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8634280476626948, | |
| "grad_norm": 0.08332082629203796, | |
| "learning_rate": 0.00011410138248847925, | |
| "loss": 0.0256, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.8652612282309807, | |
| "grad_norm": 0.09715988487005234, | |
| "learning_rate": 0.00011391705069124425, | |
| "loss": 0.0389, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.8670944087992667, | |
| "grad_norm": 0.0702865719795227, | |
| "learning_rate": 0.00011373271889400922, | |
| "loss": 0.0241, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.8689275893675527, | |
| "grad_norm": 0.07741749286651611, | |
| "learning_rate": 0.0001135483870967742, | |
| "loss": 0.0294, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.8707607699358387, | |
| "grad_norm": 0.08457162976264954, | |
| "learning_rate": 0.00011336405529953918, | |
| "loss": 0.0273, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.8725939505041247, | |
| "grad_norm": 0.08684583753347397, | |
| "learning_rate": 0.00011317972350230415, | |
| "loss": 0.0301, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.8744271310724107, | |
| "grad_norm": 0.07238451391458511, | |
| "learning_rate": 0.00011299539170506912, | |
| "loss": 0.0299, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.8762603116406966, | |
| "grad_norm": 0.06936534494161606, | |
| "learning_rate": 0.00011281105990783412, | |
| "loss": 0.0255, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.8780934922089826, | |
| "grad_norm": 0.0781572014093399, | |
| "learning_rate": 0.00011262672811059909, | |
| "loss": 0.0272, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.8799266727772685, | |
| "grad_norm": 0.05346061289310455, | |
| "learning_rate": 0.00011244239631336406, | |
| "loss": 0.0186, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8817598533455545, | |
| "grad_norm": 0.10047460347414017, | |
| "learning_rate": 0.00011225806451612903, | |
| "loss": 0.0261, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.8835930339138405, | |
| "grad_norm": 0.09505181759595871, | |
| "learning_rate": 0.000112073732718894, | |
| "loss": 0.0293, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.8854262144821264, | |
| "grad_norm": 0.08039534837007523, | |
| "learning_rate": 0.00011188940092165898, | |
| "loss": 0.0339, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.8872593950504125, | |
| "grad_norm": 0.0761064887046814, | |
| "learning_rate": 0.00011170506912442397, | |
| "loss": 0.032, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.8890925756186985, | |
| "grad_norm": 0.0830230712890625, | |
| "learning_rate": 0.00011152073732718894, | |
| "loss": 0.0282, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.8909257561869844, | |
| "grad_norm": 0.0726420059800148, | |
| "learning_rate": 0.00011133640552995393, | |
| "loss": 0.0293, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.8927589367552704, | |
| "grad_norm": 0.07475108653306961, | |
| "learning_rate": 0.0001111520737327189, | |
| "loss": 0.0311, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.8945921173235564, | |
| "grad_norm": 0.07357434183359146, | |
| "learning_rate": 0.00011096774193548387, | |
| "loss": 0.0255, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.8964252978918423, | |
| "grad_norm": 0.09986915439367294, | |
| "learning_rate": 0.00011078341013824884, | |
| "loss": 0.0312, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.8982584784601283, | |
| "grad_norm": 0.10107365250587463, | |
| "learning_rate": 0.00011059907834101384, | |
| "loss": 0.037, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9000916590284143, | |
| "grad_norm": 0.08120604604482651, | |
| "learning_rate": 0.00011041474654377881, | |
| "loss": 0.035, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.9019248395967002, | |
| "grad_norm": 0.05490780994296074, | |
| "learning_rate": 0.00011023041474654378, | |
| "loss": 0.0202, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.9037580201649863, | |
| "grad_norm": 0.10805200785398483, | |
| "learning_rate": 0.00011004608294930875, | |
| "loss": 0.045, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.9055912007332723, | |
| "grad_norm": 0.05866795405745506, | |
| "learning_rate": 0.00010986175115207374, | |
| "loss": 0.0203, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.9074243813015582, | |
| "grad_norm": 0.09001267701387405, | |
| "learning_rate": 0.00010967741935483871, | |
| "loss": 0.0286, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.9092575618698442, | |
| "grad_norm": 0.0694073960185051, | |
| "learning_rate": 0.00010949308755760371, | |
| "loss": 0.0318, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.9110907424381302, | |
| "grad_norm": 0.06593529880046844, | |
| "learning_rate": 0.00010930875576036868, | |
| "loss": 0.0227, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.9129239230064161, | |
| "grad_norm": 0.06799976527690887, | |
| "learning_rate": 0.00010912442396313365, | |
| "loss": 0.024, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.9147571035747021, | |
| "grad_norm": 0.07797495275735855, | |
| "learning_rate": 0.00010894009216589862, | |
| "loss": 0.0268, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.916590284142988, | |
| "grad_norm": 0.07280347496271133, | |
| "learning_rate": 0.00010875576036866359, | |
| "loss": 0.0252, | |
| "step": 500 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1090, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.5010830413889536e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |