Spaces:
Running
on
A10G
Running
on
A10G
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 3140, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003189792663476874, | |
| "grad_norm": 1.5252034664154053, | |
| "learning_rate": 0.0, | |
| "loss": 5.7289, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.03189792663476874, | |
| "grad_norm": 1.3174983263015747, | |
| "learning_rate": 3.6e-05, | |
| "loss": 5.6677, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06379585326953748, | |
| "grad_norm": 0.8419873714447021, | |
| "learning_rate": 7.6e-05, | |
| "loss": 5.3345, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09569377990430622, | |
| "grad_norm": 0.501588761806488, | |
| "learning_rate": 0.000116, | |
| "loss": 4.9165, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.12759170653907495, | |
| "grad_norm": 0.6858522891998291, | |
| "learning_rate": 0.00015600000000000002, | |
| "loss": 4.6209, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1594896331738437, | |
| "grad_norm": 0.8215904235839844, | |
| "learning_rate": 0.000196, | |
| "loss": 4.4453, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.19138755980861244, | |
| "grad_norm": 1.1306647062301636, | |
| "learning_rate": 0.00019941747572815535, | |
| "loss": 4.3048, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.22328548644338117, | |
| "grad_norm": 0.7509264945983887, | |
| "learning_rate": 0.00019877022653721685, | |
| "loss": 4.2047, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.2551834130781499, | |
| "grad_norm": 0.7645938992500305, | |
| "learning_rate": 0.00019812297734627833, | |
| "loss": 4.1461, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.28708133971291866, | |
| "grad_norm": 0.7448317408561707, | |
| "learning_rate": 0.0001974757281553398, | |
| "loss": 4.1107, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3189792663476874, | |
| "grad_norm": 1.1897181272506714, | |
| "learning_rate": 0.0001968284789644013, | |
| "loss": 4.0805, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3508771929824561, | |
| "grad_norm": 0.9169389605522156, | |
| "learning_rate": 0.0001961812297734628, | |
| "loss": 4.0398, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3827751196172249, | |
| "grad_norm": 1.102384090423584, | |
| "learning_rate": 0.0001955339805825243, | |
| "loss": 4.0247, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.41467304625199364, | |
| "grad_norm": 0.8408002257347107, | |
| "learning_rate": 0.00019488673139158577, | |
| "loss": 4.0037, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.44657097288676234, | |
| "grad_norm": 0.8749285936355591, | |
| "learning_rate": 0.00019423948220064724, | |
| "loss": 3.9804, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.4784688995215311, | |
| "grad_norm": 1.2045485973358154, | |
| "learning_rate": 0.00019359223300970875, | |
| "loss": 3.9795, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5103668261562998, | |
| "grad_norm": 1.443690538406372, | |
| "learning_rate": 0.00019294498381877025, | |
| "loss": 3.9416, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5422647527910686, | |
| "grad_norm": 1.188532829284668, | |
| "learning_rate": 0.0001922977346278317, | |
| "loss": 3.9073, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5741626794258373, | |
| "grad_norm": 1.053863525390625, | |
| "learning_rate": 0.0001916504854368932, | |
| "loss": 3.888, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.6060606060606061, | |
| "grad_norm": 1.6133322715759277, | |
| "learning_rate": 0.0001910032362459547, | |
| "loss": 3.8654, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.6379585326953748, | |
| "grad_norm": 1.6843335628509521, | |
| "learning_rate": 0.0001903559870550162, | |
| "loss": 3.8268, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6698564593301436, | |
| "grad_norm": 2.04089617729187, | |
| "learning_rate": 0.00018970873786407767, | |
| "loss": 3.8068, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.7017543859649122, | |
| "grad_norm": 1.8679882287979126, | |
| "learning_rate": 0.00018906148867313917, | |
| "loss": 3.7658, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.733652312599681, | |
| "grad_norm": 1.8345612287521362, | |
| "learning_rate": 0.00018841423948220065, | |
| "loss": 3.7555, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.7655502392344498, | |
| "grad_norm": 1.8040602207183838, | |
| "learning_rate": 0.00018776699029126215, | |
| "loss": 3.7112, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.7974481658692185, | |
| "grad_norm": 2.027642250061035, | |
| "learning_rate": 0.00018711974110032366, | |
| "loss": 3.7088, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.8293460925039873, | |
| "grad_norm": 2.0571959018707275, | |
| "learning_rate": 0.0001864724919093851, | |
| "loss": 3.6897, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.861244019138756, | |
| "grad_norm": 2.0399420261383057, | |
| "learning_rate": 0.0001858252427184466, | |
| "loss": 3.6013, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.8931419457735247, | |
| "grad_norm": 1.7937105894088745, | |
| "learning_rate": 0.0001851779935275081, | |
| "loss": 3.5909, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.9250398724082934, | |
| "grad_norm": 2.259948968887329, | |
| "learning_rate": 0.0001845307443365696, | |
| "loss": 3.5773, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.9569377990430622, | |
| "grad_norm": 3.999640941619873, | |
| "learning_rate": 0.00018388349514563107, | |
| "loss": 3.5398, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.988835725677831, | |
| "grad_norm": 2.760211706161499, | |
| "learning_rate": 0.00018323624595469257, | |
| "loss": 3.4636, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.0191387559808613, | |
| "grad_norm": 3.569462537765503, | |
| "learning_rate": 0.00018258899676375405, | |
| "loss": 3.208, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.0510366826156299, | |
| "grad_norm": 5.000574111938477, | |
| "learning_rate": 0.00018194174757281555, | |
| "loss": 3.3652, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.0829346092503986, | |
| "grad_norm": 3.697556734085083, | |
| "learning_rate": 0.00018129449838187703, | |
| "loss": 3.2778, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.1148325358851674, | |
| "grad_norm": 3.266451835632324, | |
| "learning_rate": 0.0001806472491909385, | |
| "loss": 3.1285, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.1467304625199362, | |
| "grad_norm": 3.4669878482818604, | |
| "learning_rate": 0.00018, | |
| "loss": 3.1121, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.178628389154705, | |
| "grad_norm": 3.9828436374664307, | |
| "learning_rate": 0.0001793527508090615, | |
| "loss": 3.0454, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.2105263157894737, | |
| "grad_norm": 4.253013610839844, | |
| "learning_rate": 0.000178705501618123, | |
| "loss": 2.9481, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.2424242424242424, | |
| "grad_norm": 3.906336784362793, | |
| "learning_rate": 0.00017805825242718447, | |
| "loss": 2.9171, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.2743221690590112, | |
| "grad_norm": 5.212265968322754, | |
| "learning_rate": 0.00017741100323624595, | |
| "loss": 2.7956, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.30622009569378, | |
| "grad_norm": 5.57565975189209, | |
| "learning_rate": 0.00017676375404530745, | |
| "loss": 2.7399, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.3381180223285487, | |
| "grad_norm": 7.32366418838501, | |
| "learning_rate": 0.00017611650485436896, | |
| "loss": 2.5755, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.3700159489633175, | |
| "grad_norm": 5.175110816955566, | |
| "learning_rate": 0.00017546925566343043, | |
| "loss": 2.44, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.401913875598086, | |
| "grad_norm": 6.860726356506348, | |
| "learning_rate": 0.0001748220064724919, | |
| "loss": 2.4152, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.4338118022328548, | |
| "grad_norm": 6.029066562652588, | |
| "learning_rate": 0.0001741747572815534, | |
| "loss": 2.3313, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.4657097288676235, | |
| "grad_norm": 5.50711727142334, | |
| "learning_rate": 0.0001735275080906149, | |
| "loss": 2.2367, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.4976076555023923, | |
| "grad_norm": 7.61848258972168, | |
| "learning_rate": 0.0001728802588996764, | |
| "loss": 2.1664, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.529505582137161, | |
| "grad_norm": 8.040018081665039, | |
| "learning_rate": 0.00017223300970873787, | |
| "loss": 1.9997, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.5614035087719298, | |
| "grad_norm": 7.164926528930664, | |
| "learning_rate": 0.00017158576051779935, | |
| "loss": 1.9375, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.5933014354066986, | |
| "grad_norm": 8.465059280395508, | |
| "learning_rate": 0.00017093851132686085, | |
| "loss": 1.7925, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.6251993620414673, | |
| "grad_norm": 9.419471740722656, | |
| "learning_rate": 0.00017029126213592236, | |
| "loss": 1.8445, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.657097288676236, | |
| "grad_norm": 8.507338523864746, | |
| "learning_rate": 0.00016964401294498383, | |
| "loss": 1.7681, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.6889952153110048, | |
| "grad_norm": 9.474655151367188, | |
| "learning_rate": 0.0001689967637540453, | |
| "loss": 1.5889, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.7208931419457736, | |
| "grad_norm": 9.287930488586426, | |
| "learning_rate": 0.00016834951456310682, | |
| "loss": 1.5392, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.7527910685805423, | |
| "grad_norm": 6.526854991912842, | |
| "learning_rate": 0.0001677022653721683, | |
| "loss": 1.4232, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.784688995215311, | |
| "grad_norm": 8.358418464660645, | |
| "learning_rate": 0.0001670550161812298, | |
| "loss": 1.2953, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.8165869218500799, | |
| "grad_norm": 7.086184978485107, | |
| "learning_rate": 0.00016640776699029127, | |
| "loss": 1.3161, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.8484848484848486, | |
| "grad_norm": 8.247760772705078, | |
| "learning_rate": 0.00016576051779935275, | |
| "loss": 1.0697, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.8803827751196174, | |
| "grad_norm": 7.4881720542907715, | |
| "learning_rate": 0.00016511326860841425, | |
| "loss": 1.0045, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.912280701754386, | |
| "grad_norm": 9.438504219055176, | |
| "learning_rate": 0.00016446601941747573, | |
| "loss": 0.9993, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.9441786283891547, | |
| "grad_norm": 7.289414882659912, | |
| "learning_rate": 0.0001638187702265372, | |
| "loss": 0.8662, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.9760765550239234, | |
| "grad_norm": 9.634711265563965, | |
| "learning_rate": 0.0001631715210355987, | |
| "loss": 0.9138, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.006379585326954, | |
| "grad_norm": 7.288631916046143, | |
| "learning_rate": 0.0001625242718446602, | |
| "loss": 0.8087, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.0382775119617227, | |
| "grad_norm": 11.611783027648926, | |
| "learning_rate": 0.0001618770226537217, | |
| "loss": 0.6835, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.0701754385964914, | |
| "grad_norm": 7.48546028137207, | |
| "learning_rate": 0.00016122977346278317, | |
| "loss": 0.5253, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.1020733652312598, | |
| "grad_norm": 8.046589851379395, | |
| "learning_rate": 0.00016058252427184465, | |
| "loss": 0.555, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.1339712918660285, | |
| "grad_norm": 5.977556228637695, | |
| "learning_rate": 0.00015993527508090615, | |
| "loss": 0.5774, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.1658692185007973, | |
| "grad_norm": 7.377207279205322, | |
| "learning_rate": 0.00015928802588996766, | |
| "loss": 0.5707, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.197767145135566, | |
| "grad_norm": 6.869225025177002, | |
| "learning_rate": 0.00015864077669902913, | |
| "loss": 0.5913, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.229665071770335, | |
| "grad_norm": 5.870683193206787, | |
| "learning_rate": 0.0001579935275080906, | |
| "loss": 0.4496, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.2615629984051036, | |
| "grad_norm": 7.628873348236084, | |
| "learning_rate": 0.00015734627831715212, | |
| "loss": 0.4418, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.2934609250398723, | |
| "grad_norm": 7.419798374176025, | |
| "learning_rate": 0.0001566990291262136, | |
| "loss": 0.3939, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.325358851674641, | |
| "grad_norm": 5.196732521057129, | |
| "learning_rate": 0.0001560517799352751, | |
| "loss": 0.3817, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.35725677830941, | |
| "grad_norm": 7.466589450836182, | |
| "learning_rate": 0.00015540453074433657, | |
| "loss": 0.3564, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.3891547049441786, | |
| "grad_norm": 4.85908317565918, | |
| "learning_rate": 0.00015475728155339805, | |
| "loss": 0.3566, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.4210526315789473, | |
| "grad_norm": 6.417631149291992, | |
| "learning_rate": 0.00015411003236245955, | |
| "loss": 0.3637, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.452950558213716, | |
| "grad_norm": 8.523224830627441, | |
| "learning_rate": 0.00015346278317152106, | |
| "loss": 0.453, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.484848484848485, | |
| "grad_norm": 5.428617477416992, | |
| "learning_rate": 0.00015281553398058254, | |
| "loss": 0.3353, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.5167464114832536, | |
| "grad_norm": 4.765276908874512, | |
| "learning_rate": 0.000152168284789644, | |
| "loss": 0.3291, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.5486443381180224, | |
| "grad_norm": 6.175685405731201, | |
| "learning_rate": 0.00015152103559870552, | |
| "loss": 0.2628, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.580542264752791, | |
| "grad_norm": 7.138719081878662, | |
| "learning_rate": 0.000150873786407767, | |
| "loss": 0.2652, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.61244019138756, | |
| "grad_norm": 5.668585777282715, | |
| "learning_rate": 0.0001502265372168285, | |
| "loss": 0.252, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.6443381180223287, | |
| "grad_norm": 5.076392650604248, | |
| "learning_rate": 0.00014957928802588998, | |
| "loss": 0.2438, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.6762360446570974, | |
| "grad_norm": 8.504268646240234, | |
| "learning_rate": 0.00014893203883495145, | |
| "loss": 0.3538, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.708133971291866, | |
| "grad_norm": 4.550518989562988, | |
| "learning_rate": 0.00014828478964401296, | |
| "loss": 0.2585, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.740031897926635, | |
| "grad_norm": 6.09181547164917, | |
| "learning_rate": 0.00014763754045307443, | |
| "loss": 0.2586, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.7719298245614032, | |
| "grad_norm": 7.383531093597412, | |
| "learning_rate": 0.00014699029126213594, | |
| "loss": 0.2497, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.803827751196172, | |
| "grad_norm": 5.605717182159424, | |
| "learning_rate": 0.00014634304207119741, | |
| "loss": 0.2285, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.8357256778309408, | |
| "grad_norm": 7.412971496582031, | |
| "learning_rate": 0.0001456957928802589, | |
| "loss": 0.257, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.8676236044657095, | |
| "grad_norm": 6.855090141296387, | |
| "learning_rate": 0.0001450485436893204, | |
| "loss": 0.2347, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.8995215311004783, | |
| "grad_norm": 4.776122570037842, | |
| "learning_rate": 0.0001444012944983819, | |
| "loss": 0.2527, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.931419457735247, | |
| "grad_norm": 3.841705560684204, | |
| "learning_rate": 0.00014375404530744335, | |
| "loss": 0.192, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.963317384370016, | |
| "grad_norm": 4.253203392028809, | |
| "learning_rate": 0.00014310679611650485, | |
| "loss": 0.2104, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.9952153110047846, | |
| "grad_norm": 5.285226345062256, | |
| "learning_rate": 0.00014245954692556636, | |
| "loss": 0.1655, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 3.025518341307815, | |
| "grad_norm": 3.939434051513672, | |
| "learning_rate": 0.00014181229773462784, | |
| "loss": 0.1744, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.0574162679425836, | |
| "grad_norm": 4.514401435852051, | |
| "learning_rate": 0.00014116504854368934, | |
| "loss": 0.179, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 3.0893141945773523, | |
| "grad_norm": 6.335837364196777, | |
| "learning_rate": 0.00014051779935275082, | |
| "loss": 0.2103, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 3.121212121212121, | |
| "grad_norm": 3.401519775390625, | |
| "learning_rate": 0.0001398705501618123, | |
| "loss": 0.1717, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 3.15311004784689, | |
| "grad_norm": 3.501477003097534, | |
| "learning_rate": 0.0001392233009708738, | |
| "loss": 0.149, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 3.1850079744816586, | |
| "grad_norm": 4.168037414550781, | |
| "learning_rate": 0.0001385760517799353, | |
| "loss": 0.1557, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.2169059011164274, | |
| "grad_norm": 5.530002117156982, | |
| "learning_rate": 0.00013792880258899675, | |
| "loss": 0.1878, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 3.248803827751196, | |
| "grad_norm": 6.048915863037109, | |
| "learning_rate": 0.00013728155339805826, | |
| "loss": 0.1915, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 3.280701754385965, | |
| "grad_norm": 3.8435139656066895, | |
| "learning_rate": 0.00013663430420711976, | |
| "loss": 0.1429, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 3.3125996810207337, | |
| "grad_norm": 5.1407952308654785, | |
| "learning_rate": 0.00013598705501618124, | |
| "loss": 0.1486, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 3.3444976076555024, | |
| "grad_norm": 3.7761664390563965, | |
| "learning_rate": 0.00013533980582524271, | |
| "loss": 0.1758, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 3.376395534290271, | |
| "grad_norm": 4.0396952629089355, | |
| "learning_rate": 0.00013469255663430422, | |
| "loss": 0.1692, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 3.40829346092504, | |
| "grad_norm": 6.123923301696777, | |
| "learning_rate": 0.0001340453074433657, | |
| "loss": 0.1761, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 3.4401913875598087, | |
| "grad_norm": 3.7022392749786377, | |
| "learning_rate": 0.0001333980582524272, | |
| "loss": 0.1338, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 3.4720893141945774, | |
| "grad_norm": 3.5536398887634277, | |
| "learning_rate": 0.00013275080906148868, | |
| "loss": 0.1375, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 3.503987240829346, | |
| "grad_norm": 5.543455123901367, | |
| "learning_rate": 0.00013210355987055015, | |
| "loss": 0.1477, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.535885167464115, | |
| "grad_norm": 3.780407667160034, | |
| "learning_rate": 0.00013145631067961166, | |
| "loss": 0.168, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 3.5677830940988837, | |
| "grad_norm": 3.6479198932647705, | |
| "learning_rate": 0.00013080906148867314, | |
| "loss": 0.115, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 3.5996810207336525, | |
| "grad_norm": 4.027154445648193, | |
| "learning_rate": 0.00013016181229773464, | |
| "loss": 0.1251, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 3.6315789473684212, | |
| "grad_norm": 3.59836745262146, | |
| "learning_rate": 0.00012951456310679612, | |
| "loss": 0.1294, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 3.66347687400319, | |
| "grad_norm": 3.4946069717407227, | |
| "learning_rate": 0.0001288673139158576, | |
| "loss": 0.1161, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 3.6953748006379588, | |
| "grad_norm": 3.2438859939575195, | |
| "learning_rate": 0.0001282200647249191, | |
| "loss": 0.1003, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 3.7272727272727275, | |
| "grad_norm": 3.518580675125122, | |
| "learning_rate": 0.0001275728155339806, | |
| "loss": 0.1088, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 3.7591706539074963, | |
| "grad_norm": 3.3931541442871094, | |
| "learning_rate": 0.00012692556634304208, | |
| "loss": 0.1162, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 3.7910685805422646, | |
| "grad_norm": 4.836375713348389, | |
| "learning_rate": 0.00012627831715210356, | |
| "loss": 0.1206, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 3.8229665071770333, | |
| "grad_norm": 3.4532904624938965, | |
| "learning_rate": 0.00012563106796116506, | |
| "loss": 0.0998, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.854864433811802, | |
| "grad_norm": 3.0089080333709717, | |
| "learning_rate": 0.00012498381877022654, | |
| "loss": 0.1325, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 3.886762360446571, | |
| "grad_norm": 2.9546520709991455, | |
| "learning_rate": 0.00012433656957928804, | |
| "loss": 0.1073, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 3.9186602870813396, | |
| "grad_norm": 4.0866851806640625, | |
| "learning_rate": 0.00012368932038834952, | |
| "loss": 0.1135, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 3.9505582137161084, | |
| "grad_norm": 2.4372901916503906, | |
| "learning_rate": 0.000123042071197411, | |
| "loss": 0.1078, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 3.982456140350877, | |
| "grad_norm": 3.0094428062438965, | |
| "learning_rate": 0.0001223948220064725, | |
| "loss": 0.0926, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 4.012759170653908, | |
| "grad_norm": 3.015343189239502, | |
| "learning_rate": 0.00012174757281553399, | |
| "loss": 0.0812, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 4.044657097288677, | |
| "grad_norm": 3.3010330200195312, | |
| "learning_rate": 0.00012110032362459547, | |
| "loss": 0.0912, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 4.076555023923445, | |
| "grad_norm": 3.2524020671844482, | |
| "learning_rate": 0.00012045307443365696, | |
| "loss": 0.0919, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 4.108452950558214, | |
| "grad_norm": 3.144233465194702, | |
| "learning_rate": 0.00011980582524271846, | |
| "loss": 0.0984, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 4.140350877192983, | |
| "grad_norm": 3.6197972297668457, | |
| "learning_rate": 0.00011915857605177994, | |
| "loss": 0.0888, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 4.172248803827751, | |
| "grad_norm": 3.464054584503174, | |
| "learning_rate": 0.00011851132686084143, | |
| "loss": 0.0948, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 4.2041467304625195, | |
| "grad_norm": 3.589921474456787, | |
| "learning_rate": 0.00011786407766990293, | |
| "loss": 0.1008, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 4.236044657097288, | |
| "grad_norm": 2.547912120819092, | |
| "learning_rate": 0.0001172168284789644, | |
| "loss": 0.0865, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 4.267942583732057, | |
| "grad_norm": 2.76481032371521, | |
| "learning_rate": 0.0001165695792880259, | |
| "loss": 0.0879, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 4.299840510366826, | |
| "grad_norm": 3.417572021484375, | |
| "learning_rate": 0.00011592233009708739, | |
| "loss": 0.0824, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 4.3317384370015946, | |
| "grad_norm": 2.9202117919921875, | |
| "learning_rate": 0.00011527508090614887, | |
| "loss": 0.0843, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 4.363636363636363, | |
| "grad_norm": 3.2221357822418213, | |
| "learning_rate": 0.00011462783171521036, | |
| "loss": 0.0935, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 4.395534290271132, | |
| "grad_norm": 3.54180645942688, | |
| "learning_rate": 0.00011398058252427184, | |
| "loss": 0.0932, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 4.427432216905901, | |
| "grad_norm": 2.438354253768921, | |
| "learning_rate": 0.00011333333333333334, | |
| "loss": 0.0787, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 4.45933014354067, | |
| "grad_norm": 1.9685388803482056, | |
| "learning_rate": 0.00011268608414239483, | |
| "loss": 0.0745, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 4.491228070175438, | |
| "grad_norm": 3.79945969581604, | |
| "learning_rate": 0.00011203883495145631, | |
| "loss": 0.0941, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 4.523125996810207, | |
| "grad_norm": 2.6421890258789062, | |
| "learning_rate": 0.0001113915857605178, | |
| "loss": 0.0703, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 4.555023923444976, | |
| "grad_norm": 2.041005849838257, | |
| "learning_rate": 0.0001107443365695793, | |
| "loss": 0.0739, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 4.586921850079745, | |
| "grad_norm": 3.8904316425323486, | |
| "learning_rate": 0.00011009708737864077, | |
| "loss": 0.0698, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 4.618819776714513, | |
| "grad_norm": 2.766702651977539, | |
| "learning_rate": 0.00010944983818770227, | |
| "loss": 0.0766, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 4.650717703349282, | |
| "grad_norm": 2.6301941871643066, | |
| "learning_rate": 0.00010880258899676376, | |
| "loss": 0.0722, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 4.682615629984051, | |
| "grad_norm": 3.7244162559509277, | |
| "learning_rate": 0.00010815533980582524, | |
| "loss": 0.0855, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 4.71451355661882, | |
| "grad_norm": 2.076242446899414, | |
| "learning_rate": 0.00010750809061488673, | |
| "loss": 0.0625, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 4.746411483253588, | |
| "grad_norm": 2.212062358856201, | |
| "learning_rate": 0.00010686084142394823, | |
| "loss": 0.0667, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 4.778309409888357, | |
| "grad_norm": 3.0947024822235107, | |
| "learning_rate": 0.00010621359223300971, | |
| "loss": 0.0639, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.810207336523126, | |
| "grad_norm": 3.310490369796753, | |
| "learning_rate": 0.0001055663430420712, | |
| "loss": 0.0821, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 4.842105263157895, | |
| "grad_norm": 2.8293209075927734, | |
| "learning_rate": 0.0001049190938511327, | |
| "loss": 0.0733, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 4.8740031897926634, | |
| "grad_norm": 2.975891590118408, | |
| "learning_rate": 0.00010427184466019417, | |
| "loss": 0.0686, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 4.905901116427432, | |
| "grad_norm": 2.633657693862915, | |
| "learning_rate": 0.00010362459546925567, | |
| "loss": 0.0711, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 4.937799043062201, | |
| "grad_norm": 2.947361469268799, | |
| "learning_rate": 0.00010297734627831716, | |
| "loss": 0.0686, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 4.96969696969697, | |
| "grad_norm": 2.373194456100464, | |
| "learning_rate": 0.00010233009708737864, | |
| "loss": 0.0652, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 1.3575212955474854, | |
| "learning_rate": 0.00010168284789644013, | |
| "loss": 0.0604, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 5.031897926634769, | |
| "grad_norm": 2.9779140949249268, | |
| "learning_rate": 0.00010103559870550164, | |
| "loss": 0.0636, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 5.0637958532695375, | |
| "grad_norm": 2.8484952449798584, | |
| "learning_rate": 0.00010038834951456311, | |
| "loss": 0.0558, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 5.095693779904306, | |
| "grad_norm": 2.9866819381713867, | |
| "learning_rate": 9.97411003236246e-05, | |
| "loss": 0.0596, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 5.127591706539075, | |
| "grad_norm": 2.2801356315612793, | |
| "learning_rate": 9.90938511326861e-05, | |
| "loss": 0.0557, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 5.159489633173844, | |
| "grad_norm": 3.125821828842163, | |
| "learning_rate": 9.844660194174757e-05, | |
| "loss": 0.0671, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 5.1913875598086126, | |
| "grad_norm": 2.4626238346099854, | |
| "learning_rate": 9.779935275080908e-05, | |
| "loss": 0.0615, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 5.223285486443381, | |
| "grad_norm": 2.107201099395752, | |
| "learning_rate": 9.715210355987055e-05, | |
| "loss": 0.0597, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 5.25518341307815, | |
| "grad_norm": 2.0662150382995605, | |
| "learning_rate": 9.650485436893204e-05, | |
| "loss": 0.0543, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 5.287081339712919, | |
| "grad_norm": 2.728776454925537, | |
| "learning_rate": 9.585760517799353e-05, | |
| "loss": 0.0615, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 5.318979266347688, | |
| "grad_norm": 2.2817022800445557, | |
| "learning_rate": 9.521035598705502e-05, | |
| "loss": 0.0513, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 5.350877192982456, | |
| "grad_norm": 3.146657943725586, | |
| "learning_rate": 9.45631067961165e-05, | |
| "loss": 0.0523, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 5.382775119617225, | |
| "grad_norm": 2.3867263793945312, | |
| "learning_rate": 9.391585760517799e-05, | |
| "loss": 0.0534, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 5.414673046251994, | |
| "grad_norm": 2.2965664863586426, | |
| "learning_rate": 9.326860841423948e-05, | |
| "loss": 0.0495, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 5.446570972886763, | |
| "grad_norm": 1.410491704940796, | |
| "learning_rate": 9.262135922330097e-05, | |
| "loss": 0.0523, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 5.478468899521531, | |
| "grad_norm": 3.351827621459961, | |
| "learning_rate": 9.197411003236246e-05, | |
| "loss": 0.0516, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 5.5103668261563, | |
| "grad_norm": 2.1586365699768066, | |
| "learning_rate": 9.132686084142395e-05, | |
| "loss": 0.0542, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 5.542264752791069, | |
| "grad_norm": 1.751285433769226, | |
| "learning_rate": 9.067961165048545e-05, | |
| "loss": 0.0508, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 5.574162679425838, | |
| "grad_norm": 1.4902950525283813, | |
| "learning_rate": 9.003236245954692e-05, | |
| "loss": 0.0547, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 5.606060606060606, | |
| "grad_norm": 2.029918670654297, | |
| "learning_rate": 8.938511326860843e-05, | |
| "loss": 0.0561, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 5.637958532695375, | |
| "grad_norm": 3.787614583969116, | |
| "learning_rate": 8.87378640776699e-05, | |
| "loss": 0.0528, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 5.669856459330144, | |
| "grad_norm": 1.8037543296813965, | |
| "learning_rate": 8.80906148867314e-05, | |
| "loss": 0.0507, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 5.701754385964913, | |
| "grad_norm": 2.3712103366851807, | |
| "learning_rate": 8.744336569579288e-05, | |
| "loss": 0.045, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 5.733652312599681, | |
| "grad_norm": 1.5658912658691406, | |
| "learning_rate": 8.679611650485438e-05, | |
| "loss": 0.0472, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 5.76555023923445, | |
| "grad_norm": 2.318931818008423, | |
| "learning_rate": 8.614886731391587e-05, | |
| "loss": 0.0484, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 5.797448165869218, | |
| "grad_norm": 2.4893152713775635, | |
| "learning_rate": 8.550161812297734e-05, | |
| "loss": 0.0467, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 5.829346092503988, | |
| "grad_norm": 1.9527428150177002, | |
| "learning_rate": 8.485436893203885e-05, | |
| "loss": 0.0498, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 5.861244019138756, | |
| "grad_norm": 2.137164831161499, | |
| "learning_rate": 8.420711974110032e-05, | |
| "loss": 0.0446, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 5.893141945773524, | |
| "grad_norm": 1.574310302734375, | |
| "learning_rate": 8.355987055016181e-05, | |
| "loss": 0.0471, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 5.925039872408293, | |
| "grad_norm": 2.1740546226501465, | |
| "learning_rate": 8.29126213592233e-05, | |
| "loss": 0.0509, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 5.956937799043062, | |
| "grad_norm": 2.2676613330841064, | |
| "learning_rate": 8.22653721682848e-05, | |
| "loss": 0.0473, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 5.988835725677831, | |
| "grad_norm": 1.7788255214691162, | |
| "learning_rate": 8.161812297734627e-05, | |
| "loss": 0.0458, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 6.019138755980861, | |
| "grad_norm": 2.5608322620391846, | |
| "learning_rate": 8.097087378640778e-05, | |
| "loss": 0.0412, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 6.05103668261563, | |
| "grad_norm": 2.0553760528564453, | |
| "learning_rate": 8.032362459546925e-05, | |
| "loss": 0.0508, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 6.082934609250398, | |
| "grad_norm": 1.3741275072097778, | |
| "learning_rate": 7.967637540453075e-05, | |
| "loss": 0.0382, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 6.114832535885167, | |
| "grad_norm": 1.6682943105697632, | |
| "learning_rate": 7.902912621359224e-05, | |
| "loss": 0.0392, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 6.146730462519936, | |
| "grad_norm": 1.4680795669555664, | |
| "learning_rate": 7.838187702265373e-05, | |
| "loss": 0.0431, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 6.178628389154705, | |
| "grad_norm": 1.8964109420776367, | |
| "learning_rate": 7.773462783171522e-05, | |
| "loss": 0.0388, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 6.2105263157894735, | |
| "grad_norm": 1.709599256515503, | |
| "learning_rate": 7.70873786407767e-05, | |
| "loss": 0.04, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 6.242424242424242, | |
| "grad_norm": 1.761167287826538, | |
| "learning_rate": 7.64401294498382e-05, | |
| "loss": 0.0398, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 6.274322169059011, | |
| "grad_norm": 2.034879207611084, | |
| "learning_rate": 7.579288025889968e-05, | |
| "loss": 0.0417, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 6.30622009569378, | |
| "grad_norm": 1.5820090770721436, | |
| "learning_rate": 7.514563106796117e-05, | |
| "loss": 0.0411, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 6.3381180223285485, | |
| "grad_norm": 1.5550072193145752, | |
| "learning_rate": 7.449838187702266e-05, | |
| "loss": 0.0365, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 6.370015948963317, | |
| "grad_norm": 1.6271281242370605, | |
| "learning_rate": 7.385113268608415e-05, | |
| "loss": 0.0361, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 6.401913875598086, | |
| "grad_norm": 1.7692902088165283, | |
| "learning_rate": 7.320388349514564e-05, | |
| "loss": 0.0364, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 6.433811802232855, | |
| "grad_norm": 1.5776255130767822, | |
| "learning_rate": 7.255663430420713e-05, | |
| "loss": 0.0409, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 6.4657097288676235, | |
| "grad_norm": 1.9021235704421997, | |
| "learning_rate": 7.190938511326862e-05, | |
| "loss": 0.0379, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 6.497607655502392, | |
| "grad_norm": 1.342091679573059, | |
| "learning_rate": 7.12621359223301e-05, | |
| "loss": 0.0398, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 6.529505582137161, | |
| "grad_norm": 2.0045480728149414, | |
| "learning_rate": 7.06148867313916e-05, | |
| "loss": 0.0403, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 6.56140350877193, | |
| "grad_norm": 1.3946913480758667, | |
| "learning_rate": 6.996763754045308e-05, | |
| "loss": 0.0387, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 6.5933014354066986, | |
| "grad_norm": 1.63865327835083, | |
| "learning_rate": 6.932038834951457e-05, | |
| "loss": 0.0377, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 6.625199362041467, | |
| "grad_norm": 1.439233422279358, | |
| "learning_rate": 6.867313915857604e-05, | |
| "loss": 0.0376, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 6.657097288676236, | |
| "grad_norm": 2.021103858947754, | |
| "learning_rate": 6.802588996763755e-05, | |
| "loss": 0.0381, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 6.688995215311005, | |
| "grad_norm": 2.1549739837646484, | |
| "learning_rate": 6.737864077669903e-05, | |
| "loss": 0.0368, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 6.720893141945774, | |
| "grad_norm": 1.404586672782898, | |
| "learning_rate": 6.673139158576052e-05, | |
| "loss": 0.0362, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 6.752791068580542, | |
| "grad_norm": 2.1748902797698975, | |
| "learning_rate": 6.608414239482201e-05, | |
| "loss": 0.0358, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 6.784688995215311, | |
| "grad_norm": 1.4457738399505615, | |
| "learning_rate": 6.54368932038835e-05, | |
| "loss": 0.0345, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 6.81658692185008, | |
| "grad_norm": 1.1100033521652222, | |
| "learning_rate": 6.478964401294499e-05, | |
| "loss": 0.0329, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 6.848484848484849, | |
| "grad_norm": 1.7228426933288574, | |
| "learning_rate": 6.414239482200648e-05, | |
| "loss": 0.0401, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 6.880382775119617, | |
| "grad_norm": 1.428963541984558, | |
| "learning_rate": 6.349514563106797e-05, | |
| "loss": 0.0334, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 6.912280701754386, | |
| "grad_norm": 0.96390700340271, | |
| "learning_rate": 6.284789644012945e-05, | |
| "loss": 0.0332, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 6.944178628389155, | |
| "grad_norm": 0.9826375246047974, | |
| "learning_rate": 6.220064724919095e-05, | |
| "loss": 0.0348, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 6.976076555023924, | |
| "grad_norm": 1.523210883140564, | |
| "learning_rate": 6.155339805825243e-05, | |
| "loss": 0.034, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 7.006379585326954, | |
| "grad_norm": 0.9604802131652832, | |
| "learning_rate": 6.090614886731392e-05, | |
| "loss": 0.0324, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 7.038277511961723, | |
| "grad_norm": 1.191280484199524, | |
| "learning_rate": 6.02588996763754e-05, | |
| "loss": 0.0296, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 7.0701754385964914, | |
| "grad_norm": 0.9107991456985474, | |
| "learning_rate": 5.96116504854369e-05, | |
| "loss": 0.0295, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 7.10207336523126, | |
| "grad_norm": 1.070676326751709, | |
| "learning_rate": 5.8964401294498384e-05, | |
| "loss": 0.0294, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 7.133971291866029, | |
| "grad_norm": 1.4254604578018188, | |
| "learning_rate": 5.831715210355987e-05, | |
| "loss": 0.0333, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 7.165869218500798, | |
| "grad_norm": 0.7672649025917053, | |
| "learning_rate": 5.7669902912621365e-05, | |
| "loss": 0.0283, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 7.1977671451355665, | |
| "grad_norm": 1.1972182989120483, | |
| "learning_rate": 5.702265372168285e-05, | |
| "loss": 0.0316, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 7.229665071770335, | |
| "grad_norm": 0.9965393543243408, | |
| "learning_rate": 5.637540453074433e-05, | |
| "loss": 0.0295, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 7.261562998405104, | |
| "grad_norm": 0.7158511281013489, | |
| "learning_rate": 5.572815533980583e-05, | |
| "loss": 0.0285, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 7.293460925039873, | |
| "grad_norm": 0.9511866569519043, | |
| "learning_rate": 5.5080906148867314e-05, | |
| "loss": 0.0313, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 7.3253588516746415, | |
| "grad_norm": 1.0649924278259277, | |
| "learning_rate": 5.4433656957928805e-05, | |
| "loss": 0.0291, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 7.35725677830941, | |
| "grad_norm": 0.8110047578811646, | |
| "learning_rate": 5.37864077669903e-05, | |
| "loss": 0.027, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 7.389154704944179, | |
| "grad_norm": 1.2479182481765747, | |
| "learning_rate": 5.3139158576051786e-05, | |
| "loss": 0.0284, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 7.421052631578947, | |
| "grad_norm": 1.4915539026260376, | |
| "learning_rate": 5.249190938511327e-05, | |
| "loss": 0.0283, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 7.4529505582137165, | |
| "grad_norm": 1.0583655834197998, | |
| "learning_rate": 5.1844660194174753e-05, | |
| "loss": 0.0274, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 7.484848484848484, | |
| "grad_norm": 0.9848374128341675, | |
| "learning_rate": 5.119741100323625e-05, | |
| "loss": 0.0287, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 7.516746411483254, | |
| "grad_norm": 0.8352569341659546, | |
| "learning_rate": 5.0550161812297735e-05, | |
| "loss": 0.0275, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 7.548644338118022, | |
| "grad_norm": 0.756790816783905, | |
| "learning_rate": 4.9902912621359225e-05, | |
| "loss": 0.026, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 7.580542264752791, | |
| "grad_norm": 0.7591552138328552, | |
| "learning_rate": 4.925566343042071e-05, | |
| "loss": 0.0271, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 7.6124401913875595, | |
| "grad_norm": 0.6143433451652527, | |
| "learning_rate": 4.86084142394822e-05, | |
| "loss": 0.026, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 7.644338118022328, | |
| "grad_norm": 1.1552224159240723, | |
| "learning_rate": 4.796116504854369e-05, | |
| "loss": 0.0258, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 7.676236044657097, | |
| "grad_norm": 0.9515504837036133, | |
| "learning_rate": 4.731391585760518e-05, | |
| "loss": 0.0248, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 7.708133971291866, | |
| "grad_norm": 0.4848612844944, | |
| "learning_rate": 4.666666666666667e-05, | |
| "loss": 0.0257, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 7.7400318979266345, | |
| "grad_norm": 0.7792959809303284, | |
| "learning_rate": 4.6019417475728155e-05, | |
| "loss": 0.0247, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 7.771929824561403, | |
| "grad_norm": 0.798723042011261, | |
| "learning_rate": 4.5372168284789646e-05, | |
| "loss": 0.0243, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 7.803827751196172, | |
| "grad_norm": 0.38535213470458984, | |
| "learning_rate": 4.472491909385114e-05, | |
| "loss": 0.024, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 7.835725677830941, | |
| "grad_norm": 0.7133737802505493, | |
| "learning_rate": 4.407766990291262e-05, | |
| "loss": 0.0239, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 7.8676236044657095, | |
| "grad_norm": 0.6840415596961975, | |
| "learning_rate": 4.343042071197411e-05, | |
| "loss": 0.0239, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 7.899521531100478, | |
| "grad_norm": 0.5434433817863464, | |
| "learning_rate": 4.27831715210356e-05, | |
| "loss": 0.0232, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 7.931419457735247, | |
| "grad_norm": 0.42021751403808594, | |
| "learning_rate": 4.2135922330097086e-05, | |
| "loss": 0.0231, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 7.963317384370016, | |
| "grad_norm": 0.24440300464630127, | |
| "learning_rate": 4.1488673139158576e-05, | |
| "loss": 0.0221, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 7.9952153110047846, | |
| "grad_norm": 0.32175081968307495, | |
| "learning_rate": 4.084142394822007e-05, | |
| "loss": 0.0215, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 8.025518341307816, | |
| "grad_norm": 0.26291289925575256, | |
| "learning_rate": 4.019417475728156e-05, | |
| "loss": 0.0204, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 8.057416267942584, | |
| "grad_norm": 0.27921006083488464, | |
| "learning_rate": 3.954692556634305e-05, | |
| "loss": 0.0202, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 8.089314194577353, | |
| "grad_norm": 0.4434060752391815, | |
| "learning_rate": 3.889967637540453e-05, | |
| "loss": 0.0204, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 8.121212121212121, | |
| "grad_norm": 0.3078613877296448, | |
| "learning_rate": 3.825242718446602e-05, | |
| "loss": 0.0208, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 8.15311004784689, | |
| "grad_norm": 0.23100249469280243, | |
| "learning_rate": 3.7605177993527506e-05, | |
| "loss": 0.0207, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 8.185007974481659, | |
| "grad_norm": 0.34067490696907043, | |
| "learning_rate": 3.6957928802589e-05, | |
| "loss": 0.0207, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 8.216905901116428, | |
| "grad_norm": 0.30456990003585815, | |
| "learning_rate": 3.631067961165049e-05, | |
| "loss": 0.0215, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 8.248803827751196, | |
| "grad_norm": 0.27407306432724, | |
| "learning_rate": 3.566343042071197e-05, | |
| "loss": 0.02, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 8.280701754385966, | |
| "grad_norm": 0.3087022304534912, | |
| "learning_rate": 3.501618122977346e-05, | |
| "loss": 0.0206, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 8.312599681020734, | |
| "grad_norm": 0.5118716359138489, | |
| "learning_rate": 3.436893203883495e-05, | |
| "loss": 0.0207, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 8.344497607655502, | |
| "grad_norm": 0.2015773355960846, | |
| "learning_rate": 3.372168284789644e-05, | |
| "loss": 0.02, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 8.376395534290271, | |
| "grad_norm": 0.2432379424571991, | |
| "learning_rate": 3.3074433656957934e-05, | |
| "loss": 0.0201, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 8.408293460925039, | |
| "grad_norm": 0.2935405373573303, | |
| "learning_rate": 3.242718446601942e-05, | |
| "loss": 0.0209, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 8.440191387559809, | |
| "grad_norm": 0.2740592658519745, | |
| "learning_rate": 3.177993527508091e-05, | |
| "loss": 0.0204, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 8.472089314194577, | |
| "grad_norm": 0.19506220519542694, | |
| "learning_rate": 3.11326860841424e-05, | |
| "loss": 0.0204, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 8.503987240829346, | |
| "grad_norm": 0.2191346436738968, | |
| "learning_rate": 3.0485436893203883e-05, | |
| "loss": 0.02, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 8.535885167464114, | |
| "grad_norm": 0.17195333540439606, | |
| "learning_rate": 2.9838187702265373e-05, | |
| "loss": 0.0203, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 8.567783094098884, | |
| "grad_norm": 0.1918991059064865, | |
| "learning_rate": 2.919093851132686e-05, | |
| "loss": 0.0199, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 8.599681020733652, | |
| "grad_norm": 0.32339102029800415, | |
| "learning_rate": 2.854368932038835e-05, | |
| "loss": 0.0207, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 8.631578947368421, | |
| "grad_norm": 0.21508780121803284, | |
| "learning_rate": 2.7896440129449842e-05, | |
| "loss": 0.0206, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 8.663476874003189, | |
| "grad_norm": 0.186102032661438, | |
| "learning_rate": 2.7249190938511326e-05, | |
| "loss": 0.0204, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 8.695374800637959, | |
| "grad_norm": 0.44521018862724304, | |
| "learning_rate": 2.6601941747572816e-05, | |
| "loss": 0.0195, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 8.727272727272727, | |
| "grad_norm": 0.2109213024377823, | |
| "learning_rate": 2.5954692556634307e-05, | |
| "loss": 0.0198, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 8.759170653907496, | |
| "grad_norm": 0.19141490757465363, | |
| "learning_rate": 2.5307443365695794e-05, | |
| "loss": 0.02, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 8.791068580542264, | |
| "grad_norm": 0.19844338297843933, | |
| "learning_rate": 2.4660194174757285e-05, | |
| "loss": 0.0203, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 8.822966507177034, | |
| "grad_norm": 0.22535575926303864, | |
| "learning_rate": 2.4012944983818772e-05, | |
| "loss": 0.0202, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 8.854864433811802, | |
| "grad_norm": 0.15982511639595032, | |
| "learning_rate": 2.336569579288026e-05, | |
| "loss": 0.0195, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 8.886762360446571, | |
| "grad_norm": 0.34473344683647156, | |
| "learning_rate": 2.2718446601941746e-05, | |
| "loss": 0.02, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 8.91866028708134, | |
| "grad_norm": 0.2416442632675171, | |
| "learning_rate": 2.2071197411003237e-05, | |
| "loss": 0.0199, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 8.950558213716109, | |
| "grad_norm": 0.32778188586235046, | |
| "learning_rate": 2.1423948220064728e-05, | |
| "loss": 0.0201, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 8.982456140350877, | |
| "grad_norm": 0.18563535809516907, | |
| "learning_rate": 2.0776699029126215e-05, | |
| "loss": 0.0201, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 9.012759170653908, | |
| "grad_norm": 0.2312159687280655, | |
| "learning_rate": 2.0129449838187702e-05, | |
| "loss": 0.019, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 9.044657097288676, | |
| "grad_norm": 0.22602292895317078, | |
| "learning_rate": 1.948220064724919e-05, | |
| "loss": 0.0197, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 9.076555023923445, | |
| "grad_norm": 0.2483113408088684, | |
| "learning_rate": 1.883495145631068e-05, | |
| "loss": 0.0191, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 9.108452950558213, | |
| "grad_norm": 0.19213229417800903, | |
| "learning_rate": 1.818770226537217e-05, | |
| "loss": 0.019, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 9.140350877192983, | |
| "grad_norm": 0.20552313327789307, | |
| "learning_rate": 1.7540453074433658e-05, | |
| "loss": 0.0196, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 9.17224880382775, | |
| "grad_norm": 0.1715112179517746, | |
| "learning_rate": 1.6893203883495145e-05, | |
| "loss": 0.0199, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 9.20414673046252, | |
| "grad_norm": 0.1956663876771927, | |
| "learning_rate": 1.6245954692556636e-05, | |
| "loss": 0.0192, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 9.236044657097288, | |
| "grad_norm": 0.20990578830242157, | |
| "learning_rate": 1.5598705501618123e-05, | |
| "loss": 0.0192, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 9.267942583732058, | |
| "grad_norm": 0.21641728281974792, | |
| "learning_rate": 1.4951456310679612e-05, | |
| "loss": 0.0194, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 9.299840510366826, | |
| "grad_norm": 0.148260235786438, | |
| "learning_rate": 1.43042071197411e-05, | |
| "loss": 0.0197, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 9.331738437001595, | |
| "grad_norm": 0.24010303616523743, | |
| "learning_rate": 1.3656957928802588e-05, | |
| "loss": 0.0194, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 9.363636363636363, | |
| "grad_norm": 0.18212716281414032, | |
| "learning_rate": 1.3009708737864079e-05, | |
| "loss": 0.0192, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 9.395534290271133, | |
| "grad_norm": 0.18527287244796753, | |
| "learning_rate": 1.2362459546925568e-05, | |
| "loss": 0.02, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 9.4274322169059, | |
| "grad_norm": 0.16474080085754395, | |
| "learning_rate": 1.1715210355987055e-05, | |
| "loss": 0.0193, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 9.45933014354067, | |
| "grad_norm": 0.16805745661258698, | |
| "learning_rate": 1.1067961165048545e-05, | |
| "loss": 0.0189, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 9.491228070175438, | |
| "grad_norm": 0.20816102623939514, | |
| "learning_rate": 1.0420711974110033e-05, | |
| "loss": 0.0193, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 9.523125996810208, | |
| "grad_norm": 0.1942005753517151, | |
| "learning_rate": 9.773462783171522e-06, | |
| "loss": 0.0191, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 9.555023923444976, | |
| "grad_norm": 0.17980408668518066, | |
| "learning_rate": 9.12621359223301e-06, | |
| "loss": 0.0187, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 9.586921850079746, | |
| "grad_norm": 0.2269669771194458, | |
| "learning_rate": 8.4789644012945e-06, | |
| "loss": 0.0193, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 9.618819776714513, | |
| "grad_norm": 0.2470860481262207, | |
| "learning_rate": 7.831715210355987e-06, | |
| "loss": 0.0197, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 9.650717703349283, | |
| "grad_norm": 0.18939535319805145, | |
| "learning_rate": 7.1844660194174755e-06, | |
| "loss": 0.0195, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 9.682615629984051, | |
| "grad_norm": 0.2230576127767563, | |
| "learning_rate": 6.5372168284789644e-06, | |
| "loss": 0.0196, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 9.71451355661882, | |
| "grad_norm": 0.22115571796894073, | |
| "learning_rate": 5.889967637540453e-06, | |
| "loss": 0.0193, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 9.746411483253588, | |
| "grad_norm": 0.22138074040412903, | |
| "learning_rate": 5.242718446601942e-06, | |
| "loss": 0.0188, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 9.778309409888358, | |
| "grad_norm": 0.17522794008255005, | |
| "learning_rate": 4.595469255663431e-06, | |
| "loss": 0.0192, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 9.810207336523126, | |
| "grad_norm": 0.22353076934814453, | |
| "learning_rate": 3.948220064724919e-06, | |
| "loss": 0.019, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 9.842105263157894, | |
| "grad_norm": 0.17822624742984772, | |
| "learning_rate": 3.300970873786408e-06, | |
| "loss": 0.0191, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 9.874003189792663, | |
| "grad_norm": 0.18609146773815155, | |
| "learning_rate": 2.6537216828478967e-06, | |
| "loss": 0.0194, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 9.905901116427433, | |
| "grad_norm": 0.16276702284812927, | |
| "learning_rate": 2.006472491909385e-06, | |
| "loss": 0.0193, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 9.937799043062201, | |
| "grad_norm": 0.2207743376493454, | |
| "learning_rate": 1.359223300970874e-06, | |
| "loss": 0.0192, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 9.969696969696969, | |
| "grad_norm": 0.1925983428955078, | |
| "learning_rate": 7.119741100323625e-07, | |
| "loss": 0.0193, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.26687198877334595, | |
| "learning_rate": 6.472491909385113e-08, | |
| "loss": 0.0182, | |
| "step": 3140 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3140, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.344970104917197e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |