| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9237341954852492, | |
| "eval_steps": 2000, | |
| "global_step": 10000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0009237341954852491, | |
| "grad_norm": 10.377836227416992, | |
| "learning_rate": 0.0001, | |
| "loss": 0.7753, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0018474683909704983, | |
| "grad_norm": 3.7491939067840576, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4055, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.002771202586455747, | |
| "grad_norm": 4.031465530395508, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3717, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0036949367819409966, | |
| "grad_norm": 2.0838451385498047, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3579, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.004618670977426246, | |
| "grad_norm": 1.9277153015136719, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3472, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.005542405172911494, | |
| "grad_norm": 2.6250083446502686, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3329, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.006466139368396744, | |
| "grad_norm": 2.675837278366089, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3285, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.007389873563881993, | |
| "grad_norm": 1.3599060773849487, | |
| "learning_rate": 0.0001, | |
| "loss": 0.323, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.008313607759367242, | |
| "grad_norm": 2.4787392616271973, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3184, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.009237341954852492, | |
| "grad_norm": 1.6417860984802246, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3176, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.01016107615033774, | |
| "grad_norm": 1.5067483186721802, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3076, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.011084810345822989, | |
| "grad_norm": 1.5528444051742554, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3126, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.012008544541308239, | |
| "grad_norm": 1.6968024969100952, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3105, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.012932278736793488, | |
| "grad_norm": 2.3010127544403076, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3115, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.013856012932278736, | |
| "grad_norm": 1.9934582710266113, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3053, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.014779747127763986, | |
| "grad_norm": 1.4010335206985474, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3021, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.015703481323249235, | |
| "grad_norm": 1.210365891456604, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3021, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.016627215518734483, | |
| "grad_norm": 0.8452786803245544, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3021, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.01755094971421973, | |
| "grad_norm": 0.8893113136291504, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2993, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.018474683909704984, | |
| "grad_norm": 1.0572125911712646, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2995, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.019398418105190232, | |
| "grad_norm": 1.1769524812698364, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2991, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.02032215230067548, | |
| "grad_norm": 1.1253703832626343, | |
| "learning_rate": 0.0001, | |
| "loss": 0.297, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.02124588649616073, | |
| "grad_norm": 0.9401121735572815, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2967, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.022169620691645978, | |
| "grad_norm": 0.8937715291976929, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2914, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.02309335488713123, | |
| "grad_norm": 1.2040687799453735, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2956, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.024017089082616478, | |
| "grad_norm": 0.8526865243911743, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2893, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.024940823278101727, | |
| "grad_norm": 0.9076613783836365, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2879, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.025864557473586975, | |
| "grad_norm": 0.8890297412872314, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2837, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.026788291669072224, | |
| "grad_norm": 0.8901604413986206, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2783, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.027712025864557472, | |
| "grad_norm": 1.1589536666870117, | |
| "learning_rate": 0.0001, | |
| "loss": 1.1256, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.028635760060042724, | |
| "grad_norm": 0.6688721776008606, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2779, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.029559494255527972, | |
| "grad_norm": 0.7300515174865723, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2769, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.03048322845101322, | |
| "grad_norm": 0.8623325228691101, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2862, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.03140696264649847, | |
| "grad_norm": 0.8834196925163269, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2815, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.03233069684198372, | |
| "grad_norm": 0.622388482093811, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2776, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.033254431037468966, | |
| "grad_norm": 0.7375735640525818, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2805, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.034178165232954215, | |
| "grad_norm": 0.7450881600379944, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2821, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.03510189942843946, | |
| "grad_norm": 0.9458677768707275, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2756, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.03602563362392472, | |
| "grad_norm": 0.9151942133903503, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2795, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.03694936781940997, | |
| "grad_norm": 0.7280314564704895, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6648, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.037873102014895216, | |
| "grad_norm": 0.7620455026626587, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2716, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.038796836210380464, | |
| "grad_norm": 0.5944289565086365, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2704, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.03972057040586571, | |
| "grad_norm": 1.0736445188522339, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2728, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.04064430460135096, | |
| "grad_norm": 0.5833008885383606, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2778, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.04156803879683621, | |
| "grad_norm": 0.6026699542999268, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2726, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.04249177299232146, | |
| "grad_norm": 0.6159306764602661, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2698, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.04341550718780671, | |
| "grad_norm": 0.5370445251464844, | |
| "learning_rate": 0.0001, | |
| "loss": 0.273, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.044339241383291955, | |
| "grad_norm": 0.6999521255493164, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2738, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.045262975578777204, | |
| "grad_norm": 0.7118927836418152, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2695, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.04618670977426246, | |
| "grad_norm": 0.5837290287017822, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6541, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.04711044396974771, | |
| "grad_norm": 0.5503808259963989, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2675, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.048034178165232956, | |
| "grad_norm": 0.7749144434928894, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2651, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.048957912360718205, | |
| "grad_norm": 0.657133162021637, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2663, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.04988164655620345, | |
| "grad_norm": 0.6993033289909363, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2667, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.0508053807516887, | |
| "grad_norm": 0.6134759187698364, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6345, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.05172911494717395, | |
| "grad_norm": 0.8342564702033997, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2667, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.0526528491426592, | |
| "grad_norm": 0.6970536112785339, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2743, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.05357658333814445, | |
| "grad_norm": 0.5340526103973389, | |
| "learning_rate": 0.0001, | |
| "loss": 0.266, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.054500317533629695, | |
| "grad_norm": 0.5798579454421997, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2645, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.055424051729114944, | |
| "grad_norm": 0.5978800654411316, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2658, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.0563477859246002, | |
| "grad_norm": 0.6169262528419495, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2648, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.05727152012008545, | |
| "grad_norm": 0.49252912402153015, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2687, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.058195254315570696, | |
| "grad_norm": 0.5621038675308228, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2646, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.059118988511055945, | |
| "grad_norm": 0.5030767321586609, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2677, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.06004272270654119, | |
| "grad_norm": 0.44811710715293884, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2636, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.06096645690202644, | |
| "grad_norm": 0.645104706287384, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2698, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.06189019109751169, | |
| "grad_norm": 0.5724523663520813, | |
| "learning_rate": 0.0001, | |
| "loss": 0.264, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.06281392529299694, | |
| "grad_norm": 0.49830982089042664, | |
| "learning_rate": 0.0001, | |
| "loss": 0.261, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.06373765948848219, | |
| "grad_norm": 0.4987528622150421, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6213, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.06466139368396744, | |
| "grad_norm": 0.4653536081314087, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2657, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.06558512787945268, | |
| "grad_norm": 0.6882783770561218, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2585, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.06650886207493793, | |
| "grad_norm": 0.6810200214385986, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2606, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.06743259627042318, | |
| "grad_norm": 0.7022980451583862, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2623, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.06835633046590843, | |
| "grad_norm": 0.6302197575569153, | |
| "learning_rate": 0.0001, | |
| "loss": 0.262, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.06928006466139368, | |
| "grad_norm": 0.5974870324134827, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2622, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.07020379885687893, | |
| "grad_norm": 0.7654616236686707, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2613, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.07112753305236419, | |
| "grad_norm": 0.6381949186325073, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2618, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.07205126724784944, | |
| "grad_norm": 0.5837942957878113, | |
| "learning_rate": 0.0001, | |
| "loss": 0.256, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.07297500144333469, | |
| "grad_norm": 0.5880801677703857, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2551, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.07389873563881993, | |
| "grad_norm": 0.5989728569984436, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2584, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.07482246983430518, | |
| "grad_norm": 0.5575709342956543, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2556, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.07574620402979043, | |
| "grad_norm": 0.46623289585113525, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2517, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.07666993822527568, | |
| "grad_norm": 0.668117105960846, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2539, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.07759367242076093, | |
| "grad_norm": 0.5178112387657166, | |
| "learning_rate": 0.0001, | |
| "loss": 0.254, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.07851740661624618, | |
| "grad_norm": 0.48676905035972595, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2554, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.07944114081173143, | |
| "grad_norm": 0.6930618286132812, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2659, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.08036487500721667, | |
| "grad_norm": 0.6086645722389221, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2544, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.08128860920270192, | |
| "grad_norm": 0.5384548306465149, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2517, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.08221234339818717, | |
| "grad_norm": 0.43174371123313904, | |
| "learning_rate": 0.0001, | |
| "loss": 0.254, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.08313607759367242, | |
| "grad_norm": 0.6086358428001404, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2541, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.08405981178915767, | |
| "grad_norm": 0.7949267029762268, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6148, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.08498354598464292, | |
| "grad_norm": 0.5551185607910156, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2555, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.08590728018012816, | |
| "grad_norm": 0.5146967172622681, | |
| "learning_rate": 0.0001, | |
| "loss": 0.261, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.08683101437561341, | |
| "grad_norm": 0.653384268283844, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2576, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.08775474857109866, | |
| "grad_norm": 0.444113552570343, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2538, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.08867848276658391, | |
| "grad_norm": 0.4909529983997345, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2518, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.08960221696206916, | |
| "grad_norm": 0.4676177203655243, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2495, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.09052595115755441, | |
| "grad_norm": 0.576374888420105, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2491, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.09144968535303967, | |
| "grad_norm": 0.6059390306472778, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2482, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.09237341954852492, | |
| "grad_norm": 0.6289136409759521, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2523, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.09329715374401017, | |
| "grad_norm": 0.6940639019012451, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2508, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.09422088793949542, | |
| "grad_norm": 0.49572697281837463, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2526, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.09514462213498066, | |
| "grad_norm": 0.9866797924041748, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6079, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.09606835633046591, | |
| "grad_norm": 0.3926860988140106, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2502, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.09699209052595116, | |
| "grad_norm": 0.5195950865745544, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2505, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.09791582472143641, | |
| "grad_norm": 0.6011677980422974, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2531, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.09883955891692166, | |
| "grad_norm": 0.45717713236808777, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2539, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.0997632931124069, | |
| "grad_norm": 0.45463839173316956, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2573, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.10068702730789215, | |
| "grad_norm": 0.5337753295898438, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2466, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.1016107615033774, | |
| "grad_norm": 0.6251022815704346, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2446, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.10253449569886265, | |
| "grad_norm": 0.5094243884086609, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2478, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.1034582298943479, | |
| "grad_norm": 0.4294573664665222, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2447, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.10438196408983315, | |
| "grad_norm": 0.619016706943512, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6017, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.1053056982853184, | |
| "grad_norm": 0.7880054116249084, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2521, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.10622943248080365, | |
| "grad_norm": 0.49156785011291504, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2527, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.1071531666762889, | |
| "grad_norm": 0.5413324236869812, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2558, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.10807690087177414, | |
| "grad_norm": 0.4010981619358063, | |
| "learning_rate": 0.0001, | |
| "loss": 0.245, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.10900063506725939, | |
| "grad_norm": 0.41714775562286377, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2462, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.10992436926274464, | |
| "grad_norm": 0.5162696838378906, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2494, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.11084810345822989, | |
| "grad_norm": 0.546750545501709, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2504, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.11177183765371515, | |
| "grad_norm": 0.4263443648815155, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5993, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.1126955718492004, | |
| "grad_norm": 0.42017892003059387, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5952, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.11361930604468565, | |
| "grad_norm": 0.49449294805526733, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2493, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.1145430402401709, | |
| "grad_norm": 0.5061096549034119, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2441, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.11546677443565614, | |
| "grad_norm": 0.7036425471305847, | |
| "learning_rate": 0.0001, | |
| "loss": 0.241, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.11639050863114139, | |
| "grad_norm": 0.46319207549095154, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2506, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.11731424282662664, | |
| "grad_norm": 0.5542079210281372, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2405, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.11823797702211189, | |
| "grad_norm": 0.5105220675468445, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2445, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.11916171121759714, | |
| "grad_norm": 0.5735233426094055, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2478, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.12008544541308239, | |
| "grad_norm": 0.4917200207710266, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2469, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.12100917960856764, | |
| "grad_norm": 0.4899098873138428, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2405, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.12193291380405288, | |
| "grad_norm": 0.42172908782958984, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2442, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.12285664799953813, | |
| "grad_norm": 0.5698174834251404, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2448, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.12378038219502338, | |
| "grad_norm": 0.43075141310691833, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5928, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.12470411639050863, | |
| "grad_norm": 0.4610329270362854, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5987, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.12562785058599388, | |
| "grad_norm": 0.4741334021091461, | |
| "learning_rate": 0.0001, | |
| "loss": 0.246, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.12655158478147913, | |
| "grad_norm": 0.8104658126831055, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2426, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.12747531897696437, | |
| "grad_norm": 0.46889594197273254, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2478, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.12839905317244962, | |
| "grad_norm": 0.5051830410957336, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2453, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.12932278736793487, | |
| "grad_norm": 0.6313543319702148, | |
| "learning_rate": 0.0001, | |
| "loss": 0.247, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.13024652156342012, | |
| "grad_norm": 0.4182421863079071, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2441, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.13117025575890537, | |
| "grad_norm": 0.5988187789916992, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2449, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.13209398995439062, | |
| "grad_norm": 0.5029876828193665, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2391, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.13301772414987587, | |
| "grad_norm": 0.3747965395450592, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2396, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.13394145834536111, | |
| "grad_norm": 0.4476030170917511, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2484, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.13486519254084636, | |
| "grad_norm": 0.4146966338157654, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2439, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.1357889267363316, | |
| "grad_norm": 0.439266562461853, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2358, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.13671266093181686, | |
| "grad_norm": 0.45384058356285095, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2465, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.1376363951273021, | |
| "grad_norm": 0.41790732741355896, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2443, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.13856012932278736, | |
| "grad_norm": 0.4739314317703247, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2412, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1394838635182726, | |
| "grad_norm": 0.40792784094810486, | |
| "learning_rate": 0.0001, | |
| "loss": 0.239, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.14040759771375785, | |
| "grad_norm": 0.38325121998786926, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2397, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.1413313319092431, | |
| "grad_norm": 0.5073789358139038, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2407, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.14225506610472838, | |
| "grad_norm": 0.42200055718421936, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2452, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.14317880030021363, | |
| "grad_norm": 0.42271173000335693, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2435, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.14410253449569888, | |
| "grad_norm": 0.4510470926761627, | |
| "learning_rate": 0.0001, | |
| "loss": 0.586, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.14502626869118412, | |
| "grad_norm": 0.47332921624183655, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2404, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.14595000288666937, | |
| "grad_norm": 0.44405099749565125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2339, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.14687373708215462, | |
| "grad_norm": 0.4454667270183563, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2431, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.14779747127763987, | |
| "grad_norm": 0.5730986595153809, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2383, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.14872120547312512, | |
| "grad_norm": 0.42024850845336914, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2409, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.14964493966861037, | |
| "grad_norm": 0.47302043437957764, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5866, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.15056867386409561, | |
| "grad_norm": 0.5812174677848816, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2406, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.15149240805958086, | |
| "grad_norm": 0.4626239240169525, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2399, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.1524161422550661, | |
| "grad_norm": 0.43332046270370483, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2372, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.15333987645055136, | |
| "grad_norm": 0.503600537776947, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2353, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.1542636106460366, | |
| "grad_norm": 0.47614091634750366, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2374, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.15518734484152186, | |
| "grad_norm": 0.584269106388092, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2409, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.1561110790370071, | |
| "grad_norm": 0.4429151117801666, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2434, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.15703481323249235, | |
| "grad_norm": 0.38515257835388184, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2378, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.1579585474279776, | |
| "grad_norm": 0.4715421795845032, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2375, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.15888228162346285, | |
| "grad_norm": 0.3542056679725647, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2326, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.1598060158189481, | |
| "grad_norm": 0.5648353695869446, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2418, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.16072975001443335, | |
| "grad_norm": 0.3755480647087097, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2386, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.1616534842099186, | |
| "grad_norm": 0.718487024307251, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2384, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.16257721840540384, | |
| "grad_norm": 0.48689281940460205, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2366, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.1635009526008891, | |
| "grad_norm": 0.44187742471694946, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2397, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.16442468679637434, | |
| "grad_norm": 0.5262730121612549, | |
| "learning_rate": 0.0001, | |
| "loss": 0.241, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.1653484209918596, | |
| "grad_norm": 0.402015745639801, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2357, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.16627215518734484, | |
| "grad_norm": 0.3373963236808777, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2305, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.1671958893828301, | |
| "grad_norm": 0.4744202792644501, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2385, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.16811962357831534, | |
| "grad_norm": 0.5223276019096375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2349, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.16904335777380058, | |
| "grad_norm": 0.4173218607902527, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2352, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.16996709196928583, | |
| "grad_norm": 0.4088299870491028, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2391, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.17089082616477108, | |
| "grad_norm": 0.6284809708595276, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2299, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.17181456036025633, | |
| "grad_norm": 0.30865636467933655, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2354, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.17273829455574158, | |
| "grad_norm": 0.44444739818573, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2317, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.17366202875122683, | |
| "grad_norm": 0.3890416622161865, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2302, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.17458576294671208, | |
| "grad_norm": 0.38967961072921753, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2407, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.17550949714219732, | |
| "grad_norm": 0.4953692555427551, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2347, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.17643323133768257, | |
| "grad_norm": 0.38823607563972473, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5787, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.17735696553316782, | |
| "grad_norm": 0.4435032606124878, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2344, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.17828069972865307, | |
| "grad_norm": 0.3668000400066376, | |
| "learning_rate": 0.0001, | |
| "loss": 0.24, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.17920443392413832, | |
| "grad_norm": 0.3940606713294983, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2363, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.18012816811962357, | |
| "grad_norm": 0.44965189695358276, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2379, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.18105190231510881, | |
| "grad_norm": 0.4830586612224579, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2368, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.18197563651059406, | |
| "grad_norm": 0.4827195107936859, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2347, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.18289937070607934, | |
| "grad_norm": 0.4296880066394806, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2333, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.1838231049015646, | |
| "grad_norm": 0.35778701305389404, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2373, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.18474683909704984, | |
| "grad_norm": 0.3922187089920044, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2369, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.18567057329253508, | |
| "grad_norm": 0.4088379740715027, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2357, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.18659430748802033, | |
| "grad_norm": 0.5719146728515625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2296, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.18751804168350558, | |
| "grad_norm": 0.6169770956039429, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2384, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.18844177587899083, | |
| "grad_norm": 0.36770936846733093, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5772, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.18936551007447608, | |
| "grad_norm": 0.43697449564933777, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2385, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.19028924426996133, | |
| "grad_norm": 0.4228357672691345, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2353, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.19121297846544658, | |
| "grad_norm": 0.42827463150024414, | |
| "learning_rate": 0.0001, | |
| "loss": 0.232, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.19213671266093182, | |
| "grad_norm": 0.4047262966632843, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2294, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.19306044685641707, | |
| "grad_norm": 0.5330992937088013, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2332, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.19398418105190232, | |
| "grad_norm": 0.47420746088027954, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2331, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.19490791524738757, | |
| "grad_norm": 0.3812981843948364, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2362, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.19583164944287282, | |
| "grad_norm": 0.3819383978843689, | |
| "learning_rate": 0.0001, | |
| "loss": 0.231, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.19675538363835807, | |
| "grad_norm": 0.30570483207702637, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2268, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.19767911783384332, | |
| "grad_norm": 0.44649067521095276, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5768, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.19860285202932856, | |
| "grad_norm": 0.3471992015838623, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2314, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.1995265862248138, | |
| "grad_norm": 0.38094842433929443, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2293, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.20045032042029906, | |
| "grad_norm": 0.3608592450618744, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2284, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.2013740546157843, | |
| "grad_norm": 0.37288790941238403, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2745, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.20229778881126956, | |
| "grad_norm": 0.3077997863292694, | |
| "learning_rate": 0.0001, | |
| "loss": 0.233, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.2032215230067548, | |
| "grad_norm": 0.4390782415866852, | |
| "learning_rate": 0.0001, | |
| "loss": 0.233, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.20414525720224005, | |
| "grad_norm": 0.450605183839798, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2307, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.2050689913977253, | |
| "grad_norm": 0.43789491057395935, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2288, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.20599272559321055, | |
| "grad_norm": 0.4309540092945099, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2289, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.2069164597886958, | |
| "grad_norm": 0.973432183265686, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2342, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.20784019398418105, | |
| "grad_norm": 0.6154965162277222, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2324, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.2087639281796663, | |
| "grad_norm": 0.5133722424507141, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2317, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.20968766237515155, | |
| "grad_norm": 0.5500303506851196, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2325, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.2106113965706368, | |
| "grad_norm": 0.4004790484905243, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2348, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.21153513076612204, | |
| "grad_norm": 0.4432072341442108, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2298, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.2124588649616073, | |
| "grad_norm": 0.32310307025909424, | |
| "learning_rate": 0.0001, | |
| "loss": 0.227, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.21338259915709254, | |
| "grad_norm": 0.4177485406398773, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2262, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.2143063333525778, | |
| "grad_norm": 0.46155500411987305, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2293, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.21523006754806304, | |
| "grad_norm": 0.5199536085128784, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2318, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.21615380174354829, | |
| "grad_norm": 0.45795243978500366, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2322, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.21707753593903353, | |
| "grad_norm": 0.36111390590667725, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2308, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.21800127013451878, | |
| "grad_norm": 0.3496291935443878, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2304, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.21892500433000403, | |
| "grad_norm": 0.38132452964782715, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2353, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.21984873852548928, | |
| "grad_norm": 0.3942013084888458, | |
| "learning_rate": 0.0001, | |
| "loss": 0.231, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.22077247272097453, | |
| "grad_norm": 0.45268431305885315, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2311, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.22169620691645978, | |
| "grad_norm": 0.5048559308052063, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2339, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.22261994111194502, | |
| "grad_norm": 0.4176543354988098, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2324, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.2235436753074303, | |
| "grad_norm": 0.49830180406570435, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2255, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.22446740950291555, | |
| "grad_norm": 0.45104148983955383, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2305, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.2253911436984008, | |
| "grad_norm": 0.5595024824142456, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2303, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.22631487789388605, | |
| "grad_norm": 0.3488145172595978, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2298, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.2272386120893713, | |
| "grad_norm": 0.4085540473461151, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2274, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.22816234628485654, | |
| "grad_norm": 0.3607054054737091, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2298, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.2290860804803418, | |
| "grad_norm": 0.46505624055862427, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2287, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.23000981467582704, | |
| "grad_norm": 0.3342452645301819, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2284, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.2309335488713123, | |
| "grad_norm": 0.383774071931839, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2323, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.23185728306679754, | |
| "grad_norm": 0.48557889461517334, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2374, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.23278101726228279, | |
| "grad_norm": 0.478712260723114, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2357, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.23370475145776803, | |
| "grad_norm": 0.45071640610694885, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2325, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.23462848565325328, | |
| "grad_norm": 0.3682229816913605, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2281, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.23555221984873853, | |
| "grad_norm": 0.4806053042411804, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2289, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.23647595404422378, | |
| "grad_norm": 0.44345441460609436, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2339, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.23739968823970903, | |
| "grad_norm": 0.38094815611839294, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2303, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.23832342243519428, | |
| "grad_norm": 0.4847588539123535, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2267, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.23924715663067952, | |
| "grad_norm": 0.4299144148826599, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2285, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.24017089082616477, | |
| "grad_norm": 0.3692493736743927, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2292, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.24109462502165002, | |
| "grad_norm": 0.4436064660549164, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2259, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.24201835921713527, | |
| "grad_norm": 0.5403839349746704, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5703, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.24294209341262052, | |
| "grad_norm": 0.39412447810173035, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2295, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.24386582760810577, | |
| "grad_norm": 0.45101356506347656, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2322, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.24478956180359102, | |
| "grad_norm": 0.32571062445640564, | |
| "learning_rate": 0.0001, | |
| "loss": 0.228, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.24571329599907626, | |
| "grad_norm": 0.39001643657684326, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2283, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.2466370301945615, | |
| "grad_norm": 0.396857351064682, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2305, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.24756076439004676, | |
| "grad_norm": 0.40750470757484436, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2287, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.248484498585532, | |
| "grad_norm": 0.330258846282959, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2237, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.24940823278101726, | |
| "grad_norm": 0.4315931499004364, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2278, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.2503319669765025, | |
| "grad_norm": 0.32770416140556335, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2297, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.25125570117198776, | |
| "grad_norm": 0.6947807669639587, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2345, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.252179435367473, | |
| "grad_norm": 1.181774377822876, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5722, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.25310316956295825, | |
| "grad_norm": 0.4993976056575775, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2307, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.2540269037584435, | |
| "grad_norm": 0.3862954080104828, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2303, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.25495063795392875, | |
| "grad_norm": 0.4661349356174469, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5788, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.255874372149414, | |
| "grad_norm": 0.4150824248790741, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2263, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.25679810634489925, | |
| "grad_norm": 0.33502984046936035, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2256, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.2577218405403845, | |
| "grad_norm": 0.5108284950256348, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2303, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.25864557473586974, | |
| "grad_norm": 0.3284357488155365, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2291, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.259569308931355, | |
| "grad_norm": 0.40002018213272095, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2252, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.26049304312684024, | |
| "grad_norm": 0.4224354922771454, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2278, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.2614167773223255, | |
| "grad_norm": 0.4732987582683563, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2276, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.26234051151781074, | |
| "grad_norm": 0.3268199563026428, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2296, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.263264245713296, | |
| "grad_norm": 0.4282330274581909, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2299, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.26418797990878123, | |
| "grad_norm": 0.3902899920940399, | |
| "learning_rate": 0.0001, | |
| "loss": 0.228, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.2651117141042665, | |
| "grad_norm": 0.2985314428806305, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2234, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.26603544829975173, | |
| "grad_norm": 0.6039664149284363, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2293, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.266959182495237, | |
| "grad_norm": 0.5956476926803589, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2246, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.26788291669072223, | |
| "grad_norm": 0.48536789417266846, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2287, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.2688066508862075, | |
| "grad_norm": 0.5488937497138977, | |
| "learning_rate": 0.0001, | |
| "loss": 0.228, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.2697303850816927, | |
| "grad_norm": 0.34997400641441345, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2263, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.270654119277178, | |
| "grad_norm": 0.44494467973709106, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2248, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.2715778534726632, | |
| "grad_norm": 0.4488919973373413, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2205, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.27250158766814847, | |
| "grad_norm": 0.4541712701320648, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2259, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.2734253218636337, | |
| "grad_norm": 0.5734704732894897, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5651, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.27434905605911897, | |
| "grad_norm": 0.5139559507369995, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2234, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.2752727902546042, | |
| "grad_norm": 0.3888668119907379, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2268, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.27619652445008946, | |
| "grad_norm": 0.4781576991081238, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2252, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.2771202586455747, | |
| "grad_norm": 0.507675051689148, | |
| "learning_rate": 0.0001, | |
| "loss": 0.228, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.27804399284105996, | |
| "grad_norm": 0.5105670690536499, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2342, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.2789677270365452, | |
| "grad_norm": 0.39887920022010803, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2296, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.27989146123203046, | |
| "grad_norm": 0.33971166610717773, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2302, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.2808151954275157, | |
| "grad_norm": 0.34017518162727356, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2245, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.28173892962300096, | |
| "grad_norm": 0.3974183201789856, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2254, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.2826626638184862, | |
| "grad_norm": 0.4833984673023224, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2276, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.28358639801397145, | |
| "grad_norm": 0.5275608897209167, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2261, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.28451013220945676, | |
| "grad_norm": 0.384132981300354, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2273, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.285433866404942, | |
| "grad_norm": 0.506726086139679, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2257, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.28635760060042725, | |
| "grad_norm": 0.3069717288017273, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2245, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.2872813347959125, | |
| "grad_norm": 0.3742898404598236, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2239, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.28820506899139775, | |
| "grad_norm": 0.4463520348072052, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2247, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.289128803186883, | |
| "grad_norm": 0.4115908443927765, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2235, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.29005253738236825, | |
| "grad_norm": 0.3671577274799347, | |
| "learning_rate": 0.0001, | |
| "loss": 0.222, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.2909762715778535, | |
| "grad_norm": 0.39772334694862366, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2268, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.29190000577333874, | |
| "grad_norm": 0.33106935024261475, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2234, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.292823739968824, | |
| "grad_norm": 0.45472344756126404, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5588, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.29374747416430924, | |
| "grad_norm": 0.4509011507034302, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2254, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.2946712083597945, | |
| "grad_norm": 0.35373446345329285, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2257, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.29559494255527974, | |
| "grad_norm": 0.3876577615737915, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2211, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.296518676750765, | |
| "grad_norm": 0.37119758129119873, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2192, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.29744241094625024, | |
| "grad_norm": 0.3517870008945465, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2245, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.2983661451417355, | |
| "grad_norm": 0.33061233162879944, | |
| "learning_rate": 0.0001, | |
| "loss": 0.219, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.29928987933722073, | |
| "grad_norm": 0.4985819458961487, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2272, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.300213613532706, | |
| "grad_norm": 0.33760136365890503, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2226, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.30113734772819123, | |
| "grad_norm": 0.30659353733062744, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2203, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.3020610819236765, | |
| "grad_norm": 0.37383535504341125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2216, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.3029848161191617, | |
| "grad_norm": 0.3123575747013092, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2212, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.303908550314647, | |
| "grad_norm": 0.41256213188171387, | |
| "learning_rate": 0.0001, | |
| "loss": 0.218, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.3048322845101322, | |
| "grad_norm": 0.41308340430259705, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2234, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.30575601870561747, | |
| "grad_norm": 0.42367058992385864, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2268, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.3066797529011027, | |
| "grad_norm": 0.4047527611255646, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2224, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.30760348709658797, | |
| "grad_norm": 0.4354739487171173, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2221, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.3085272212920732, | |
| "grad_norm": 0.4134977161884308, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2238, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.30945095548755847, | |
| "grad_norm": 0.3472785949707031, | |
| "learning_rate": 0.0001, | |
| "loss": 0.221, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.3103746896830437, | |
| "grad_norm": 0.5074013471603394, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2246, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.31129842387852896, | |
| "grad_norm": 0.4216979146003723, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2172, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.3122221580740142, | |
| "grad_norm": 0.5406137108802795, | |
| "learning_rate": 0.0001, | |
| "loss": 0.222, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.31314589226949946, | |
| "grad_norm": 0.39189383387565613, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2202, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.3140696264649847, | |
| "grad_norm": 0.32048338651657104, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2193, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.31499336066046996, | |
| "grad_norm": 0.39686036109924316, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2233, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.3159170948559552, | |
| "grad_norm": 0.3520762622356415, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2158, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.31684082905144045, | |
| "grad_norm": 0.3739902675151825, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2247, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.3177645632469257, | |
| "grad_norm": 0.42624348402023315, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5581, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.31868829744241095, | |
| "grad_norm": 0.4178403615951538, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2222, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.3196120316378962, | |
| "grad_norm": 0.4947364330291748, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2223, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.32053576583338145, | |
| "grad_norm": 0.3784151077270508, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2164, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.3214595000288667, | |
| "grad_norm": 0.46106189489364624, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2169, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.32238323422435194, | |
| "grad_norm": 0.29418033361434937, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2252, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.3233069684198372, | |
| "grad_norm": 0.3958749771118164, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2205, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.32423070261532244, | |
| "grad_norm": 0.3951239287853241, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2218, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.3251544368108077, | |
| "grad_norm": 0.32840287685394287, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2204, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.32607817100629294, | |
| "grad_norm": 0.3020164370536804, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2235, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.3270019052017782, | |
| "grad_norm": 0.4775121510028839, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2182, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.32792563939726344, | |
| "grad_norm": 0.3426738381385803, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2239, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.3288493735927487, | |
| "grad_norm": 0.37116217613220215, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2226, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.32977310778823393, | |
| "grad_norm": 0.5717690587043762, | |
| "learning_rate": 0.0001, | |
| "loss": 0.219, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.3306968419837192, | |
| "grad_norm": 0.4740220010280609, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2201, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.33162057617920443, | |
| "grad_norm": 0.3198816776275635, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2214, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.3325443103746897, | |
| "grad_norm": 0.4375385046005249, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2253, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.3334680445701749, | |
| "grad_norm": 0.3628276586532593, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2181, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.3343917787656602, | |
| "grad_norm": 0.6822785139083862, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2222, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.3353155129611454, | |
| "grad_norm": 0.45200440287590027, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2233, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.33623924715663067, | |
| "grad_norm": 0.42117175459861755, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2223, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.3371629813521159, | |
| "grad_norm": 0.42544686794281006, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2242, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.33808671554760117, | |
| "grad_norm": 0.44248509407043457, | |
| "learning_rate": 0.0001, | |
| "loss": 0.219, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.3390104497430864, | |
| "grad_norm": 0.32826924324035645, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2185, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.33993418393857167, | |
| "grad_norm": 0.4132959842681885, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2235, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.3408579181340569, | |
| "grad_norm": 0.4707147181034088, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2227, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.34178165232954216, | |
| "grad_norm": 0.55455482006073, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2207, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.3427053865250274, | |
| "grad_norm": 0.41418227553367615, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5481, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.34362912072051266, | |
| "grad_norm": 0.40281161665916443, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2239, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.3445528549159979, | |
| "grad_norm": 0.41627874970436096, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3852, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.34547658911148316, | |
| "grad_norm": 0.5185238122940063, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2265, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.3464003233069684, | |
| "grad_norm": 0.3757447302341461, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2225, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.34732405750245365, | |
| "grad_norm": 0.4384530484676361, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2177, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.3482477916979389, | |
| "grad_norm": 0.48497986793518066, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2138, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.34917152589342415, | |
| "grad_norm": 0.3871825933456421, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5491, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.3500952600889094, | |
| "grad_norm": 0.38259807229042053, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2219, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.35101899428439465, | |
| "grad_norm": 0.39300450682640076, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2206, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.3519427284798799, | |
| "grad_norm": 0.40899527072906494, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2176, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.35286646267536514, | |
| "grad_norm": 0.3768922686576843, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2241, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.3537901968708504, | |
| "grad_norm": 0.3415662944316864, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2201, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.35471393106633564, | |
| "grad_norm": 0.37897399067878723, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2166, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.3556376652618209, | |
| "grad_norm": 0.30442744493484497, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2182, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.35656139945730614, | |
| "grad_norm": 0.31671950221061707, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2149, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.3574851336527914, | |
| "grad_norm": 0.35188165307044983, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2172, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.35840886784827664, | |
| "grad_norm": 0.3252941071987152, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2139, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.3593326020437619, | |
| "grad_norm": 0.3823052644729614, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2195, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.36025633623924713, | |
| "grad_norm": 0.3905535340309143, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5448, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.3611800704347324, | |
| "grad_norm": 0.3888482451438904, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5429, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.36210380463021763, | |
| "grad_norm": 0.41537418961524963, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2155, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.3630275388257029, | |
| "grad_norm": 0.43847179412841797, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2152, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.3639512730211881, | |
| "grad_norm": 0.3908667266368866, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2209, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.3648750072166734, | |
| "grad_norm": 0.3972265124320984, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2201, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.3657987414121587, | |
| "grad_norm": 0.3925420343875885, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2146, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.3667224756076439, | |
| "grad_norm": 0.36612439155578613, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2159, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.3676462098031292, | |
| "grad_norm": 0.3378424346446991, | |
| "learning_rate": 0.0001, | |
| "loss": 0.214, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.3685699439986144, | |
| "grad_norm": 0.38814422488212585, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2161, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.3694936781940997, | |
| "grad_norm": 0.4463529884815216, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2145, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.3704174123895849, | |
| "grad_norm": 0.359531432390213, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2148, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.37134114658507017, | |
| "grad_norm": 0.43108657002449036, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2127, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.3722648807805554, | |
| "grad_norm": 0.355692982673645, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2189, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.37318861497604067, | |
| "grad_norm": 0.4168572425842285, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2219, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.3741123491715259, | |
| "grad_norm": 0.4103034734725952, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2246, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.37503608336701116, | |
| "grad_norm": 0.42007574439048767, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2151, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.3759598175624964, | |
| "grad_norm": 0.563727617263794, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2127, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.37688355175798166, | |
| "grad_norm": 0.3448803424835205, | |
| "learning_rate": 0.0001, | |
| "loss": 0.215, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.3778072859534669, | |
| "grad_norm": 0.35881146788597107, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2153, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.37873102014895216, | |
| "grad_norm": 0.42269548773765564, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2166, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.3796547543444374, | |
| "grad_norm": 0.3665271997451782, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2129, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.38057848853992265, | |
| "grad_norm": 0.3933054208755493, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2154, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.3815022227354079, | |
| "grad_norm": 0.348173588514328, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2143, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.38242595693089315, | |
| "grad_norm": 0.321692556142807, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2121, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.3833496911263784, | |
| "grad_norm": 0.37629473209381104, | |
| "learning_rate": 0.0001, | |
| "loss": 0.546, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.38427342532186365, | |
| "grad_norm": 0.5757617950439453, | |
| "learning_rate": 0.0001, | |
| "loss": 0.221, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.3851971595173489, | |
| "grad_norm": 0.43266335129737854, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2153, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.38612089371283415, | |
| "grad_norm": 0.4368360936641693, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2228, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.3870446279083194, | |
| "grad_norm": 0.29642513394355774, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2138, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.38796836210380464, | |
| "grad_norm": 0.3685188889503479, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2155, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.3888920962992899, | |
| "grad_norm": 0.41517990827560425, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5366, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.38981583049477514, | |
| "grad_norm": 0.5697425603866577, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2142, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.3907395646902604, | |
| "grad_norm": 0.29118719696998596, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2112, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.39166329888574564, | |
| "grad_norm": 0.39508700370788574, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2182, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.3925870330812309, | |
| "grad_norm": 0.374363511800766, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2132, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.39351076727671613, | |
| "grad_norm": 0.42270106077194214, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2158, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.3944345014722014, | |
| "grad_norm": 0.44743838906288147, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2196, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.39535823566768663, | |
| "grad_norm": 0.39016926288604736, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2142, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.3962819698631719, | |
| "grad_norm": 0.4054979681968689, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2097, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.3972057040586571, | |
| "grad_norm": 0.39118272066116333, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2151, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.3981294382541424, | |
| "grad_norm": 0.47753405570983887, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2162, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.3990531724496276, | |
| "grad_norm": 0.4376755356788635, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2127, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.3999769066451129, | |
| "grad_norm": 0.35959693789482117, | |
| "learning_rate": 0.0001, | |
| "loss": 0.213, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.4009006408405981, | |
| "grad_norm": 0.36669230461120605, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5367, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.40182437503608337, | |
| "grad_norm": 0.37655845284461975, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5361, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.4027481092315686, | |
| "grad_norm": 0.33536455035209656, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2155, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.40367184342705387, | |
| "grad_norm": 0.49370628595352173, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2119, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.4045955776225391, | |
| "grad_norm": 0.38602909445762634, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2163, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.40551931181802436, | |
| "grad_norm": 0.3964555859565735, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2142, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.4064430460135096, | |
| "grad_norm": 0.5216159820556641, | |
| "learning_rate": 0.0001, | |
| "loss": 0.215, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.40736678020899486, | |
| "grad_norm": 0.5131031274795532, | |
| "learning_rate": 0.0001, | |
| "loss": 0.215, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.4082905144044801, | |
| "grad_norm": 0.3819705545902252, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2218, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.40921424859996536, | |
| "grad_norm": 0.41002157330513, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2134, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.4101379827954506, | |
| "grad_norm": 0.3606174886226654, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2112, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.41106171699093585, | |
| "grad_norm": 0.4673618674278259, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2102, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.4119854511864211, | |
| "grad_norm": 0.3812722861766815, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2119, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.41290918538190635, | |
| "grad_norm": 0.3641897439956665, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2136, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.4138329195773916, | |
| "grad_norm": 0.5161442756652832, | |
| "learning_rate": 0.0001, | |
| "loss": 0.217, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.41475665377287685, | |
| "grad_norm": 0.4585149884223938, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2099, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.4156803879683621, | |
| "grad_norm": 0.468485563993454, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2145, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.41660412216384735, | |
| "grad_norm": 0.3875979781150818, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2147, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.4175278563593326, | |
| "grad_norm": 0.5004295110702515, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2159, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.41845159055481784, | |
| "grad_norm": 0.45981934666633606, | |
| "learning_rate": 0.0001, | |
| "loss": 0.216, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.4193753247503031, | |
| "grad_norm": 0.40167751908302307, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2152, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.42029905894578834, | |
| "grad_norm": 0.41078290343284607, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2169, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.4212227931412736, | |
| "grad_norm": 0.4501461386680603, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5322, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.42214652733675884, | |
| "grad_norm": 0.3375072777271271, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2088, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.4230702615322441, | |
| "grad_norm": 0.43444013595581055, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2103, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.42399399572772933, | |
| "grad_norm": 0.6476168632507324, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2138, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.4249177299232146, | |
| "grad_norm": 0.4376696050167084, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2094, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.42584146411869983, | |
| "grad_norm": 0.37766605615615845, | |
| "learning_rate": 0.0001, | |
| "loss": 0.21, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.4267651983141851, | |
| "grad_norm": 0.551823079586029, | |
| "learning_rate": 0.0001, | |
| "loss": 0.211, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.4276889325096703, | |
| "grad_norm": 0.37839171290397644, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2121, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.4286126667051556, | |
| "grad_norm": 0.4079412817955017, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2092, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.4295364009006408, | |
| "grad_norm": 0.4214341342449188, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2123, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.4304601350961261, | |
| "grad_norm": 0.37837374210357666, | |
| "learning_rate": 0.0001, | |
| "loss": 0.213, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.4313838692916113, | |
| "grad_norm": 0.4620187282562256, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2123, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.43230760348709657, | |
| "grad_norm": 0.3782137334346771, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2108, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.4332313376825818, | |
| "grad_norm": 0.43671566247940063, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2137, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.43415507187806707, | |
| "grad_norm": 0.4650036096572876, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2123, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.4350788060735523, | |
| "grad_norm": 0.3813154697418213, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2112, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.43600254026903756, | |
| "grad_norm": 0.40941861271858215, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5279, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.4369262744645228, | |
| "grad_norm": 0.5895905494689941, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2112, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.43785000866000806, | |
| "grad_norm": 0.4762829542160034, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2084, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.4387737428554933, | |
| "grad_norm": 0.4353892207145691, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2056, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.43969747705097856, | |
| "grad_norm": 0.3636105954647064, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2092, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.4406212112464638, | |
| "grad_norm": 0.3632349371910095, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2128, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.44154494544194905, | |
| "grad_norm": 0.3997797966003418, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2109, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.4424686796374343, | |
| "grad_norm": 0.3572961688041687, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2095, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.44339241383291955, | |
| "grad_norm": 0.5888026356697083, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2071, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.4443161480284048, | |
| "grad_norm": 0.5110175013542175, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2123, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.44523988222389005, | |
| "grad_norm": 0.5052554607391357, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2074, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.4461636164193753, | |
| "grad_norm": 0.3634883463382721, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2127, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.4470873506148606, | |
| "grad_norm": 0.5420510172843933, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2067, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.44801108481034585, | |
| "grad_norm": 0.41634082794189453, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2066, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.4489348190058311, | |
| "grad_norm": 0.42388445138931274, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2105, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.44985855320131635, | |
| "grad_norm": 0.3504672944545746, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2138, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.4507822873968016, | |
| "grad_norm": 0.4212481677532196, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2121, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.45170602159228684, | |
| "grad_norm": 0.404214471578598, | |
| "learning_rate": 0.0001, | |
| "loss": 0.211, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.4526297557877721, | |
| "grad_norm": 0.33001455664634705, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2117, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.45355348998325734, | |
| "grad_norm": 0.44681602716445923, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2065, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.4544772241787426, | |
| "grad_norm": 0.38129082322120667, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2127, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.45540095837422784, | |
| "grad_norm": 0.6335559487342834, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2096, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.4563246925697131, | |
| "grad_norm": 0.34582656621932983, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2081, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.45724842676519833, | |
| "grad_norm": 0.5229650139808655, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2115, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.4581721609606836, | |
| "grad_norm": 0.34041497111320496, | |
| "learning_rate": 0.0001, | |
| "loss": 0.525, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.45909589515616883, | |
| "grad_norm": 0.4637944996356964, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2093, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.4600196293516541, | |
| "grad_norm": 0.42019012570381165, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2041, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.46094336354713933, | |
| "grad_norm": 0.49644148349761963, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2105, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.4618670977426246, | |
| "grad_norm": 0.44564756751060486, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2069, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.4627908319381098, | |
| "grad_norm": 0.48275625705718994, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2101, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.4637145661335951, | |
| "grad_norm": 0.37899744510650635, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2057, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.4646383003290803, | |
| "grad_norm": 0.3303026556968689, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2036, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.46556203452456557, | |
| "grad_norm": 0.369429349899292, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2051, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.4664857687200508, | |
| "grad_norm": 0.5387573838233948, | |
| "learning_rate": 0.0001, | |
| "loss": 0.207, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.46740950291553607, | |
| "grad_norm": 0.4687996804714203, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2093, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.4683332371110213, | |
| "grad_norm": 0.4017786383628845, | |
| "learning_rate": 0.0001, | |
| "loss": 0.208, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.46925697130650657, | |
| "grad_norm": 0.41899704933166504, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2074, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.4701807055019918, | |
| "grad_norm": 0.3573046326637268, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2097, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.47110443969747706, | |
| "grad_norm": 0.7022538185119629, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2051, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.4720281738929623, | |
| "grad_norm": 0.403980016708374, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2084, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.47295190808844756, | |
| "grad_norm": 0.3694177567958832, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2061, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.4738756422839328, | |
| "grad_norm": 0.32916390895843506, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2038, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.47479937647941806, | |
| "grad_norm": 0.36460039019584656, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2038, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.4757231106749033, | |
| "grad_norm": 0.33318039774894714, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2057, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.47664684487038855, | |
| "grad_norm": 0.33933624625205994, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2043, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.4775705790658738, | |
| "grad_norm": 0.4552598297595978, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2079, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.47849431326135905, | |
| "grad_norm": 0.3418557047843933, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2051, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.4794180474568443, | |
| "grad_norm": 0.4285419285297394, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2095, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.48034178165232955, | |
| "grad_norm": 0.48558488488197327, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2038, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.4812655158478148, | |
| "grad_norm": 0.3706708550453186, | |
| "learning_rate": 0.0001, | |
| "loss": 0.209, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.48218925004330004, | |
| "grad_norm": 0.39956068992614746, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2074, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.4831129842387853, | |
| "grad_norm": 0.5333150625228882, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2039, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.48403671843427054, | |
| "grad_norm": 0.6540710926055908, | |
| "learning_rate": 0.0001, | |
| "loss": 0.207, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.4849604526297558, | |
| "grad_norm": 0.47686120867729187, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2116, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.48588418682524104, | |
| "grad_norm": 0.3650057911872864, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2099, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.4868079210207263, | |
| "grad_norm": 0.37506401538848877, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2096, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.48773165521621153, | |
| "grad_norm": 0.4392995536327362, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2047, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.4886553894116968, | |
| "grad_norm": 0.3806658089160919, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2098, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.48957912360718203, | |
| "grad_norm": 0.3960641324520111, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2061, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.4905028578026673, | |
| "grad_norm": 0.3831325173377991, | |
| "learning_rate": 0.0001, | |
| "loss": 0.203, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.49142659199815253, | |
| "grad_norm": 0.46465015411376953, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2048, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.4923503261936378, | |
| "grad_norm": 0.426073282957077, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1992, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.493274060389123, | |
| "grad_norm": 0.3883862793445587, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2063, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.4941977945846083, | |
| "grad_norm": 0.5800668597221375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2054, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.4951215287800935, | |
| "grad_norm": 0.4094981551170349, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2052, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.49604526297557877, | |
| "grad_norm": 0.4575849771499634, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2033, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.496968997171064, | |
| "grad_norm": 0.44456157088279724, | |
| "learning_rate": 0.0001, | |
| "loss": 0.208, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.49789273136654927, | |
| "grad_norm": 0.47655409574508667, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2085, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.4988164655620345, | |
| "grad_norm": 0.5304370522499084, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2029, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.49974019975751977, | |
| "grad_norm": 0.44756844639778137, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2039, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.500663933953005, | |
| "grad_norm": 0.6823918223381042, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2025, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.5015876681484903, | |
| "grad_norm": 0.3913891613483429, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2057, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.5025114023439755, | |
| "grad_norm": 0.40689653158187866, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2062, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.5034351365394608, | |
| "grad_norm": 0.6231473684310913, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2039, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.504358870734946, | |
| "grad_norm": 0.3745615780353546, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2033, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.5052826049304313, | |
| "grad_norm": 0.42019689083099365, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2066, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.5062063391259165, | |
| "grad_norm": 0.37668848037719727, | |
| "learning_rate": 0.0001, | |
| "loss": 0.206, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.5071300733214018, | |
| "grad_norm": 0.4221283197402954, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2068, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.508053807516887, | |
| "grad_norm": 0.3790681064128876, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2015, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.5089775417123723, | |
| "grad_norm": 0.3551619052886963, | |
| "learning_rate": 0.0001, | |
| "loss": 0.203, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.5099012759078575, | |
| "grad_norm": 0.45014235377311707, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5225, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.5108250101033428, | |
| "grad_norm": 0.6005759835243225, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2062, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.511748744298828, | |
| "grad_norm": 0.43494895100593567, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2062, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.5126724784943133, | |
| "grad_norm": 0.4632158577442169, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2018, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.5135962126897985, | |
| "grad_norm": 0.4338931739330292, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2045, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.5145199468852838, | |
| "grad_norm": 0.39191317558288574, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5165, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.515443681080769, | |
| "grad_norm": 0.37209969758987427, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2035, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.5163674152762543, | |
| "grad_norm": 0.4409072697162628, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2026, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.5172911494717395, | |
| "grad_norm": 0.4884057641029358, | |
| "learning_rate": 0.0001, | |
| "loss": 0.208, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.5182148836672248, | |
| "grad_norm": 0.6604700684547424, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2083, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.51913861786271, | |
| "grad_norm": 0.45961496233940125, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2062, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.5200623520581953, | |
| "grad_norm": 0.5905351638793945, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2033, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.5209860862536805, | |
| "grad_norm": 0.4417837858200073, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2014, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.5219098204491658, | |
| "grad_norm": 0.4360920488834381, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2066, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.522833554644651, | |
| "grad_norm": 0.39159780740737915, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2047, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.5237572888401363, | |
| "grad_norm": 0.49577730894088745, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2026, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.5246810230356215, | |
| "grad_norm": 0.4545651376247406, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1994, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.5256047572311068, | |
| "grad_norm": 0.44074729084968567, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2049, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.526528491426592, | |
| "grad_norm": 0.3732854425907135, | |
| "learning_rate": 0.0001, | |
| "loss": 0.207, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.5274522256220773, | |
| "grad_norm": 0.3655250072479248, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2026, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.5283759598175625, | |
| "grad_norm": 1.0543566942214966, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2007, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.5292996940130478, | |
| "grad_norm": 0.63393634557724, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2004, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.530223428208533, | |
| "grad_norm": 0.5240088701248169, | |
| "learning_rate": 0.0001, | |
| "loss": 0.206, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.5311471624040183, | |
| "grad_norm": 0.3218691349029541, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2025, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.5320708965995035, | |
| "grad_norm": 0.4333510398864746, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2009, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.5329946307949888, | |
| "grad_norm": 0.3151180148124695, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2003, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.533918364990474, | |
| "grad_norm": 0.5382466912269592, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2021, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.5348420991859593, | |
| "grad_norm": 0.44026586413383484, | |
| "learning_rate": 0.0001, | |
| "loss": 0.204, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.5357658333814445, | |
| "grad_norm": 0.3506523072719574, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2034, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.5366895675769298, | |
| "grad_norm": 0.5124208331108093, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2043, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.537613301772415, | |
| "grad_norm": 0.428034245967865, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1989, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.5385370359679003, | |
| "grad_norm": 0.38046014308929443, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1976, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.5394607701633855, | |
| "grad_norm": 0.37654662132263184, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2018, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.5403845043588708, | |
| "grad_norm": 0.38294240832328796, | |
| "learning_rate": 0.0001, | |
| "loss": 0.205, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.541308238554356, | |
| "grad_norm": 0.5114345550537109, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2013, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.5422319727498413, | |
| "grad_norm": 0.39832645654678345, | |
| "learning_rate": 0.0001, | |
| "loss": 0.199, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.5431557069453264, | |
| "grad_norm": 0.4142138957977295, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1982, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.5440794411408117, | |
| "grad_norm": 0.5544411540031433, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2012, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.5450031753362969, | |
| "grad_norm": 0.4589371383190155, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1977, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.5459269095317822, | |
| "grad_norm": 0.5622234344482422, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1984, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.5468506437272674, | |
| "grad_norm": 0.4233631193637848, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1997, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.5477743779227527, | |
| "grad_norm": 0.7320846915245056, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1991, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.5486981121182379, | |
| "grad_norm": 0.38207074999809265, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1989, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.5496218463137232, | |
| "grad_norm": 0.35408392548561096, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1984, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.5505455805092084, | |
| "grad_norm": 0.5252432227134705, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2022, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.5514693147046937, | |
| "grad_norm": 0.536348819732666, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1986, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.5523930489001789, | |
| "grad_norm": 0.45002058148384094, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1945, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.5533167830956642, | |
| "grad_norm": 0.8043459057807922, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2021, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.5542405172911494, | |
| "grad_norm": 0.3983522355556488, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1972, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.5551642514866347, | |
| "grad_norm": 0.7973201274871826, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1969, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.5560879856821199, | |
| "grad_norm": 0.4090383052825928, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1988, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.5570117198776052, | |
| "grad_norm": 0.4011801779270172, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1974, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.5579354540730904, | |
| "grad_norm": 0.34157446026802063, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1958, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.5588591882685757, | |
| "grad_norm": 0.4531498849391937, | |
| "learning_rate": 0.0001, | |
| "loss": 0.194, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.5597829224640609, | |
| "grad_norm": 0.3886425197124481, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1962, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.5607066566595462, | |
| "grad_norm": 0.6903582811355591, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1998, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.5616303908550314, | |
| "grad_norm": 0.4828319251537323, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1989, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.5625541250505167, | |
| "grad_norm": 0.3676108717918396, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1962, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.5634778592460019, | |
| "grad_norm": 0.36137014627456665, | |
| "learning_rate": 0.0001, | |
| "loss": 0.198, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.5644015934414872, | |
| "grad_norm": 0.523659884929657, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1945, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.5653253276369724, | |
| "grad_norm": 0.3308408260345459, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1973, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.5662490618324577, | |
| "grad_norm": 0.39561885595321655, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2012, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.5671727960279429, | |
| "grad_norm": 0.4854455590248108, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2025, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.5680965302234282, | |
| "grad_norm": 0.4626583456993103, | |
| "learning_rate": 0.0001, | |
| "loss": 0.203, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.5690202644189135, | |
| "grad_norm": 0.36548173427581787, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2001, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.5699439986143987, | |
| "grad_norm": 0.4229353070259094, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1982, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.570867732809884, | |
| "grad_norm": 0.3363434076309204, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1956, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.5717914670053692, | |
| "grad_norm": 0.6675479412078857, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1968, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.5727152012008545, | |
| "grad_norm": 0.4045729339122772, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1976, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.5736389353963397, | |
| "grad_norm": 0.6667457222938538, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1973, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.574562669591825, | |
| "grad_norm": 0.3881010413169861, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5135, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.5754864037873102, | |
| "grad_norm": 0.4003308415412903, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1966, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.5764101379827955, | |
| "grad_norm": 0.3514181971549988, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1946, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.5773338721782807, | |
| "grad_norm": 0.46444565057754517, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1967, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.578257606373766, | |
| "grad_norm": 0.4301440715789795, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1975, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.5791813405692512, | |
| "grad_norm": 0.4304162561893463, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1946, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.5801050747647365, | |
| "grad_norm": 0.3856479525566101, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1944, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.5810288089602217, | |
| "grad_norm": 0.4279865324497223, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1926, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 0.581952543155707, | |
| "grad_norm": 0.44592881202697754, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1936, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.5828762773511922, | |
| "grad_norm": 0.43160223960876465, | |
| "learning_rate": 0.0001, | |
| "loss": 0.193, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 0.5838000115466775, | |
| "grad_norm": 0.4333481192588806, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1957, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.5847237457421627, | |
| "grad_norm": 0.45316243171691895, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1967, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.585647479937648, | |
| "grad_norm": 0.2942434251308441, | |
| "learning_rate": 0.0001, | |
| "loss": 0.197, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.5865712141331332, | |
| "grad_norm": 0.49979010224342346, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1925, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.5874949483286185, | |
| "grad_norm": 0.4264412522315979, | |
| "learning_rate": 0.0001, | |
| "loss": 0.196, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.5884186825241037, | |
| "grad_norm": 0.38610175251960754, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1928, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 0.589342416719589, | |
| "grad_norm": 0.6601687669754028, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1967, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.5902661509150742, | |
| "grad_norm": 0.34420180320739746, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1993, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 0.5911898851105595, | |
| "grad_norm": 0.4261429011821747, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1932, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.5921136193060447, | |
| "grad_norm": 0.3757234513759613, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1971, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 0.59303735350153, | |
| "grad_norm": 0.3245389461517334, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1957, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.5939610876970152, | |
| "grad_norm": 0.4480787515640259, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1968, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 0.5948848218925005, | |
| "grad_norm": 0.3903449475765228, | |
| "learning_rate": 0.0001, | |
| "loss": 0.197, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.5958085560879857, | |
| "grad_norm": 0.38698723912239075, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4993, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.596732290283471, | |
| "grad_norm": 0.37320080399513245, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1903, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 0.5976560244789562, | |
| "grad_norm": 0.40918853878974915, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1931, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 0.5985797586744415, | |
| "grad_norm": 0.5983522534370422, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1925, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.5995034928699267, | |
| "grad_norm": 0.422124981880188, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1957, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 0.600427227065412, | |
| "grad_norm": 0.549231767654419, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1961, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.6013509612608972, | |
| "grad_norm": 0.47191357612609863, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1945, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 0.6022746954563825, | |
| "grad_norm": 0.40886831283569336, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1931, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 0.6031984296518677, | |
| "grad_norm": 0.5115428566932678, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1972, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 0.604122163847353, | |
| "grad_norm": 0.44815415143966675, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1937, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 0.6050458980428381, | |
| "grad_norm": 0.46659350395202637, | |
| "learning_rate": 0.0001, | |
| "loss": 0.195, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.6059696322383235, | |
| "grad_norm": 0.38357239961624146, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1963, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 0.6068933664338086, | |
| "grad_norm": 0.5536012649536133, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1927, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 0.607817100629294, | |
| "grad_norm": 0.4727837145328522, | |
| "learning_rate": 0.0001, | |
| "loss": 0.193, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 0.6087408348247791, | |
| "grad_norm": 0.4362563490867615, | |
| "learning_rate": 0.0001, | |
| "loss": 0.194, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 0.6096645690202644, | |
| "grad_norm": 0.4345818758010864, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1954, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.6105883032157496, | |
| "grad_norm": 0.38879844546318054, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1923, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 0.6115120374112349, | |
| "grad_norm": 0.401516318321228, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1909, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 0.6124357716067201, | |
| "grad_norm": 0.36760014295578003, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1922, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 0.6133595058022054, | |
| "grad_norm": 0.37221571803092957, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1837, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 0.6142832399976906, | |
| "grad_norm": 0.3736136555671692, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1886, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.6152069741931759, | |
| "grad_norm": 0.4183785617351532, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1943, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 0.6161307083886611, | |
| "grad_norm": 0.5676806569099426, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1957, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 0.6170544425841464, | |
| "grad_norm": 0.4059545695781708, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1929, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 0.6179781767796316, | |
| "grad_norm": 0.3749110698699951, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1916, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 0.6189019109751169, | |
| "grad_norm": 0.34088799357414246, | |
| "learning_rate": 0.0001, | |
| "loss": 0.191, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.6198256451706021, | |
| "grad_norm": 0.3927398920059204, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1946, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 0.6207493793660874, | |
| "grad_norm": 0.4718213975429535, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1948, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.6216731135615726, | |
| "grad_norm": 0.4251835346221924, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5033, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 0.6225968477570579, | |
| "grad_norm": 0.39704465866088867, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1924, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 0.6235205819525431, | |
| "grad_norm": 0.4783962368965149, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1922, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.6244443161480284, | |
| "grad_norm": 0.4048413038253784, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1962, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 0.6253680503435136, | |
| "grad_norm": 0.3915632665157318, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1906, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 0.6262917845389989, | |
| "grad_norm": 0.39853039383888245, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5015, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 0.6272155187344841, | |
| "grad_norm": 0.40785282850265503, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1896, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 0.6281392529299694, | |
| "grad_norm": 0.6974129676818848, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4933, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.6290629871254546, | |
| "grad_norm": 0.3204994797706604, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1887, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 0.6299867213209399, | |
| "grad_norm": 0.8155962228775024, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1903, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 0.6309104555164251, | |
| "grad_norm": 0.8928115963935852, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1893, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 0.6318341897119104, | |
| "grad_norm": 0.5847592353820801, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1906, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 0.6327579239073956, | |
| "grad_norm": 0.4507416784763336, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1902, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.6336816581028809, | |
| "grad_norm": 0.6219226121902466, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1928, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 0.6346053922983661, | |
| "grad_norm": 0.4082391560077667, | |
| "learning_rate": 0.0001, | |
| "loss": 0.19, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 0.6355291264938514, | |
| "grad_norm": 0.48560988903045654, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2649, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 0.6364528606893366, | |
| "grad_norm": 0.8206206560134888, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1883, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 0.6373765948848219, | |
| "grad_norm": 0.4298798143863678, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3829, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.6383003290803071, | |
| "grad_norm": 0.4346946179866791, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1901, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 0.6392240632757924, | |
| "grad_norm": 0.4600197672843933, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1916, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 0.6401477974712776, | |
| "grad_norm": 0.3899524211883545, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1898, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 0.6410715316667629, | |
| "grad_norm": 0.4084889888763428, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4899, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 0.6419952658622481, | |
| "grad_norm": 0.4361218512058258, | |
| "learning_rate": 0.0001, | |
| "loss": 0.188, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.6429190000577334, | |
| "grad_norm": 0.504452109336853, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1876, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 0.6438427342532186, | |
| "grad_norm": 0.4924841821193695, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1928, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 0.6447664684487039, | |
| "grad_norm": 0.47029873728752136, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1906, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 0.6456902026441891, | |
| "grad_norm": 0.3729427456855774, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1826, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 0.6466139368396744, | |
| "grad_norm": 0.3899003267288208, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1912, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.6475376710351596, | |
| "grad_norm": 0.41368311643600464, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1891, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 0.6484614052306449, | |
| "grad_norm": 0.5094907283782959, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1862, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 0.6493851394261301, | |
| "grad_norm": 0.442466676235199, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1896, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 0.6503088736216154, | |
| "grad_norm": 0.37364810705184937, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1873, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 0.6512326078171007, | |
| "grad_norm": 0.5380529761314392, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1879, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.6521563420125859, | |
| "grad_norm": 0.6053389310836792, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1859, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 0.6530800762080712, | |
| "grad_norm": 0.5834764838218689, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1878, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 0.6540038104035564, | |
| "grad_norm": 0.4722538888454437, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4849, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 0.6549275445990417, | |
| "grad_norm": 0.5411726832389832, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1826, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 0.6558512787945269, | |
| "grad_norm": 0.7647090554237366, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1883, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.6567750129900122, | |
| "grad_norm": 0.5438753962516785, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4819, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 0.6576987471854974, | |
| "grad_norm": 0.3922753632068634, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1842, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 0.6586224813809827, | |
| "grad_norm": 0.6545330882072449, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1864, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 0.6595462155764679, | |
| "grad_norm": 0.4712873697280884, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1871, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 0.6604699497719532, | |
| "grad_norm": 0.4064500033855438, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1836, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.6613936839674384, | |
| "grad_norm": 0.4518815577030182, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1889, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 0.6623174181629237, | |
| "grad_norm": 0.7918335795402527, | |
| "learning_rate": 0.0001, | |
| "loss": 0.187, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 0.6632411523584089, | |
| "grad_norm": 0.48166733980178833, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1845, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 0.6641648865538942, | |
| "grad_norm": 0.6200546026229858, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1855, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 0.6650886207493794, | |
| "grad_norm": 0.6837434768676758, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4852, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.6660123549448647, | |
| "grad_norm": 0.6038565039634705, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1889, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 0.6669360891403499, | |
| "grad_norm": 0.7944003939628601, | |
| "learning_rate": 0.0001, | |
| "loss": 0.186, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 0.6678598233358352, | |
| "grad_norm": 0.47415363788604736, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1862, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 0.6687835575313203, | |
| "grad_norm": 0.5343001484870911, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1867, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 0.6697072917268057, | |
| "grad_norm": 0.4093886613845825, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1855, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.6706310259222908, | |
| "grad_norm": 0.47353625297546387, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1875, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 0.6715547601177762, | |
| "grad_norm": 0.6467652916908264, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1834, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 0.6724784943132613, | |
| "grad_norm": 0.561173677444458, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1872, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 0.6734022285087466, | |
| "grad_norm": 0.44280222058296204, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1855, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 0.6743259627042318, | |
| "grad_norm": 0.45622822642326355, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1817, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.6752496968997171, | |
| "grad_norm": 0.43211063742637634, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4778, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 0.6761734310952023, | |
| "grad_norm": 0.4531904458999634, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1853, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 0.6770971652906876, | |
| "grad_norm": 0.4295077621936798, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1838, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 0.6780208994861728, | |
| "grad_norm": 0.4116787612438202, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1843, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 0.6789446336816581, | |
| "grad_norm": 0.4715608060359955, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1843, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.6798683678771433, | |
| "grad_norm": 0.49722760915756226, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1852, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 0.6807921020726286, | |
| "grad_norm": 0.6285380721092224, | |
| "learning_rate": 0.0001, | |
| "loss": 0.183, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 0.6817158362681138, | |
| "grad_norm": 0.6626774668693542, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1848, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 0.6826395704635991, | |
| "grad_norm": 0.3493516445159912, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1816, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 0.6835633046590843, | |
| "grad_norm": 0.39721986651420593, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1849, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.6844870388545696, | |
| "grad_norm": 0.36072245240211487, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1834, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 0.6854107730500548, | |
| "grad_norm": 0.3699972629547119, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1838, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 0.6863345072455401, | |
| "grad_norm": 0.353380024433136, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1839, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 0.6872582414410253, | |
| "grad_norm": 0.43993157148361206, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1808, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 0.6881819756365106, | |
| "grad_norm": 0.3863504230976105, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1841, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.6891057098319958, | |
| "grad_norm": 0.6462234854698181, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1823, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 0.6900294440274811, | |
| "grad_norm": 0.45021629333496094, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1878, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 0.6909531782229663, | |
| "grad_norm": 0.5298671722412109, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1836, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 0.6918769124184516, | |
| "grad_norm": 0.544666051864624, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1843, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 0.6928006466139368, | |
| "grad_norm": 0.4883028566837311, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1831, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.6937243808094221, | |
| "grad_norm": 0.5272738933563232, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1887, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 0.6946481150049073, | |
| "grad_norm": 0.4605458378791809, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1834, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 0.6955718492003926, | |
| "grad_norm": 0.4206326901912689, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1803, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 0.6964955833958778, | |
| "grad_norm": 0.4763469398021698, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1839, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 0.6974193175913631, | |
| "grad_norm": 0.9533921480178833, | |
| "learning_rate": 0.0001, | |
| "loss": 0.184, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.6983430517868483, | |
| "grad_norm": 0.4474931061267853, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1845, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 0.6992667859823336, | |
| "grad_norm": 0.7657787799835205, | |
| "learning_rate": 0.0001, | |
| "loss": 0.179, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 0.7001905201778188, | |
| "grad_norm": 0.4875025451183319, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1783, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 0.7011142543733041, | |
| "grad_norm": 0.42080581188201904, | |
| "learning_rate": 0.0001, | |
| "loss": 0.178, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 0.7020379885687893, | |
| "grad_norm": 0.36648714542388916, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1849, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.7029617227642746, | |
| "grad_norm": 0.4213377833366394, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1807, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 0.7038854569597598, | |
| "grad_norm": 0.35371777415275574, | |
| "learning_rate": 0.0001, | |
| "loss": 0.185, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 0.7048091911552451, | |
| "grad_norm": 0.4143765866756439, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1833, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 0.7057329253507303, | |
| "grad_norm": 0.34513357281684875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1816, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 0.7066566595462156, | |
| "grad_norm": 0.56122225522995, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4756, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.7075803937417008, | |
| "grad_norm": 0.3687942624092102, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1794, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 0.7085041279371861, | |
| "grad_norm": 0.6322677731513977, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4732, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 0.7094278621326713, | |
| "grad_norm": 0.781364381313324, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1824, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 0.7103515963281566, | |
| "grad_norm": 0.7070577144622803, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1819, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 0.7112753305236418, | |
| "grad_norm": 0.6206178069114685, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1807, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.7121990647191271, | |
| "grad_norm": 0.6880868077278137, | |
| "learning_rate": 0.0001, | |
| "loss": 0.182, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 0.7131227989146123, | |
| "grad_norm": 0.5743620991706848, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1749, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 0.7140465331100976, | |
| "grad_norm": 0.6810530424118042, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1859, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 0.7149702673055828, | |
| "grad_norm": 0.6227778792381287, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1761, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 0.7158940015010681, | |
| "grad_norm": 0.84186851978302, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1827, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.7168177356965533, | |
| "grad_norm": 0.5849947929382324, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1816, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 0.7177414698920386, | |
| "grad_norm": 0.42177727818489075, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1792, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 0.7186652040875238, | |
| "grad_norm": 0.7109232544898987, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1795, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 0.7195889382830091, | |
| "grad_norm": 0.6489834785461426, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1768, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 0.7205126724784943, | |
| "grad_norm": 0.5706067085266113, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1795, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.7214364066739796, | |
| "grad_norm": 0.6116433143615723, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1796, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 0.7223601408694648, | |
| "grad_norm": 0.40196338295936584, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1761, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 0.7232838750649501, | |
| "grad_norm": 0.48718318343162537, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1775, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 0.7242076092604353, | |
| "grad_norm": 0.4317290186882019, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1798, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 0.7251313434559206, | |
| "grad_norm": 0.4202496111392975, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1764, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.7260550776514058, | |
| "grad_norm": 0.6695144772529602, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4703, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 0.7269788118468911, | |
| "grad_norm": 0.596879780292511, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1793, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 0.7279025460423763, | |
| "grad_norm": 0.4169802665710449, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1806, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 0.7288262802378616, | |
| "grad_norm": 0.3892427980899811, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1793, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 0.7297500144333467, | |
| "grad_norm": 0.4820833206176758, | |
| "learning_rate": 0.0001, | |
| "loss": 0.179, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.730673748628832, | |
| "grad_norm": 0.6452758312225342, | |
| "learning_rate": 0.0001, | |
| "loss": 0.182, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 0.7315974828243174, | |
| "grad_norm": 0.4922819137573242, | |
| "learning_rate": 0.0001, | |
| "loss": 0.179, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 0.7325212170198026, | |
| "grad_norm": 0.5007845759391785, | |
| "learning_rate": 0.0001, | |
| "loss": 0.172, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 0.7334449512152879, | |
| "grad_norm": 0.44310009479522705, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1748, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 0.734368685410773, | |
| "grad_norm": 0.46110036969184875, | |
| "learning_rate": 0.0001, | |
| "loss": 0.3969, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.7352924196062584, | |
| "grad_norm": 0.7349547743797302, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1795, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 0.7362161538017435, | |
| "grad_norm": 0.5728829503059387, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1774, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 0.7371398879972288, | |
| "grad_norm": 0.4830996096134186, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1798, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 0.738063622192714, | |
| "grad_norm": 0.8080053925514221, | |
| "learning_rate": 0.0001, | |
| "loss": 0.175, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 0.7389873563881993, | |
| "grad_norm": 0.9416603446006775, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1791, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.7399110905836845, | |
| "grad_norm": 0.602627694606781, | |
| "learning_rate": 0.0001, | |
| "loss": 0.175, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 0.7408348247791698, | |
| "grad_norm": 0.8102542757987976, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1762, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 0.741758558974655, | |
| "grad_norm": 0.5637885928153992, | |
| "learning_rate": 0.0001, | |
| "loss": 0.177, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 0.7426822931701403, | |
| "grad_norm": 0.34650176763534546, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1708, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 0.7436060273656255, | |
| "grad_norm": 0.4475821554660797, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1789, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.7445297615611108, | |
| "grad_norm": 0.4095354378223419, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1785, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 0.745453495756596, | |
| "grad_norm": 0.4277932941913605, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1769, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 0.7463772299520813, | |
| "grad_norm": 0.3410409986972809, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4647, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 0.7473009641475665, | |
| "grad_norm": 0.3772905468940735, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1743, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 0.7482246983430518, | |
| "grad_norm": 0.43052271008491516, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1762, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.749148432538537, | |
| "grad_norm": 0.44517043232917786, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1768, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 0.7500721667340223, | |
| "grad_norm": 0.3778810501098633, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1768, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 0.7509959009295075, | |
| "grad_norm": 0.7292577624320984, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1761, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 0.7519196351249928, | |
| "grad_norm": 0.5761885046958923, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1768, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 0.752843369320478, | |
| "grad_norm": 0.5460628271102905, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1789, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.7537671035159633, | |
| "grad_norm": 0.49501052498817444, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1783, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 0.7546908377114485, | |
| "grad_norm": 0.4425870180130005, | |
| "learning_rate": 0.0001, | |
| "loss": 0.173, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 0.7556145719069338, | |
| "grad_norm": 0.39895811676979065, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1714, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 0.756538306102419, | |
| "grad_norm": 0.4607703685760498, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1806, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 0.7574620402979043, | |
| "grad_norm": 0.38244667649269104, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1768, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.7583857744933895, | |
| "grad_norm": 0.5269780158996582, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1755, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 0.7593095086888748, | |
| "grad_norm": 0.42601966857910156, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1713, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 0.76023324288436, | |
| "grad_norm": 0.5692285895347595, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1751, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 0.7611569770798453, | |
| "grad_norm": 0.862949550151825, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1752, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 0.7620807112753305, | |
| "grad_norm": 0.5871086716651917, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1736, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.7630044454708158, | |
| "grad_norm": 0.36870133876800537, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1758, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 0.763928179666301, | |
| "grad_norm": 0.5182648301124573, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1712, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 0.7648519138617863, | |
| "grad_norm": 0.44941583275794983, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1761, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 0.7657756480572715, | |
| "grad_norm": 0.4947168529033661, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1687, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 0.7666993822527568, | |
| "grad_norm": 0.4370608627796173, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1757, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.767623116448242, | |
| "grad_norm": 0.4258075952529907, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1721, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 0.7685468506437273, | |
| "grad_norm": 0.4772101640701294, | |
| "learning_rate": 0.0001, | |
| "loss": 0.177, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 0.7694705848392125, | |
| "grad_norm": 0.45485544204711914, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1743, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 0.7703943190346978, | |
| "grad_norm": 0.6943185925483704, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1722, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 0.771318053230183, | |
| "grad_norm": 0.8926262855529785, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1749, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.7722417874256683, | |
| "grad_norm": 0.7720366716384888, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1721, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 0.7731655216211535, | |
| "grad_norm": 0.6796042919158936, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1743, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 0.7740892558166388, | |
| "grad_norm": 0.40225762128829956, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1728, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 0.775012990012124, | |
| "grad_norm": 0.5170344710350037, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1744, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 0.7759367242076093, | |
| "grad_norm": 0.43774446845054626, | |
| "learning_rate": 0.0001, | |
| "loss": 0.171, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.7768604584030945, | |
| "grad_norm": 0.35116827487945557, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1722, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 0.7777841925985798, | |
| "grad_norm": 0.42142215371131897, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1708, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 0.778707926794065, | |
| "grad_norm": 0.45540010929107666, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1708, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 0.7796316609895503, | |
| "grad_norm": 0.5589233040809631, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1746, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 0.7805553951850355, | |
| "grad_norm": 0.3629968464374542, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1748, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.7814791293805208, | |
| "grad_norm": 0.806050717830658, | |
| "learning_rate": 0.0001, | |
| "loss": 0.173, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 0.782402863576006, | |
| "grad_norm": 0.7409831285476685, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1771, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 0.7833265977714913, | |
| "grad_norm": 0.4119178354740143, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1733, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 0.7842503319669765, | |
| "grad_norm": 0.4585535526275635, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1732, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 0.7851740661624618, | |
| "grad_norm": 0.4914413094520569, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1742, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.786097800357947, | |
| "grad_norm": 0.451362282037735, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1713, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 0.7870215345534323, | |
| "grad_norm": 0.39437827467918396, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1733, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 0.7879452687489175, | |
| "grad_norm": 0.6410636305809021, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1737, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 0.7888690029444028, | |
| "grad_norm": 0.8889188170433044, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1751, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 0.789792737139888, | |
| "grad_norm": 0.5608518123626709, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1709, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.7907164713353733, | |
| "grad_norm": 0.4477889835834503, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1718, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 0.7916402055308585, | |
| "grad_norm": 0.543216347694397, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1679, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 0.7925639397263438, | |
| "grad_norm": 0.47616589069366455, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1747, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 0.793487673921829, | |
| "grad_norm": 0.5494805574417114, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1698, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 0.7944114081173143, | |
| "grad_norm": 0.3553082346916199, | |
| "learning_rate": 0.0001, | |
| "loss": 0.168, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.7953351423127994, | |
| "grad_norm": 0.9502201676368713, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1763, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 0.7962588765082848, | |
| "grad_norm": 0.47275465726852417, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1701, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 0.79718261070377, | |
| "grad_norm": 0.8205789923667908, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1742, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 0.7981063448992552, | |
| "grad_norm": 0.5286776423454285, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1734, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 0.7990300790947404, | |
| "grad_norm": 0.4189944565296173, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1689, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.7999538132902257, | |
| "grad_norm": 0.4479851722717285, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1694, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 0.8008775474857109, | |
| "grad_norm": 0.739837646484375, | |
| "learning_rate": 0.0001, | |
| "loss": 0.17, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 0.8018012816811962, | |
| "grad_norm": 0.34144723415374756, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1717, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 0.8027250158766814, | |
| "grad_norm": 0.556404173374176, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1721, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 0.8036487500721667, | |
| "grad_norm": 0.6465960144996643, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1698, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.8045724842676519, | |
| "grad_norm": 0.4784127473831177, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1726, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 0.8054962184631372, | |
| "grad_norm": 0.446210652589798, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1709, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 0.8064199526586224, | |
| "grad_norm": 0.6317527294158936, | |
| "learning_rate": 0.0001, | |
| "loss": 0.17, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 0.8073436868541077, | |
| "grad_norm": 0.9491158127784729, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1738, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 0.8082674210495929, | |
| "grad_norm": 0.485317587852478, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1679, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.8091911552450782, | |
| "grad_norm": 0.4314129054546356, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1694, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 0.8101148894405634, | |
| "grad_norm": 0.43169793486595154, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4557, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 0.8110386236360487, | |
| "grad_norm": 0.7022022008895874, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1692, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 0.8119623578315339, | |
| "grad_norm": 0.8071302771568298, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1643, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 0.8128860920270192, | |
| "grad_norm": 0.4125478267669678, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1695, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.8138098262225045, | |
| "grad_norm": 0.669414222240448, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1685, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 0.8147335604179897, | |
| "grad_norm": 0.5178114175796509, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1707, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 0.815657294613475, | |
| "grad_norm": 0.6118435263633728, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1655, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 0.8165810288089602, | |
| "grad_norm": 0.5061274766921997, | |
| "learning_rate": 0.0001, | |
| "loss": 0.168, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 0.8175047630044455, | |
| "grad_norm": 0.44611838459968567, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4568, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.8184284971999307, | |
| "grad_norm": 0.5419828295707703, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1687, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 0.819352231395416, | |
| "grad_norm": 0.47415947914123535, | |
| "learning_rate": 0.0001, | |
| "loss": 0.164, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 0.8202759655909012, | |
| "grad_norm": 0.3742285370826721, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4504, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 0.8211996997863865, | |
| "grad_norm": 0.4165765941143036, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1691, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 0.8221234339818717, | |
| "grad_norm": 0.4976545572280884, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1716, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.823047168177357, | |
| "grad_norm": 0.3930950164794922, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1669, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 0.8239709023728422, | |
| "grad_norm": 0.7222493290901184, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1678, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 0.8248946365683275, | |
| "grad_norm": 0.6513727903366089, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1651, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 0.8258183707638127, | |
| "grad_norm": 0.36553195118904114, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1685, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 0.826742104959298, | |
| "grad_norm": 0.4141073226928711, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1647, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.8276658391547832, | |
| "grad_norm": 0.9026088714599609, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1655, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 0.8285895733502685, | |
| "grad_norm": 0.4109916687011719, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1701, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 0.8295133075457537, | |
| "grad_norm": 0.4617246389389038, | |
| "learning_rate": 0.0001, | |
| "loss": 0.164, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 0.830437041741239, | |
| "grad_norm": 0.6527224779129028, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1638, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 0.8313607759367242, | |
| "grad_norm": 1.0628283023834229, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1663, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.8322845101322095, | |
| "grad_norm": 0.4224163293838501, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1664, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 0.8332082443276947, | |
| "grad_norm": 0.5163058638572693, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1656, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 0.83413197852318, | |
| "grad_norm": 0.5277922749519348, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1671, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 0.8350557127186652, | |
| "grad_norm": 1.1311513185501099, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1668, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 0.8359794469141505, | |
| "grad_norm": 0.7585214376449585, | |
| "learning_rate": 0.0001, | |
| "loss": 0.162, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.8369031811096357, | |
| "grad_norm": 0.8250924944877625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1631, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 0.837826915305121, | |
| "grad_norm": 0.664091944694519, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1643, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 0.8387506495006062, | |
| "grad_norm": 0.5413016676902771, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4144, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 0.8396743836960915, | |
| "grad_norm": 0.533757746219635, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1679, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 0.8405981178915767, | |
| "grad_norm": 0.5132860541343689, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1662, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.841521852087062, | |
| "grad_norm": 1.0943279266357422, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1674, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 0.8424455862825472, | |
| "grad_norm": 0.9697147011756897, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1701, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 0.8433693204780325, | |
| "grad_norm": 0.4370110332965851, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1664, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 0.8442930546735177, | |
| "grad_norm": 0.49583470821380615, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1675, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 0.845216788869003, | |
| "grad_norm": 0.3518935441970825, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1649, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.8461405230644882, | |
| "grad_norm": 0.4974721670150757, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1672, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 0.8470642572599735, | |
| "grad_norm": 0.5408349633216858, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1644, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 0.8479879914554587, | |
| "grad_norm": 0.4251846373081207, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1627, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 0.848911725650944, | |
| "grad_norm": 0.47709429264068604, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1672, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 0.8498354598464292, | |
| "grad_norm": 0.45655733346939087, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1664, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.8507591940419145, | |
| "grad_norm": 0.5739580988883972, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1628, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 0.8516829282373997, | |
| "grad_norm": 0.41699105501174927, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1659, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 0.852606662432885, | |
| "grad_norm": 0.5231941938400269, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1667, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 0.8535303966283702, | |
| "grad_norm": 0.3867189884185791, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1656, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 0.8544541308238555, | |
| "grad_norm": 0.42265090346336365, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1621, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.8553778650193407, | |
| "grad_norm": 0.7266954779624939, | |
| "learning_rate": 0.0001, | |
| "loss": 0.166, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 0.856301599214826, | |
| "grad_norm": 0.4829242527484894, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1705, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 0.8572253334103112, | |
| "grad_norm": 0.4225987493991852, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1716, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 0.8581490676057965, | |
| "grad_norm": 0.7197133898735046, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1674, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 0.8590728018012816, | |
| "grad_norm": 0.5460970997810364, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1641, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.859996535996767, | |
| "grad_norm": 0.3880206048488617, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1651, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 0.8609202701922521, | |
| "grad_norm": 0.5950866937637329, | |
| "learning_rate": 0.0001, | |
| "loss": 0.163, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 0.8618440043877375, | |
| "grad_norm": 0.4253402352333069, | |
| "learning_rate": 0.0001, | |
| "loss": 0.165, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 0.8627677385832226, | |
| "grad_norm": 0.3970343768596649, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1647, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 0.863691472778708, | |
| "grad_norm": 0.43371790647506714, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1671, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 0.8646152069741931, | |
| "grad_norm": 0.634937047958374, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1603, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 0.8655389411696784, | |
| "grad_norm": 0.7563658356666565, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1625, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 0.8664626753651636, | |
| "grad_norm": 0.498696506023407, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4461, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 0.8673864095606489, | |
| "grad_norm": 0.39479953050613403, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1595, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 0.8683101437561341, | |
| "grad_norm": 0.5282758474349976, | |
| "learning_rate": 0.0001, | |
| "loss": 0.165, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.8692338779516194, | |
| "grad_norm": 0.502001941204071, | |
| "learning_rate": 0.0001, | |
| "loss": 0.161, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 0.8701576121471046, | |
| "grad_norm": 0.44498735666275024, | |
| "learning_rate": 0.0001, | |
| "loss": 0.166, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 0.8710813463425899, | |
| "grad_norm": 0.4128197729587555, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4407, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 0.8720050805380751, | |
| "grad_norm": 0.4395778775215149, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1637, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 0.8729288147335604, | |
| "grad_norm": 0.42124509811401367, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1619, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 0.8738525489290456, | |
| "grad_norm": 0.6406058073043823, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1609, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 0.8747762831245309, | |
| "grad_norm": 0.48441171646118164, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1589, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 0.8757000173200161, | |
| "grad_norm": 0.41274169087409973, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1599, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 0.8766237515155014, | |
| "grad_norm": 0.7884708046913147, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1577, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 0.8775474857109866, | |
| "grad_norm": 0.3887585997581482, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1597, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.8784712199064719, | |
| "grad_norm": 0.5127732157707214, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1598, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 0.8793949541019571, | |
| "grad_norm": 0.6500501036643982, | |
| "learning_rate": 0.0001, | |
| "loss": 0.16, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 0.8803186882974424, | |
| "grad_norm": 0.9996128082275391, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1616, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 0.8812424224929276, | |
| "grad_norm": 0.6707596182823181, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1639, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 0.8821661566884129, | |
| "grad_norm": 0.47952958941459656, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1551, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 0.8830898908838981, | |
| "grad_norm": 0.5806353688240051, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1574, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 0.8840136250793834, | |
| "grad_norm": 0.5127092003822327, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1594, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 0.8849373592748686, | |
| "grad_norm": 0.7458198070526123, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1598, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 0.8858610934703539, | |
| "grad_norm": 0.4138084650039673, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1588, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 0.8867848276658391, | |
| "grad_norm": 0.6283295154571533, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1607, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.8877085618613244, | |
| "grad_norm": 0.8556647300720215, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1603, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 0.8886322960568096, | |
| "grad_norm": 0.962799608707428, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1596, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 0.8895560302522949, | |
| "grad_norm": 0.4691963493824005, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1613, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 0.8904797644477801, | |
| "grad_norm": 0.5351691246032715, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1594, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 0.8914034986432654, | |
| "grad_norm": 0.43255048990249634, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1617, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 0.8923272328387506, | |
| "grad_norm": 0.4275433123111725, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1602, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 0.8932509670342359, | |
| "grad_norm": 0.41553664207458496, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1548, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 0.8941747012297212, | |
| "grad_norm": 0.5009292960166931, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1587, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 0.8950984354252064, | |
| "grad_norm": 0.47097280621528625, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1593, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 0.8960221696206917, | |
| "grad_norm": 0.4848572313785553, | |
| "learning_rate": 0.0001, | |
| "loss": 0.16, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.8969459038161769, | |
| "grad_norm": 0.41388949751853943, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1569, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 0.8978696380116622, | |
| "grad_norm": 1.0132074356079102, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1601, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 0.8987933722071474, | |
| "grad_norm": 0.5202128887176514, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1641, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 0.8997171064026327, | |
| "grad_norm": 0.5012717843055725, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1602, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 0.9006408405981179, | |
| "grad_norm": 0.6582213640213013, | |
| "learning_rate": 0.0001, | |
| "loss": 0.162, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.9015645747936032, | |
| "grad_norm": 0.5900190472602844, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1582, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 0.9024883089890884, | |
| "grad_norm": 0.8734009861946106, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1577, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 0.9034120431845737, | |
| "grad_norm": 0.5617889165878296, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1566, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 0.9043357773800589, | |
| "grad_norm": 0.4171944260597229, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1611, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 0.9052595115755442, | |
| "grad_norm": 0.6544126272201538, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1562, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.9061832457710294, | |
| "grad_norm": 0.48378440737724304, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1586, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 0.9071069799665147, | |
| "grad_norm": 0.7782066464424133, | |
| "learning_rate": 0.0001, | |
| "loss": 0.4362, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 0.9080307141619999, | |
| "grad_norm": 0.4387872517108917, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1584, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 0.9089544483574852, | |
| "grad_norm": 0.5127896070480347, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1554, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 0.9098781825529704, | |
| "grad_norm": 1.0956830978393555, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1641, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 0.9108019167484557, | |
| "grad_norm": 0.4374365210533142, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1602, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 0.9117256509439409, | |
| "grad_norm": 0.3823205828666687, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1583, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 0.9126493851394262, | |
| "grad_norm": 0.47566449642181396, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1606, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 0.9135731193349114, | |
| "grad_norm": 0.5634418725967407, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1555, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 0.9144968535303967, | |
| "grad_norm": 0.543655276298523, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1574, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.9154205877258819, | |
| "grad_norm": 0.7198746204376221, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1589, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 0.9163443219213672, | |
| "grad_norm": 0.4656532108783722, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1588, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 0.9172680561168524, | |
| "grad_norm": 0.4030615985393524, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1599, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 0.9181917903123377, | |
| "grad_norm": 0.5512037873268127, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1579, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 0.9191155245078229, | |
| "grad_norm": 1.127402901649475, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1591, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 0.9200392587033082, | |
| "grad_norm": 1.4228541851043701, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1572, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 0.9209629928987934, | |
| "grad_norm": 1.1439874172210693, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5687, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 0.9218867270942787, | |
| "grad_norm": 1.3781626224517822, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1619, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 0.9228104612897639, | |
| "grad_norm": 0.9378222823143005, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1613, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 0.9237341954852492, | |
| "grad_norm": 0.42424276471138, | |
| "learning_rate": 0.0001, | |
| "loss": 0.1576, | |
| "step": 10000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 100000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 10000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |