MilaWang commited on
Commit
b7bb931
·
verified ·
1 Parent(s): b941ade

Upload folder using huggingface_hub

Browse files
gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.3-num-38392-sd-42/training_log.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"epoch": 1.0, "step": 4559, "epoch_duration": 32039.956733465195, "total_accumulated_duration": 32039.956733465195, "gpu_info": {"GPU_0": "NVIDIA A100-SXM4-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7751.47119140625}, "peak_memory_usage": {"GPU_0": 11696.9921875}, "avg_memory_reserved": {"GPU_0": 12786.0}, "peak_memory_reserved": {"GPU_0": 12786.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.6056, "grad_norm": 1.028513789176941, "learning_rate": 0.0002, "epoch": 0.0021934634788330775, "step": 10}, {"loss": 1.7103, "grad_norm": 1.3106452226638794, "learning_rate": 0.0002, "epoch": 0.004386926957666155, "step": 20}, {"loss": 1.2672, "grad_norm": 0.4737258553504944, "learning_rate": 0.0002, "epoch": 0.006580390436499232, "step": 30}, {"loss": 1.2031, "grad_norm": 0.40276429057121277, "learning_rate": 0.0002, "epoch": 0.00877385391533231, "step": 40}, {"loss": 1.0971, "grad_norm": 0.34621521830558777, "learning_rate": 0.0002, "epoch": 0.010967317394165387, "step": 50}, {"loss": 1.0034, "grad_norm": 0.29575487971305847, "learning_rate": 0.0002, "epoch": 0.013160780872998464, "step": 60}, {"loss": 1.0082, "grad_norm": 0.3011494576931, "learning_rate": 0.0002, "epoch": 0.015354244351831543, "step": 70}, {"loss": 1.0362, "grad_norm": 0.34532850980758667, "learning_rate": 0.0002, "epoch": 0.01754770783066462, "step": 80}, {"loss": 1.0557, "grad_norm": 0.2690030336380005, "learning_rate": 0.0002, "epoch": 0.019741171309497697, "step": 90}, {"loss": 1.0447, "grad_norm": 0.2821115255355835, "learning_rate": 0.0002, "epoch": 0.021934634788330774, "step": 100}, {"loss": 1.0631, "grad_norm": 0.2538447678089142, "learning_rate": 0.0002, "epoch": 0.02412809826716385, "step": 110}, {"loss": 1.0549, "grad_norm": 0.2965952157974243, "learning_rate": 0.0002, "epoch": 0.026321561745996928, "step": 120}, {"loss": 1.0388, "grad_norm": 0.30606284737586975, "learning_rate": 0.0002, "epoch": 0.028515025224830008, "step": 130}, {"loss": 1.0647, "grad_norm": 0.253121554851532, "learning_rate": 0.0002, "epoch": 0.030708488703663085, "step": 140}, {"loss": 1.051, "grad_norm": 0.2890211343765259, "learning_rate": 0.0002, "epoch": 0.03290195218249616, "step": 150}, {"loss": 1.0477, "grad_norm": 0.3440452218055725, "learning_rate": 0.0002, "epoch": 0.03509541566132924, "step": 160}, {"loss": 1.0277, "grad_norm": 0.3027452826499939, "learning_rate": 0.0002, "epoch": 0.037288879140162316, "step": 170}, {"loss": 1.0227, "grad_norm": 0.2768750786781311, "learning_rate": 0.0002, "epoch": 0.03948234261899539, "step": 180}, {"loss": 0.9755, "grad_norm": 0.23852699995040894, "learning_rate": 0.0002, "epoch": 0.04167580609782847, "step": 190}, {"loss": 1.0043, "grad_norm": 0.29259586334228516, "learning_rate": 0.0002, "epoch": 0.04386926957666155, "step": 200}, {"loss": 0.9818, "grad_norm": 0.22666217386722565, "learning_rate": 0.0002, "epoch": 0.046062733055494624, "step": 210}, {"loss": 1.0162, "grad_norm": 0.3581731915473938, "learning_rate": 0.0002, "epoch": 0.0482561965343277, "step": 220}, {"loss": 1.0259, "grad_norm": 0.24345263838768005, "learning_rate": 0.0002, "epoch": 0.05044966001316078, "step": 230}, {"loss": 0.9386, "grad_norm": 0.22751791775226593, "learning_rate": 0.0002, "epoch": 0.052643123491993855, "step": 240}, {"loss": 0.9844, "grad_norm": 0.29614853858947754, "learning_rate": 0.0002, "epoch": 0.05483658697082693, "step": 250}, {"loss": 1.0033, "grad_norm": 0.2492554932832718, "learning_rate": 0.0002, "epoch": 0.057030050449660016, "step": 260}, {"loss": 1.0013, "grad_norm": 0.35212525725364685, "learning_rate": 0.0002, "epoch": 0.05922351392849309, "step": 270}, {"loss": 0.9978, "grad_norm": 0.28620097041130066, "learning_rate": 0.0002, "epoch": 0.06141697740732617, "step": 280}, {"loss": 0.9568, "grad_norm": 0.26882052421569824, "learning_rate": 0.0002, "epoch": 0.06361044088615925, "step": 290}, {"loss": 0.9864, "grad_norm": 0.24778933823108673, "learning_rate": 0.0002, "epoch": 0.06580390436499232, "step": 300}, {"loss": 0.9713, "grad_norm": 0.319064736366272, "learning_rate": 0.0002, "epoch": 0.0679973678438254, "step": 310}, {"loss": 0.996, "grad_norm": 0.24345211684703827, "learning_rate": 0.0002, "epoch": 0.07019083132265848, "step": 320}, {"loss": 0.9727, "grad_norm": 0.2269463986158371, "learning_rate": 0.0002, "epoch": 0.07238429480149156, "step": 330}, {"loss": 1.0276, "grad_norm": 0.24835708737373352, "learning_rate": 0.0002, "epoch": 0.07457775828032463, "step": 340}, {"loss": 0.9979, "grad_norm": 0.24580131471157074, "learning_rate": 0.0002, "epoch": 0.07677122175915771, "step": 350}, {"loss": 0.9954, "grad_norm": 0.2741334140300751, "learning_rate": 0.0002, "epoch": 0.07896468523799079, "step": 360}, {"loss": 0.9519, "grad_norm": 0.2848239541053772, "learning_rate": 0.0002, "epoch": 0.08115814871682386, "step": 370}, {"loss": 0.9733, "grad_norm": 0.24520659446716309, "learning_rate": 0.0002, "epoch": 0.08335161219565694, "step": 380}, {"loss": 0.9881, "grad_norm": 0.2989078164100647, "learning_rate": 0.0002, "epoch": 0.08554507567449002, "step": 390}, {"loss": 1.0186, "grad_norm": 0.2695613503456116, "learning_rate": 0.0002, "epoch": 0.0877385391533231, "step": 400}, {"loss": 1.0594, "grad_norm": 0.3037568926811218, "learning_rate": 0.0002, "epoch": 0.08993200263215617, "step": 410}, {"loss": 0.9658, "grad_norm": 0.3047358989715576, "learning_rate": 0.0002, "epoch": 0.09212546611098925, "step": 420}, {"loss": 0.9966, "grad_norm": 0.24212510883808136, "learning_rate": 0.0002, "epoch": 0.09431892958982233, "step": 430}, {"loss": 1.0071, "grad_norm": 0.27018389105796814, "learning_rate": 0.0002, "epoch": 0.0965123930686554, "step": 440}, {"loss": 0.9664, "grad_norm": 0.2781650125980377, "learning_rate": 0.0002, "epoch": 0.09870585654748848, "step": 450}, {"loss": 0.9201, "grad_norm": 0.2679078280925751, "learning_rate": 0.0002, "epoch": 0.10089932002632156, "step": 460}, {"loss": 1.0225, "grad_norm": 0.22544418275356293, "learning_rate": 0.0002, "epoch": 0.10309278350515463, "step": 470}, {"loss": 0.9631, "grad_norm": 0.2237607091665268, "learning_rate": 0.0002, "epoch": 0.10528624698398771, "step": 480}, {"loss": 0.9407, "grad_norm": 0.29013291001319885, "learning_rate": 0.0002, "epoch": 0.10747971046282079, "step": 490}, {"loss": 0.9834, "grad_norm": 0.25578540563583374, "learning_rate": 0.0002, "epoch": 0.10967317394165386, "step": 500}, {"loss": 0.9573, "grad_norm": 0.27631187438964844, "learning_rate": 0.0002, "epoch": 0.11186663742048696, "step": 510}, {"loss": 0.9478, "grad_norm": 0.23903188109397888, "learning_rate": 0.0002, "epoch": 0.11406010089932003, "step": 520}, {"loss": 1.0132, "grad_norm": 0.28117722272872925, "learning_rate": 0.0002, "epoch": 0.11625356437815311, "step": 530}, {"loss": 1.0018, "grad_norm": 0.2566194236278534, "learning_rate": 0.0002, "epoch": 0.11844702785698619, "step": 540}, {"loss": 1.0046, "grad_norm": 0.2582397758960724, "learning_rate": 0.0002, "epoch": 0.12064049133581926, "step": 550}, {"loss": 0.9684, "grad_norm": 0.24175693094730377, "learning_rate": 0.0002, "epoch": 0.12283395481465234, "step": 560}, {"loss": 1.0289, "grad_norm": 0.23307913541793823, "learning_rate": 0.0002, "epoch": 0.1250274182934854, "step": 570}, {"loss": 0.9326, "grad_norm": 0.2421293705701828, "learning_rate": 0.0002, "epoch": 0.1272208817723185, "step": 580}, {"loss": 0.925, "grad_norm": 0.30303820967674255, "learning_rate": 0.0002, "epoch": 0.12941434525115156, "step": 590}, {"loss": 0.9871, "grad_norm": 0.28316158056259155, "learning_rate": 0.0002, "epoch": 0.13160780872998465, "step": 600}, {"loss": 0.9418, "grad_norm": 0.25508493185043335, "learning_rate": 0.0002, "epoch": 0.1338012722088177, "step": 610}, {"loss": 1.0086, "grad_norm": 0.2379150092601776, "learning_rate": 0.0002, "epoch": 0.1359947356876508, "step": 620}, {"loss": 0.9527, "grad_norm": 0.2509845495223999, "learning_rate": 0.0002, "epoch": 0.13818819916648387, "step": 630}, {"loss": 0.9829, "grad_norm": 0.2685025930404663, "learning_rate": 0.0002, "epoch": 0.14038166264531696, "step": 640}, {"loss": 0.9651, "grad_norm": 0.28748831152915955, "learning_rate": 0.0002, "epoch": 0.14257512612415002, "step": 650}, {"loss": 1.0087, "grad_norm": 0.25447049736976624, "learning_rate": 0.0002, "epoch": 0.1447685896029831, "step": 660}, {"loss": 1.0104, "grad_norm": 0.2594515085220337, "learning_rate": 0.0002, "epoch": 0.14696205308181617, "step": 670}, {"loss": 0.9632, "grad_norm": 0.2659056484699249, "learning_rate": 0.0002, "epoch": 0.14915551656064927, "step": 680}, {"loss": 0.9666, "grad_norm": 0.2819564938545227, "learning_rate": 0.0002, "epoch": 0.15134898003948236, "step": 690}, {"loss": 0.9641, "grad_norm": 0.27030453085899353, "learning_rate": 0.0002, "epoch": 0.15354244351831542, "step": 700}, {"loss": 1.0004, "grad_norm": 0.280082106590271, "learning_rate": 0.0002, "epoch": 0.1557359069971485, "step": 710}, {"loss": 0.9396, "grad_norm": 0.27448907494544983, "learning_rate": 0.0002, "epoch": 0.15792937047598157, "step": 720}, {"loss": 0.9347, "grad_norm": 0.2674694061279297, "learning_rate": 0.0002, "epoch": 0.16012283395481466, "step": 730}, {"loss": 0.9758, "grad_norm": 0.2971559762954712, "learning_rate": 0.0002, "epoch": 0.16231629743364773, "step": 740}, {"loss": 0.9693, "grad_norm": 0.26260021328926086, "learning_rate": 0.0002, "epoch": 0.16450976091248082, "step": 750}, {"loss": 0.9582, "grad_norm": 0.22301781177520752, "learning_rate": 0.0002, "epoch": 0.16670322439131388, "step": 760}, {"loss": 0.9487, "grad_norm": 0.2671736776828766, "learning_rate": 0.0002, "epoch": 0.16889668787014697, "step": 770}, {"loss": 0.9379, "grad_norm": 0.21052002906799316, "learning_rate": 0.0002, "epoch": 0.17109015134898004, "step": 780}, {"loss": 0.9339, "grad_norm": 0.27540385723114014, "learning_rate": 0.0002, "epoch": 0.17328361482781313, "step": 790}, {"loss": 1.0058, "grad_norm": 0.27856168150901794, "learning_rate": 0.0002, "epoch": 0.1754770783066462, "step": 800}, {"loss": 0.9941, "grad_norm": 0.27243366837501526, "learning_rate": 0.0002, "epoch": 0.17767054178547928, "step": 810}, {"loss": 1.0613, "grad_norm": 0.32812198996543884, "learning_rate": 0.0002, "epoch": 0.17986400526431234, "step": 820}, {"loss": 0.9109, "grad_norm": 0.254926472902298, "learning_rate": 0.0002, "epoch": 0.18205746874314543, "step": 830}, {"loss": 0.9985, "grad_norm": 0.23510406911373138, "learning_rate": 0.0002, "epoch": 0.1842509322219785, "step": 840}, {"loss": 0.9859, "grad_norm": 0.22493895888328552, "learning_rate": 0.0002, "epoch": 0.1864443957008116, "step": 850}, {"loss": 0.9063, "grad_norm": 0.2509733736515045, "learning_rate": 0.0002, "epoch": 0.18863785917964465, "step": 860}, {"loss": 0.9543, "grad_norm": 0.28649914264678955, "learning_rate": 0.0002, "epoch": 0.19083132265847774, "step": 870}, {"loss": 0.9991, "grad_norm": 0.3130183517932892, "learning_rate": 0.0002, "epoch": 0.1930247861373108, "step": 880}, {"loss": 0.9612, "grad_norm": 0.3050929605960846, "learning_rate": 0.0002, "epoch": 0.1952182496161439, "step": 890}, {"loss": 0.9625, "grad_norm": 0.2583090662956238, "learning_rate": 0.0002, "epoch": 0.19741171309497696, "step": 900}, {"loss": 0.9834, "grad_norm": 0.23116709291934967, "learning_rate": 0.0002, "epoch": 0.19960517657381005, "step": 910}, {"loss": 0.9381, "grad_norm": 0.27929458022117615, "learning_rate": 0.0002, "epoch": 0.2017986400526431, "step": 920}, {"loss": 1.0092, "grad_norm": 0.2899194359779358, "learning_rate": 0.0002, "epoch": 0.2039921035314762, "step": 930}, {"loss": 0.9598, "grad_norm": 0.29898619651794434, "learning_rate": 0.0002, "epoch": 0.20618556701030927, "step": 940}, {"loss": 0.8838, "grad_norm": 0.25518035888671875, "learning_rate": 0.0002, "epoch": 0.20837903048914236, "step": 950}, {"loss": 0.96, "grad_norm": 0.2870531976222992, "learning_rate": 0.0002, "epoch": 0.21057249396797542, "step": 960}, {"loss": 1.0059, "grad_norm": 0.31430622935295105, "learning_rate": 0.0002, "epoch": 0.2127659574468085, "step": 970}, {"loss": 1.0093, "grad_norm": 0.25647610425949097, "learning_rate": 0.0002, "epoch": 0.21495942092564158, "step": 980}, {"loss": 0.9919, "grad_norm": 0.266030877828598, "learning_rate": 0.0002, "epoch": 0.21715288440447467, "step": 990}, {"loss": 1.0047, "grad_norm": 0.2774191200733185, "learning_rate": 0.0002, "epoch": 0.21934634788330773, "step": 1000}, {"loss": 0.9743, "grad_norm": 0.24293410778045654, "learning_rate": 0.0002, "epoch": 0.22153981136214082, "step": 1010}, {"loss": 0.9186, "grad_norm": 0.8793322443962097, "learning_rate": 0.0002, "epoch": 0.2237332748409739, "step": 1020}, {"loss": 0.9011, "grad_norm": 0.26562735438346863, "learning_rate": 0.0002, "epoch": 0.22592673831980697, "step": 1030}, {"loss": 0.923, "grad_norm": 0.2593516409397125, "learning_rate": 0.0002, "epoch": 0.22812020179864007, "step": 1040}, {"loss": 1.0151, "grad_norm": 0.31918203830718994, "learning_rate": 0.0002, "epoch": 0.23031366527747313, "step": 1050}, {"loss": 0.9778, "grad_norm": 0.29118841886520386, "learning_rate": 0.0002, "epoch": 0.23250712875630622, "step": 1060}, {"loss": 0.9475, "grad_norm": 0.2521571218967438, "learning_rate": 0.0002, "epoch": 0.23470059223513928, "step": 1070}, {"loss": 0.9537, "grad_norm": 0.2908271849155426, "learning_rate": 0.0002, "epoch": 0.23689405571397237, "step": 1080}, {"loss": 0.9521, "grad_norm": 0.30912894010543823, "learning_rate": 0.0002, "epoch": 0.23908751919280544, "step": 1090}, {"loss": 0.8922, "grad_norm": 0.23892425000667572, "learning_rate": 0.0002, "epoch": 0.24128098267163853, "step": 1100}, {"loss": 1.0111, "grad_norm": 0.2697543203830719, "learning_rate": 0.0002, "epoch": 0.2434744461504716, "step": 1110}, {"loss": 0.9082, "grad_norm": 0.234395831823349, "learning_rate": 0.0002, "epoch": 0.24566790962930468, "step": 1120}, {"loss": 0.9434, "grad_norm": 0.3189583122730255, "learning_rate": 0.0002, "epoch": 0.24786137310813774, "step": 1130}, {"loss": 0.9921, "grad_norm": 0.25458860397338867, "learning_rate": 0.0002, "epoch": 0.2500548365869708, "step": 1140}, {"loss": 1.0101, "grad_norm": 0.30376094579696655, "learning_rate": 0.0002, "epoch": 0.2522483000658039, "step": 1150}, {"loss": 0.9901, "grad_norm": 0.3042148947715759, "learning_rate": 0.0002, "epoch": 0.254441763544637, "step": 1160}, {"loss": 1.0132, "grad_norm": 0.2782956659793854, "learning_rate": 0.0002, "epoch": 0.2566352270234701, "step": 1170}, {"loss": 0.9388, "grad_norm": 0.24752891063690186, "learning_rate": 0.0002, "epoch": 0.2588286905023031, "step": 1180}, {"loss": 0.9492, "grad_norm": 0.29337266087532043, "learning_rate": 0.0002, "epoch": 0.2610221539811362, "step": 1190}, {"loss": 0.9936, "grad_norm": 0.2475079596042633, "learning_rate": 0.0002, "epoch": 0.2632156174599693, "step": 1200}, {"loss": 0.9505, "grad_norm": 0.28889575600624084, "learning_rate": 0.0002, "epoch": 0.2654090809388024, "step": 1210}, {"loss": 0.9482, "grad_norm": 0.2810121178627014, "learning_rate": 0.0002, "epoch": 0.2676025444176354, "step": 1220}, {"loss": 0.9775, "grad_norm": 0.2809271514415741, "learning_rate": 0.0002, "epoch": 0.2697960078964685, "step": 1230}, {"loss": 0.9848, "grad_norm": 0.2559370994567871, "learning_rate": 0.0002, "epoch": 0.2719894713753016, "step": 1240}, {"loss": 0.9799, "grad_norm": 0.282394140958786, "learning_rate": 0.0002, "epoch": 0.2741829348541347, "step": 1250}, {"loss": 0.9862, "grad_norm": 0.3215254843235016, "learning_rate": 0.0002, "epoch": 0.27637639833296773, "step": 1260}, {"loss": 0.9974, "grad_norm": 0.3029433786869049, "learning_rate": 0.0002, "epoch": 0.2785698618118008, "step": 1270}, {"loss": 0.9396, "grad_norm": 0.24840985238552094, "learning_rate": 0.0002, "epoch": 0.2807633252906339, "step": 1280}, {"loss": 0.9763, "grad_norm": 0.2621304988861084, "learning_rate": 0.0002, "epoch": 0.282956788769467, "step": 1290}, {"loss": 0.9464, "grad_norm": 0.2726896107196808, "learning_rate": 0.0002, "epoch": 0.28515025224830004, "step": 1300}, {"loss": 0.955, "grad_norm": 0.25803691148757935, "learning_rate": 0.0002, "epoch": 0.28734371572713313, "step": 1310}, {"loss": 0.9432, "grad_norm": 0.29259294271469116, "learning_rate": 0.0002, "epoch": 0.2895371792059662, "step": 1320}, {"loss": 0.9327, "grad_norm": 0.28409960865974426, "learning_rate": 0.0002, "epoch": 0.2917306426847993, "step": 1330}, {"loss": 1.0276, "grad_norm": 0.2825821042060852, "learning_rate": 0.0002, "epoch": 0.29392410616363235, "step": 1340}, {"loss": 0.9741, "grad_norm": 0.2664753496646881, "learning_rate": 0.0002, "epoch": 0.29611756964246544, "step": 1350}, {"loss": 0.9367, "grad_norm": 0.2840157747268677, "learning_rate": 0.0002, "epoch": 0.29831103312129853, "step": 1360}, {"loss": 0.903, "grad_norm": 0.3289981484413147, "learning_rate": 0.0002, "epoch": 0.3005044966001316, "step": 1370}, {"loss": 0.8924, "grad_norm": 0.2600017189979553, "learning_rate": 0.0002, "epoch": 0.3026979600789647, "step": 1380}, {"loss": 1.0522, "grad_norm": 0.2717713415622711, "learning_rate": 0.0002, "epoch": 0.30489142355779775, "step": 1390}, {"loss": 0.9938, "grad_norm": 0.2909969687461853, "learning_rate": 0.0002, "epoch": 0.30708488703663084, "step": 1400}, {"loss": 0.9166, "grad_norm": 0.28429287672042847, "learning_rate": 0.0002, "epoch": 0.30927835051546393, "step": 1410}, {"loss": 0.956, "grad_norm": 0.2643485963344574, "learning_rate": 0.0002, "epoch": 0.311471813994297, "step": 1420}, {"loss": 1.0096, "grad_norm": 0.2772969901561737, "learning_rate": 0.0002, "epoch": 0.31366527747313006, "step": 1430}, {"loss": 1.0096, "grad_norm": 0.2956376075744629, "learning_rate": 0.0002, "epoch": 0.31585874095196315, "step": 1440}, {"loss": 0.9239, "grad_norm": 0.2682052254676819, "learning_rate": 0.0002, "epoch": 0.31805220443079624, "step": 1450}, {"loss": 0.9886, "grad_norm": 0.26227933168411255, "learning_rate": 0.0002, "epoch": 0.32024566790962933, "step": 1460}, {"loss": 1.0001, "grad_norm": 0.27500468492507935, "learning_rate": 0.0002, "epoch": 0.32243913138846236, "step": 1470}, {"loss": 0.9819, "grad_norm": 0.291887491941452, "learning_rate": 0.0002, "epoch": 0.32463259486729545, "step": 1480}, {"loss": 0.9659, "grad_norm": 0.28374260663986206, "learning_rate": 0.0002, "epoch": 0.32682605834612855, "step": 1490}, {"loss": 0.9659, "grad_norm": 0.25460076332092285, "learning_rate": 0.0002, "epoch": 0.32901952182496164, "step": 1500}, {"loss": 0.9497, "grad_norm": 0.29558590054512024, "learning_rate": 0.0002, "epoch": 0.33121298530379467, "step": 1510}, {"loss": 0.9606, "grad_norm": 0.2923066318035126, "learning_rate": 0.0002, "epoch": 0.33340644878262776, "step": 1520}, {"loss": 0.9737, "grad_norm": 0.27461138367652893, "learning_rate": 0.0002, "epoch": 0.33559991226146085, "step": 1530}, {"loss": 0.9253, "grad_norm": 0.2553742527961731, "learning_rate": 0.0002, "epoch": 0.33779337574029394, "step": 1540}, {"loss": 1.0249, "grad_norm": 0.2610684931278229, "learning_rate": 0.0002, "epoch": 0.339986839219127, "step": 1550}, {"loss": 0.9775, "grad_norm": 0.32386520504951477, "learning_rate": 0.0002, "epoch": 0.34218030269796007, "step": 1560}, {"loss": 0.939, "grad_norm": 0.25414299964904785, "learning_rate": 0.0002, "epoch": 0.34437376617679316, "step": 1570}, {"loss": 1.0143, "grad_norm": 0.2658778727054596, "learning_rate": 0.0002, "epoch": 0.34656722965562625, "step": 1580}, {"loss": 0.9668, "grad_norm": 0.261451780796051, "learning_rate": 0.0002, "epoch": 0.3487606931344593, "step": 1590}, {"loss": 0.9714, "grad_norm": 0.30385783314704895, "learning_rate": 0.0002, "epoch": 0.3509541566132924, "step": 1600}, {"loss": 0.9504, "grad_norm": 0.29142382740974426, "learning_rate": 0.0002, "epoch": 0.35314762009212547, "step": 1610}, {"loss": 0.9984, "grad_norm": 0.2508818507194519, "learning_rate": 0.0002, "epoch": 0.35534108357095856, "step": 1620}, {"loss": 0.9178, "grad_norm": 0.3103947043418884, "learning_rate": 0.0002, "epoch": 0.3575345470497916, "step": 1630}, {"loss": 0.9072, "grad_norm": 0.2687077522277832, "learning_rate": 0.0002, "epoch": 0.3597280105286247, "step": 1640}, {"loss": 0.9571, "grad_norm": 0.27417635917663574, "learning_rate": 0.0002, "epoch": 0.3619214740074578, "step": 1650}, {"loss": 0.9757, "grad_norm": 0.2913258969783783, "learning_rate": 0.0002, "epoch": 0.36411493748629087, "step": 1660}, {"loss": 0.9924, "grad_norm": 0.27138423919677734, "learning_rate": 0.0002, "epoch": 0.3663084009651239, "step": 1670}, {"loss": 1.0252, "grad_norm": 0.29340213537216187, "learning_rate": 0.0002, "epoch": 0.368501864443957, "step": 1680}, {"loss": 0.9482, "grad_norm": 0.2600564956665039, "learning_rate": 0.0002, "epoch": 0.3706953279227901, "step": 1690}, {"loss": 0.9643, "grad_norm": 0.2850398123264313, "learning_rate": 0.0002, "epoch": 0.3728887914016232, "step": 1700}, {"loss": 0.9603, "grad_norm": 0.27044641971588135, "learning_rate": 0.0002, "epoch": 0.37508225488045627, "step": 1710}, {"loss": 0.9231, "grad_norm": 0.279672235250473, "learning_rate": 0.0002, "epoch": 0.3772757183592893, "step": 1720}, {"loss": 0.9619, "grad_norm": 0.39690279960632324, "learning_rate": 0.0002, "epoch": 0.3794691818381224, "step": 1730}, {"loss": 1.029, "grad_norm": 0.2885724902153015, "learning_rate": 0.0002, "epoch": 0.3816626453169555, "step": 1740}, {"loss": 0.937, "grad_norm": 0.25408467650413513, "learning_rate": 0.0002, "epoch": 0.3838561087957886, "step": 1750}, {"loss": 0.9318, "grad_norm": 0.28737959265708923, "learning_rate": 0.0002, "epoch": 0.3860495722746216, "step": 1760}, {"loss": 0.9437, "grad_norm": 0.28748467564582825, "learning_rate": 0.0002, "epoch": 0.3882430357534547, "step": 1770}, {"loss": 0.9944, "grad_norm": 0.45409372448921204, "learning_rate": 0.0002, "epoch": 0.3904364992322878, "step": 1780}, {"loss": 0.9484, "grad_norm": 0.288301020860672, "learning_rate": 0.0002, "epoch": 0.3926299627111209, "step": 1790}, {"loss": 0.9695, "grad_norm": 0.296345591545105, "learning_rate": 0.0002, "epoch": 0.3948234261899539, "step": 1800}, {"loss": 1.0307, "grad_norm": 0.2767113745212555, "learning_rate": 0.0002, "epoch": 0.397016889668787, "step": 1810}, {"loss": 0.9054, "grad_norm": 0.31655144691467285, "learning_rate": 0.0002, "epoch": 0.3992103531476201, "step": 1820}, {"loss": 0.938, "grad_norm": 0.29370176792144775, "learning_rate": 0.0002, "epoch": 0.4014038166264532, "step": 1830}, {"loss": 0.9339, "grad_norm": 0.3059439957141876, "learning_rate": 0.0002, "epoch": 0.4035972801052862, "step": 1840}, {"loss": 1.0231, "grad_norm": 0.35507315397262573, "learning_rate": 0.0002, "epoch": 0.4057907435841193, "step": 1850}, {"loss": 0.9682, "grad_norm": 0.32321879267692566, "learning_rate": 0.0002, "epoch": 0.4079842070629524, "step": 1860}, {"loss": 0.9297, "grad_norm": 0.3280821144580841, "learning_rate": 0.0002, "epoch": 0.4101776705417855, "step": 1870}, {"loss": 0.9396, "grad_norm": 0.2943691909313202, "learning_rate": 0.0002, "epoch": 0.41237113402061853, "step": 1880}, {"loss": 0.9466, "grad_norm": 0.31097978353500366, "learning_rate": 0.0002, "epoch": 0.4145645974994516, "step": 1890}, {"loss": 0.9243, "grad_norm": 0.3235275447368622, "learning_rate": 0.0002, "epoch": 0.4167580609782847, "step": 1900}, {"loss": 0.9702, "grad_norm": 0.2567186653614044, "learning_rate": 0.0002, "epoch": 0.4189515244571178, "step": 1910}, {"loss": 0.9463, "grad_norm": 0.3001096248626709, "learning_rate": 0.0002, "epoch": 0.42114498793595084, "step": 1920}, {"loss": 0.9355, "grad_norm": 0.24351635575294495, "learning_rate": 0.0002, "epoch": 0.42333845141478393, "step": 1930}, {"loss": 0.9418, "grad_norm": 0.3179199993610382, "learning_rate": 0.0002, "epoch": 0.425531914893617, "step": 1940}, {"loss": 0.934, "grad_norm": 0.3179665207862854, "learning_rate": 0.0002, "epoch": 0.4277253783724501, "step": 1950}, {"loss": 0.9661, "grad_norm": 0.2748781740665436, "learning_rate": 0.0002, "epoch": 0.42991884185128315, "step": 1960}, {"loss": 0.9757, "grad_norm": 0.3291238248348236, "learning_rate": 0.0002, "epoch": 0.43211230533011624, "step": 1970}, {"loss": 0.9366, "grad_norm": 0.3690650761127472, "learning_rate": 0.0002, "epoch": 0.43430576880894933, "step": 1980}, {"loss": 0.9619, "grad_norm": 0.38407042622566223, "learning_rate": 0.0002, "epoch": 0.4364992322877824, "step": 1990}, {"loss": 0.9862, "grad_norm": 0.3754602074623108, "learning_rate": 0.0002, "epoch": 0.43869269576661546, "step": 2000}, {"loss": 1.0161, "grad_norm": 0.29660362005233765, "learning_rate": 0.0002, "epoch": 0.44088615924544855, "step": 2010}, {"loss": 0.9579, "grad_norm": 0.26131588220596313, "learning_rate": 0.0002, "epoch": 0.44307962272428164, "step": 2020}, {"loss": 0.9966, "grad_norm": 0.31432992219924927, "learning_rate": 0.0002, "epoch": 0.44527308620311473, "step": 2030}, {"loss": 0.9335, "grad_norm": 0.34775862097740173, "learning_rate": 0.0002, "epoch": 0.4474665496819478, "step": 2040}, {"loss": 0.945, "grad_norm": 0.2786644995212555, "learning_rate": 0.0002, "epoch": 0.44966001316078086, "step": 2050}, {"loss": 0.9672, "grad_norm": 0.2947084307670593, "learning_rate": 0.0002, "epoch": 0.45185347663961395, "step": 2060}, {"loss": 0.9788, "grad_norm": 0.25026124715805054, "learning_rate": 0.0002, "epoch": 0.45404694011844704, "step": 2070}, {"loss": 0.9432, "grad_norm": 0.25767192244529724, "learning_rate": 0.0002, "epoch": 0.45624040359728013, "step": 2080}, {"loss": 0.9041, "grad_norm": 0.26298925280570984, "learning_rate": 0.0002, "epoch": 0.45843386707611317, "step": 2090}, {"loss": 0.8898, "grad_norm": 0.2936318516731262, "learning_rate": 0.0002, "epoch": 0.46062733055494626, "step": 2100}, {"loss": 0.9411, "grad_norm": 0.32343974709510803, "learning_rate": 0.0002, "epoch": 0.46282079403377935, "step": 2110}, {"loss": 0.956, "grad_norm": 0.3265640139579773, "learning_rate": 0.0002, "epoch": 0.46501425751261244, "step": 2120}, {"loss": 0.9432, "grad_norm": 0.2969162166118622, "learning_rate": 0.0002, "epoch": 0.4672077209914455, "step": 2130}, {"loss": 0.9939, "grad_norm": 0.24851012229919434, "learning_rate": 0.0002, "epoch": 0.46940118447027857, "step": 2140}, {"loss": 0.959, "grad_norm": 0.2893223166465759, "learning_rate": 0.0002, "epoch": 0.47159464794911166, "step": 2150}, {"loss": 0.9642, "grad_norm": 0.28900429606437683, "learning_rate": 0.0002, "epoch": 0.47378811142794475, "step": 2160}, {"loss": 0.9509, "grad_norm": 0.35519737005233765, "learning_rate": 0.0002, "epoch": 0.4759815749067778, "step": 2170}, {"loss": 0.9567, "grad_norm": 0.24103783071041107, "learning_rate": 0.0002, "epoch": 0.4781750383856109, "step": 2180}, {"loss": 0.9745, "grad_norm": 0.2589971125125885, "learning_rate": 0.0002, "epoch": 0.48036850186444396, "step": 2190}, {"loss": 0.9091, "grad_norm": 0.28667405247688293, "learning_rate": 0.0002, "epoch": 0.48256196534327706, "step": 2200}, {"loss": 0.9455, "grad_norm": 0.2571360468864441, "learning_rate": 0.0002, "epoch": 0.4847554288221101, "step": 2210}, {"loss": 1.0029, "grad_norm": 0.318481981754303, "learning_rate": 0.0002, "epoch": 0.4869488923009432, "step": 2220}, {"loss": 0.9133, "grad_norm": 0.2386094331741333, "learning_rate": 0.0002, "epoch": 0.48914235577977627, "step": 2230}, {"loss": 0.9467, "grad_norm": 0.2541503310203552, "learning_rate": 0.0002, "epoch": 0.49133581925860936, "step": 2240}, {"loss": 0.9099, "grad_norm": 0.23434612154960632, "learning_rate": 0.0002, "epoch": 0.4935292827374424, "step": 2250}, {"loss": 0.9132, "grad_norm": 0.30263033509254456, "learning_rate": 0.0002, "epoch": 0.4957227462162755, "step": 2260}, {"loss": 0.9876, "grad_norm": 0.2867080271244049, "learning_rate": 0.0002, "epoch": 0.4979162096951086, "step": 2270}, {"loss": 0.9664, "grad_norm": 0.29734519124031067, "learning_rate": 0.0002, "epoch": 0.5001096731739416, "step": 2280}, {"loss": 0.9732, "grad_norm": 0.329632967710495, "learning_rate": 0.0002, "epoch": 0.5023031366527747, "step": 2290}, {"loss": 0.9295, "grad_norm": 0.27570590376853943, "learning_rate": 0.0002, "epoch": 0.5044966001316078, "step": 2300}, {"loss": 0.9877, "grad_norm": 0.2730073034763336, "learning_rate": 0.0002, "epoch": 0.5066900636104409, "step": 2310}, {"loss": 0.8998, "grad_norm": 0.26178479194641113, "learning_rate": 0.0002, "epoch": 0.508883527089274, "step": 2320}, {"loss": 0.9407, "grad_norm": 0.28730741143226624, "learning_rate": 0.0002, "epoch": 0.5110769905681071, "step": 2330}, {"loss": 0.9438, "grad_norm": 0.28783559799194336, "learning_rate": 0.0002, "epoch": 0.5132704540469402, "step": 2340}, {"loss": 0.9944, "grad_norm": 0.29126057028770447, "learning_rate": 0.0002, "epoch": 0.5154639175257731, "step": 2350}, {"loss": 0.9269, "grad_norm": 0.2820777893066406, "learning_rate": 0.0002, "epoch": 0.5176573810046062, "step": 2360}, {"loss": 0.991, "grad_norm": 0.2885262370109558, "learning_rate": 0.0002, "epoch": 0.5198508444834393, "step": 2370}, {"loss": 0.8854, "grad_norm": 0.285761296749115, "learning_rate": 0.0002, "epoch": 0.5220443079622724, "step": 2380}, {"loss": 0.9476, "grad_norm": 0.25369325280189514, "learning_rate": 0.0002, "epoch": 0.5242377714411055, "step": 2390}, {"loss": 0.9296, "grad_norm": 0.2796023488044739, "learning_rate": 0.0002, "epoch": 0.5264312349199386, "step": 2400}, {"loss": 0.9003, "grad_norm": 0.3219817578792572, "learning_rate": 0.0002, "epoch": 0.5286246983987717, "step": 2410}, {"loss": 0.904, "grad_norm": 0.2741348147392273, "learning_rate": 0.0002, "epoch": 0.5308181618776048, "step": 2420}, {"loss": 0.9016, "grad_norm": 0.276089072227478, "learning_rate": 0.0002, "epoch": 0.5330116253564379, "step": 2430}, {"loss": 0.9236, "grad_norm": 0.35273653268814087, "learning_rate": 0.0002, "epoch": 0.5352050888352708, "step": 2440}, {"loss": 0.9737, "grad_norm": 0.26745638251304626, "learning_rate": 0.0002, "epoch": 0.5373985523141039, "step": 2450}, {"loss": 0.9128, "grad_norm": 0.29637888073921204, "learning_rate": 0.0002, "epoch": 0.539592015792937, "step": 2460}, {"loss": 0.9253, "grad_norm": 0.47859564423561096, "learning_rate": 0.0002, "epoch": 0.5417854792717701, "step": 2470}, {"loss": 0.9573, "grad_norm": 0.26885104179382324, "learning_rate": 0.0002, "epoch": 0.5439789427506032, "step": 2480}, {"loss": 1.0216, "grad_norm": 0.3799450695514679, "learning_rate": 0.0002, "epoch": 0.5461724062294363, "step": 2490}, {"loss": 0.9765, "grad_norm": 0.29873204231262207, "learning_rate": 0.0002, "epoch": 0.5483658697082694, "step": 2500}, {"loss": 0.9336, "grad_norm": 0.2657175660133362, "learning_rate": 0.0002, "epoch": 0.5505593331871025, "step": 2510}, {"loss": 0.9713, "grad_norm": 0.2960939407348633, "learning_rate": 0.0002, "epoch": 0.5527527966659355, "step": 2520}, {"loss": 0.9405, "grad_norm": 0.29653313755989075, "learning_rate": 0.0002, "epoch": 0.5549462601447686, "step": 2530}, {"loss": 0.9765, "grad_norm": 0.33513569831848145, "learning_rate": 0.0002, "epoch": 0.5571397236236016, "step": 2540}, {"loss": 0.9772, "grad_norm": 0.27931782603263855, "learning_rate": 0.0002, "epoch": 0.5593331871024347, "step": 2550}, {"loss": 0.9185, "grad_norm": 0.2695784270763397, "learning_rate": 0.0002, "epoch": 0.5615266505812678, "step": 2560}, {"loss": 0.9187, "grad_norm": 0.36575549840927124, "learning_rate": 0.0002, "epoch": 0.5637201140601009, "step": 2570}, {"loss": 0.9136, "grad_norm": 0.27411699295043945, "learning_rate": 0.0002, "epoch": 0.565913577538934, "step": 2580}, {"loss": 0.9124, "grad_norm": 0.24721871316432953, "learning_rate": 0.0002, "epoch": 0.5681070410177671, "step": 2590}, {"loss": 0.92, "grad_norm": 0.28097397089004517, "learning_rate": 0.0002, "epoch": 0.5703005044966001, "step": 2600}, {"loss": 0.9449, "grad_norm": 0.2763797342777252, "learning_rate": 0.0002, "epoch": 0.5724939679754332, "step": 2610}, {"loss": 1.0024, "grad_norm": 0.29981425404548645, "learning_rate": 0.0002, "epoch": 0.5746874314542663, "step": 2620}, {"loss": 0.8959, "grad_norm": 0.3037952780723572, "learning_rate": 0.0002, "epoch": 0.5768808949330994, "step": 2630}, {"loss": 1.0221, "grad_norm": 0.43900713324546814, "learning_rate": 0.0002, "epoch": 0.5790743584119324, "step": 2640}, {"loss": 0.9712, "grad_norm": 0.29732951521873474, "learning_rate": 0.0002, "epoch": 0.5812678218907655, "step": 2650}, {"loss": 0.9127, "grad_norm": 0.3402515947818756, "learning_rate": 0.0002, "epoch": 0.5834612853695986, "step": 2660}, {"loss": 0.9734, "grad_norm": 0.3183230459690094, "learning_rate": 0.0002, "epoch": 0.5856547488484317, "step": 2670}, {"loss": 0.9943, "grad_norm": 0.32273998856544495, "learning_rate": 0.0002, "epoch": 0.5878482123272647, "step": 2680}, {"loss": 0.9285, "grad_norm": 0.2581384778022766, "learning_rate": 0.0002, "epoch": 0.5900416758060978, "step": 2690}, {"loss": 0.9352, "grad_norm": 0.29068681597709656, "learning_rate": 0.0002, "epoch": 0.5922351392849309, "step": 2700}, {"loss": 1.021, "grad_norm": 0.43102848529815674, "learning_rate": 0.0002, "epoch": 0.594428602763764, "step": 2710}, {"loss": 0.9532, "grad_norm": 0.28541913628578186, "learning_rate": 0.0002, "epoch": 0.5966220662425971, "step": 2720}, {"loss": 0.9051, "grad_norm": 0.26985034346580505, "learning_rate": 0.0002, "epoch": 0.5988155297214302, "step": 2730}, {"loss": 0.9623, "grad_norm": 0.29203641414642334, "learning_rate": 0.0002, "epoch": 0.6010089932002632, "step": 2740}, {"loss": 0.9104, "grad_norm": 0.2598368227481842, "learning_rate": 0.0002, "epoch": 0.6032024566790963, "step": 2750}, {"loss": 0.9014, "grad_norm": 0.26170074939727783, "learning_rate": 0.0002, "epoch": 0.6053959201579294, "step": 2760}, {"loss": 0.9487, "grad_norm": 0.29607558250427246, "learning_rate": 0.0002, "epoch": 0.6075893836367624, "step": 2770}, {"loss": 0.9095, "grad_norm": 0.3353744447231293, "learning_rate": 0.0002, "epoch": 0.6097828471155955, "step": 2780}, {"loss": 0.9172, "grad_norm": 0.3440144956111908, "learning_rate": 0.0002, "epoch": 0.6119763105944286, "step": 2790}, {"loss": 0.9383, "grad_norm": 0.30086150765419006, "learning_rate": 0.0002, "epoch": 0.6141697740732617, "step": 2800}, {"loss": 0.9254, "grad_norm": 0.2854063808917999, "learning_rate": 0.0002, "epoch": 0.6163632375520948, "step": 2810}, {"loss": 0.8981, "grad_norm": 0.3145923316478729, "learning_rate": 0.0002, "epoch": 0.6185567010309279, "step": 2820}, {"loss": 0.9108, "grad_norm": 0.3131923973560333, "learning_rate": 0.0002, "epoch": 0.620750164509761, "step": 2830}, {"loss": 0.9281, "grad_norm": 0.31920260190963745, "learning_rate": 0.0002, "epoch": 0.622943627988594, "step": 2840}, {"loss": 0.9563, "grad_norm": 0.31609806418418884, "learning_rate": 0.0002, "epoch": 0.625137091467427, "step": 2850}, {"loss": 0.9637, "grad_norm": 0.25338292121887207, "learning_rate": 0.0002, "epoch": 0.6273305549462601, "step": 2860}, {"loss": 0.9166, "grad_norm": 0.3332087993621826, "learning_rate": 0.0002, "epoch": 0.6295240184250932, "step": 2870}, {"loss": 0.9969, "grad_norm": 0.2748430371284485, "learning_rate": 0.0002, "epoch": 0.6317174819039263, "step": 2880}, {"loss": 0.9575, "grad_norm": 0.2676611542701721, "learning_rate": 0.0002, "epoch": 0.6339109453827594, "step": 2890}, {"loss": 0.9844, "grad_norm": 0.3148820102214813, "learning_rate": 0.0002, "epoch": 0.6361044088615925, "step": 2900}, {"loss": 0.9458, "grad_norm": 0.2662498652935028, "learning_rate": 0.0002, "epoch": 0.6382978723404256, "step": 2910}, {"loss": 0.9512, "grad_norm": 0.2376423329114914, "learning_rate": 0.0002, "epoch": 0.6404913358192587, "step": 2920}, {"loss": 0.9055, "grad_norm": 0.3229467272758484, "learning_rate": 0.0002, "epoch": 0.6426847992980916, "step": 2930}, {"loss": 0.9556, "grad_norm": 0.3393401503562927, "learning_rate": 0.0002, "epoch": 0.6448782627769247, "step": 2940}, {"loss": 0.8888, "grad_norm": 0.25922730565071106, "learning_rate": 0.0002, "epoch": 0.6470717262557578, "step": 2950}, {"loss": 0.9818, "grad_norm": 0.33279597759246826, "learning_rate": 0.0002, "epoch": 0.6492651897345909, "step": 2960}, {"loss": 0.9138, "grad_norm": 0.3159688711166382, "learning_rate": 0.0002, "epoch": 0.651458653213424, "step": 2970}, {"loss": 1.0306, "grad_norm": 0.2815823256969452, "learning_rate": 0.0002, "epoch": 0.6536521166922571, "step": 2980}, {"loss": 0.9672, "grad_norm": 0.33509743213653564, "learning_rate": 0.0002, "epoch": 0.6558455801710902, "step": 2990}, {"loss": 0.9282, "grad_norm": 0.30443593859672546, "learning_rate": 0.0002, "epoch": 0.6580390436499233, "step": 3000}, {"loss": 0.908, "grad_norm": 0.28104275465011597, "learning_rate": 0.0002, "epoch": 0.6602325071287563, "step": 3010}, {"loss": 0.9848, "grad_norm": 0.2987457513809204, "learning_rate": 0.0002, "epoch": 0.6624259706075893, "step": 3020}, {"loss": 0.9431, "grad_norm": 0.29156386852264404, "learning_rate": 0.0002, "epoch": 0.6646194340864224, "step": 3030}, {"loss": 0.9145, "grad_norm": 0.3095228672027588, "learning_rate": 0.0002, "epoch": 0.6668128975652555, "step": 3040}, {"loss": 0.9129, "grad_norm": 0.2687548100948334, "learning_rate": 0.0002, "epoch": 0.6690063610440886, "step": 3050}, {"loss": 0.9413, "grad_norm": 0.3118886947631836, "learning_rate": 0.0002, "epoch": 0.6711998245229217, "step": 3060}, {"loss": 0.9387, "grad_norm": 0.30067798495292664, "learning_rate": 0.0002, "epoch": 0.6733932880017548, "step": 3070}, {"loss": 0.9727, "grad_norm": 0.27757528424263, "learning_rate": 0.0002, "epoch": 0.6755867514805879, "step": 3080}, {"loss": 0.9275, "grad_norm": 0.2936228811740875, "learning_rate": 0.0002, "epoch": 0.677780214959421, "step": 3090}, {"loss": 1.0258, "grad_norm": 0.3333088457584381, "learning_rate": 0.0002, "epoch": 0.679973678438254, "step": 3100}, {"loss": 0.9692, "grad_norm": 0.28410646319389343, "learning_rate": 0.0002, "epoch": 0.682167141917087, "step": 3110}, {"loss": 0.905, "grad_norm": 0.276865690946579, "learning_rate": 0.0002, "epoch": 0.6843606053959201, "step": 3120}, {"loss": 0.9276, "grad_norm": 0.3552042245864868, "learning_rate": 0.0002, "epoch": 0.6865540688747532, "step": 3130}, {"loss": 0.9683, "grad_norm": 0.34020522236824036, "learning_rate": 0.0002, "epoch": 0.6887475323535863, "step": 3140}, {"loss": 0.9391, "grad_norm": 0.2758551239967346, "learning_rate": 0.0002, "epoch": 0.6909409958324194, "step": 3150}, {"loss": 0.9617, "grad_norm": 0.27048254013061523, "learning_rate": 0.0002, "epoch": 0.6931344593112525, "step": 3160}, {"loss": 1.0023, "grad_norm": 0.32675179839134216, "learning_rate": 0.0002, "epoch": 0.6953279227900856, "step": 3170}, {"loss": 0.9719, "grad_norm": 0.30552831292152405, "learning_rate": 0.0002, "epoch": 0.6975213862689186, "step": 3180}, {"loss": 0.9379, "grad_norm": 0.2896120846271515, "learning_rate": 0.0002, "epoch": 0.6997148497477517, "step": 3190}, {"loss": 0.9345, "grad_norm": 0.328437477350235, "learning_rate": 0.0002, "epoch": 0.7019083132265848, "step": 3200}, {"loss": 0.9031, "grad_norm": 0.2681572437286377, "learning_rate": 0.0002, "epoch": 0.7041017767054178, "step": 3210}, {"loss": 1.0194, "grad_norm": 0.31614506244659424, "learning_rate": 0.0002, "epoch": 0.7062952401842509, "step": 3220}, {"loss": 0.9144, "grad_norm": 0.3212965726852417, "learning_rate": 0.0002, "epoch": 0.708488703663084, "step": 3230}, {"loss": 0.9252, "grad_norm": 0.29909437894821167, "learning_rate": 0.0002, "epoch": 0.7106821671419171, "step": 3240}, {"loss": 0.9104, "grad_norm": 0.3564198613166809, "learning_rate": 0.0002, "epoch": 0.7128756306207502, "step": 3250}, {"loss": 0.8579, "grad_norm": 0.30233755707740784, "learning_rate": 0.0002, "epoch": 0.7150690940995832, "step": 3260}, {"loss": 0.9569, "grad_norm": 0.3637634515762329, "learning_rate": 0.0002, "epoch": 0.7172625575784163, "step": 3270}, {"loss": 0.8938, "grad_norm": 0.3526783287525177, "learning_rate": 0.0002, "epoch": 0.7194560210572494, "step": 3280}, {"loss": 0.9087, "grad_norm": 0.31693780422210693, "learning_rate": 0.0002, "epoch": 0.7216494845360825, "step": 3290}, {"loss": 0.8903, "grad_norm": 0.27407363057136536, "learning_rate": 0.0002, "epoch": 0.7238429480149156, "step": 3300}, {"loss": 0.9246, "grad_norm": 0.2853906750679016, "learning_rate": 0.0002, "epoch": 0.7260364114937486, "step": 3310}, {"loss": 0.9073, "grad_norm": 0.32970669865608215, "learning_rate": 0.0002, "epoch": 0.7282298749725817, "step": 3320}, {"loss": 0.9104, "grad_norm": 0.30202552676200867, "learning_rate": 0.0002, "epoch": 0.7304233384514148, "step": 3330}, {"loss": 0.9994, "grad_norm": 0.303541898727417, "learning_rate": 0.0002, "epoch": 0.7326168019302478, "step": 3340}, {"loss": 0.9077, "grad_norm": 0.30037763714790344, "learning_rate": 0.0002, "epoch": 0.7348102654090809, "step": 3350}, {"loss": 0.9647, "grad_norm": 0.3005385398864746, "learning_rate": 0.0002, "epoch": 0.737003728887914, "step": 3360}, {"loss": 0.9843, "grad_norm": 0.3014072775840759, "learning_rate": 0.0002, "epoch": 0.7391971923667471, "step": 3370}, {"loss": 0.9473, "grad_norm": 0.30344435572624207, "learning_rate": 0.0002, "epoch": 0.7413906558455802, "step": 3380}, {"loss": 0.9929, "grad_norm": 0.429573655128479, "learning_rate": 0.0002, "epoch": 0.7435841193244133, "step": 3390}, {"loss": 1.0071, "grad_norm": 0.34228846430778503, "learning_rate": 0.0002, "epoch": 0.7457775828032464, "step": 3400}, {"loss": 0.9588, "grad_norm": 0.3337685167789459, "learning_rate": 0.0002, "epoch": 0.7479710462820794, "step": 3410}, {"loss": 0.9052, "grad_norm": 0.28519952297210693, "learning_rate": 0.0002, "epoch": 0.7501645097609125, "step": 3420}, {"loss": 0.9609, "grad_norm": 0.2672232687473297, "learning_rate": 0.0002, "epoch": 0.7523579732397455, "step": 3430}, {"loss": 0.922, "grad_norm": 0.26975974440574646, "learning_rate": 0.0002, "epoch": 0.7545514367185786, "step": 3440}, {"loss": 0.9143, "grad_norm": 0.33559730648994446, "learning_rate": 0.0002, "epoch": 0.7567449001974117, "step": 3450}, {"loss": 0.927, "grad_norm": 0.28974875807762146, "learning_rate": 0.0002, "epoch": 0.7589383636762448, "step": 3460}, {"loss": 0.9004, "grad_norm": 0.27283650636672974, "learning_rate": 0.0002, "epoch": 0.7611318271550779, "step": 3470}, {"loss": 0.9196, "grad_norm": 0.2745708227157593, "learning_rate": 0.0002, "epoch": 0.763325290633911, "step": 3480}, {"loss": 0.9881, "grad_norm": 0.32927849888801575, "learning_rate": 0.0002, "epoch": 0.7655187541127441, "step": 3490}, {"loss": 0.954, "grad_norm": 0.3045092821121216, "learning_rate": 0.0002, "epoch": 0.7677122175915772, "step": 3500}, {"loss": 0.8954, "grad_norm": 0.28448644280433655, "learning_rate": 0.0002, "epoch": 0.7699056810704101, "step": 3510}, {"loss": 0.9258, "grad_norm": 0.3182753324508667, "learning_rate": 0.0002, "epoch": 0.7720991445492432, "step": 3520}, {"loss": 0.9779, "grad_norm": 0.3121042251586914, "learning_rate": 0.0002, "epoch": 0.7742926080280763, "step": 3530}, {"loss": 0.9326, "grad_norm": 0.32504507899284363, "learning_rate": 0.0002, "epoch": 0.7764860715069094, "step": 3540}, {"loss": 0.9947, "grad_norm": 0.34173160791397095, "learning_rate": 0.0002, "epoch": 0.7786795349857425, "step": 3550}, {"loss": 0.9296, "grad_norm": 0.32241320610046387, "learning_rate": 0.0002, "epoch": 0.7808729984645756, "step": 3560}, {"loss": 0.9526, "grad_norm": 0.2895474135875702, "learning_rate": 0.0002, "epoch": 0.7830664619434087, "step": 3570}, {"loss": 1.0204, "grad_norm": 0.3866957724094391, "learning_rate": 0.0002, "epoch": 0.7852599254222418, "step": 3580}, {"loss": 0.9496, "grad_norm": 0.32689714431762695, "learning_rate": 0.0002, "epoch": 0.7874533889010747, "step": 3590}, {"loss": 0.9068, "grad_norm": 0.3310331404209137, "learning_rate": 0.0002, "epoch": 0.7896468523799078, "step": 3600}, {"loss": 0.9144, "grad_norm": 0.34452763199806213, "learning_rate": 0.0002, "epoch": 0.7918403158587409, "step": 3610}, {"loss": 0.8867, "grad_norm": 0.30505695939064026, "learning_rate": 0.0002, "epoch": 0.794033779337574, "step": 3620}, {"loss": 0.9284, "grad_norm": 0.31703150272369385, "learning_rate": 0.0002, "epoch": 0.7962272428164071, "step": 3630}, {"loss": 0.8824, "grad_norm": 0.3235083818435669, "learning_rate": 0.0002, "epoch": 0.7984207062952402, "step": 3640}, {"loss": 0.9111, "grad_norm": 0.32634443044662476, "learning_rate": 0.0002, "epoch": 0.8006141697740733, "step": 3650}, {"loss": 0.9092, "grad_norm": 0.3178234398365021, "learning_rate": 0.0002, "epoch": 0.8028076332529064, "step": 3660}, {"loss": 0.9223, "grad_norm": 0.31957778334617615, "learning_rate": 0.0002, "epoch": 0.8050010967317394, "step": 3670}, {"loss": 0.951, "grad_norm": 0.43891236186027527, "learning_rate": 0.0002, "epoch": 0.8071945602105725, "step": 3680}, {"loss": 0.9115, "grad_norm": 0.3237752318382263, "learning_rate": 0.0002, "epoch": 0.8093880236894055, "step": 3690}, {"loss": 0.881, "grad_norm": 0.3394874036312103, "learning_rate": 0.0002, "epoch": 0.8115814871682386, "step": 3700}, {"loss": 0.9474, "grad_norm": 0.3321021497249603, "learning_rate": 0.0002, "epoch": 0.8137749506470717, "step": 3710}, {"loss": 0.9027, "grad_norm": 0.275594025850296, "learning_rate": 0.0002, "epoch": 0.8159684141259048, "step": 3720}, {"loss": 0.9628, "grad_norm": 0.3246499001979828, "learning_rate": 0.0002, "epoch": 0.8181618776047379, "step": 3730}, {"loss": 0.9168, "grad_norm": 0.30453723669052124, "learning_rate": 0.0002, "epoch": 0.820355341083571, "step": 3740}, {"loss": 0.9399, "grad_norm": 0.31527435779571533, "learning_rate": 0.0002, "epoch": 0.8225488045624041, "step": 3750}, {"loss": 0.9208, "grad_norm": 0.31260135769844055, "learning_rate": 0.0002, "epoch": 0.8247422680412371, "step": 3760}, {"loss": 0.9508, "grad_norm": 0.2887323498725891, "learning_rate": 0.0002, "epoch": 0.8269357315200702, "step": 3770}, {"loss": 0.9062, "grad_norm": 0.32923346757888794, "learning_rate": 0.0002, "epoch": 0.8291291949989033, "step": 3780}, {"loss": 0.9416, "grad_norm": 0.32532116770744324, "learning_rate": 0.0002, "epoch": 0.8313226584777363, "step": 3790}, {"loss": 0.912, "grad_norm": 0.32623663544654846, "learning_rate": 0.0002, "epoch": 0.8335161219565694, "step": 3800}, {"loss": 1.0021, "grad_norm": 0.30956289172172546, "learning_rate": 0.0002, "epoch": 0.8357095854354025, "step": 3810}, {"loss": 0.9488, "grad_norm": 0.3155129551887512, "learning_rate": 0.0002, "epoch": 0.8379030489142356, "step": 3820}, {"loss": 0.9587, "grad_norm": 0.25943952798843384, "learning_rate": 0.0002, "epoch": 0.8400965123930687, "step": 3830}, {"loss": 0.9214, "grad_norm": 0.23917023837566376, "learning_rate": 0.0002, "epoch": 0.8422899758719017, "step": 3840}, {"loss": 0.9141, "grad_norm": 0.2924705147743225, "learning_rate": 0.0002, "epoch": 0.8444834393507348, "step": 3850}, {"loss": 0.9027, "grad_norm": 0.4130593240261078, "learning_rate": 0.0002, "epoch": 0.8466769028295679, "step": 3860}, {"loss": 0.8976, "grad_norm": 0.3671727776527405, "learning_rate": 0.0002, "epoch": 0.848870366308401, "step": 3870}, {"loss": 0.9352, "grad_norm": 0.3440222442150116, "learning_rate": 0.0002, "epoch": 0.851063829787234, "step": 3880}, {"loss": 0.9345, "grad_norm": 0.30887120962142944, "learning_rate": 0.0002, "epoch": 0.8532572932660671, "step": 3890}, {"loss": 0.9445, "grad_norm": 0.3625484108924866, "learning_rate": 0.0002, "epoch": 0.8554507567449002, "step": 3900}, {"loss": 0.9481, "grad_norm": 0.28620463609695435, "learning_rate": 0.0002, "epoch": 0.8576442202237333, "step": 3910}, {"loss": 0.938, "grad_norm": 0.3712252974510193, "learning_rate": 0.0002, "epoch": 0.8598376837025663, "step": 3920}, {"loss": 0.9301, "grad_norm": 0.2609408497810364, "learning_rate": 0.0002, "epoch": 0.8620311471813994, "step": 3930}, {"loss": 0.9392, "grad_norm": 0.3209652006626129, "learning_rate": 0.0002, "epoch": 0.8642246106602325, "step": 3940}, {"loss": 0.9237, "grad_norm": 0.2881571650505066, "learning_rate": 0.0002, "epoch": 0.8664180741390656, "step": 3950}, {"loss": 0.9475, "grad_norm": 0.3390534222126007, "learning_rate": 0.0002, "epoch": 0.8686115376178987, "step": 3960}, {"loss": 0.9157, "grad_norm": 0.29375696182250977, "learning_rate": 0.0002, "epoch": 0.8708050010967318, "step": 3970}, {"loss": 0.915, "grad_norm": 0.28744739294052124, "learning_rate": 0.0002, "epoch": 0.8729984645755648, "step": 3980}, {"loss": 0.9182, "grad_norm": 0.2927035093307495, "learning_rate": 0.0002, "epoch": 0.8751919280543979, "step": 3990}, {"loss": 0.9157, "grad_norm": 0.2802438735961914, "learning_rate": 0.0002, "epoch": 0.8773853915332309, "step": 4000}, {"loss": 0.9248, "grad_norm": 0.28368017077445984, "learning_rate": 0.0002, "epoch": 0.879578855012064, "step": 4010}, {"loss": 0.896, "grad_norm": 0.29639744758605957, "learning_rate": 0.0002, "epoch": 0.8817723184908971, "step": 4020}, {"loss": 0.9285, "grad_norm": 0.32742634415626526, "learning_rate": 0.0002, "epoch": 0.8839657819697302, "step": 4030}, {"loss": 0.999, "grad_norm": 0.35471639037132263, "learning_rate": 0.0002, "epoch": 0.8861592454485633, "step": 4040}, {"loss": 0.9499, "grad_norm": 0.3137759268283844, "learning_rate": 0.0002, "epoch": 0.8883527089273964, "step": 4050}, {"loss": 0.8779, "grad_norm": 0.29136285185813904, "learning_rate": 0.0002, "epoch": 0.8905461724062295, "step": 4060}, {"loss": 0.9541, "grad_norm": 0.31348589062690735, "learning_rate": 0.0002, "epoch": 0.8927396358850626, "step": 4070}, {"loss": 0.9034, "grad_norm": 0.2923150062561035, "learning_rate": 0.0002, "epoch": 0.8949330993638956, "step": 4080}, {"loss": 0.944, "grad_norm": 0.347741037607193, "learning_rate": 0.0002, "epoch": 0.8971265628427286, "step": 4090}, {"loss": 0.929, "grad_norm": 0.2765970528125763, "learning_rate": 0.0002, "epoch": 0.8993200263215617, "step": 4100}, {"loss": 0.9289, "grad_norm": 0.34861597418785095, "learning_rate": 0.0002, "epoch": 0.9015134898003948, "step": 4110}, {"loss": 0.947, "grad_norm": 0.32990163564682007, "learning_rate": 0.0002, "epoch": 0.9037069532792279, "step": 4120}, {"loss": 0.9396, "grad_norm": 0.3003794550895691, "learning_rate": 0.0002, "epoch": 0.905900416758061, "step": 4130}, {"loss": 0.8809, "grad_norm": 0.30308797955513, "learning_rate": 0.0002, "epoch": 0.9080938802368941, "step": 4140}, {"loss": 0.9232, "grad_norm": 0.30399802327156067, "learning_rate": 0.0002, "epoch": 0.9102873437157272, "step": 4150}, {"loss": 0.9194, "grad_norm": 0.2956405282020569, "learning_rate": 0.0002, "epoch": 0.9124808071945603, "step": 4160}, {"loss": 0.9037, "grad_norm": 0.28979742527008057, "learning_rate": 0.0002, "epoch": 0.9146742706733932, "step": 4170}, {"loss": 0.9296, "grad_norm": 0.28005316853523254, "learning_rate": 0.0002, "epoch": 0.9168677341522263, "step": 4180}, {"loss": 0.9812, "grad_norm": 0.3533253073692322, "learning_rate": 0.0002, "epoch": 0.9190611976310594, "step": 4190}, {"loss": 0.9353, "grad_norm": 0.3128524422645569, "learning_rate": 0.0002, "epoch": 0.9212546611098925, "step": 4200}, {"loss": 0.9222, "grad_norm": 0.31551462411880493, "learning_rate": 0.0002, "epoch": 0.9234481245887256, "step": 4210}, {"loss": 0.9664, "grad_norm": 0.2988870143890381, "learning_rate": 0.0002, "epoch": 0.9256415880675587, "step": 4220}, {"loss": 0.9205, "grad_norm": 0.27368006110191345, "learning_rate": 0.0002, "epoch": 0.9278350515463918, "step": 4230}, {"loss": 0.9387, "grad_norm": 0.36728551983833313, "learning_rate": 0.0002, "epoch": 0.9300285150252249, "step": 4240}, {"loss": 0.8725, "grad_norm": 0.31942427158355713, "learning_rate": 0.0002, "epoch": 0.9322219785040579, "step": 4250}, {"loss": 0.9101, "grad_norm": 0.28932490944862366, "learning_rate": 0.0002, "epoch": 0.934415441982891, "step": 4260}, {"loss": 0.9102, "grad_norm": 0.34236326813697815, "learning_rate": 0.0002, "epoch": 0.936608905461724, "step": 4270}, {"loss": 0.9311, "grad_norm": 0.30271056294441223, "learning_rate": 0.0002, "epoch": 0.9388023689405571, "step": 4280}, {"loss": 0.9068, "grad_norm": 0.37781208753585815, "learning_rate": 0.0002, "epoch": 0.9409958324193902, "step": 4290}, {"loss": 0.9468, "grad_norm": 0.3334667980670929, "learning_rate": 0.0002, "epoch": 0.9431892958982233, "step": 4300}, {"loss": 1.0027, "grad_norm": 0.2996140420436859, "learning_rate": 0.0002, "epoch": 0.9453827593770564, "step": 4310}, {"loss": 0.9264, "grad_norm": 0.362100213766098, "learning_rate": 0.0002, "epoch": 0.9475762228558895, "step": 4320}, {"loss": 0.9281, "grad_norm": 0.3205488324165344, "learning_rate": 0.0002, "epoch": 0.9497696863347225, "step": 4330}, {"loss": 0.9433, "grad_norm": 0.35258468985557556, "learning_rate": 0.0002, "epoch": 0.9519631498135556, "step": 4340}, {"loss": 0.9234, "grad_norm": 0.2878371775150299, "learning_rate": 0.0002, "epoch": 0.9541566132923887, "step": 4350}, {"loss": 0.9272, "grad_norm": 0.35933104157447815, "learning_rate": 0.0002, "epoch": 0.9563500767712217, "step": 4360}, {"loss": 0.9556, "grad_norm": 0.30155718326568604, "learning_rate": 0.0002, "epoch": 0.9585435402500548, "step": 4370}, {"loss": 0.9382, "grad_norm": 0.3314479887485504, "learning_rate": 0.0002, "epoch": 0.9607370037288879, "step": 4380}, {"loss": 0.9148, "grad_norm": 0.3483031690120697, "learning_rate": 0.0002, "epoch": 0.962930467207721, "step": 4390}, {"loss": 0.8934, "grad_norm": 0.26347094774246216, "learning_rate": 0.0002, "epoch": 0.9651239306865541, "step": 4400}, {"loss": 0.9976, "grad_norm": 0.27101579308509827, "learning_rate": 0.0002, "epoch": 0.9673173941653872, "step": 4410}, {"loss": 0.9146, "grad_norm": 0.3251340687274933, "learning_rate": 0.0002, "epoch": 0.9695108576442202, "step": 4420}, {"loss": 0.9523, "grad_norm": 0.35335880517959595, "learning_rate": 0.0002, "epoch": 0.9717043211230533, "step": 4430}, {"loss": 0.9438, "grad_norm": 0.3828853666782379, "learning_rate": 0.0002, "epoch": 0.9738977846018864, "step": 4440}, {"loss": 0.9319, "grad_norm": 0.30478939414024353, "learning_rate": 0.0002, "epoch": 0.9760912480807195, "step": 4450}, {"loss": 0.9339, "grad_norm": 0.34777334332466125, "learning_rate": 0.0002, "epoch": 0.9782847115595525, "step": 4460}, {"loss": 0.9246, "grad_norm": 0.31834876537323, "learning_rate": 0.0002, "epoch": 0.9804781750383856, "step": 4470}, {"loss": 0.9716, "grad_norm": 0.36317092180252075, "learning_rate": 0.0002, "epoch": 0.9826716385172187, "step": 4480}, {"loss": 0.9435, "grad_norm": 0.31571221351623535, "learning_rate": 0.0002, "epoch": 0.9848651019960518, "step": 4490}, {"loss": 0.9371, "grad_norm": 0.3219566345214844, "learning_rate": 0.0002, "epoch": 0.9870585654748848, "step": 4500}, {"loss": 0.8871, "grad_norm": 0.2858487665653229, "learning_rate": 0.0002, "epoch": 0.9892520289537179, "step": 4510}, {"loss": 0.903, "grad_norm": 0.30729708075523376, "learning_rate": 0.0002, "epoch": 0.991445492432551, "step": 4520}, {"loss": 0.8953, "grad_norm": 0.34205910563468933, "learning_rate": 0.0002, "epoch": 0.9936389559113841, "step": 4530}, {"loss": 0.8955, "grad_norm": 0.3066234886646271, "learning_rate": 0.0002, "epoch": 0.9958324193902172, "step": 4540}, {"loss": 0.9363, "grad_norm": 0.3025010824203491, "learning_rate": 0.0002, "epoch": 0.9980258828690503, "step": 4550}]}