| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9977695167286247, |
| "eval_steps": 500, |
| "global_step": 504, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005947955390334572, |
| "grad_norm": 1.824343204498291, |
| "learning_rate": 0.0002, |
| "loss": 3.1087, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.011895910780669145, |
| "grad_norm": 1.374577283859253, |
| "learning_rate": 0.0002, |
| "loss": 2.9898, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.017843866171003718, |
| "grad_norm": 1.5655758380889893, |
| "learning_rate": 0.0002, |
| "loss": 3.0999, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.02379182156133829, |
| "grad_norm": 5.276195049285889, |
| "learning_rate": 0.0002, |
| "loss": 2.7586, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.02973977695167286, |
| "grad_norm": 1.3304948806762695, |
| "learning_rate": 0.0002, |
| "loss": 2.7295, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.035687732342007436, |
| "grad_norm": 1.5444062948226929, |
| "learning_rate": 0.0002, |
| "loss": 2.5949, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.041635687732342004, |
| "grad_norm": 1.2866592407226562, |
| "learning_rate": 0.0002, |
| "loss": 2.4865, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.04758364312267658, |
| "grad_norm": 1.6175459623336792, |
| "learning_rate": 0.0002, |
| "loss": 2.3327, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.053531598513011154, |
| "grad_norm": 1.503796100616455, |
| "learning_rate": 0.0002, |
| "loss": 2.274, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.05947955390334572, |
| "grad_norm": 1.5973471403121948, |
| "learning_rate": 0.0002, |
| "loss": 2.3809, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0654275092936803, |
| "grad_norm": 1.4748364686965942, |
| "learning_rate": 0.0002, |
| "loss": 2.1347, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.07137546468401487, |
| "grad_norm": 1.7337145805358887, |
| "learning_rate": 0.0002, |
| "loss": 1.9428, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.07732342007434945, |
| "grad_norm": 2.0839593410491943, |
| "learning_rate": 0.0002, |
| "loss": 2.0188, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.08327137546468401, |
| "grad_norm": 2.5018622875213623, |
| "learning_rate": 0.0002, |
| "loss": 1.7943, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.08921933085501858, |
| "grad_norm": 3.416013479232788, |
| "learning_rate": 0.0002, |
| "loss": 1.6812, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.09516728624535316, |
| "grad_norm": 4.241348743438721, |
| "learning_rate": 0.0002, |
| "loss": 1.6478, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.10111524163568773, |
| "grad_norm": 2.6027214527130127, |
| "learning_rate": 0.0002, |
| "loss": 1.5114, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.10706319702602231, |
| "grad_norm": 2.1649773120880127, |
| "learning_rate": 0.0002, |
| "loss": 1.5063, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.11301115241635688, |
| "grad_norm": 1.9704638719558716, |
| "learning_rate": 0.0002, |
| "loss": 1.3781, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.11895910780669144, |
| "grad_norm": 1.7165110111236572, |
| "learning_rate": 0.0002, |
| "loss": 1.3058, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.12490706319702602, |
| "grad_norm": 1.3949488401412964, |
| "learning_rate": 0.0002, |
| "loss": 1.3533, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.1308550185873606, |
| "grad_norm": 1.5300015211105347, |
| "learning_rate": 0.0002, |
| "loss": 1.2578, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.13680297397769517, |
| "grad_norm": 1.9964842796325684, |
| "learning_rate": 0.0002, |
| "loss": 1.2485, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.14275092936802974, |
| "grad_norm": 1.322247862815857, |
| "learning_rate": 0.0002, |
| "loss": 1.1887, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.14869888475836432, |
| "grad_norm": 1.2447245121002197, |
| "learning_rate": 0.0002, |
| "loss": 1.0807, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1546468401486989, |
| "grad_norm": 1.2943564653396606, |
| "learning_rate": 0.0002, |
| "loss": 1.1295, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.16059479553903347, |
| "grad_norm": 1.2561174631118774, |
| "learning_rate": 0.0002, |
| "loss": 1.1011, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.16654275092936802, |
| "grad_norm": 1.305808663368225, |
| "learning_rate": 0.0002, |
| "loss": 1.1217, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.1724907063197026, |
| "grad_norm": 1.1935081481933594, |
| "learning_rate": 0.0002, |
| "loss": 1.0971, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.17843866171003717, |
| "grad_norm": 0.9849146008491516, |
| "learning_rate": 0.0002, |
| "loss": 0.9949, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.18438661710037174, |
| "grad_norm": 1.236385703086853, |
| "learning_rate": 0.0002, |
| "loss": 1.0533, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.19033457249070632, |
| "grad_norm": 1.091674566268921, |
| "learning_rate": 0.0002, |
| "loss": 0.9963, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.1962825278810409, |
| "grad_norm": 1.4563655853271484, |
| "learning_rate": 0.0002, |
| "loss": 1.0137, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.20223048327137547, |
| "grad_norm": 1.1690599918365479, |
| "learning_rate": 0.0002, |
| "loss": 0.9163, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.20817843866171004, |
| "grad_norm": 1.2094273567199707, |
| "learning_rate": 0.0002, |
| "loss": 1.0236, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.21412639405204462, |
| "grad_norm": 1.388743281364441, |
| "learning_rate": 0.0002, |
| "loss": 0.9591, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.2200743494423792, |
| "grad_norm": 1.2390081882476807, |
| "learning_rate": 0.0002, |
| "loss": 0.8751, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.22602230483271377, |
| "grad_norm": 2.5453145503997803, |
| "learning_rate": 0.0002, |
| "loss": 0.8459, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.2319702602230483, |
| "grad_norm": 1.3461544513702393, |
| "learning_rate": 0.0002, |
| "loss": 0.9373, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.2379182156133829, |
| "grad_norm": 1.2979869842529297, |
| "learning_rate": 0.0002, |
| "loss": 0.9373, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.24386617100371746, |
| "grad_norm": 53.14031219482422, |
| "learning_rate": 0.0002, |
| "loss": 0.7923, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.24981412639405204, |
| "grad_norm": 3.772839307785034, |
| "learning_rate": 0.0002, |
| "loss": 1.0239, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.25576208178438664, |
| "grad_norm": 1.684868574142456, |
| "learning_rate": 0.0002, |
| "loss": 0.8878, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.2617100371747212, |
| "grad_norm": 1.1423863172531128, |
| "learning_rate": 0.0002, |
| "loss": 0.896, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.26765799256505574, |
| "grad_norm": 1.3496270179748535, |
| "learning_rate": 0.0002, |
| "loss": 1.0198, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.27360594795539034, |
| "grad_norm": 1.2799283266067505, |
| "learning_rate": 0.0002, |
| "loss": 0.88, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.2795539033457249, |
| "grad_norm": 1.1613731384277344, |
| "learning_rate": 0.0002, |
| "loss": 0.9708, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.2855018587360595, |
| "grad_norm": 1.1430435180664062, |
| "learning_rate": 0.0002, |
| "loss": 0.8913, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.29144981412639404, |
| "grad_norm": 0.9552589654922485, |
| "learning_rate": 0.0002, |
| "loss": 0.8091, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.29739776951672864, |
| "grad_norm": 1.1052002906799316, |
| "learning_rate": 0.0002, |
| "loss": 0.9737, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.3033457249070632, |
| "grad_norm": 1.3066654205322266, |
| "learning_rate": 0.0002, |
| "loss": 0.9281, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.3092936802973978, |
| "grad_norm": 1.0277180671691895, |
| "learning_rate": 0.0002, |
| "loss": 0.96, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.31524163568773234, |
| "grad_norm": 1.208615779876709, |
| "learning_rate": 0.0002, |
| "loss": 0.8485, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.32118959107806694, |
| "grad_norm": 1.0156666040420532, |
| "learning_rate": 0.0002, |
| "loss": 0.9602, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.3271375464684015, |
| "grad_norm": 1.0822789669036865, |
| "learning_rate": 0.0002, |
| "loss": 0.891, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.33308550185873603, |
| "grad_norm": 1.063072681427002, |
| "learning_rate": 0.0002, |
| "loss": 0.7898, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.33903345724907064, |
| "grad_norm": 1.061710238456726, |
| "learning_rate": 0.0002, |
| "loss": 0.7903, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.3449814126394052, |
| "grad_norm": 1.1998765468597412, |
| "learning_rate": 0.0002, |
| "loss": 0.8346, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.3509293680297398, |
| "grad_norm": 1.083093285560608, |
| "learning_rate": 0.0002, |
| "loss": 0.8103, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.35687732342007433, |
| "grad_norm": 1.0685770511627197, |
| "learning_rate": 0.0002, |
| "loss": 0.7284, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.36282527881040894, |
| "grad_norm": 1.3935203552246094, |
| "learning_rate": 0.0002, |
| "loss": 0.8349, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.3687732342007435, |
| "grad_norm": 1.005191445350647, |
| "learning_rate": 0.0002, |
| "loss": 0.8611, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.3747211895910781, |
| "grad_norm": 1.1198813915252686, |
| "learning_rate": 0.0002, |
| "loss": 0.7994, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.38066914498141263, |
| "grad_norm": 1.454626202583313, |
| "learning_rate": 0.0002, |
| "loss": 0.7984, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.38661710037174724, |
| "grad_norm": 1.1353782415390015, |
| "learning_rate": 0.0002, |
| "loss": 0.7505, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.3925650557620818, |
| "grad_norm": 1.1953253746032715, |
| "learning_rate": 0.0002, |
| "loss": 0.7754, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.39851301115241633, |
| "grad_norm": 1.0996239185333252, |
| "learning_rate": 0.0002, |
| "loss": 0.8747, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.40446096654275093, |
| "grad_norm": 1.5701665878295898, |
| "learning_rate": 0.0002, |
| "loss": 0.8047, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.4104089219330855, |
| "grad_norm": 1.29320228099823, |
| "learning_rate": 0.0002, |
| "loss": 0.7546, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.4163568773234201, |
| "grad_norm": 1.284342646598816, |
| "learning_rate": 0.0002, |
| "loss": 0.7324, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.42230483271375463, |
| "grad_norm": 1.0330944061279297, |
| "learning_rate": 0.0002, |
| "loss": 0.8614, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.42825278810408923, |
| "grad_norm": 1.0411959886550903, |
| "learning_rate": 0.0002, |
| "loss": 0.7431, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.4342007434944238, |
| "grad_norm": 1.2095258235931396, |
| "learning_rate": 0.0002, |
| "loss": 0.7909, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.4401486988847584, |
| "grad_norm": 1.3570586442947388, |
| "learning_rate": 0.0002, |
| "loss": 0.8113, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.44609665427509293, |
| "grad_norm": 1.0079586505889893, |
| "learning_rate": 0.0002, |
| "loss": 0.8457, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.45204460966542753, |
| "grad_norm": 0.9446130990982056, |
| "learning_rate": 0.0002, |
| "loss": 0.7934, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.4579925650557621, |
| "grad_norm": 1.0489394664764404, |
| "learning_rate": 0.0002, |
| "loss": 0.856, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.4639405204460966, |
| "grad_norm": 1.0112191438674927, |
| "learning_rate": 0.0002, |
| "loss": 0.7455, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.46988847583643123, |
| "grad_norm": 0.9976668953895569, |
| "learning_rate": 0.0002, |
| "loss": 0.8143, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.4758364312267658, |
| "grad_norm": 1.0991159677505493, |
| "learning_rate": 0.0002, |
| "loss": 0.6766, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4817843866171004, |
| "grad_norm": 1.1794909238815308, |
| "learning_rate": 0.0002, |
| "loss": 0.6635, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.4877323420074349, |
| "grad_norm": 1.0414669513702393, |
| "learning_rate": 0.0002, |
| "loss": 0.8228, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.49368029739776953, |
| "grad_norm": 1.0767929553985596, |
| "learning_rate": 0.0002, |
| "loss": 0.8092, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.4996282527881041, |
| "grad_norm": 1.3375307321548462, |
| "learning_rate": 0.0002, |
| "loss": 0.7361, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.5055762081784386, |
| "grad_norm": 1.2492313385009766, |
| "learning_rate": 0.0002, |
| "loss": 0.8343, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.5115241635687733, |
| "grad_norm": 1.0948379039764404, |
| "learning_rate": 0.0002, |
| "loss": 0.7559, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.5174721189591078, |
| "grad_norm": 1.1456286907196045, |
| "learning_rate": 0.0002, |
| "loss": 0.7819, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.5234200743494424, |
| "grad_norm": 0.9729915857315063, |
| "learning_rate": 0.0002, |
| "loss": 0.7507, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.5293680297397769, |
| "grad_norm": 1.089845895767212, |
| "learning_rate": 0.0002, |
| "loss": 0.857, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.5353159851301115, |
| "grad_norm": 1.0552901029586792, |
| "learning_rate": 0.0002, |
| "loss": 0.8387, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5412639405204461, |
| "grad_norm": 1.2134290933609009, |
| "learning_rate": 0.0002, |
| "loss": 0.845, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.5472118959107807, |
| "grad_norm": 1.2725780010223389, |
| "learning_rate": 0.0002, |
| "loss": 0.8203, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.5531598513011152, |
| "grad_norm": 1.3931224346160889, |
| "learning_rate": 0.0002, |
| "loss": 0.8428, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.5591078066914498, |
| "grad_norm": 1.0805130004882812, |
| "learning_rate": 0.0002, |
| "loss": 0.7855, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.5650557620817844, |
| "grad_norm": 1.018471598625183, |
| "learning_rate": 0.0002, |
| "loss": 0.6318, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.571003717472119, |
| "grad_norm": 3.2651963233947754, |
| "learning_rate": 0.0002, |
| "loss": 0.7039, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.5769516728624535, |
| "grad_norm": 0.9978213906288147, |
| "learning_rate": 0.0002, |
| "loss": 0.6734, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.5828996282527881, |
| "grad_norm": 0.8679234385490417, |
| "learning_rate": 0.0002, |
| "loss": 0.7277, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.5888475836431227, |
| "grad_norm": 1.1249589920043945, |
| "learning_rate": 0.0002, |
| "loss": 0.7392, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.5947955390334573, |
| "grad_norm": 0.9032052755355835, |
| "learning_rate": 0.0002, |
| "loss": 0.6876, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6007434944237918, |
| "grad_norm": 0.9359114170074463, |
| "learning_rate": 0.0002, |
| "loss": 0.6448, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.6066914498141264, |
| "grad_norm": 1.076899528503418, |
| "learning_rate": 0.0002, |
| "loss": 0.7399, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.6126394052044609, |
| "grad_norm": 1.4630522727966309, |
| "learning_rate": 0.0002, |
| "loss": 0.6902, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.6185873605947956, |
| "grad_norm": 1.0862653255462646, |
| "learning_rate": 0.0002, |
| "loss": 0.854, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.6245353159851301, |
| "grad_norm": 1.3135863542556763, |
| "learning_rate": 0.0002, |
| "loss": 0.8188, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.6304832713754647, |
| "grad_norm": 0.9794917106628418, |
| "learning_rate": 0.0002, |
| "loss": 0.6275, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.6364312267657992, |
| "grad_norm": 1.02755868434906, |
| "learning_rate": 0.0002, |
| "loss": 0.7058, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.6423791821561339, |
| "grad_norm": 0.9642486572265625, |
| "learning_rate": 0.0002, |
| "loss": 0.8053, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.6483271375464684, |
| "grad_norm": 1.1192632913589478, |
| "learning_rate": 0.0002, |
| "loss": 0.7515, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.654275092936803, |
| "grad_norm": 1.1808356046676636, |
| "learning_rate": 0.0002, |
| "loss": 0.678, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6602230483271375, |
| "grad_norm": 1.2461023330688477, |
| "learning_rate": 0.0002, |
| "loss": 0.6892, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.6661710037174721, |
| "grad_norm": 1.0632222890853882, |
| "learning_rate": 0.0002, |
| "loss": 0.698, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.6721189591078067, |
| "grad_norm": 1.0353591442108154, |
| "learning_rate": 0.0002, |
| "loss": 0.7453, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.6780669144981413, |
| "grad_norm": 1.124794602394104, |
| "learning_rate": 0.0002, |
| "loss": 0.815, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.6840148698884758, |
| "grad_norm": 1.0341081619262695, |
| "learning_rate": 0.0002, |
| "loss": 0.7816, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.6899628252788104, |
| "grad_norm": 1.082952857017517, |
| "learning_rate": 0.0002, |
| "loss": 0.6825, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.695910780669145, |
| "grad_norm": 0.9126180410385132, |
| "learning_rate": 0.0002, |
| "loss": 0.7042, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.7018587360594796, |
| "grad_norm": 1.2339016199111938, |
| "learning_rate": 0.0002, |
| "loss": 0.7759, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.7078066914498141, |
| "grad_norm": 1.5227537155151367, |
| "learning_rate": 0.0002, |
| "loss": 0.8574, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.7137546468401487, |
| "grad_norm": 1.0859841108322144, |
| "learning_rate": 0.0002, |
| "loss": 0.7241, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7197026022304833, |
| "grad_norm": 1.0609203577041626, |
| "learning_rate": 0.0002, |
| "loss": 0.7391, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.7256505576208179, |
| "grad_norm": 0.9025185704231262, |
| "learning_rate": 0.0002, |
| "loss": 0.7538, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.7315985130111524, |
| "grad_norm": 0.9280850291252136, |
| "learning_rate": 0.0002, |
| "loss": 0.8023, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.737546468401487, |
| "grad_norm": 1.0120896100997925, |
| "learning_rate": 0.0002, |
| "loss": 0.797, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.7434944237918215, |
| "grad_norm": 0.9294270277023315, |
| "learning_rate": 0.0002, |
| "loss": 0.7939, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.7494423791821562, |
| "grad_norm": 1.001685380935669, |
| "learning_rate": 0.0002, |
| "loss": 0.7297, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.7553903345724907, |
| "grad_norm": 1.0650714635849, |
| "learning_rate": 0.0002, |
| "loss": 0.7955, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.7613382899628253, |
| "grad_norm": 0.9343367218971252, |
| "learning_rate": 0.0002, |
| "loss": 0.7008, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.7672862453531598, |
| "grad_norm": 1.0042743682861328, |
| "learning_rate": 0.0002, |
| "loss": 0.8086, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.7732342007434945, |
| "grad_norm": 0.9538952708244324, |
| "learning_rate": 0.0002, |
| "loss": 0.6839, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.779182156133829, |
| "grad_norm": 1.0010913610458374, |
| "learning_rate": 0.0002, |
| "loss": 0.6247, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.7851301115241636, |
| "grad_norm": 0.8673060536384583, |
| "learning_rate": 0.0002, |
| "loss": 0.7232, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.7910780669144981, |
| "grad_norm": 1.070591688156128, |
| "learning_rate": 0.0002, |
| "loss": 0.785, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.7970260223048327, |
| "grad_norm": 1.0302468538284302, |
| "learning_rate": 0.0002, |
| "loss": 0.8044, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.8029739776951673, |
| "grad_norm": 1.0886098146438599, |
| "learning_rate": 0.0002, |
| "loss": 0.6984, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.8089219330855019, |
| "grad_norm": 0.9349246025085449, |
| "learning_rate": 0.0002, |
| "loss": 0.6711, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.8148698884758364, |
| "grad_norm": 1.2482446432113647, |
| "learning_rate": 0.0002, |
| "loss": 0.6468, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.820817843866171, |
| "grad_norm": 1.184043049812317, |
| "learning_rate": 0.0002, |
| "loss": 0.6909, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.8267657992565056, |
| "grad_norm": 0.8721855878829956, |
| "learning_rate": 0.0002, |
| "loss": 0.6553, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.8327137546468402, |
| "grad_norm": 1.30323326587677, |
| "learning_rate": 0.0002, |
| "loss": 0.7109, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.8386617100371747, |
| "grad_norm": 1.0187689065933228, |
| "learning_rate": 0.0002, |
| "loss": 0.6682, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.8446096654275093, |
| "grad_norm": 2.3475165367126465, |
| "learning_rate": 0.0002, |
| "loss": 0.7615, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.8505576208178439, |
| "grad_norm": 0.9803043603897095, |
| "learning_rate": 0.0002, |
| "loss": 0.7179, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.8565055762081785, |
| "grad_norm": 1.2290213108062744, |
| "learning_rate": 0.0002, |
| "loss": 0.8696, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.862453531598513, |
| "grad_norm": 1.1041066646575928, |
| "learning_rate": 0.0002, |
| "loss": 0.8196, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.8684014869888476, |
| "grad_norm": 0.9638866186141968, |
| "learning_rate": 0.0002, |
| "loss": 0.655, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.8743494423791821, |
| "grad_norm": 0.9777591824531555, |
| "learning_rate": 0.0002, |
| "loss": 0.8053, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.8802973977695168, |
| "grad_norm": 0.8717353343963623, |
| "learning_rate": 0.0002, |
| "loss": 0.6726, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.8862453531598513, |
| "grad_norm": 1.1772398948669434, |
| "learning_rate": 0.0002, |
| "loss": 0.7188, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.8921933085501859, |
| "grad_norm": 1.1113988161087036, |
| "learning_rate": 0.0002, |
| "loss": 0.7177, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.8981412639405204, |
| "grad_norm": 1.6691763401031494, |
| "learning_rate": 0.0002, |
| "loss": 0.5822, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.9040892193308551, |
| "grad_norm": 1.0139896869659424, |
| "learning_rate": 0.0002, |
| "loss": 0.7416, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.9100371747211896, |
| "grad_norm": 1.2538039684295654, |
| "learning_rate": 0.0002, |
| "loss": 0.7822, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.9159851301115242, |
| "grad_norm": 0.833595335483551, |
| "learning_rate": 0.0002, |
| "loss": 0.6617, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.9219330855018587, |
| "grad_norm": 0.869482696056366, |
| "learning_rate": 0.0002, |
| "loss": 0.6146, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.9278810408921933, |
| "grad_norm": 0.973523736000061, |
| "learning_rate": 0.0002, |
| "loss": 0.6685, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.9338289962825279, |
| "grad_norm": 0.982566237449646, |
| "learning_rate": 0.0002, |
| "loss": 0.6685, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.9397769516728625, |
| "grad_norm": 1.0534875392913818, |
| "learning_rate": 0.0002, |
| "loss": 0.656, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.945724907063197, |
| "grad_norm": 1.11860990524292, |
| "learning_rate": 0.0002, |
| "loss": 0.7261, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.9516728624535316, |
| "grad_norm": 1.0286844968795776, |
| "learning_rate": 0.0002, |
| "loss": 0.7271, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9576208178438662, |
| "grad_norm": 0.8426340818405151, |
| "learning_rate": 0.0002, |
| "loss": 0.718, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.9635687732342008, |
| "grad_norm": 0.990667998790741, |
| "learning_rate": 0.0002, |
| "loss": 0.7795, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.9695167286245353, |
| "grad_norm": 1.1110923290252686, |
| "learning_rate": 0.0002, |
| "loss": 0.6946, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.9754646840148699, |
| "grad_norm": 1.0378597974777222, |
| "learning_rate": 0.0002, |
| "loss": 0.8106, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.9814126394052045, |
| "grad_norm": 0.9507467746734619, |
| "learning_rate": 0.0002, |
| "loss": 0.7031, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.9873605947955391, |
| "grad_norm": 0.8636868596076965, |
| "learning_rate": 0.0002, |
| "loss": 0.6636, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.9933085501858736, |
| "grad_norm": 1.0482003688812256, |
| "learning_rate": 0.0002, |
| "loss": 0.7204, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.9992565055762082, |
| "grad_norm": 0.9490022659301758, |
| "learning_rate": 0.0002, |
| "loss": 0.5626, |
| "step": 168 |
| }, |
| { |
| "epoch": 1.0052044609665427, |
| "grad_norm": 0.8918917179107666, |
| "learning_rate": 0.0002, |
| "loss": 0.6822, |
| "step": 169 |
| }, |
| { |
| "epoch": 1.0111524163568772, |
| "grad_norm": 0.9100430011749268, |
| "learning_rate": 0.0002, |
| "loss": 0.7147, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.0171003717472118, |
| "grad_norm": 0.9007158279418945, |
| "learning_rate": 0.0002, |
| "loss": 0.6015, |
| "step": 171 |
| }, |
| { |
| "epoch": 1.0230483271375466, |
| "grad_norm": 0.9267099499702454, |
| "learning_rate": 0.0002, |
| "loss": 0.6562, |
| "step": 172 |
| }, |
| { |
| "epoch": 1.0289962825278811, |
| "grad_norm": 1.0618972778320312, |
| "learning_rate": 0.0002, |
| "loss": 0.6601, |
| "step": 173 |
| }, |
| { |
| "epoch": 1.0349442379182157, |
| "grad_norm": 1.1782737970352173, |
| "learning_rate": 0.0002, |
| "loss": 0.6172, |
| "step": 174 |
| }, |
| { |
| "epoch": 1.0408921933085502, |
| "grad_norm": 1.141661286354065, |
| "learning_rate": 0.0002, |
| "loss": 0.5854, |
| "step": 175 |
| }, |
| { |
| "epoch": 1.0468401486988848, |
| "grad_norm": 1.1038105487823486, |
| "learning_rate": 0.0002, |
| "loss": 0.6655, |
| "step": 176 |
| }, |
| { |
| "epoch": 1.0527881040892193, |
| "grad_norm": 1.1518810987472534, |
| "learning_rate": 0.0002, |
| "loss": 0.493, |
| "step": 177 |
| }, |
| { |
| "epoch": 1.0587360594795538, |
| "grad_norm": 1.0501494407653809, |
| "learning_rate": 0.0002, |
| "loss": 0.6256, |
| "step": 178 |
| }, |
| { |
| "epoch": 1.0646840148698884, |
| "grad_norm": 0.9064037799835205, |
| "learning_rate": 0.0002, |
| "loss": 0.5695, |
| "step": 179 |
| }, |
| { |
| "epoch": 1.070631970260223, |
| "grad_norm": 1.1978446245193481, |
| "learning_rate": 0.0002, |
| "loss": 0.617, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.0765799256505577, |
| "grad_norm": 0.9782500267028809, |
| "learning_rate": 0.0002, |
| "loss": 0.6542, |
| "step": 181 |
| }, |
| { |
| "epoch": 1.0825278810408923, |
| "grad_norm": 1.6090043783187866, |
| "learning_rate": 0.0002, |
| "loss": 0.6827, |
| "step": 182 |
| }, |
| { |
| "epoch": 1.0884758364312268, |
| "grad_norm": 1.2153990268707275, |
| "learning_rate": 0.0002, |
| "loss": 0.7469, |
| "step": 183 |
| }, |
| { |
| "epoch": 1.0944237918215614, |
| "grad_norm": 0.9915666580200195, |
| "learning_rate": 0.0002, |
| "loss": 0.6475, |
| "step": 184 |
| }, |
| { |
| "epoch": 1.100371747211896, |
| "grad_norm": 1.1319215297698975, |
| "learning_rate": 0.0002, |
| "loss": 0.7879, |
| "step": 185 |
| }, |
| { |
| "epoch": 1.1063197026022304, |
| "grad_norm": 1.0497454404830933, |
| "learning_rate": 0.0002, |
| "loss": 0.6386, |
| "step": 186 |
| }, |
| { |
| "epoch": 1.112267657992565, |
| "grad_norm": 1.1735246181488037, |
| "learning_rate": 0.0002, |
| "loss": 0.7276, |
| "step": 187 |
| }, |
| { |
| "epoch": 1.1182156133828995, |
| "grad_norm": 1.280543327331543, |
| "learning_rate": 0.0002, |
| "loss": 0.6403, |
| "step": 188 |
| }, |
| { |
| "epoch": 1.124163568773234, |
| "grad_norm": 1.0103743076324463, |
| "learning_rate": 0.0002, |
| "loss": 0.5913, |
| "step": 189 |
| }, |
| { |
| "epoch": 1.1301115241635689, |
| "grad_norm": 1.0629348754882812, |
| "learning_rate": 0.0002, |
| "loss": 0.708, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.1360594795539034, |
| "grad_norm": 0.9152292609214783, |
| "learning_rate": 0.0002, |
| "loss": 0.8295, |
| "step": 191 |
| }, |
| { |
| "epoch": 1.142007434944238, |
| "grad_norm": 0.9847885370254517, |
| "learning_rate": 0.0002, |
| "loss": 0.6569, |
| "step": 192 |
| }, |
| { |
| "epoch": 1.1479553903345725, |
| "grad_norm": 1.5211213827133179, |
| "learning_rate": 0.0002, |
| "loss": 0.676, |
| "step": 193 |
| }, |
| { |
| "epoch": 1.153903345724907, |
| "grad_norm": 1.0376240015029907, |
| "learning_rate": 0.0002, |
| "loss": 0.5558, |
| "step": 194 |
| }, |
| { |
| "epoch": 1.1598513011152416, |
| "grad_norm": 0.9746745824813843, |
| "learning_rate": 0.0002, |
| "loss": 0.6051, |
| "step": 195 |
| }, |
| { |
| "epoch": 1.1657992565055761, |
| "grad_norm": 1.0810937881469727, |
| "learning_rate": 0.0002, |
| "loss": 0.5947, |
| "step": 196 |
| }, |
| { |
| "epoch": 1.1717472118959107, |
| "grad_norm": 0.8687509894371033, |
| "learning_rate": 0.0002, |
| "loss": 0.5834, |
| "step": 197 |
| }, |
| { |
| "epoch": 1.1776951672862452, |
| "grad_norm": 1.3437882661819458, |
| "learning_rate": 0.0002, |
| "loss": 0.7133, |
| "step": 198 |
| }, |
| { |
| "epoch": 1.18364312267658, |
| "grad_norm": 0.9247745871543884, |
| "learning_rate": 0.0002, |
| "loss": 0.668, |
| "step": 199 |
| }, |
| { |
| "epoch": 1.1895910780669146, |
| "grad_norm": 1.116870403289795, |
| "learning_rate": 0.0002, |
| "loss": 0.5763, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.195539033457249, |
| "grad_norm": 1.0791046619415283, |
| "learning_rate": 0.0002, |
| "loss": 0.7535, |
| "step": 201 |
| }, |
| { |
| "epoch": 1.2014869888475836, |
| "grad_norm": 1.1156578063964844, |
| "learning_rate": 0.0002, |
| "loss": 0.623, |
| "step": 202 |
| }, |
| { |
| "epoch": 1.2074349442379182, |
| "grad_norm": 1.3306505680084229, |
| "learning_rate": 0.0002, |
| "loss": 0.6239, |
| "step": 203 |
| }, |
| { |
| "epoch": 1.2133828996282527, |
| "grad_norm": 1.1251856088638306, |
| "learning_rate": 0.0002, |
| "loss": 0.6122, |
| "step": 204 |
| }, |
| { |
| "epoch": 1.2193308550185873, |
| "grad_norm": 0.9659932255744934, |
| "learning_rate": 0.0002, |
| "loss": 0.7163, |
| "step": 205 |
| }, |
| { |
| "epoch": 1.2252788104089218, |
| "grad_norm": 1.1080381870269775, |
| "learning_rate": 0.0002, |
| "loss": 0.5558, |
| "step": 206 |
| }, |
| { |
| "epoch": 1.2312267657992564, |
| "grad_norm": 1.1085773706436157, |
| "learning_rate": 0.0002, |
| "loss": 0.6306, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.2371747211895912, |
| "grad_norm": 1.5555293560028076, |
| "learning_rate": 0.0002, |
| "loss": 0.7266, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.2431226765799257, |
| "grad_norm": 1.1494146585464478, |
| "learning_rate": 0.0002, |
| "loss": 0.6902, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.2490706319702602, |
| "grad_norm": 1.0835429430007935, |
| "learning_rate": 0.0002, |
| "loss": 0.7219, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.2550185873605948, |
| "grad_norm": 1.1306850910186768, |
| "learning_rate": 0.0002, |
| "loss": 0.5931, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.2609665427509293, |
| "grad_norm": 1.148278832435608, |
| "learning_rate": 0.0002, |
| "loss": 0.6452, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.266914498141264, |
| "grad_norm": 1.097596526145935, |
| "learning_rate": 0.0002, |
| "loss": 0.6486, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.2728624535315984, |
| "grad_norm": 1.3407150506973267, |
| "learning_rate": 0.0002, |
| "loss": 0.5874, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.2788104089219332, |
| "grad_norm": 0.8781871199607849, |
| "learning_rate": 0.0002, |
| "loss": 0.6079, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.2847583643122675, |
| "grad_norm": 0.998188853263855, |
| "learning_rate": 0.0002, |
| "loss": 0.7292, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.2907063197026023, |
| "grad_norm": 1.0128471851348877, |
| "learning_rate": 0.0002, |
| "loss": 0.6729, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.2966542750929368, |
| "grad_norm": 1.2172327041625977, |
| "learning_rate": 0.0002, |
| "loss": 0.7529, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.3026022304832714, |
| "grad_norm": 0.9904006719589233, |
| "learning_rate": 0.0002, |
| "loss": 0.639, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.308550185873606, |
| "grad_norm": 0.8886059522628784, |
| "learning_rate": 0.0002, |
| "loss": 0.6128, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.3144981412639405, |
| "grad_norm": 1.0350927114486694, |
| "learning_rate": 0.0002, |
| "loss": 0.6668, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.320446096654275, |
| "grad_norm": 1.0321650505065918, |
| "learning_rate": 0.0002, |
| "loss": 0.6493, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.3263940520446096, |
| "grad_norm": 0.8952768445014954, |
| "learning_rate": 0.0002, |
| "loss": 0.7005, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.3323420074349444, |
| "grad_norm": 1.3372063636779785, |
| "learning_rate": 0.0002, |
| "loss": 0.6347, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.3382899628252787, |
| "grad_norm": 0.9312218427658081, |
| "learning_rate": 0.0002, |
| "loss": 0.5963, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.3442379182156134, |
| "grad_norm": 0.8845749497413635, |
| "learning_rate": 0.0002, |
| "loss": 0.5445, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.350185873605948, |
| "grad_norm": 1.292598843574524, |
| "learning_rate": 0.0002, |
| "loss": 0.6662, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.3561338289962825, |
| "grad_norm": 1.0537996292114258, |
| "learning_rate": 0.0002, |
| "loss": 0.7332, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.362081784386617, |
| "grad_norm": 0.9492632150650024, |
| "learning_rate": 0.0002, |
| "loss": 0.6157, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.3680297397769516, |
| "grad_norm": 1.0352752208709717, |
| "learning_rate": 0.0002, |
| "loss": 0.5248, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.3739776951672862, |
| "grad_norm": 1.085534930229187, |
| "learning_rate": 0.0002, |
| "loss": 0.6358, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.3799256505576207, |
| "grad_norm": 1.098999261856079, |
| "learning_rate": 0.0002, |
| "loss": 0.6181, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.3858736059479555, |
| "grad_norm": 1.114450454711914, |
| "learning_rate": 0.0002, |
| "loss": 0.7067, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.3918215613382898, |
| "grad_norm": 1.3746989965438843, |
| "learning_rate": 0.0002, |
| "loss": 0.6016, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.3977695167286246, |
| "grad_norm": 0.916519820690155, |
| "learning_rate": 0.0002, |
| "loss": 0.6579, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.4037174721189591, |
| "grad_norm": 1.0786117315292358, |
| "learning_rate": 0.0002, |
| "loss": 0.6516, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.4096654275092937, |
| "grad_norm": 0.9264970421791077, |
| "learning_rate": 0.0002, |
| "loss": 0.5927, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.4156133828996282, |
| "grad_norm": 1.0969526767730713, |
| "learning_rate": 0.0002, |
| "loss": 0.6747, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.4215613382899628, |
| "grad_norm": 0.9945991635322571, |
| "learning_rate": 0.0002, |
| "loss": 0.6719, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.4275092936802973, |
| "grad_norm": 1.0272929668426514, |
| "learning_rate": 0.0002, |
| "loss": 0.7015, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.4334572490706319, |
| "grad_norm": 1.2321354150772095, |
| "learning_rate": 0.0002, |
| "loss": 0.6904, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.4394052044609666, |
| "grad_norm": 1.1331416368484497, |
| "learning_rate": 0.0002, |
| "loss": 0.6444, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.4453531598513012, |
| "grad_norm": 1.0527664422988892, |
| "learning_rate": 0.0002, |
| "loss": 0.6135, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.4513011152416357, |
| "grad_norm": 1.0586967468261719, |
| "learning_rate": 0.0002, |
| "loss": 0.6705, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.4572490706319703, |
| "grad_norm": 1.0302836894989014, |
| "learning_rate": 0.0002, |
| "loss": 0.6757, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.4631970260223048, |
| "grad_norm": 0.9323686957359314, |
| "learning_rate": 0.0002, |
| "loss": 0.7458, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.4691449814126394, |
| "grad_norm": 1.103028416633606, |
| "learning_rate": 0.0002, |
| "loss": 0.5832, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.475092936802974, |
| "grad_norm": 1.1638356447219849, |
| "learning_rate": 0.0002, |
| "loss": 0.6286, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.4810408921933085, |
| "grad_norm": 1.0685887336730957, |
| "learning_rate": 0.0002, |
| "loss": 0.6255, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.486988847583643, |
| "grad_norm": 0.9854826927185059, |
| "learning_rate": 0.0002, |
| "loss": 0.7764, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.4929368029739778, |
| "grad_norm": 1.1790441274642944, |
| "learning_rate": 0.0002, |
| "loss": 0.5791, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.4988847583643123, |
| "grad_norm": 0.9097880721092224, |
| "learning_rate": 0.0002, |
| "loss": 0.5517, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.504832713754647, |
| "grad_norm": 1.1351274251937866, |
| "learning_rate": 0.0002, |
| "loss": 0.6725, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.5107806691449814, |
| "grad_norm": 1.2710050344467163, |
| "learning_rate": 0.0002, |
| "loss": 0.7106, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.516728624535316, |
| "grad_norm": 1.2035406827926636, |
| "learning_rate": 0.0002, |
| "loss": 0.623, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.5226765799256505, |
| "grad_norm": 1.100200891494751, |
| "learning_rate": 0.0002, |
| "loss": 0.6418, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.528624535315985, |
| "grad_norm": 1.3976622819900513, |
| "learning_rate": 0.0002, |
| "loss": 0.7007, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.5345724907063198, |
| "grad_norm": 1.2113823890686035, |
| "learning_rate": 0.0002, |
| "loss": 0.7271, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.5405204460966542, |
| "grad_norm": 1.1983304023742676, |
| "learning_rate": 0.0002, |
| "loss": 0.6937, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.546468401486989, |
| "grad_norm": 1.2594386339187622, |
| "learning_rate": 0.0002, |
| "loss": 0.708, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.5524163568773233, |
| "grad_norm": 1.1495513916015625, |
| "learning_rate": 0.0002, |
| "loss": 0.621, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.558364312267658, |
| "grad_norm": 1.0474885702133179, |
| "learning_rate": 0.0002, |
| "loss": 0.5433, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.5643122676579926, |
| "grad_norm": 1.1138205528259277, |
| "learning_rate": 0.0002, |
| "loss": 0.6886, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.5702602230483271, |
| "grad_norm": 0.9678700566291809, |
| "learning_rate": 0.0002, |
| "loss": 0.5945, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.5762081784386617, |
| "grad_norm": 0.928419828414917, |
| "learning_rate": 0.0002, |
| "loss": 0.7483, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.5821561338289962, |
| "grad_norm": 0.8806396126747131, |
| "learning_rate": 0.0002, |
| "loss": 0.6022, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.588104089219331, |
| "grad_norm": 0.9389284253120422, |
| "learning_rate": 0.0002, |
| "loss": 0.6638, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.5940520446096653, |
| "grad_norm": 1.0797287225723267, |
| "learning_rate": 0.0002, |
| "loss": 0.6298, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.9545785784721375, |
| "learning_rate": 0.0002, |
| "loss": 0.6335, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.6059479553903344, |
| "grad_norm": 0.9800273776054382, |
| "learning_rate": 0.0002, |
| "loss": 0.5772, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.6118959107806692, |
| "grad_norm": 1.3683196306228638, |
| "learning_rate": 0.0002, |
| "loss": 0.6181, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.6178438661710037, |
| "grad_norm": 1.1559855937957764, |
| "learning_rate": 0.0002, |
| "loss": 0.62, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.6237918215613383, |
| "grad_norm": 1.1240603923797607, |
| "learning_rate": 0.0002, |
| "loss": 0.682, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.6297397769516728, |
| "grad_norm": 0.9673051834106445, |
| "learning_rate": 0.0002, |
| "loss": 0.6835, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.6356877323420074, |
| "grad_norm": 1.1218955516815186, |
| "learning_rate": 0.0002, |
| "loss": 0.6591, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.6416356877323421, |
| "grad_norm": 1.2360399961471558, |
| "learning_rate": 0.0002, |
| "loss": 0.7957, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.6475836431226765, |
| "grad_norm": 1.2180172204971313, |
| "learning_rate": 0.0002, |
| "loss": 0.6951, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.6535315985130112, |
| "grad_norm": 1.2104121446609497, |
| "learning_rate": 0.0002, |
| "loss": 0.6549, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.6594795539033456, |
| "grad_norm": 0.9836241006851196, |
| "learning_rate": 0.0002, |
| "loss": 0.6077, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.6654275092936803, |
| "grad_norm": 0.8980191349983215, |
| "learning_rate": 0.0002, |
| "loss": 0.7533, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.6713754646840149, |
| "grad_norm": 1.056117296218872, |
| "learning_rate": 0.0002, |
| "loss": 0.6283, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.6773234200743494, |
| "grad_norm": 1.0315310955047607, |
| "learning_rate": 0.0002, |
| "loss": 0.7099, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.683271375464684, |
| "grad_norm": 1.1293710470199585, |
| "learning_rate": 0.0002, |
| "loss": 0.7628, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.6892193308550185, |
| "grad_norm": 0.8841990232467651, |
| "learning_rate": 0.0002, |
| "loss": 0.6076, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.6951672862453533, |
| "grad_norm": 1.0221779346466064, |
| "learning_rate": 0.0002, |
| "loss": 0.6003, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.7011152416356876, |
| "grad_norm": 0.9923282861709595, |
| "learning_rate": 0.0002, |
| "loss": 0.5743, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.7070631970260224, |
| "grad_norm": 1.1585432291030884, |
| "learning_rate": 0.0002, |
| "loss": 0.6121, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.713011152416357, |
| "grad_norm": 0.9201356172561646, |
| "learning_rate": 0.0002, |
| "loss": 0.6343, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.7189591078066915, |
| "grad_norm": 1.164581298828125, |
| "learning_rate": 0.0002, |
| "loss": 0.615, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.724907063197026, |
| "grad_norm": 0.9991989135742188, |
| "learning_rate": 0.0002, |
| "loss": 0.62, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.7308550185873606, |
| "grad_norm": 1.0976234674453735, |
| "learning_rate": 0.0002, |
| "loss": 0.687, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.7368029739776951, |
| "grad_norm": 1.1581001281738281, |
| "learning_rate": 0.0002, |
| "loss": 0.6227, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.7427509293680297, |
| "grad_norm": 1.0079922676086426, |
| "learning_rate": 0.0002, |
| "loss": 0.675, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.7486988847583644, |
| "grad_norm": 1.0962276458740234, |
| "learning_rate": 0.0002, |
| "loss": 0.6694, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.7546468401486988, |
| "grad_norm": 1.0988850593566895, |
| "learning_rate": 0.0002, |
| "loss": 0.6114, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.7605947955390335, |
| "grad_norm": 1.4446635246276855, |
| "learning_rate": 0.0002, |
| "loss": 0.5885, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.766542750929368, |
| "grad_norm": 1.2141138315200806, |
| "learning_rate": 0.0002, |
| "loss": 0.7856, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.7724907063197026, |
| "grad_norm": 1.1908177137374878, |
| "learning_rate": 0.0002, |
| "loss": 0.7033, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.7784386617100372, |
| "grad_norm": 1.019839882850647, |
| "learning_rate": 0.0002, |
| "loss": 0.6763, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.7843866171003717, |
| "grad_norm": 1.039696216583252, |
| "learning_rate": 0.0002, |
| "loss": 0.6279, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.7903345724907063, |
| "grad_norm": 0.974805474281311, |
| "learning_rate": 0.0002, |
| "loss": 0.5777, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.7962825278810408, |
| "grad_norm": 1.1052793264389038, |
| "learning_rate": 0.0002, |
| "loss": 0.7294, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.8022304832713756, |
| "grad_norm": 1.4657918214797974, |
| "learning_rate": 0.0002, |
| "loss": 0.5142, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.80817843866171, |
| "grad_norm": 1.0391294956207275, |
| "learning_rate": 0.0002, |
| "loss": 0.6388, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.8141263940520447, |
| "grad_norm": 1.0521687269210815, |
| "learning_rate": 0.0002, |
| "loss": 0.6944, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.8200743494423792, |
| "grad_norm": 1.0755914449691772, |
| "learning_rate": 0.0002, |
| "loss": 0.6974, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.8260223048327138, |
| "grad_norm": 1.128304123878479, |
| "learning_rate": 0.0002, |
| "loss": 0.7687, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.8319702602230483, |
| "grad_norm": 1.0178970098495483, |
| "learning_rate": 0.0002, |
| "loss": 0.6074, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.8379182156133829, |
| "grad_norm": 0.9115421772003174, |
| "learning_rate": 0.0002, |
| "loss": 0.725, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.8438661710037176, |
| "grad_norm": 1.0200258493423462, |
| "learning_rate": 0.0002, |
| "loss": 0.6866, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.849814126394052, |
| "grad_norm": 1.286431908607483, |
| "learning_rate": 0.0002, |
| "loss": 0.6618, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.8557620817843867, |
| "grad_norm": 1.0576943159103394, |
| "learning_rate": 0.0002, |
| "loss": 0.6217, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.861710037174721, |
| "grad_norm": 0.9450961351394653, |
| "learning_rate": 0.0002, |
| "loss": 0.7285, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.8676579925650558, |
| "grad_norm": 1.2659786939620972, |
| "learning_rate": 0.0002, |
| "loss": 0.6004, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.8736059479553904, |
| "grad_norm": 1.0950329303741455, |
| "learning_rate": 0.0002, |
| "loss": 0.6792, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.879553903345725, |
| "grad_norm": 1.0434305667877197, |
| "learning_rate": 0.0002, |
| "loss": 0.6651, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.8855018587360595, |
| "grad_norm": 2.390085458755493, |
| "learning_rate": 0.0002, |
| "loss": 0.6394, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.891449814126394, |
| "grad_norm": 1.1428786516189575, |
| "learning_rate": 0.0002, |
| "loss": 0.6519, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.8973977695167288, |
| "grad_norm": 1.1516354084014893, |
| "learning_rate": 0.0002, |
| "loss": 0.5967, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.903345724907063, |
| "grad_norm": 0.9553952813148499, |
| "learning_rate": 0.0002, |
| "loss": 0.6626, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.9092936802973979, |
| "grad_norm": 1.1295243501663208, |
| "learning_rate": 0.0002, |
| "loss": 0.6151, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.9152416356877322, |
| "grad_norm": 1.1593585014343262, |
| "learning_rate": 0.0002, |
| "loss": 0.6412, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.921189591078067, |
| "grad_norm": 1.830063819885254, |
| "learning_rate": 0.0002, |
| "loss": 0.6746, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.9271375464684015, |
| "grad_norm": 1.3906419277191162, |
| "learning_rate": 0.0002, |
| "loss": 0.6351, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.933085501858736, |
| "grad_norm": 1.4869827032089233, |
| "learning_rate": 0.0002, |
| "loss": 0.6806, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.9390334572490706, |
| "grad_norm": 1.110323429107666, |
| "learning_rate": 0.0002, |
| "loss": 0.5748, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.9449814126394052, |
| "grad_norm": 1.4225271940231323, |
| "learning_rate": 0.0002, |
| "loss": 0.6572, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.95092936802974, |
| "grad_norm": 1.0343074798583984, |
| "learning_rate": 0.0002, |
| "loss": 0.5376, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.9568773234200743, |
| "grad_norm": 0.9949336647987366, |
| "learning_rate": 0.0002, |
| "loss": 0.69, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.962825278810409, |
| "grad_norm": 1.0380656719207764, |
| "learning_rate": 0.0002, |
| "loss": 0.63, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.9687732342007433, |
| "grad_norm": 1.390371561050415, |
| "learning_rate": 0.0002, |
| "loss": 0.7011, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.9747211895910781, |
| "grad_norm": 1.4939589500427246, |
| "learning_rate": 0.0002, |
| "loss": 0.5689, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.9806691449814127, |
| "grad_norm": 1.2601418495178223, |
| "learning_rate": 0.0002, |
| "loss": 0.5838, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.9866171003717472, |
| "grad_norm": 2.679206132888794, |
| "learning_rate": 0.0002, |
| "loss": 0.7981, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.9925650557620818, |
| "grad_norm": 1.1042869091033936, |
| "learning_rate": 0.0002, |
| "loss": 0.6212, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.9985130111524163, |
| "grad_norm": 1.4491620063781738, |
| "learning_rate": 0.0002, |
| "loss": 0.6194, |
| "step": 336 |
| }, |
| { |
| "epoch": 2.004460966542751, |
| "grad_norm": 0.9622808694839478, |
| "learning_rate": 0.0002, |
| "loss": 0.6566, |
| "step": 337 |
| }, |
| { |
| "epoch": 2.0104089219330854, |
| "grad_norm": 1.5044182538986206, |
| "learning_rate": 0.0002, |
| "loss": 0.6093, |
| "step": 338 |
| }, |
| { |
| "epoch": 2.01635687732342, |
| "grad_norm": 1.699040174484253, |
| "learning_rate": 0.0002, |
| "loss": 0.5237, |
| "step": 339 |
| }, |
| { |
| "epoch": 2.0223048327137545, |
| "grad_norm": 1.1767878532409668, |
| "learning_rate": 0.0002, |
| "loss": 0.6145, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.0282527881040893, |
| "grad_norm": 1.2151747941970825, |
| "learning_rate": 0.0002, |
| "loss": 0.6171, |
| "step": 341 |
| }, |
| { |
| "epoch": 2.0342007434944236, |
| "grad_norm": 1.2429864406585693, |
| "learning_rate": 0.0002, |
| "loss": 0.6489, |
| "step": 342 |
| }, |
| { |
| "epoch": 2.0401486988847584, |
| "grad_norm": 1.164552092552185, |
| "learning_rate": 0.0002, |
| "loss": 0.509, |
| "step": 343 |
| }, |
| { |
| "epoch": 2.046096654275093, |
| "grad_norm": 1.1822024583816528, |
| "learning_rate": 0.0002, |
| "loss": 0.6568, |
| "step": 344 |
| }, |
| { |
| "epoch": 2.0520446096654275, |
| "grad_norm": 7.130686283111572, |
| "learning_rate": 0.0002, |
| "loss": 0.5688, |
| "step": 345 |
| }, |
| { |
| "epoch": 2.0579925650557622, |
| "grad_norm": 1.1000553369522095, |
| "learning_rate": 0.0002, |
| "loss": 0.5202, |
| "step": 346 |
| }, |
| { |
| "epoch": 2.0639405204460965, |
| "grad_norm": 1.0652920007705688, |
| "learning_rate": 0.0002, |
| "loss": 0.5797, |
| "step": 347 |
| }, |
| { |
| "epoch": 2.0698884758364313, |
| "grad_norm": 1.3442667722702026, |
| "learning_rate": 0.0002, |
| "loss": 0.6164, |
| "step": 348 |
| }, |
| { |
| "epoch": 2.0758364312267656, |
| "grad_norm": 1.1383881568908691, |
| "learning_rate": 0.0002, |
| "loss": 0.5462, |
| "step": 349 |
| }, |
| { |
| "epoch": 2.0817843866171004, |
| "grad_norm": 0.9077207446098328, |
| "learning_rate": 0.0002, |
| "loss": 0.4876, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.0877323420074347, |
| "grad_norm": 1.0893263816833496, |
| "learning_rate": 0.0002, |
| "loss": 0.6335, |
| "step": 351 |
| }, |
| { |
| "epoch": 2.0936802973977695, |
| "grad_norm": 0.9917628169059753, |
| "learning_rate": 0.0002, |
| "loss": 0.5745, |
| "step": 352 |
| }, |
| { |
| "epoch": 2.0996282527881043, |
| "grad_norm": 1.0131194591522217, |
| "learning_rate": 0.0002, |
| "loss": 0.5427, |
| "step": 353 |
| }, |
| { |
| "epoch": 2.1055762081784386, |
| "grad_norm": 2.081542730331421, |
| "learning_rate": 0.0002, |
| "loss": 0.5692, |
| "step": 354 |
| }, |
| { |
| "epoch": 2.1115241635687734, |
| "grad_norm": 1.659941554069519, |
| "learning_rate": 0.0002, |
| "loss": 0.4985, |
| "step": 355 |
| }, |
| { |
| "epoch": 2.1174721189591077, |
| "grad_norm": 1.309465765953064, |
| "learning_rate": 0.0002, |
| "loss": 0.5439, |
| "step": 356 |
| }, |
| { |
| "epoch": 2.1234200743494425, |
| "grad_norm": 1.2212107181549072, |
| "learning_rate": 0.0002, |
| "loss": 0.5133, |
| "step": 357 |
| }, |
| { |
| "epoch": 2.129368029739777, |
| "grad_norm": 1.0576850175857544, |
| "learning_rate": 0.0002, |
| "loss": 0.6627, |
| "step": 358 |
| }, |
| { |
| "epoch": 2.1353159851301116, |
| "grad_norm": 1.2587406635284424, |
| "learning_rate": 0.0002, |
| "loss": 0.5235, |
| "step": 359 |
| }, |
| { |
| "epoch": 2.141263940520446, |
| "grad_norm": 1.05579674243927, |
| "learning_rate": 0.0002, |
| "loss": 0.5994, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.1472118959107807, |
| "grad_norm": 1.2232978343963623, |
| "learning_rate": 0.0002, |
| "loss": 0.586, |
| "step": 361 |
| }, |
| { |
| "epoch": 2.1531598513011154, |
| "grad_norm": 1.0725406408309937, |
| "learning_rate": 0.0002, |
| "loss": 0.4801, |
| "step": 362 |
| }, |
| { |
| "epoch": 2.1591078066914497, |
| "grad_norm": 1.0593106746673584, |
| "learning_rate": 0.0002, |
| "loss": 0.4877, |
| "step": 363 |
| }, |
| { |
| "epoch": 2.1650557620817845, |
| "grad_norm": 2.2175445556640625, |
| "learning_rate": 0.0002, |
| "loss": 0.57, |
| "step": 364 |
| }, |
| { |
| "epoch": 2.171003717472119, |
| "grad_norm": 1.1013628244400024, |
| "learning_rate": 0.0002, |
| "loss": 0.5797, |
| "step": 365 |
| }, |
| { |
| "epoch": 2.1769516728624536, |
| "grad_norm": 1.186463713645935, |
| "learning_rate": 0.0002, |
| "loss": 0.5938, |
| "step": 366 |
| }, |
| { |
| "epoch": 2.182899628252788, |
| "grad_norm": 1.1608301401138306, |
| "learning_rate": 0.0002, |
| "loss": 0.4446, |
| "step": 367 |
| }, |
| { |
| "epoch": 2.1888475836431227, |
| "grad_norm": 1.2304465770721436, |
| "learning_rate": 0.0002, |
| "loss": 0.5154, |
| "step": 368 |
| }, |
| { |
| "epoch": 2.194795539033457, |
| "grad_norm": 1.1233623027801514, |
| "learning_rate": 0.0002, |
| "loss": 0.6199, |
| "step": 369 |
| }, |
| { |
| "epoch": 2.200743494423792, |
| "grad_norm": 1.2339355945587158, |
| "learning_rate": 0.0002, |
| "loss": 0.584, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.2066914498141266, |
| "grad_norm": 1.6794264316558838, |
| "learning_rate": 0.0002, |
| "loss": 0.5609, |
| "step": 371 |
| }, |
| { |
| "epoch": 2.212639405204461, |
| "grad_norm": 1.1440285444259644, |
| "learning_rate": 0.0002, |
| "loss": 0.5306, |
| "step": 372 |
| }, |
| { |
| "epoch": 2.2185873605947957, |
| "grad_norm": 1.6425179243087769, |
| "learning_rate": 0.0002, |
| "loss": 0.6556, |
| "step": 373 |
| }, |
| { |
| "epoch": 2.22453531598513, |
| "grad_norm": 1.1068412065505981, |
| "learning_rate": 0.0002, |
| "loss": 0.6442, |
| "step": 374 |
| }, |
| { |
| "epoch": 2.2304832713754648, |
| "grad_norm": 1.1996163129806519, |
| "learning_rate": 0.0002, |
| "loss": 0.51, |
| "step": 375 |
| }, |
| { |
| "epoch": 2.236431226765799, |
| "grad_norm": 1.193741798400879, |
| "learning_rate": 0.0002, |
| "loss": 0.5323, |
| "step": 376 |
| }, |
| { |
| "epoch": 2.242379182156134, |
| "grad_norm": 1.3267923593521118, |
| "learning_rate": 0.0002, |
| "loss": 0.5576, |
| "step": 377 |
| }, |
| { |
| "epoch": 2.248327137546468, |
| "grad_norm": 1.1256170272827148, |
| "learning_rate": 0.0002, |
| "loss": 0.6218, |
| "step": 378 |
| }, |
| { |
| "epoch": 2.254275092936803, |
| "grad_norm": 1.1282093524932861, |
| "learning_rate": 0.0002, |
| "loss": 0.5095, |
| "step": 379 |
| }, |
| { |
| "epoch": 2.2602230483271377, |
| "grad_norm": 1.156480073928833, |
| "learning_rate": 0.0002, |
| "loss": 0.5423, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.266171003717472, |
| "grad_norm": 1.2083227634429932, |
| "learning_rate": 0.0002, |
| "loss": 0.5714, |
| "step": 381 |
| }, |
| { |
| "epoch": 2.272118959107807, |
| "grad_norm": 1.5389211177825928, |
| "learning_rate": 0.0002, |
| "loss": 0.4797, |
| "step": 382 |
| }, |
| { |
| "epoch": 2.278066914498141, |
| "grad_norm": 1.1418561935424805, |
| "learning_rate": 0.0002, |
| "loss": 0.585, |
| "step": 383 |
| }, |
| { |
| "epoch": 2.284014869888476, |
| "grad_norm": 1.0242007970809937, |
| "learning_rate": 0.0002, |
| "loss": 0.562, |
| "step": 384 |
| }, |
| { |
| "epoch": 2.2899628252788102, |
| "grad_norm": 1.4750384092330933, |
| "learning_rate": 0.0002, |
| "loss": 0.558, |
| "step": 385 |
| }, |
| { |
| "epoch": 2.295910780669145, |
| "grad_norm": 1.176080346107483, |
| "learning_rate": 0.0002, |
| "loss": 0.4977, |
| "step": 386 |
| }, |
| { |
| "epoch": 2.3018587360594793, |
| "grad_norm": 1.1733489036560059, |
| "learning_rate": 0.0002, |
| "loss": 0.5984, |
| "step": 387 |
| }, |
| { |
| "epoch": 2.307806691449814, |
| "grad_norm": 1.0431591272354126, |
| "learning_rate": 0.0002, |
| "loss": 0.5805, |
| "step": 388 |
| }, |
| { |
| "epoch": 2.313754646840149, |
| "grad_norm": 1.1595654487609863, |
| "learning_rate": 0.0002, |
| "loss": 0.5639, |
| "step": 389 |
| }, |
| { |
| "epoch": 2.319702602230483, |
| "grad_norm": 1.2077865600585938, |
| "learning_rate": 0.0002, |
| "loss": 0.5021, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.325650557620818, |
| "grad_norm": 1.4747991561889648, |
| "learning_rate": 0.0002, |
| "loss": 0.5809, |
| "step": 391 |
| }, |
| { |
| "epoch": 2.3315985130111523, |
| "grad_norm": 1.0486669540405273, |
| "learning_rate": 0.0002, |
| "loss": 0.6334, |
| "step": 392 |
| }, |
| { |
| "epoch": 2.337546468401487, |
| "grad_norm": 1.8145817518234253, |
| "learning_rate": 0.0002, |
| "loss": 0.5223, |
| "step": 393 |
| }, |
| { |
| "epoch": 2.3434944237918214, |
| "grad_norm": 1.393776535987854, |
| "learning_rate": 0.0002, |
| "loss": 0.4034, |
| "step": 394 |
| }, |
| { |
| "epoch": 2.349442379182156, |
| "grad_norm": 1.208957314491272, |
| "learning_rate": 0.0002, |
| "loss": 0.5929, |
| "step": 395 |
| }, |
| { |
| "epoch": 2.3553903345724905, |
| "grad_norm": 1.3021739721298218, |
| "learning_rate": 0.0002, |
| "loss": 0.6176, |
| "step": 396 |
| }, |
| { |
| "epoch": 2.3613382899628252, |
| "grad_norm": 1.4258112907409668, |
| "learning_rate": 0.0002, |
| "loss": 0.6765, |
| "step": 397 |
| }, |
| { |
| "epoch": 2.36728624535316, |
| "grad_norm": 1.2623789310455322, |
| "learning_rate": 0.0002, |
| "loss": 0.5973, |
| "step": 398 |
| }, |
| { |
| "epoch": 2.3732342007434943, |
| "grad_norm": 1.1510920524597168, |
| "learning_rate": 0.0002, |
| "loss": 0.6167, |
| "step": 399 |
| }, |
| { |
| "epoch": 2.379182156133829, |
| "grad_norm": 1.0992542505264282, |
| "learning_rate": 0.0002, |
| "loss": 0.4328, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.3851301115241634, |
| "grad_norm": 1.3745630979537964, |
| "learning_rate": 0.0002, |
| "loss": 0.6485, |
| "step": 401 |
| }, |
| { |
| "epoch": 2.391078066914498, |
| "grad_norm": 1.2044932842254639, |
| "learning_rate": 0.0002, |
| "loss": 0.6345, |
| "step": 402 |
| }, |
| { |
| "epoch": 2.3970260223048325, |
| "grad_norm": 1.4290401935577393, |
| "learning_rate": 0.0002, |
| "loss": 0.5706, |
| "step": 403 |
| }, |
| { |
| "epoch": 2.4029739776951673, |
| "grad_norm": 1.3580994606018066, |
| "learning_rate": 0.0002, |
| "loss": 0.5373, |
| "step": 404 |
| }, |
| { |
| "epoch": 2.4089219330855016, |
| "grad_norm": 1.0747463703155518, |
| "learning_rate": 0.0002, |
| "loss": 0.5597, |
| "step": 405 |
| }, |
| { |
| "epoch": 2.4148698884758364, |
| "grad_norm": 1.288228154182434, |
| "learning_rate": 0.0002, |
| "loss": 0.6366, |
| "step": 406 |
| }, |
| { |
| "epoch": 2.420817843866171, |
| "grad_norm": 1.2379798889160156, |
| "learning_rate": 0.0002, |
| "loss": 0.474, |
| "step": 407 |
| }, |
| { |
| "epoch": 2.4267657992565055, |
| "grad_norm": 1.234220266342163, |
| "learning_rate": 0.0002, |
| "loss": 0.5363, |
| "step": 408 |
| }, |
| { |
| "epoch": 2.4327137546468403, |
| "grad_norm": 1.2338114976882935, |
| "learning_rate": 0.0002, |
| "loss": 0.4992, |
| "step": 409 |
| }, |
| { |
| "epoch": 2.4386617100371746, |
| "grad_norm": 1.3846346139907837, |
| "learning_rate": 0.0002, |
| "loss": 0.6412, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.4446096654275093, |
| "grad_norm": 1.2423279285430908, |
| "learning_rate": 0.0002, |
| "loss": 0.528, |
| "step": 411 |
| }, |
| { |
| "epoch": 2.4505576208178437, |
| "grad_norm": 1.235088586807251, |
| "learning_rate": 0.0002, |
| "loss": 0.5763, |
| "step": 412 |
| }, |
| { |
| "epoch": 2.4565055762081784, |
| "grad_norm": 1.3832026720046997, |
| "learning_rate": 0.0002, |
| "loss": 0.6355, |
| "step": 413 |
| }, |
| { |
| "epoch": 2.4624535315985128, |
| "grad_norm": 1.214076280593872, |
| "learning_rate": 0.0002, |
| "loss": 0.6345, |
| "step": 414 |
| }, |
| { |
| "epoch": 2.4684014869888475, |
| "grad_norm": 1.463728904724121, |
| "learning_rate": 0.0002, |
| "loss": 0.6186, |
| "step": 415 |
| }, |
| { |
| "epoch": 2.4743494423791823, |
| "grad_norm": 1.0485203266143799, |
| "learning_rate": 0.0002, |
| "loss": 0.4723, |
| "step": 416 |
| }, |
| { |
| "epoch": 2.4802973977695166, |
| "grad_norm": 1.5590802431106567, |
| "learning_rate": 0.0002, |
| "loss": 0.5688, |
| "step": 417 |
| }, |
| { |
| "epoch": 2.4862453531598514, |
| "grad_norm": 1.1843955516815186, |
| "learning_rate": 0.0002, |
| "loss": 0.5043, |
| "step": 418 |
| }, |
| { |
| "epoch": 2.4921933085501857, |
| "grad_norm": 1.579487919807434, |
| "learning_rate": 0.0002, |
| "loss": 0.5257, |
| "step": 419 |
| }, |
| { |
| "epoch": 2.4981412639405205, |
| "grad_norm": 1.2086743116378784, |
| "learning_rate": 0.0002, |
| "loss": 0.6798, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.5040892193308553, |
| "grad_norm": 1.195058822631836, |
| "learning_rate": 0.0002, |
| "loss": 0.6485, |
| "step": 421 |
| }, |
| { |
| "epoch": 2.5100371747211896, |
| "grad_norm": 1.283530354499817, |
| "learning_rate": 0.0002, |
| "loss": 0.5873, |
| "step": 422 |
| }, |
| { |
| "epoch": 2.515985130111524, |
| "grad_norm": 1.2394181489944458, |
| "learning_rate": 0.0002, |
| "loss": 0.6391, |
| "step": 423 |
| }, |
| { |
| "epoch": 2.5219330855018587, |
| "grad_norm": 1.2939765453338623, |
| "learning_rate": 0.0002, |
| "loss": 0.6709, |
| "step": 424 |
| }, |
| { |
| "epoch": 2.5278810408921935, |
| "grad_norm": 1.1421490907669067, |
| "learning_rate": 0.0002, |
| "loss": 0.563, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.533828996282528, |
| "grad_norm": 1.1861711740493774, |
| "learning_rate": 0.0002, |
| "loss": 0.5176, |
| "step": 426 |
| }, |
| { |
| "epoch": 2.5397769516728625, |
| "grad_norm": 1.3675614595413208, |
| "learning_rate": 0.0002, |
| "loss": 0.529, |
| "step": 427 |
| }, |
| { |
| "epoch": 2.545724907063197, |
| "grad_norm": 1.5121257305145264, |
| "learning_rate": 0.0002, |
| "loss": 0.7019, |
| "step": 428 |
| }, |
| { |
| "epoch": 2.5516728624535316, |
| "grad_norm": 1.169859766960144, |
| "learning_rate": 0.0002, |
| "loss": 0.5985, |
| "step": 429 |
| }, |
| { |
| "epoch": 2.5576208178438664, |
| "grad_norm": 1.3540085554122925, |
| "learning_rate": 0.0002, |
| "loss": 0.5499, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.5635687732342007, |
| "grad_norm": 1.0933575630187988, |
| "learning_rate": 0.0002, |
| "loss": 0.5208, |
| "step": 431 |
| }, |
| { |
| "epoch": 2.569516728624535, |
| "grad_norm": 1.1712511777877808, |
| "learning_rate": 0.0002, |
| "loss": 0.536, |
| "step": 432 |
| }, |
| { |
| "epoch": 2.57546468401487, |
| "grad_norm": 1.4346905946731567, |
| "learning_rate": 0.0002, |
| "loss": 0.6115, |
| "step": 433 |
| }, |
| { |
| "epoch": 2.5814126394052046, |
| "grad_norm": 2.1174967288970947, |
| "learning_rate": 0.0002, |
| "loss": 0.5341, |
| "step": 434 |
| }, |
| { |
| "epoch": 2.587360594795539, |
| "grad_norm": 1.3911654949188232, |
| "learning_rate": 0.0002, |
| "loss": 0.5707, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.5933085501858737, |
| "grad_norm": 1.3274894952774048, |
| "learning_rate": 0.0002, |
| "loss": 0.6272, |
| "step": 436 |
| }, |
| { |
| "epoch": 2.599256505576208, |
| "grad_norm": 0.9820629358291626, |
| "learning_rate": 0.0002, |
| "loss": 0.4664, |
| "step": 437 |
| }, |
| { |
| "epoch": 2.605204460966543, |
| "grad_norm": 1.1450122594833374, |
| "learning_rate": 0.0002, |
| "loss": 0.4652, |
| "step": 438 |
| }, |
| { |
| "epoch": 2.6111524163568776, |
| "grad_norm": 2.7575511932373047, |
| "learning_rate": 0.0002, |
| "loss": 0.5652, |
| "step": 439 |
| }, |
| { |
| "epoch": 2.617100371747212, |
| "grad_norm": 1.4760148525238037, |
| "learning_rate": 0.0002, |
| "loss": 0.6331, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.623048327137546, |
| "grad_norm": 1.2463843822479248, |
| "learning_rate": 0.0002, |
| "loss": 0.5721, |
| "step": 441 |
| }, |
| { |
| "epoch": 2.628996282527881, |
| "grad_norm": 1.2081701755523682, |
| "learning_rate": 0.0002, |
| "loss": 0.752, |
| "step": 442 |
| }, |
| { |
| "epoch": 2.6349442379182157, |
| "grad_norm": 1.1693692207336426, |
| "learning_rate": 0.0002, |
| "loss": 0.5505, |
| "step": 443 |
| }, |
| { |
| "epoch": 2.64089219330855, |
| "grad_norm": 1.3918544054031372, |
| "learning_rate": 0.0002, |
| "loss": 0.5387, |
| "step": 444 |
| }, |
| { |
| "epoch": 2.646840148698885, |
| "grad_norm": 1.3081449270248413, |
| "learning_rate": 0.0002, |
| "loss": 0.4156, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.652788104089219, |
| "grad_norm": 1.1178191900253296, |
| "learning_rate": 0.0002, |
| "loss": 0.6099, |
| "step": 446 |
| }, |
| { |
| "epoch": 2.658736059479554, |
| "grad_norm": 1.172034740447998, |
| "learning_rate": 0.0002, |
| "loss": 0.6112, |
| "step": 447 |
| }, |
| { |
| "epoch": 2.6646840148698887, |
| "grad_norm": 1.3142459392547607, |
| "learning_rate": 0.0002, |
| "loss": 0.6123, |
| "step": 448 |
| }, |
| { |
| "epoch": 2.670631970260223, |
| "grad_norm": 1.2363723516464233, |
| "learning_rate": 0.0002, |
| "loss": 0.5472, |
| "step": 449 |
| }, |
| { |
| "epoch": 2.6765799256505574, |
| "grad_norm": 1.2881202697753906, |
| "learning_rate": 0.0002, |
| "loss": 0.4572, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.682527881040892, |
| "grad_norm": 1.0761253833770752, |
| "learning_rate": 0.0002, |
| "loss": 0.5366, |
| "step": 451 |
| }, |
| { |
| "epoch": 2.688475836431227, |
| "grad_norm": 1.0405654907226562, |
| "learning_rate": 0.0002, |
| "loss": 0.6858, |
| "step": 452 |
| }, |
| { |
| "epoch": 2.694423791821561, |
| "grad_norm": 1.3384194374084473, |
| "learning_rate": 0.0002, |
| "loss": 0.616, |
| "step": 453 |
| }, |
| { |
| "epoch": 2.700371747211896, |
| "grad_norm": 8.933956146240234, |
| "learning_rate": 0.0002, |
| "loss": 0.6136, |
| "step": 454 |
| }, |
| { |
| "epoch": 2.7063197026022303, |
| "grad_norm": 1.1435190439224243, |
| "learning_rate": 0.0002, |
| "loss": 0.5598, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.712267657992565, |
| "grad_norm": 1.2891956567764282, |
| "learning_rate": 0.0002, |
| "loss": 0.5945, |
| "step": 456 |
| }, |
| { |
| "epoch": 2.7182156133829, |
| "grad_norm": 1.3077706098556519, |
| "learning_rate": 0.0002, |
| "loss": 0.5793, |
| "step": 457 |
| }, |
| { |
| "epoch": 2.724163568773234, |
| "grad_norm": 1.1445353031158447, |
| "learning_rate": 0.0002, |
| "loss": 0.5644, |
| "step": 458 |
| }, |
| { |
| "epoch": 2.7301115241635685, |
| "grad_norm": 1.1466567516326904, |
| "learning_rate": 0.0002, |
| "loss": 0.5748, |
| "step": 459 |
| }, |
| { |
| "epoch": 2.7360594795539033, |
| "grad_norm": 1.0083645582199097, |
| "learning_rate": 0.0002, |
| "loss": 0.5676, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.742007434944238, |
| "grad_norm": 0.9980899691581726, |
| "learning_rate": 0.0002, |
| "loss": 0.5975, |
| "step": 461 |
| }, |
| { |
| "epoch": 2.7479553903345724, |
| "grad_norm": 1.0702303647994995, |
| "learning_rate": 0.0002, |
| "loss": 0.5235, |
| "step": 462 |
| }, |
| { |
| "epoch": 2.753903345724907, |
| "grad_norm": 1.3305853605270386, |
| "learning_rate": 0.0002, |
| "loss": 0.6493, |
| "step": 463 |
| }, |
| { |
| "epoch": 2.7598513011152415, |
| "grad_norm": 1.4583408832550049, |
| "learning_rate": 0.0002, |
| "loss": 0.5948, |
| "step": 464 |
| }, |
| { |
| "epoch": 2.7657992565055762, |
| "grad_norm": 1.1704531908035278, |
| "learning_rate": 0.0002, |
| "loss": 0.5631, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.771747211895911, |
| "grad_norm": 1.1165651082992554, |
| "learning_rate": 0.0002, |
| "loss": 0.6674, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.7776951672862453, |
| "grad_norm": 1.2043639421463013, |
| "learning_rate": 0.0002, |
| "loss": 0.649, |
| "step": 467 |
| }, |
| { |
| "epoch": 2.7836431226765797, |
| "grad_norm": 1.0930832624435425, |
| "learning_rate": 0.0002, |
| "loss": 0.541, |
| "step": 468 |
| }, |
| { |
| "epoch": 2.7895910780669144, |
| "grad_norm": 1.1153466701507568, |
| "learning_rate": 0.0002, |
| "loss": 0.4666, |
| "step": 469 |
| }, |
| { |
| "epoch": 2.795539033457249, |
| "grad_norm": 3.27708101272583, |
| "learning_rate": 0.0002, |
| "loss": 0.6981, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.8014869888475835, |
| "grad_norm": 1.200003981590271, |
| "learning_rate": 0.0002, |
| "loss": 0.5579, |
| "step": 471 |
| }, |
| { |
| "epoch": 2.8074349442379183, |
| "grad_norm": 1.2021151781082153, |
| "learning_rate": 0.0002, |
| "loss": 0.5383, |
| "step": 472 |
| }, |
| { |
| "epoch": 2.8133828996282526, |
| "grad_norm": 1.0844088792800903, |
| "learning_rate": 0.0002, |
| "loss": 0.6027, |
| "step": 473 |
| }, |
| { |
| "epoch": 2.8193308550185874, |
| "grad_norm": 1.1981035470962524, |
| "learning_rate": 0.0002, |
| "loss": 0.5589, |
| "step": 474 |
| }, |
| { |
| "epoch": 2.825278810408922, |
| "grad_norm": 1.23332679271698, |
| "learning_rate": 0.0002, |
| "loss": 0.6733, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.8312267657992565, |
| "grad_norm": 1.2242364883422852, |
| "learning_rate": 0.0002, |
| "loss": 0.6392, |
| "step": 476 |
| }, |
| { |
| "epoch": 2.8371747211895912, |
| "grad_norm": 1.2482764720916748, |
| "learning_rate": 0.0002, |
| "loss": 0.6185, |
| "step": 477 |
| }, |
| { |
| "epoch": 2.8431226765799256, |
| "grad_norm": 1.3755487203598022, |
| "learning_rate": 0.0002, |
| "loss": 0.5761, |
| "step": 478 |
| }, |
| { |
| "epoch": 2.8490706319702603, |
| "grad_norm": 1.2065231800079346, |
| "learning_rate": 0.0002, |
| "loss": 0.5185, |
| "step": 479 |
| }, |
| { |
| "epoch": 2.8550185873605947, |
| "grad_norm": 1.1161603927612305, |
| "learning_rate": 0.0002, |
| "loss": 0.5697, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.8609665427509294, |
| "grad_norm": 1.7466390132904053, |
| "learning_rate": 0.0002, |
| "loss": 0.5835, |
| "step": 481 |
| }, |
| { |
| "epoch": 2.8669144981412638, |
| "grad_norm": 1.371319055557251, |
| "learning_rate": 0.0002, |
| "loss": 0.6031, |
| "step": 482 |
| }, |
| { |
| "epoch": 2.8728624535315985, |
| "grad_norm": 1.4363592863082886, |
| "learning_rate": 0.0002, |
| "loss": 0.6028, |
| "step": 483 |
| }, |
| { |
| "epoch": 2.8788104089219333, |
| "grad_norm": 1.1025314331054688, |
| "learning_rate": 0.0002, |
| "loss": 0.492, |
| "step": 484 |
| }, |
| { |
| "epoch": 2.8847583643122676, |
| "grad_norm": 1.04302978515625, |
| "learning_rate": 0.0002, |
| "loss": 0.6032, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.8907063197026024, |
| "grad_norm": 1.5093481540679932, |
| "learning_rate": 0.0002, |
| "loss": 0.6832, |
| "step": 486 |
| }, |
| { |
| "epoch": 2.8966542750929367, |
| "grad_norm": 1.068484902381897, |
| "learning_rate": 0.0002, |
| "loss": 0.4942, |
| "step": 487 |
| }, |
| { |
| "epoch": 2.9026022304832715, |
| "grad_norm": 2.1092681884765625, |
| "learning_rate": 0.0002, |
| "loss": 0.4909, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.908550185873606, |
| "grad_norm": 1.22842276096344, |
| "learning_rate": 0.0002, |
| "loss": 0.6226, |
| "step": 489 |
| }, |
| { |
| "epoch": 2.9144981412639406, |
| "grad_norm": 1.1664717197418213, |
| "learning_rate": 0.0002, |
| "loss": 0.6093, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.920446096654275, |
| "grad_norm": 1.2886883020401, |
| "learning_rate": 0.0002, |
| "loss": 0.5866, |
| "step": 491 |
| }, |
| { |
| "epoch": 2.9263940520446097, |
| "grad_norm": 1.1186504364013672, |
| "learning_rate": 0.0002, |
| "loss": 0.5942, |
| "step": 492 |
| }, |
| { |
| "epoch": 2.9323420074349444, |
| "grad_norm": 1.2734028100967407, |
| "learning_rate": 0.0002, |
| "loss": 0.5871, |
| "step": 493 |
| }, |
| { |
| "epoch": 2.9382899628252788, |
| "grad_norm": 1.1976778507232666, |
| "learning_rate": 0.0002, |
| "loss": 0.6766, |
| "step": 494 |
| }, |
| { |
| "epoch": 2.9442379182156135, |
| "grad_norm": 1.524681806564331, |
| "learning_rate": 0.0002, |
| "loss": 0.602, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.950185873605948, |
| "grad_norm": 1.4174754619598389, |
| "learning_rate": 0.0002, |
| "loss": 0.6079, |
| "step": 496 |
| }, |
| { |
| "epoch": 2.9561338289962826, |
| "grad_norm": 1.1006587743759155, |
| "learning_rate": 0.0002, |
| "loss": 0.6393, |
| "step": 497 |
| }, |
| { |
| "epoch": 2.962081784386617, |
| "grad_norm": 1.3037843704223633, |
| "learning_rate": 0.0002, |
| "loss": 0.6065, |
| "step": 498 |
| }, |
| { |
| "epoch": 2.9680297397769517, |
| "grad_norm": 1.5767035484313965, |
| "learning_rate": 0.0002, |
| "loss": 0.6444, |
| "step": 499 |
| }, |
| { |
| "epoch": 2.973977695167286, |
| "grad_norm": 1.2918823957443237, |
| "learning_rate": 0.0002, |
| "loss": 0.5221, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.979925650557621, |
| "grad_norm": 1.0898538827896118, |
| "learning_rate": 0.0002, |
| "loss": 0.687, |
| "step": 501 |
| }, |
| { |
| "epoch": 2.9858736059479556, |
| "grad_norm": 1.6398361921310425, |
| "learning_rate": 0.0002, |
| "loss": 0.5781, |
| "step": 502 |
| }, |
| { |
| "epoch": 2.99182156133829, |
| "grad_norm": 1.2504217624664307, |
| "learning_rate": 0.0002, |
| "loss": 0.5916, |
| "step": 503 |
| }, |
| { |
| "epoch": 2.9977695167286247, |
| "grad_norm": 1.8298507928848267, |
| "learning_rate": 0.0002, |
| "loss": 0.5777, |
| "step": 504 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 504, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 5, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.641281849472852e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|