| { |
| "best_metric": 1.7092292308807373, |
| "best_model_checkpoint": "outputs/checkpoint-550", |
| "epoch": 0.823199251637044, |
| "eval_steps": 25, |
| "global_step": 550, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0014967259120673526, |
| "grad_norm": 2.434373378753662, |
| "learning_rate": 4e-05, |
| "loss": 2.0037, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.002993451824134705, |
| "grad_norm": 2.795464038848877, |
| "learning_rate": 8e-05, |
| "loss": 1.9814, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.004490177736202058, |
| "grad_norm": 2.0998575687408447, |
| "learning_rate": 0.00012, |
| "loss": 2.0044, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00598690364826941, |
| "grad_norm": 4.470895290374756, |
| "learning_rate": 0.00016, |
| "loss": 1.8696, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.007483629560336763, |
| "grad_norm": 1.6047176122665405, |
| "learning_rate": 0.0002, |
| "loss": 1.8021, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.008980355472404116, |
| "grad_norm": 1.9450230598449707, |
| "learning_rate": 0.0001996638655462185, |
| "loss": 1.8558, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01047708138447147, |
| "grad_norm": 1.4420437812805176, |
| "learning_rate": 0.00019932773109243698, |
| "loss": 1.9079, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01197380729653882, |
| "grad_norm": 2.428009033203125, |
| "learning_rate": 0.00019899159663865548, |
| "loss": 1.7173, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.013470533208606174, |
| "grad_norm": 1.6478683948516846, |
| "learning_rate": 0.00019865546218487395, |
| "loss": 1.7631, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.014967259120673527, |
| "grad_norm": 1.6283013820648193, |
| "learning_rate": 0.00019831932773109245, |
| "loss": 1.9375, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01646398503274088, |
| "grad_norm": 1.7736356258392334, |
| "learning_rate": 0.00019798319327731095, |
| "loss": 1.7921, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.017960710944808233, |
| "grad_norm": 1.3353335857391357, |
| "learning_rate": 0.00019764705882352942, |
| "loss": 1.8699, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.019457436856875586, |
| "grad_norm": 1.5582761764526367, |
| "learning_rate": 0.00019731092436974792, |
| "loss": 1.8015, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.02095416276894294, |
| "grad_norm": 1.3032814264297485, |
| "learning_rate": 0.00019697478991596642, |
| "loss": 1.8513, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.02245088868101029, |
| "grad_norm": 1.3724833726882935, |
| "learning_rate": 0.00019663865546218486, |
| "loss": 1.8694, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.02394761459307764, |
| "grad_norm": 1.715156078338623, |
| "learning_rate": 0.00019630252100840336, |
| "loss": 1.7997, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.025444340505144995, |
| "grad_norm": 1.9989070892333984, |
| "learning_rate": 0.00019596638655462186, |
| "loss": 1.7037, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.026941066417212348, |
| "grad_norm": 1.6255011558532715, |
| "learning_rate": 0.00019563025210084033, |
| "loss": 1.8382, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0284377923292797, |
| "grad_norm": 1.305870532989502, |
| "learning_rate": 0.00019529411764705883, |
| "loss": 1.8095, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.029934518241347054, |
| "grad_norm": 3.40390944480896, |
| "learning_rate": 0.0001949579831932773, |
| "loss": 1.6112, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0314312441534144, |
| "grad_norm": 1.1514052152633667, |
| "learning_rate": 0.0001946218487394958, |
| "loss": 1.9725, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.03292797006548176, |
| "grad_norm": 1.9419797658920288, |
| "learning_rate": 0.0001942857142857143, |
| "loss": 1.8492, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.03442469597754911, |
| "grad_norm": 1.830913782119751, |
| "learning_rate": 0.00019394957983193278, |
| "loss": 1.7852, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.035921421889616466, |
| "grad_norm": 1.345436453819275, |
| "learning_rate": 0.00019361344537815127, |
| "loss": 1.8929, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.037418147801683815, |
| "grad_norm": 1.1833657026290894, |
| "learning_rate": 0.00019327731092436975, |
| "loss": 1.8822, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.037418147801683815, |
| "eval_loss": 1.7638475894927979, |
| "eval_runtime": 13.3387, |
| "eval_samples_per_second": 4.798, |
| "eval_steps_per_second": 2.399, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.03891487371375117, |
| "grad_norm": 1.4092592000961304, |
| "learning_rate": 0.00019294117647058825, |
| "loss": 1.954, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.04041159962581852, |
| "grad_norm": 1.201281189918518, |
| "learning_rate": 0.00019260504201680674, |
| "loss": 1.9364, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.04190832553788588, |
| "grad_norm": 1.407148838043213, |
| "learning_rate": 0.00019226890756302522, |
| "loss": 1.7673, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.04340505144995323, |
| "grad_norm": 1.3781392574310303, |
| "learning_rate": 0.00019193277310924372, |
| "loss": 1.81, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.04490177736202058, |
| "grad_norm": 1.4952391386032104, |
| "learning_rate": 0.00019159663865546221, |
| "loss": 1.8216, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.046398503274087934, |
| "grad_norm": 1.5127140283584595, |
| "learning_rate": 0.0001912605042016807, |
| "loss": 1.8316, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.04789522918615528, |
| "grad_norm": 1.3208520412445068, |
| "learning_rate": 0.00019092436974789919, |
| "loss": 1.7302, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.04939195509822264, |
| "grad_norm": 1.3473477363586426, |
| "learning_rate": 0.00019058823529411766, |
| "loss": 1.9661, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.05088868101028999, |
| "grad_norm": 1.201379418373108, |
| "learning_rate": 0.00019025210084033613, |
| "loss": 1.8531, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.052385406922357346, |
| "grad_norm": 1.32240891456604, |
| "learning_rate": 0.00018991596638655463, |
| "loss": 1.7485, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.053882132834424695, |
| "grad_norm": 1.3222694396972656, |
| "learning_rate": 0.0001895798319327731, |
| "loss": 1.7939, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.05537885874649205, |
| "grad_norm": 1.1342493295669556, |
| "learning_rate": 0.0001892436974789916, |
| "loss": 1.855, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.0568755846585594, |
| "grad_norm": 1.4912521839141846, |
| "learning_rate": 0.0001889075630252101, |
| "loss": 1.8394, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.05837231057062675, |
| "grad_norm": 1.4635943174362183, |
| "learning_rate": 0.00018857142857142857, |
| "loss": 1.7669, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.05986903648269411, |
| "grad_norm": 1.4757208824157715, |
| "learning_rate": 0.00018823529411764707, |
| "loss": 1.7842, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.06136576239476146, |
| "grad_norm": 1.5162277221679688, |
| "learning_rate": 0.00018789915966386554, |
| "loss": 1.8873, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.0628624883068288, |
| "grad_norm": 1.3085792064666748, |
| "learning_rate": 0.00018756302521008404, |
| "loss": 1.9716, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.06435921421889616, |
| "grad_norm": 1.1214959621429443, |
| "learning_rate": 0.00018722689075630254, |
| "loss": 1.8991, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.06585594013096352, |
| "grad_norm": 1.1944588422775269, |
| "learning_rate": 0.000186890756302521, |
| "loss": 1.8894, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.06735266604303088, |
| "grad_norm": 1.509717345237732, |
| "learning_rate": 0.0001865546218487395, |
| "loss": 1.8035, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.06884939195509822, |
| "grad_norm": 1.3220465183258057, |
| "learning_rate": 0.000186218487394958, |
| "loss": 1.8673, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.07034611786716558, |
| "grad_norm": 1.3592686653137207, |
| "learning_rate": 0.00018588235294117648, |
| "loss": 1.8396, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.07184284377923293, |
| "grad_norm": 1.3568888902664185, |
| "learning_rate": 0.00018554621848739498, |
| "loss": 1.6562, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.07333956969130027, |
| "grad_norm": 1.1371209621429443, |
| "learning_rate": 0.00018521008403361345, |
| "loss": 1.9563, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.07483629560336763, |
| "grad_norm": 1.234221339225769, |
| "learning_rate": 0.00018487394957983195, |
| "loss": 1.7702, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07483629560336763, |
| "eval_loss": 1.7585577964782715, |
| "eval_runtime": 9.906, |
| "eval_samples_per_second": 6.461, |
| "eval_steps_per_second": 3.23, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07633302151543499, |
| "grad_norm": 1.4679025411605835, |
| "learning_rate": 0.00018453781512605045, |
| "loss": 1.7903, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.07782974742750234, |
| "grad_norm": 1.6469783782958984, |
| "learning_rate": 0.0001842016806722689, |
| "loss": 1.9121, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.07932647333956969, |
| "grad_norm": 1.0950040817260742, |
| "learning_rate": 0.0001838655462184874, |
| "loss": 1.8015, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.08082319925163704, |
| "grad_norm": 1.4614354372024536, |
| "learning_rate": 0.0001835294117647059, |
| "loss": 1.8627, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.0823199251637044, |
| "grad_norm": 1.0772849321365356, |
| "learning_rate": 0.00018319327731092437, |
| "loss": 1.8474, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.08381665107577176, |
| "grad_norm": 0.8980317115783691, |
| "learning_rate": 0.00018285714285714286, |
| "loss": 1.9381, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0853133769878391, |
| "grad_norm": 1.028698205947876, |
| "learning_rate": 0.00018252100840336134, |
| "loss": 1.8726, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.08681010289990646, |
| "grad_norm": 1.2643156051635742, |
| "learning_rate": 0.00018218487394957984, |
| "loss": 1.937, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.08830682881197381, |
| "grad_norm": 1.0845692157745361, |
| "learning_rate": 0.00018184873949579833, |
| "loss": 1.9708, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.08980355472404115, |
| "grad_norm": 1.2025495767593384, |
| "learning_rate": 0.0001815126050420168, |
| "loss": 1.8674, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.09130028063610851, |
| "grad_norm": 1.2060717344284058, |
| "learning_rate": 0.0001811764705882353, |
| "loss": 1.8268, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.09279700654817587, |
| "grad_norm": 1.3296293020248413, |
| "learning_rate": 0.0001808403361344538, |
| "loss": 1.6956, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.09429373246024322, |
| "grad_norm": 1.2353034019470215, |
| "learning_rate": 0.00018050420168067228, |
| "loss": 1.9816, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.09579045837231057, |
| "grad_norm": 1.5975768566131592, |
| "learning_rate": 0.00018016806722689078, |
| "loss": 1.7846, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.09728718428437792, |
| "grad_norm": 1.2220622301101685, |
| "learning_rate": 0.00017983193277310925, |
| "loss": 1.7895, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.09878391019644528, |
| "grad_norm": 1.2025718688964844, |
| "learning_rate": 0.00017949579831932775, |
| "loss": 1.9242, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.10028063610851262, |
| "grad_norm": 3.2830123901367188, |
| "learning_rate": 0.00017915966386554625, |
| "loss": 1.7076, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.10177736202057998, |
| "grad_norm": 1.5499017238616943, |
| "learning_rate": 0.00017882352941176472, |
| "loss": 1.7964, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.10327408793264733, |
| "grad_norm": 1.4630420207977295, |
| "learning_rate": 0.00017848739495798322, |
| "loss": 1.8724, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.10477081384471469, |
| "grad_norm": 1.4005722999572754, |
| "learning_rate": 0.0001781512605042017, |
| "loss": 1.6889, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.10626753975678203, |
| "grad_norm": 1.114207148551941, |
| "learning_rate": 0.00017781512605042016, |
| "loss": 1.8272, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.10776426566884939, |
| "grad_norm": 1.4557619094848633, |
| "learning_rate": 0.00017747899159663866, |
| "loss": 1.6877, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.10926099158091675, |
| "grad_norm": 1.4767951965332031, |
| "learning_rate": 0.00017714285714285713, |
| "loss": 1.8667, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.1107577174929841, |
| "grad_norm": 1.3078974485397339, |
| "learning_rate": 0.00017680672268907563, |
| "loss": 1.9319, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.11225444340505145, |
| "grad_norm": 1.1861608028411865, |
| "learning_rate": 0.00017647058823529413, |
| "loss": 1.9233, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.11225444340505145, |
| "eval_loss": 1.7611756324768066, |
| "eval_runtime": 9.9015, |
| "eval_samples_per_second": 6.464, |
| "eval_steps_per_second": 3.232, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1137511693171188, |
| "grad_norm": 1.1504981517791748, |
| "learning_rate": 0.0001761344537815126, |
| "loss": 1.8044, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.11524789522918616, |
| "grad_norm": 1.3776837587356567, |
| "learning_rate": 0.0001757983193277311, |
| "loss": 1.725, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.1167446211412535, |
| "grad_norm": 1.3975869417190552, |
| "learning_rate": 0.0001754621848739496, |
| "loss": 1.7935, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.11824134705332086, |
| "grad_norm": 1.3506461381912231, |
| "learning_rate": 0.00017512605042016807, |
| "loss": 1.7342, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.11973807296538821, |
| "grad_norm": 1.1317209005355835, |
| "learning_rate": 0.00017478991596638657, |
| "loss": 1.8149, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.12123479887745557, |
| "grad_norm": 1.2540264129638672, |
| "learning_rate": 0.00017445378151260504, |
| "loss": 1.84, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.12273152478952291, |
| "grad_norm": 1.23360276222229, |
| "learning_rate": 0.00017411764705882354, |
| "loss": 1.7623, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.12422825070159027, |
| "grad_norm": 1.0347758531570435, |
| "learning_rate": 0.00017378151260504204, |
| "loss": 1.7381, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.1257249766136576, |
| "grad_norm": 1.4501961469650269, |
| "learning_rate": 0.0001734453781512605, |
| "loss": 1.7075, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.12722170252572498, |
| "grad_norm": 1.0509997606277466, |
| "learning_rate": 0.000173109243697479, |
| "loss": 1.7295, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.12871842843779233, |
| "grad_norm": 1.2986621856689453, |
| "learning_rate": 0.00017277310924369748, |
| "loss": 1.7988, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.13021515434985967, |
| "grad_norm": 1.1701687574386597, |
| "learning_rate": 0.00017243697478991598, |
| "loss": 1.6763, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.13171188026192704, |
| "grad_norm": 1.2512173652648926, |
| "learning_rate": 0.00017210084033613448, |
| "loss": 1.6641, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.13320860617399438, |
| "grad_norm": 1.658525824546814, |
| "learning_rate": 0.00017176470588235293, |
| "loss": 1.6849, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.13470533208606175, |
| "grad_norm": 1.5465582609176636, |
| "learning_rate": 0.00017142857142857143, |
| "loss": 1.6439, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1362020579981291, |
| "grad_norm": 1.3289684057235718, |
| "learning_rate": 0.00017109243697478992, |
| "loss": 1.7429, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.13769878391019644, |
| "grad_norm": 1.3123184442520142, |
| "learning_rate": 0.0001707563025210084, |
| "loss": 1.6429, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.1391955098222638, |
| "grad_norm": 1.385330319404602, |
| "learning_rate": 0.0001704201680672269, |
| "loss": 1.8257, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.14069223573433115, |
| "grad_norm": 1.3719394207000732, |
| "learning_rate": 0.0001700840336134454, |
| "loss": 1.7493, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.1421889616463985, |
| "grad_norm": 1.468948245048523, |
| "learning_rate": 0.00016974789915966387, |
| "loss": 1.8626, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.14368568755846586, |
| "grad_norm": 1.2705055475234985, |
| "learning_rate": 0.00016941176470588237, |
| "loss": 1.79, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.1451824134705332, |
| "grad_norm": 1.0876643657684326, |
| "learning_rate": 0.00016907563025210084, |
| "loss": 1.9631, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.14667913938260055, |
| "grad_norm": 1.1760327816009521, |
| "learning_rate": 0.00016873949579831934, |
| "loss": 1.91, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.14817586529466792, |
| "grad_norm": 1.0915436744689941, |
| "learning_rate": 0.00016840336134453784, |
| "loss": 1.8369, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.14967259120673526, |
| "grad_norm": 1.4619494676589966, |
| "learning_rate": 0.0001680672268907563, |
| "loss": 1.7074, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.14967259120673526, |
| "eval_loss": 1.7622296810150146, |
| "eval_runtime": 9.9165, |
| "eval_samples_per_second": 6.454, |
| "eval_steps_per_second": 3.227, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.15116931711880263, |
| "grad_norm": 1.147395372390747, |
| "learning_rate": 0.0001677310924369748, |
| "loss": 1.7815, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.15266604303086997, |
| "grad_norm": 1.1551228761672974, |
| "learning_rate": 0.00016739495798319328, |
| "loss": 1.9444, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.15416276894293732, |
| "grad_norm": 1.2220309972763062, |
| "learning_rate": 0.00016705882352941178, |
| "loss": 1.8735, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.1556594948550047, |
| "grad_norm": 1.315051555633545, |
| "learning_rate": 0.00016672268907563028, |
| "loss": 1.7284, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.15715622076707203, |
| "grad_norm": 1.2493054866790771, |
| "learning_rate": 0.00016638655462184875, |
| "loss": 1.7598, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.15865294667913937, |
| "grad_norm": 1.0625994205474854, |
| "learning_rate": 0.00016605042016806725, |
| "loss": 1.8334, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.16014967259120674, |
| "grad_norm": 1.284947395324707, |
| "learning_rate": 0.00016571428571428575, |
| "loss": 1.7762, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.16164639850327409, |
| "grad_norm": 1.27797269821167, |
| "learning_rate": 0.0001653781512605042, |
| "loss": 1.7898, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.16314312441534143, |
| "grad_norm": 1.4802685976028442, |
| "learning_rate": 0.0001650420168067227, |
| "loss": 1.7004, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.1646398503274088, |
| "grad_norm": 0.973327100276947, |
| "learning_rate": 0.0001647058823529412, |
| "loss": 1.9218, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.16613657623947614, |
| "grad_norm": 1.3942281007766724, |
| "learning_rate": 0.00016436974789915966, |
| "loss": 1.9183, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.1676333021515435, |
| "grad_norm": 1.2495373487472534, |
| "learning_rate": 0.00016403361344537816, |
| "loss": 1.8296, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.16913002806361085, |
| "grad_norm": 1.2634400129318237, |
| "learning_rate": 0.00016369747899159663, |
| "loss": 1.9474, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.1706267539756782, |
| "grad_norm": 1.2135545015335083, |
| "learning_rate": 0.00016336134453781513, |
| "loss": 1.787, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.17212347988774557, |
| "grad_norm": 1.0599427223205566, |
| "learning_rate": 0.00016302521008403363, |
| "loss": 1.9359, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.1736202057998129, |
| "grad_norm": 1.1955755949020386, |
| "learning_rate": 0.0001626890756302521, |
| "loss": 1.7344, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.17511693171188025, |
| "grad_norm": 1.3276002407073975, |
| "learning_rate": 0.0001623529411764706, |
| "loss": 1.8934, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.17661365762394762, |
| "grad_norm": 1.4872647523880005, |
| "learning_rate": 0.00016201680672268907, |
| "loss": 1.8229, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.17811038353601497, |
| "grad_norm": 1.377747893333435, |
| "learning_rate": 0.00016168067226890757, |
| "loss": 1.7041, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.1796071094480823, |
| "grad_norm": 1.087159276008606, |
| "learning_rate": 0.00016134453781512607, |
| "loss": 1.8932, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.18110383536014968, |
| "grad_norm": 1.299407720565796, |
| "learning_rate": 0.00016100840336134454, |
| "loss": 1.8673, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.18260056127221702, |
| "grad_norm": 1.172582983970642, |
| "learning_rate": 0.00016067226890756304, |
| "loss": 1.7616, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.1840972871842844, |
| "grad_norm": 1.4097166061401367, |
| "learning_rate": 0.00016033613445378154, |
| "loss": 1.6969, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.18559401309635173, |
| "grad_norm": 1.0662322044372559, |
| "learning_rate": 0.00016, |
| "loss": 1.8128, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.18709073900841908, |
| "grad_norm": 1.2918486595153809, |
| "learning_rate": 0.0001596638655462185, |
| "loss": 1.8538, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.18709073900841908, |
| "eval_loss": 1.762688159942627, |
| "eval_runtime": 9.9256, |
| "eval_samples_per_second": 6.448, |
| "eval_steps_per_second": 3.224, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.18858746492048645, |
| "grad_norm": 1.4962085485458374, |
| "learning_rate": 0.00015932773109243698, |
| "loss": 1.732, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.1900841908325538, |
| "grad_norm": 1.1726781129837036, |
| "learning_rate": 0.00015899159663865546, |
| "loss": 1.8208, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.19158091674462113, |
| "grad_norm": 1.1145118474960327, |
| "learning_rate": 0.00015865546218487396, |
| "loss": 1.8918, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.1930776426566885, |
| "grad_norm": 1.2448960542678833, |
| "learning_rate": 0.00015831932773109243, |
| "loss": 1.7441, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.19457436856875585, |
| "grad_norm": 1.1530712842941284, |
| "learning_rate": 0.00015798319327731093, |
| "loss": 1.8817, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1960710944808232, |
| "grad_norm": 0.9994822144508362, |
| "learning_rate": 0.00015764705882352943, |
| "loss": 1.8655, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.19756782039289056, |
| "grad_norm": 1.475071668624878, |
| "learning_rate": 0.0001573109243697479, |
| "loss": 1.5336, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.1990645463049579, |
| "grad_norm": 1.0761913061141968, |
| "learning_rate": 0.0001569747899159664, |
| "loss": 1.9806, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.20056127221702524, |
| "grad_norm": 1.1356831789016724, |
| "learning_rate": 0.00015663865546218487, |
| "loss": 1.7876, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.20205799812909261, |
| "grad_norm": 1.2572017908096313, |
| "learning_rate": 0.00015630252100840337, |
| "loss": 1.7748, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.20355472404115996, |
| "grad_norm": 1.2248603105545044, |
| "learning_rate": 0.00015596638655462187, |
| "loss": 1.8696, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.20505144995322733, |
| "grad_norm": 1.1236392259597778, |
| "learning_rate": 0.00015563025210084034, |
| "loss": 1.8774, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.20654817586529467, |
| "grad_norm": 1.3141965866088867, |
| "learning_rate": 0.00015529411764705884, |
| "loss": 1.8864, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.208044901777362, |
| "grad_norm": 1.364126443862915, |
| "learning_rate": 0.00015495798319327734, |
| "loss": 1.6608, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.20954162768942938, |
| "grad_norm": 1.0413249731063843, |
| "learning_rate": 0.0001546218487394958, |
| "loss": 1.8559, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.21103835360149673, |
| "grad_norm": 1.0397676229476929, |
| "learning_rate": 0.0001542857142857143, |
| "loss": 1.8886, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.21253507951356407, |
| "grad_norm": 1.072859525680542, |
| "learning_rate": 0.00015394957983193278, |
| "loss": 1.8294, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.21403180542563144, |
| "grad_norm": 1.3657593727111816, |
| "learning_rate": 0.00015361344537815128, |
| "loss": 1.6844, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.21552853133769878, |
| "grad_norm": 1.4864728450775146, |
| "learning_rate": 0.00015327731092436978, |
| "loss": 1.7364, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.21702525724976612, |
| "grad_norm": 1.2342066764831543, |
| "learning_rate": 0.00015294117647058822, |
| "loss": 1.8441, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.2185219831618335, |
| "grad_norm": 1.788312315940857, |
| "learning_rate": 0.00015260504201680672, |
| "loss": 1.7228, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.22001870907390084, |
| "grad_norm": 1.1817758083343506, |
| "learning_rate": 0.00015226890756302522, |
| "loss": 1.7602, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.2215154349859682, |
| "grad_norm": 1.0265668630599976, |
| "learning_rate": 0.0001519327731092437, |
| "loss": 1.8208, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.22301216089803555, |
| "grad_norm": 1.2950278520584106, |
| "learning_rate": 0.0001515966386554622, |
| "loss": 1.7958, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.2245088868101029, |
| "grad_norm": 1.21755850315094, |
| "learning_rate": 0.00015126050420168066, |
| "loss": 1.9051, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2245088868101029, |
| "eval_loss": 1.760790467262268, |
| "eval_runtime": 9.9578, |
| "eval_samples_per_second": 6.427, |
| "eval_steps_per_second": 3.214, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.22600561272217026, |
| "grad_norm": 1.0923712253570557, |
| "learning_rate": 0.00015092436974789916, |
| "loss": 1.7108, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.2275023386342376, |
| "grad_norm": 1.222659945487976, |
| "learning_rate": 0.00015058823529411766, |
| "loss": 1.8221, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.22899906454630495, |
| "grad_norm": 1.2633992433547974, |
| "learning_rate": 0.00015025210084033613, |
| "loss": 1.7576, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.23049579045837232, |
| "grad_norm": 1.3663132190704346, |
| "learning_rate": 0.00014991596638655463, |
| "loss": 1.7411, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.23199251637043966, |
| "grad_norm": 1.0865604877471924, |
| "learning_rate": 0.00014957983193277313, |
| "loss": 1.7677, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.233489242282507, |
| "grad_norm": 1.0313267707824707, |
| "learning_rate": 0.0001492436974789916, |
| "loss": 1.8802, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.23498596819457437, |
| "grad_norm": 1.300527811050415, |
| "learning_rate": 0.0001489075630252101, |
| "loss": 1.7062, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.23648269410664172, |
| "grad_norm": 1.1446460485458374, |
| "learning_rate": 0.00014857142857142857, |
| "loss": 1.821, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.2379794200187091, |
| "grad_norm": 1.1422364711761475, |
| "learning_rate": 0.00014823529411764707, |
| "loss": 1.9409, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.23947614593077643, |
| "grad_norm": 1.2949453592300415, |
| "learning_rate": 0.00014789915966386557, |
| "loss": 1.9585, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.24097287184284377, |
| "grad_norm": 1.0998990535736084, |
| "learning_rate": 0.00014756302521008404, |
| "loss": 1.823, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.24246959775491114, |
| "grad_norm": 1.3279107809066772, |
| "learning_rate": 0.00014722689075630254, |
| "loss": 1.7261, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.24396632366697849, |
| "grad_norm": 1.1471811532974243, |
| "learning_rate": 0.00014689075630252101, |
| "loss": 1.8507, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.24546304957904583, |
| "grad_norm": 1.0583767890930176, |
| "learning_rate": 0.0001465546218487395, |
| "loss": 1.9237, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.2469597754911132, |
| "grad_norm": 1.3685698509216309, |
| "learning_rate": 0.00014621848739495799, |
| "loss": 1.8113, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.24845650140318054, |
| "grad_norm": 1.5403902530670166, |
| "learning_rate": 0.00014588235294117646, |
| "loss": 1.797, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.24995322731524788, |
| "grad_norm": 1.0701065063476562, |
| "learning_rate": 0.00014554621848739496, |
| "loss": 1.8527, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.2514499532273152, |
| "grad_norm": 1.0490641593933105, |
| "learning_rate": 0.00014521008403361346, |
| "loss": 1.9737, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.2529466791393826, |
| "grad_norm": 1.1226781606674194, |
| "learning_rate": 0.00014487394957983193, |
| "loss": 1.8817, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.25444340505144997, |
| "grad_norm": 1.2346230745315552, |
| "learning_rate": 0.00014453781512605043, |
| "loss": 1.873, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.2559401309635173, |
| "grad_norm": 0.9842538237571716, |
| "learning_rate": 0.00014420168067226893, |
| "loss": 1.8042, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.25743685687558465, |
| "grad_norm": 1.217533826828003, |
| "learning_rate": 0.0001438655462184874, |
| "loss": 1.7716, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.258933582787652, |
| "grad_norm": 1.1946215629577637, |
| "learning_rate": 0.0001435294117647059, |
| "loss": 1.8272, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.26043030869971934, |
| "grad_norm": 1.1696230173110962, |
| "learning_rate": 0.00014319327731092437, |
| "loss": 1.801, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.26192703461178674, |
| "grad_norm": 1.2546944618225098, |
| "learning_rate": 0.00014285714285714287, |
| "loss": 1.7625, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.26192703461178674, |
| "eval_loss": 1.7579066753387451, |
| "eval_runtime": 9.9267, |
| "eval_samples_per_second": 6.447, |
| "eval_steps_per_second": 3.224, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.2634237605238541, |
| "grad_norm": 1.136839509010315, |
| "learning_rate": 0.00014252100840336137, |
| "loss": 1.8288, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.2649204864359214, |
| "grad_norm": 1.2978945970535278, |
| "learning_rate": 0.00014218487394957984, |
| "loss": 1.6907, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.26641721234798876, |
| "grad_norm": 1.340311050415039, |
| "learning_rate": 0.00014184873949579834, |
| "loss": 1.8468, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.2679139382600561, |
| "grad_norm": 1.236281156539917, |
| "learning_rate": 0.0001415126050420168, |
| "loss": 1.834, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.2694106641721235, |
| "grad_norm": 1.2765839099884033, |
| "learning_rate": 0.0001411764705882353, |
| "loss": 1.8231, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.27090739008419085, |
| "grad_norm": 1.9402660131454468, |
| "learning_rate": 0.0001408403361344538, |
| "loss": 1.8436, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.2724041159962582, |
| "grad_norm": 1.6397343873977661, |
| "learning_rate": 0.00014050420168067225, |
| "loss": 1.8738, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.27390084190832553, |
| "grad_norm": 1.2269023656845093, |
| "learning_rate": 0.00014016806722689075, |
| "loss": 1.724, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.2753975678203929, |
| "grad_norm": 1.0990972518920898, |
| "learning_rate": 0.00013983193277310925, |
| "loss": 1.805, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.2768942937324602, |
| "grad_norm": 1.2719955444335938, |
| "learning_rate": 0.00013949579831932772, |
| "loss": 1.8694, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.2783910196445276, |
| "grad_norm": 1.6118435859680176, |
| "learning_rate": 0.00013915966386554622, |
| "loss": 1.7453, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.27988774555659496, |
| "grad_norm": 1.2474150657653809, |
| "learning_rate": 0.00013882352941176472, |
| "loss": 1.7035, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.2813844714686623, |
| "grad_norm": 0.929045557975769, |
| "learning_rate": 0.0001384873949579832, |
| "loss": 1.4875, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.28288119738072964, |
| "grad_norm": 1.1837550401687622, |
| "learning_rate": 0.0001381512605042017, |
| "loss": 1.8196, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.284377923292797, |
| "grad_norm": 1.171769142150879, |
| "learning_rate": 0.00013781512605042016, |
| "loss": 1.8209, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.2858746492048644, |
| "grad_norm": 1.0087103843688965, |
| "learning_rate": 0.00013747899159663866, |
| "loss": 1.8493, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.2873713751169317, |
| "grad_norm": 1.0094703435897827, |
| "learning_rate": 0.00013714285714285716, |
| "loss": 1.8036, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.28886810102899907, |
| "grad_norm": 1.2591369152069092, |
| "learning_rate": 0.00013680672268907563, |
| "loss": 1.6686, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.2903648269410664, |
| "grad_norm": 1.1550267934799194, |
| "learning_rate": 0.00013647058823529413, |
| "loss": 1.7808, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.29186155285313375, |
| "grad_norm": 1.575493335723877, |
| "learning_rate": 0.0001361344537815126, |
| "loss": 1.6981, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.2933582787652011, |
| "grad_norm": 1.0852630138397217, |
| "learning_rate": 0.0001357983193277311, |
| "loss": 1.913, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.2948550046772685, |
| "grad_norm": 1.4476265907287598, |
| "learning_rate": 0.0001354621848739496, |
| "loss": 1.8064, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.29635173058933584, |
| "grad_norm": 1.1281249523162842, |
| "learning_rate": 0.00013512605042016807, |
| "loss": 1.9073, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.2978484565014032, |
| "grad_norm": 1.011376142501831, |
| "learning_rate": 0.00013478991596638657, |
| "loss": 1.9038, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.2993451824134705, |
| "grad_norm": 1.017004370689392, |
| "learning_rate": 0.00013445378151260507, |
| "loss": 1.842, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2993451824134705, |
| "eval_loss": 1.751793384552002, |
| "eval_runtime": 9.9274, |
| "eval_samples_per_second": 6.447, |
| "eval_steps_per_second": 3.223, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.30084190832553787, |
| "grad_norm": 1.280287504196167, |
| "learning_rate": 0.00013411764705882352, |
| "loss": 1.8463, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.30233863423760526, |
| "grad_norm": 1.071548581123352, |
| "learning_rate": 0.00013378151260504202, |
| "loss": 1.936, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.3038353601496726, |
| "grad_norm": 1.0402483940124512, |
| "learning_rate": 0.00013344537815126052, |
| "loss": 1.8919, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.30533208606173995, |
| "grad_norm": 1.404093623161316, |
| "learning_rate": 0.000133109243697479, |
| "loss": 1.7126, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.3068288119738073, |
| "grad_norm": 1.2647768259048462, |
| "learning_rate": 0.0001327731092436975, |
| "loss": 1.8001, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.30832553788587463, |
| "grad_norm": 1.1771318912506104, |
| "learning_rate": 0.00013243697478991596, |
| "loss": 1.8045, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.309822263797942, |
| "grad_norm": 1.1957385540008545, |
| "learning_rate": 0.00013210084033613446, |
| "loss": 1.7873, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.3113189897100094, |
| "grad_norm": 1.4250292778015137, |
| "learning_rate": 0.00013176470588235296, |
| "loss": 1.5988, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.3128157156220767, |
| "grad_norm": 1.1845389604568481, |
| "learning_rate": 0.00013142857142857143, |
| "loss": 1.7846, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.31431244153414406, |
| "grad_norm": 1.080325722694397, |
| "learning_rate": 0.00013109243697478993, |
| "loss": 1.9488, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3158091674462114, |
| "grad_norm": 1.2887210845947266, |
| "learning_rate": 0.0001307563025210084, |
| "loss": 1.8574, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.31730589335827875, |
| "grad_norm": 1.1449840068817139, |
| "learning_rate": 0.0001304201680672269, |
| "loss": 1.8628, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.31880261927034614, |
| "grad_norm": 1.0899405479431152, |
| "learning_rate": 0.0001300840336134454, |
| "loss": 1.7998, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.3202993451824135, |
| "grad_norm": 1.1760225296020508, |
| "learning_rate": 0.00012974789915966387, |
| "loss": 1.7833, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.32179607109448083, |
| "grad_norm": 1.2030748128890991, |
| "learning_rate": 0.00012941176470588237, |
| "loss": 1.8504, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.32329279700654817, |
| "grad_norm": 1.0713863372802734, |
| "learning_rate": 0.00012907563025210087, |
| "loss": 1.9432, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3247895229186155, |
| "grad_norm": 1.1058694124221802, |
| "learning_rate": 0.00012873949579831934, |
| "loss": 1.727, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.32628624883068286, |
| "grad_norm": 1.1129230260849, |
| "learning_rate": 0.00012840336134453784, |
| "loss": 1.9422, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.32778297474275025, |
| "grad_norm": 0.9841461181640625, |
| "learning_rate": 0.0001280672268907563, |
| "loss": 1.8916, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.3292797006548176, |
| "grad_norm": 1.3188430070877075, |
| "learning_rate": 0.00012773109243697478, |
| "loss": 1.7102, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.33077642656688494, |
| "grad_norm": 1.1673728227615356, |
| "learning_rate": 0.00012739495798319328, |
| "loss": 1.8466, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.3322731524789523, |
| "grad_norm": 1.170258641242981, |
| "learning_rate": 0.00012705882352941175, |
| "loss": 1.8602, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.3337698783910196, |
| "grad_norm": 1.1097817420959473, |
| "learning_rate": 0.00012672268907563025, |
| "loss": 1.912, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.335266604303087, |
| "grad_norm": 1.0673434734344482, |
| "learning_rate": 0.00012638655462184875, |
| "loss": 1.808, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.33676333021515437, |
| "grad_norm": 1.311902642250061, |
| "learning_rate": 0.00012605042016806722, |
| "loss": 1.8527, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.33676333021515437, |
| "eval_loss": 1.7518868446350098, |
| "eval_runtime": 9.8911, |
| "eval_samples_per_second": 6.47, |
| "eval_steps_per_second": 3.235, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.3382600561272217, |
| "grad_norm": 1.0087488889694214, |
| "learning_rate": 0.00012571428571428572, |
| "loss": 1.9122, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.33975678203928905, |
| "grad_norm": 1.2666288614273071, |
| "learning_rate": 0.0001253781512605042, |
| "loss": 1.7594, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.3412535079513564, |
| "grad_norm": 1.1287747621536255, |
| "learning_rate": 0.0001250420168067227, |
| "loss": 1.8053, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.34275023386342374, |
| "grad_norm": 1.206766128540039, |
| "learning_rate": 0.0001247058823529412, |
| "loss": 1.9256, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.34424695977549113, |
| "grad_norm": 1.4732266664505005, |
| "learning_rate": 0.00012436974789915966, |
| "loss": 1.794, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3457436856875585, |
| "grad_norm": 1.1438567638397217, |
| "learning_rate": 0.00012403361344537816, |
| "loss": 1.8223, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.3472404115996258, |
| "grad_norm": 1.1886340379714966, |
| "learning_rate": 0.00012369747899159666, |
| "loss": 1.7702, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.34873713751169316, |
| "grad_norm": 1.4150899648666382, |
| "learning_rate": 0.00012336134453781513, |
| "loss": 1.7094, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.3502338634237605, |
| "grad_norm": 1.446444034576416, |
| "learning_rate": 0.00012302521008403363, |
| "loss": 1.7575, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.3517305893358279, |
| "grad_norm": 1.254396677017212, |
| "learning_rate": 0.0001226890756302521, |
| "loss": 1.6884, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.35322731524789525, |
| "grad_norm": 1.2610015869140625, |
| "learning_rate": 0.0001223529411764706, |
| "loss": 1.8701, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.3547240411599626, |
| "grad_norm": 0.932133138179779, |
| "learning_rate": 0.00012201680672268909, |
| "loss": 1.8865, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.35622076707202993, |
| "grad_norm": 1.4056602716445923, |
| "learning_rate": 0.00012168067226890756, |
| "loss": 1.788, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.3577174929840973, |
| "grad_norm": 1.2466380596160889, |
| "learning_rate": 0.00012134453781512605, |
| "loss": 1.8152, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.3592142188961646, |
| "grad_norm": 1.1610888242721558, |
| "learning_rate": 0.00012100840336134453, |
| "loss": 1.8782, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.360710944808232, |
| "grad_norm": 1.1852293014526367, |
| "learning_rate": 0.00012067226890756302, |
| "loss": 1.7942, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.36220767072029936, |
| "grad_norm": 1.1470458507537842, |
| "learning_rate": 0.00012033613445378152, |
| "loss": 1.9395, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.3637043966323667, |
| "grad_norm": 1.1640129089355469, |
| "learning_rate": 0.00012, |
| "loss": 1.8221, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.36520112254443404, |
| "grad_norm": 1.7140247821807861, |
| "learning_rate": 0.00011966386554621849, |
| "loss": 1.8398, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.3666978484565014, |
| "grad_norm": 1.1535826921463013, |
| "learning_rate": 0.00011932773109243697, |
| "loss": 1.7175, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.3681945743685688, |
| "grad_norm": 1.1240558624267578, |
| "learning_rate": 0.00011899159663865547, |
| "loss": 1.8434, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.3696913002806361, |
| "grad_norm": 1.2826379537582397, |
| "learning_rate": 0.00011865546218487396, |
| "loss": 1.7932, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.37118802619270347, |
| "grad_norm": 1.423509955406189, |
| "learning_rate": 0.00011831932773109244, |
| "loss": 1.851, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.3726847521047708, |
| "grad_norm": 1.0698450803756714, |
| "learning_rate": 0.00011798319327731093, |
| "loss": 1.8628, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.37418147801683815, |
| "grad_norm": 1.2364152669906616, |
| "learning_rate": 0.00011764705882352942, |
| "loss": 1.8172, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.37418147801683815, |
| "eval_loss": 1.7512738704681396, |
| "eval_runtime": 9.9241, |
| "eval_samples_per_second": 6.449, |
| "eval_steps_per_second": 3.224, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3756782039289055, |
| "grad_norm": 1.073832392692566, |
| "learning_rate": 0.00011731092436974791, |
| "loss": 1.7081, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.3771749298409729, |
| "grad_norm": 1.0931236743927002, |
| "learning_rate": 0.0001169747899159664, |
| "loss": 1.8104, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.37867165575304024, |
| "grad_norm": 1.0120686292648315, |
| "learning_rate": 0.00011663865546218489, |
| "loss": 1.9482, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.3801683816651076, |
| "grad_norm": 1.493310570716858, |
| "learning_rate": 0.00011630252100840337, |
| "loss": 1.7734, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.3816651075771749, |
| "grad_norm": 1.1117216348648071, |
| "learning_rate": 0.00011596638655462187, |
| "loss": 1.8592, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.38316183348924227, |
| "grad_norm": 1.1169261932373047, |
| "learning_rate": 0.00011563025210084036, |
| "loss": 1.7698, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.3846585594013096, |
| "grad_norm": 1.1613709926605225, |
| "learning_rate": 0.00011529411764705881, |
| "loss": 1.8207, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.386155285313377, |
| "grad_norm": 1.2825706005096436, |
| "learning_rate": 0.00011495798319327731, |
| "loss": 1.822, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.38765201122544435, |
| "grad_norm": 0.9785429835319519, |
| "learning_rate": 0.0001146218487394958, |
| "loss": 1.7244, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.3891487371375117, |
| "grad_norm": 1.177465558052063, |
| "learning_rate": 0.00011428571428571428, |
| "loss": 1.8048, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.39064546304957903, |
| "grad_norm": 1.3732517957687378, |
| "learning_rate": 0.00011394957983193277, |
| "loss": 1.752, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.3921421889616464, |
| "grad_norm": 1.217416763305664, |
| "learning_rate": 0.00011361344537815127, |
| "loss": 1.8383, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.3936389148737138, |
| "grad_norm": 1.1581103801727295, |
| "learning_rate": 0.00011327731092436975, |
| "loss": 1.7979, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.3951356407857811, |
| "grad_norm": 1.0889670848846436, |
| "learning_rate": 0.00011294117647058824, |
| "loss": 1.8704, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.39663236669784846, |
| "grad_norm": 1.3366667032241821, |
| "learning_rate": 0.00011260504201680672, |
| "loss": 1.8054, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.3981290926099158, |
| "grad_norm": 1.2124860286712646, |
| "learning_rate": 0.00011226890756302521, |
| "loss": 1.8366, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.39962581852198314, |
| "grad_norm": 1.3980365991592407, |
| "learning_rate": 0.00011193277310924371, |
| "loss": 1.7174, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.4011225444340505, |
| "grad_norm": 1.1408543586730957, |
| "learning_rate": 0.0001115966386554622, |
| "loss": 1.7571, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.4026192703461179, |
| "grad_norm": 1.1843181848526, |
| "learning_rate": 0.00011126050420168068, |
| "loss": 1.8332, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.40411599625818523, |
| "grad_norm": 1.4148800373077393, |
| "learning_rate": 0.00011092436974789917, |
| "loss": 1.9739, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.40561272217025257, |
| "grad_norm": 1.0253487825393677, |
| "learning_rate": 0.00011058823529411766, |
| "loss": 1.8726, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.4071094480823199, |
| "grad_norm": 1.3067514896392822, |
| "learning_rate": 0.00011025210084033615, |
| "loss": 1.6941, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.40860617399438726, |
| "grad_norm": 1.0671433210372925, |
| "learning_rate": 0.00010991596638655464, |
| "loss": 1.7697, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.41010289990645465, |
| "grad_norm": 1.3519949913024902, |
| "learning_rate": 0.00010957983193277312, |
| "loss": 1.7855, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.411599625818522, |
| "grad_norm": 1.076204538345337, |
| "learning_rate": 0.00010924369747899159, |
| "loss": 1.9365, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.411599625818522, |
| "eval_loss": 1.7478337287902832, |
| "eval_runtime": 9.905, |
| "eval_samples_per_second": 6.461, |
| "eval_steps_per_second": 3.231, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.41309635173058934, |
| "grad_norm": 1.2591997385025024, |
| "learning_rate": 0.00010890756302521008, |
| "loss": 1.7309, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.4145930776426567, |
| "grad_norm": 1.340928554534912, |
| "learning_rate": 0.00010857142857142856, |
| "loss": 1.7348, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.416089803554724, |
| "grad_norm": 0.9326046109199524, |
| "learning_rate": 0.00010823529411764706, |
| "loss": 1.8101, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.41758652946679137, |
| "grad_norm": 1.3383642435073853, |
| "learning_rate": 0.00010789915966386555, |
| "loss": 1.7476, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.41908325537885877, |
| "grad_norm": 0.9598699808120728, |
| "learning_rate": 0.00010756302521008403, |
| "loss": 1.6919, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4205799812909261, |
| "grad_norm": 1.019420862197876, |
| "learning_rate": 0.00010722689075630252, |
| "loss": 1.8148, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.42207670720299345, |
| "grad_norm": 1.142913579940796, |
| "learning_rate": 0.000106890756302521, |
| "loss": 1.8095, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.4235734331150608, |
| "grad_norm": 1.3397822380065918, |
| "learning_rate": 0.0001065546218487395, |
| "loss": 1.7827, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.42507015902712814, |
| "grad_norm": 1.3161386251449585, |
| "learning_rate": 0.00010621848739495799, |
| "loss": 1.8575, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.42656688493919553, |
| "grad_norm": 1.1929869651794434, |
| "learning_rate": 0.00010588235294117647, |
| "loss": 1.79, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.4280636108512629, |
| "grad_norm": 1.2960401773452759, |
| "learning_rate": 0.00010554621848739496, |
| "loss": 1.7203, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.4295603367633302, |
| "grad_norm": 1.117655873298645, |
| "learning_rate": 0.00010521008403361346, |
| "loss": 1.8556, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.43105706267539756, |
| "grad_norm": 1.117436408996582, |
| "learning_rate": 0.00010487394957983194, |
| "loss": 1.7695, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.4325537885874649, |
| "grad_norm": 1.0667588710784912, |
| "learning_rate": 0.00010453781512605043, |
| "loss": 1.8246, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.43405051449953225, |
| "grad_norm": 1.0113589763641357, |
| "learning_rate": 0.00010420168067226892, |
| "loss": 1.9029, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.43554724041159965, |
| "grad_norm": 1.0438803434371948, |
| "learning_rate": 0.00010386554621848741, |
| "loss": 1.8053, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.437043966323667, |
| "grad_norm": 2.1361262798309326, |
| "learning_rate": 0.0001035294117647059, |
| "loss": 1.7788, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.43854069223573433, |
| "grad_norm": 1.2499916553497314, |
| "learning_rate": 0.00010319327731092439, |
| "loss": 1.745, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.4400374181478017, |
| "grad_norm": 1.10703444480896, |
| "learning_rate": 0.00010285714285714286, |
| "loss": 1.8723, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.441534144059869, |
| "grad_norm": 0.9382535815238953, |
| "learning_rate": 0.00010252100840336134, |
| "loss": 1.8814, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.4430308699719364, |
| "grad_norm": 1.1860700845718384, |
| "learning_rate": 0.00010218487394957983, |
| "loss": 1.7975, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.44452759588400376, |
| "grad_norm": 1.0663989782333374, |
| "learning_rate": 0.00010184873949579831, |
| "loss": 1.8333, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.4460243217960711, |
| "grad_norm": 1.0662665367126465, |
| "learning_rate": 0.0001015126050420168, |
| "loss": 1.7244, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.44752104770813844, |
| "grad_norm": 1.1393229961395264, |
| "learning_rate": 0.0001011764705882353, |
| "loss": 1.7832, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.4490177736202058, |
| "grad_norm": 1.1080511808395386, |
| "learning_rate": 0.00010084033613445378, |
| "loss": 1.9043, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4490177736202058, |
| "eval_loss": 1.7416434288024902, |
| "eval_runtime": 9.9207, |
| "eval_samples_per_second": 6.451, |
| "eval_steps_per_second": 3.226, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4505144995322731, |
| "grad_norm": 1.2179614305496216, |
| "learning_rate": 0.00010050420168067227, |
| "loss": 1.7529, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.4520112254443405, |
| "grad_norm": 1.0161402225494385, |
| "learning_rate": 0.00010016806722689076, |
| "loss": 1.9195, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.45350795135640787, |
| "grad_norm": 0.995812714099884, |
| "learning_rate": 9.983193277310925e-05, |
| "loss": 1.8102, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.4550046772684752, |
| "grad_norm": 1.1934641599655151, |
| "learning_rate": 9.949579831932774e-05, |
| "loss": 1.7814, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.45650140318054255, |
| "grad_norm": 0.8843271732330322, |
| "learning_rate": 9.915966386554623e-05, |
| "loss": 1.6012, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.4579981290926099, |
| "grad_norm": 1.0673537254333496, |
| "learning_rate": 9.882352941176471e-05, |
| "loss": 1.8338, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.4594948550046773, |
| "grad_norm": 1.007251262664795, |
| "learning_rate": 9.848739495798321e-05, |
| "loss": 1.9598, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.46099158091674464, |
| "grad_norm": 1.111372470855713, |
| "learning_rate": 9.815126050420168e-05, |
| "loss": 1.8086, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.462488306828812, |
| "grad_norm": 1.2300423383712769, |
| "learning_rate": 9.781512605042017e-05, |
| "loss": 1.8517, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.4639850327408793, |
| "grad_norm": 1.034952998161316, |
| "learning_rate": 9.747899159663865e-05, |
| "loss": 1.8743, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.46548175865294666, |
| "grad_norm": 1.1956812143325806, |
| "learning_rate": 9.714285714285715e-05, |
| "loss": 1.8484, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.466978484565014, |
| "grad_norm": 1.0862494707107544, |
| "learning_rate": 9.680672268907564e-05, |
| "loss": 1.8167, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.4684752104770814, |
| "grad_norm": 1.0881495475769043, |
| "learning_rate": 9.647058823529412e-05, |
| "loss": 1.7105, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.46997193638914875, |
| "grad_norm": 1.1325311660766602, |
| "learning_rate": 9.613445378151261e-05, |
| "loss": 1.7793, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.4714686623012161, |
| "grad_norm": 1.0453370809555054, |
| "learning_rate": 9.579831932773111e-05, |
| "loss": 1.8046, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.47296538821328343, |
| "grad_norm": 1.127502679824829, |
| "learning_rate": 9.546218487394959e-05, |
| "loss": 1.8062, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.4744621141253508, |
| "grad_norm": 1.0815576314926147, |
| "learning_rate": 9.512605042016806e-05, |
| "loss": 1.8572, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.4759588400374182, |
| "grad_norm": 1.1076608896255493, |
| "learning_rate": 9.478991596638655e-05, |
| "loss": 1.7315, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.4774555659494855, |
| "grad_norm": 1.1549115180969238, |
| "learning_rate": 9.445378151260505e-05, |
| "loss": 1.749, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.47895229186155286, |
| "grad_norm": 1.0027329921722412, |
| "learning_rate": 9.411764705882353e-05, |
| "loss": 1.9418, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.4804490177736202, |
| "grad_norm": 1.1883653402328491, |
| "learning_rate": 9.378151260504202e-05, |
| "loss": 1.7007, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.48194574368568754, |
| "grad_norm": 1.0235621929168701, |
| "learning_rate": 9.34453781512605e-05, |
| "loss": 1.7951, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.4834424695977549, |
| "grad_norm": 1.3429903984069824, |
| "learning_rate": 9.3109243697479e-05, |
| "loss": 1.8396, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.4849391955098223, |
| "grad_norm": 1.2389410734176636, |
| "learning_rate": 9.277310924369749e-05, |
| "loss": 1.8577, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.4864359214218896, |
| "grad_norm": 1.1658669710159302, |
| "learning_rate": 9.243697478991598e-05, |
| "loss": 1.7733, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.4864359214218896, |
| "eval_loss": 1.7379932403564453, |
| "eval_runtime": 9.9225, |
| "eval_samples_per_second": 6.45, |
| "eval_steps_per_second": 3.225, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.48793264733395697, |
| "grad_norm": 1.2423300743103027, |
| "learning_rate": 9.210084033613445e-05, |
| "loss": 1.7292, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.4894293732460243, |
| "grad_norm": 1.0733031034469604, |
| "learning_rate": 9.176470588235295e-05, |
| "loss": 1.8459, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.49092609915809166, |
| "grad_norm": 1.0904580354690552, |
| "learning_rate": 9.142857142857143e-05, |
| "loss": 1.7226, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.49242282507015905, |
| "grad_norm": 1.271660327911377, |
| "learning_rate": 9.109243697478992e-05, |
| "loss": 1.7913, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.4939195509822264, |
| "grad_norm": 1.0605442523956299, |
| "learning_rate": 9.07563025210084e-05, |
| "loss": 1.8367, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.49541627689429374, |
| "grad_norm": 1.0855785608291626, |
| "learning_rate": 9.04201680672269e-05, |
| "loss": 1.7726, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.4969130028063611, |
| "grad_norm": 1.0943794250488281, |
| "learning_rate": 9.008403361344539e-05, |
| "loss": 1.8692, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.4984097287184284, |
| "grad_norm": 1.1238775253295898, |
| "learning_rate": 8.974789915966387e-05, |
| "loss": 1.6684, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.49990645463049577, |
| "grad_norm": 1.1262332201004028, |
| "learning_rate": 8.941176470588236e-05, |
| "loss": 2.0474, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.5014031805425632, |
| "grad_norm": 1.1114274263381958, |
| "learning_rate": 8.907563025210084e-05, |
| "loss": 1.8302, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.5028999064546305, |
| "grad_norm": 1.2173712253570557, |
| "learning_rate": 8.873949579831933e-05, |
| "loss": 1.7415, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.5043966323666979, |
| "grad_norm": 1.0768870115280151, |
| "learning_rate": 8.840336134453782e-05, |
| "loss": 1.8232, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.5058933582787652, |
| "grad_norm": 1.3007467985153198, |
| "learning_rate": 8.80672268907563e-05, |
| "loss": 1.8447, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.5073900841908325, |
| "grad_norm": 1.1609522104263306, |
| "learning_rate": 8.77310924369748e-05, |
| "loss": 1.7427, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.5088868101028999, |
| "grad_norm": 1.3535274267196655, |
| "learning_rate": 8.739495798319329e-05, |
| "loss": 1.748, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5103835360149672, |
| "grad_norm": 1.132091999053955, |
| "learning_rate": 8.705882352941177e-05, |
| "loss": 1.8139, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.5118802619270346, |
| "grad_norm": 1.1243830919265747, |
| "learning_rate": 8.672268907563026e-05, |
| "loss": 1.7954, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.513376987839102, |
| "grad_norm": 1.2013453245162964, |
| "learning_rate": 8.638655462184874e-05, |
| "loss": 1.7759, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.5148737137511693, |
| "grad_norm": 1.1899113655090332, |
| "learning_rate": 8.605042016806724e-05, |
| "loss": 1.9394, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.5163704396632367, |
| "grad_norm": 1.1117327213287354, |
| "learning_rate": 8.571428571428571e-05, |
| "loss": 1.7475, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.517867165575304, |
| "grad_norm": 0.9757189154624939, |
| "learning_rate": 8.53781512605042e-05, |
| "loss": 1.8084, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.5193638914873714, |
| "grad_norm": 1.1269667148590088, |
| "learning_rate": 8.50420168067227e-05, |
| "loss": 1.872, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.5208606173994387, |
| "grad_norm": 1.0271408557891846, |
| "learning_rate": 8.470588235294118e-05, |
| "loss": 1.8508, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.5223573433115061, |
| "grad_norm": 1.1958681344985962, |
| "learning_rate": 8.436974789915967e-05, |
| "loss": 1.7609, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.5238540692235735, |
| "grad_norm": 1.1345899105072021, |
| "learning_rate": 8.403361344537815e-05, |
| "loss": 1.7925, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5238540692235735, |
| "eval_loss": 1.7380330562591553, |
| "eval_runtime": 9.922, |
| "eval_samples_per_second": 6.45, |
| "eval_steps_per_second": 3.225, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5253507951356408, |
| "grad_norm": 1.1461538076400757, |
| "learning_rate": 8.369747899159664e-05, |
| "loss": 1.7498, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.5268475210477082, |
| "grad_norm": 1.1772956848144531, |
| "learning_rate": 8.336134453781514e-05, |
| "loss": 1.8021, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.5283442469597754, |
| "grad_norm": 1.058077335357666, |
| "learning_rate": 8.302521008403362e-05, |
| "loss": 1.8782, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.5298409728718428, |
| "grad_norm": 1.0576335191726685, |
| "learning_rate": 8.26890756302521e-05, |
| "loss": 1.917, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.5313376987839102, |
| "grad_norm": 1.3313714265823364, |
| "learning_rate": 8.23529411764706e-05, |
| "loss": 1.5589, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.5328344246959775, |
| "grad_norm": 1.1257191896438599, |
| "learning_rate": 8.201680672268908e-05, |
| "loss": 1.8366, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.5343311506080449, |
| "grad_norm": 1.1489942073822021, |
| "learning_rate": 8.168067226890757e-05, |
| "loss": 1.7436, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.5358278765201122, |
| "grad_norm": 1.3241493701934814, |
| "learning_rate": 8.134453781512605e-05, |
| "loss": 1.6859, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.5373246024321796, |
| "grad_norm": 1.0549739599227905, |
| "learning_rate": 8.100840336134454e-05, |
| "loss": 1.7418, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.538821328344247, |
| "grad_norm": 1.207139015197754, |
| "learning_rate": 8.067226890756304e-05, |
| "loss": 1.8467, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5403180542563143, |
| "grad_norm": 0.9392403364181519, |
| "learning_rate": 8.033613445378152e-05, |
| "loss": 1.8414, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.5418147801683817, |
| "grad_norm": 1.0758482217788696, |
| "learning_rate": 8e-05, |
| "loss": 1.8437, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.543311506080449, |
| "grad_norm": 1.3484997749328613, |
| "learning_rate": 7.966386554621849e-05, |
| "loss": 1.7959, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.5448082319925164, |
| "grad_norm": 1.1805089712142944, |
| "learning_rate": 7.932773109243698e-05, |
| "loss": 1.9371, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.5463049579045838, |
| "grad_norm": 1.1756744384765625, |
| "learning_rate": 7.899159663865546e-05, |
| "loss": 1.7701, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.5478016838166511, |
| "grad_norm": 1.1375999450683594, |
| "learning_rate": 7.865546218487395e-05, |
| "loss": 1.6643, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.5492984097287185, |
| "grad_norm": 1.069983720779419, |
| "learning_rate": 7.831932773109243e-05, |
| "loss": 1.8322, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.5507951356407857, |
| "grad_norm": 1.0515434741973877, |
| "learning_rate": 7.798319327731093e-05, |
| "loss": 1.7744, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.5522918615528531, |
| "grad_norm": 1.0633738040924072, |
| "learning_rate": 7.764705882352942e-05, |
| "loss": 1.8109, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.5537885874649204, |
| "grad_norm": 1.359668254852295, |
| "learning_rate": 7.73109243697479e-05, |
| "loss": 1.7944, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5552853133769878, |
| "grad_norm": 1.2288023233413696, |
| "learning_rate": 7.697478991596639e-05, |
| "loss": 1.7918, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.5567820392890552, |
| "grad_norm": 1.1025428771972656, |
| "learning_rate": 7.663865546218489e-05, |
| "loss": 1.823, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.5582787652011225, |
| "grad_norm": 1.005429744720459, |
| "learning_rate": 7.630252100840336e-05, |
| "loss": 1.8222, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.5597754911131899, |
| "grad_norm": 1.2293034791946411, |
| "learning_rate": 7.596638655462185e-05, |
| "loss": 1.8342, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.5612722170252572, |
| "grad_norm": 1.0827324390411377, |
| "learning_rate": 7.563025210084033e-05, |
| "loss": 1.643, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.5612722170252572, |
| "eval_loss": 1.7346194982528687, |
| "eval_runtime": 9.9111, |
| "eval_samples_per_second": 6.457, |
| "eval_steps_per_second": 3.229, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.5627689429373246, |
| "grad_norm": 1.0979185104370117, |
| "learning_rate": 7.529411764705883e-05, |
| "loss": 1.792, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.564265668849392, |
| "grad_norm": 1.0999720096588135, |
| "learning_rate": 7.495798319327732e-05, |
| "loss": 1.75, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.5657623947614593, |
| "grad_norm": 1.1695318222045898, |
| "learning_rate": 7.46218487394958e-05, |
| "loss": 1.7434, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.5672591206735267, |
| "grad_norm": 1.452539324760437, |
| "learning_rate": 7.428571428571429e-05, |
| "loss": 1.8034, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.568755846585594, |
| "grad_norm": 0.971097469329834, |
| "learning_rate": 7.394957983193279e-05, |
| "loss": 1.9281, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5702525724976614, |
| "grad_norm": 1.190000057220459, |
| "learning_rate": 7.361344537815127e-05, |
| "loss": 1.8709, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.5717492984097288, |
| "grad_norm": 1.2394530773162842, |
| "learning_rate": 7.327731092436974e-05, |
| "loss": 1.8645, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.573246024321796, |
| "grad_norm": 1.0058298110961914, |
| "learning_rate": 7.294117647058823e-05, |
| "loss": 1.7103, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.5747427502338635, |
| "grad_norm": 0.9850262999534607, |
| "learning_rate": 7.260504201680673e-05, |
| "loss": 1.8576, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.5762394761459307, |
| "grad_norm": 0.9836248159408569, |
| "learning_rate": 7.226890756302521e-05, |
| "loss": 1.8989, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.5777362020579981, |
| "grad_norm": 0.9555202126502991, |
| "learning_rate": 7.19327731092437e-05, |
| "loss": 1.9098, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.5792329279700655, |
| "grad_norm": 1.2133311033248901, |
| "learning_rate": 7.159663865546218e-05, |
| "loss": 1.719, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.5807296538821328, |
| "grad_norm": 1.0879029035568237, |
| "learning_rate": 7.126050420168068e-05, |
| "loss": 1.7404, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.5822263797942002, |
| "grad_norm": 1.1183611154556274, |
| "learning_rate": 7.092436974789917e-05, |
| "loss": 1.7065, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.5837231057062675, |
| "grad_norm": 1.1441535949707031, |
| "learning_rate": 7.058823529411765e-05, |
| "loss": 1.876, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.5852198316183349, |
| "grad_norm": 1.0589473247528076, |
| "learning_rate": 7.025210084033613e-05, |
| "loss": 1.7505, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.5867165575304022, |
| "grad_norm": 1.149380087852478, |
| "learning_rate": 6.991596638655463e-05, |
| "loss": 1.7483, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.5882132834424696, |
| "grad_norm": 1.18876314163208, |
| "learning_rate": 6.957983193277311e-05, |
| "loss": 1.6457, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.589710009354537, |
| "grad_norm": 1.4560132026672363, |
| "learning_rate": 6.92436974789916e-05, |
| "loss": 1.747, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.5912067352666043, |
| "grad_norm": 1.0865813493728638, |
| "learning_rate": 6.890756302521008e-05, |
| "loss": 1.7372, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.5927034611786717, |
| "grad_norm": 1.1931579113006592, |
| "learning_rate": 6.857142857142858e-05, |
| "loss": 1.9051, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.594200187090739, |
| "grad_norm": 1.0811692476272583, |
| "learning_rate": 6.823529411764707e-05, |
| "loss": 1.8224, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.5956969130028064, |
| "grad_norm": 1.2077957391738892, |
| "learning_rate": 6.789915966386555e-05, |
| "loss": 1.8336, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.5971936389148738, |
| "grad_norm": 1.0455660820007324, |
| "learning_rate": 6.756302521008404e-05, |
| "loss": 1.797, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.598690364826941, |
| "grad_norm": 1.0006000995635986, |
| "learning_rate": 6.722689075630254e-05, |
| "loss": 1.7179, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.598690364826941, |
| "eval_loss": 1.7308931350708008, |
| "eval_runtime": 9.9241, |
| "eval_samples_per_second": 6.449, |
| "eval_steps_per_second": 3.224, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6001870907390084, |
| "grad_norm": 1.143531322479248, |
| "learning_rate": 6.689075630252101e-05, |
| "loss": 1.8215, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.6016838166510757, |
| "grad_norm": 0.9073050618171692, |
| "learning_rate": 6.65546218487395e-05, |
| "loss": 1.9348, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.6031805425631431, |
| "grad_norm": 0.994017481803894, |
| "learning_rate": 6.621848739495798e-05, |
| "loss": 1.5425, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.6046772684752105, |
| "grad_norm": 1.1705002784729004, |
| "learning_rate": 6.588235294117648e-05, |
| "loss": 1.8536, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.6061739943872778, |
| "grad_norm": 0.9837265610694885, |
| "learning_rate": 6.554621848739496e-05, |
| "loss": 1.7408, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.6076707202993452, |
| "grad_norm": 0.9924471378326416, |
| "learning_rate": 6.521008403361345e-05, |
| "loss": 1.7051, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.6091674462114125, |
| "grad_norm": 1.0559359788894653, |
| "learning_rate": 6.487394957983193e-05, |
| "loss": 1.9459, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.6106641721234799, |
| "grad_norm": 1.2436074018478394, |
| "learning_rate": 6.453781512605043e-05, |
| "loss": 1.7981, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.6121608980355472, |
| "grad_norm": 1.278940200805664, |
| "learning_rate": 6.420168067226892e-05, |
| "loss": 1.7406, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.6136576239476146, |
| "grad_norm": 1.1248103380203247, |
| "learning_rate": 6.386554621848739e-05, |
| "loss": 1.7909, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.615154349859682, |
| "grad_norm": 1.1026619672775269, |
| "learning_rate": 6.352941176470588e-05, |
| "loss": 1.8588, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.6166510757717493, |
| "grad_norm": 1.1886570453643799, |
| "learning_rate": 6.319327731092438e-05, |
| "loss": 1.7935, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.6181478016838167, |
| "grad_norm": 1.0327427387237549, |
| "learning_rate": 6.285714285714286e-05, |
| "loss": 1.8984, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.619644527595884, |
| "grad_norm": 0.9914230108261108, |
| "learning_rate": 6.252100840336135e-05, |
| "loss": 1.9701, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.6211412535079514, |
| "grad_norm": 0.9550872445106506, |
| "learning_rate": 6.218487394957983e-05, |
| "loss": 1.7874, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.6226379794200188, |
| "grad_norm": 1.10657799243927, |
| "learning_rate": 6.184873949579833e-05, |
| "loss": 1.7691, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.624134705332086, |
| "grad_norm": 0.956917405128479, |
| "learning_rate": 6.151260504201682e-05, |
| "loss": 1.7304, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.6256314312441534, |
| "grad_norm": 1.0174245834350586, |
| "learning_rate": 6.11764705882353e-05, |
| "loss": 1.8446, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.6271281571562207, |
| "grad_norm": 1.321598768234253, |
| "learning_rate": 6.084033613445378e-05, |
| "loss": 1.5281, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.6286248830682881, |
| "grad_norm": 0.977022111415863, |
| "learning_rate": 6.0504201680672267e-05, |
| "loss": 1.8618, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6301216089803555, |
| "grad_norm": 1.2729384899139404, |
| "learning_rate": 6.016806722689076e-05, |
| "loss": 1.6538, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.6316183348924228, |
| "grad_norm": 0.982009768486023, |
| "learning_rate": 5.9831932773109244e-05, |
| "loss": 1.8753, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.6331150608044902, |
| "grad_norm": 1.1528300046920776, |
| "learning_rate": 5.9495798319327737e-05, |
| "loss": 1.8639, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.6346117867165575, |
| "grad_norm": 1.096552848815918, |
| "learning_rate": 5.915966386554622e-05, |
| "loss": 1.7805, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.6361085126286249, |
| "grad_norm": 0.9799396991729736, |
| "learning_rate": 5.882352941176471e-05, |
| "loss": 1.9097, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.6361085126286249, |
| "eval_loss": 1.7265229225158691, |
| "eval_runtime": 9.9192, |
| "eval_samples_per_second": 6.452, |
| "eval_steps_per_second": 3.226, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.6376052385406923, |
| "grad_norm": 0.9917983412742615, |
| "learning_rate": 5.84873949579832e-05, |
| "loss": 1.8196, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.6391019644527596, |
| "grad_norm": 0.9843238592147827, |
| "learning_rate": 5.8151260504201685e-05, |
| "loss": 1.8505, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.640598690364827, |
| "grad_norm": 1.1680494546890259, |
| "learning_rate": 5.781512605042018e-05, |
| "loss": 1.7219, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.6420954162768943, |
| "grad_norm": 1.100326657295227, |
| "learning_rate": 5.7478991596638656e-05, |
| "loss": 1.8383, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.6435921421889617, |
| "grad_norm": 1.1734800338745117, |
| "learning_rate": 5.714285714285714e-05, |
| "loss": 1.7812, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.645088868101029, |
| "grad_norm": 1.129654049873352, |
| "learning_rate": 5.6806722689075634e-05, |
| "loss": 1.6585, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.6465855940130963, |
| "grad_norm": 0.925308108329773, |
| "learning_rate": 5.647058823529412e-05, |
| "loss": 2.008, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.6480823199251637, |
| "grad_norm": 1.2162028551101685, |
| "learning_rate": 5.6134453781512605e-05, |
| "loss": 1.7252, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.649579045837231, |
| "grad_norm": 1.0221952199935913, |
| "learning_rate": 5.57983193277311e-05, |
| "loss": 1.8121, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.6510757717492984, |
| "grad_norm": 1.125623106956482, |
| "learning_rate": 5.546218487394958e-05, |
| "loss": 1.7751, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.6525724976613657, |
| "grad_norm": 1.0839701890945435, |
| "learning_rate": 5.5126050420168075e-05, |
| "loss": 1.7789, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.6540692235734331, |
| "grad_norm": 1.1495144367218018, |
| "learning_rate": 5.478991596638656e-05, |
| "loss": 1.7065, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.6555659494855005, |
| "grad_norm": 1.1935160160064697, |
| "learning_rate": 5.445378151260504e-05, |
| "loss": 1.6623, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.6570626753975678, |
| "grad_norm": 1.0761985778808594, |
| "learning_rate": 5.411764705882353e-05, |
| "loss": 1.7685, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.6585594013096352, |
| "grad_norm": 1.2685760259628296, |
| "learning_rate": 5.378151260504202e-05, |
| "loss": 1.6965, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6600561272217025, |
| "grad_norm": 1.3311073780059814, |
| "learning_rate": 5.34453781512605e-05, |
| "loss": 1.7738, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.6615528531337699, |
| "grad_norm": 1.2063039541244507, |
| "learning_rate": 5.3109243697478995e-05, |
| "loss": 1.8287, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.6630495790458373, |
| "grad_norm": 0.8949533104896545, |
| "learning_rate": 5.277310924369748e-05, |
| "loss": 1.7777, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.6645463049579046, |
| "grad_norm": 0.9890621900558472, |
| "learning_rate": 5.243697478991597e-05, |
| "loss": 1.8908, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.666043030869972, |
| "grad_norm": 1.141076683998108, |
| "learning_rate": 5.210084033613446e-05, |
| "loss": 1.7678, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.6675397567820393, |
| "grad_norm": 1.4433619976043701, |
| "learning_rate": 5.176470588235295e-05, |
| "loss": 1.7579, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.6690364826941066, |
| "grad_norm": 1.088409423828125, |
| "learning_rate": 5.142857142857143e-05, |
| "loss": 1.7632, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.670533208606174, |
| "grad_norm": 0.9711755514144897, |
| "learning_rate": 5.1092436974789914e-05, |
| "loss": 1.9189, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.6720299345182413, |
| "grad_norm": 1.2208515405654907, |
| "learning_rate": 5.07563025210084e-05, |
| "loss": 1.8619, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.6735266604303087, |
| "grad_norm": 1.202310562133789, |
| "learning_rate": 5.042016806722689e-05, |
| "loss": 1.7852, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.6735266604303087, |
| "eval_loss": 1.7230315208435059, |
| "eval_runtime": 9.9156, |
| "eval_samples_per_second": 6.454, |
| "eval_steps_per_second": 3.227, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.675023386342376, |
| "grad_norm": 1.0285881757736206, |
| "learning_rate": 5.008403361344538e-05, |
| "loss": 1.8857, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.6765201122544434, |
| "grad_norm": 1.0473498106002808, |
| "learning_rate": 4.974789915966387e-05, |
| "loss": 1.8324, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.6780168381665107, |
| "grad_norm": 1.129944920539856, |
| "learning_rate": 4.9411764705882355e-05, |
| "loss": 1.9199, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.6795135640785781, |
| "grad_norm": 0.9636753797531128, |
| "learning_rate": 4.907563025210084e-05, |
| "loss": 1.8929, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.6810102899906455, |
| "grad_norm": 1.014259934425354, |
| "learning_rate": 4.8739495798319326e-05, |
| "loss": 1.9402, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.6825070159027128, |
| "grad_norm": 1.033557415008545, |
| "learning_rate": 4.840336134453782e-05, |
| "loss": 1.7401, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.6840037418147802, |
| "grad_norm": 1.017338752746582, |
| "learning_rate": 4.8067226890756304e-05, |
| "loss": 1.7473, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.6855004677268475, |
| "grad_norm": 1.0434340238571167, |
| "learning_rate": 4.7731092436974796e-05, |
| "loss": 1.8009, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.6869971936389149, |
| "grad_norm": 1.1409871578216553, |
| "learning_rate": 4.7394957983193275e-05, |
| "loss": 1.7357, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.6884939195509823, |
| "grad_norm": 1.2012221813201904, |
| "learning_rate": 4.705882352941177e-05, |
| "loss": 1.6896, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.6899906454630496, |
| "grad_norm": 1.079777479171753, |
| "learning_rate": 4.672268907563025e-05, |
| "loss": 1.8386, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.691487371375117, |
| "grad_norm": 1.0131338834762573, |
| "learning_rate": 4.6386554621848745e-05, |
| "loss": 1.8507, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.6929840972871842, |
| "grad_norm": 1.123460292816162, |
| "learning_rate": 4.6050420168067224e-05, |
| "loss": 1.7637, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.6944808231992516, |
| "grad_norm": 1.4412729740142822, |
| "learning_rate": 4.5714285714285716e-05, |
| "loss": 1.6948, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.695977549111319, |
| "grad_norm": 1.220767617225647, |
| "learning_rate": 4.53781512605042e-05, |
| "loss": 1.8524, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.6974742750233863, |
| "grad_norm": 1.0192736387252808, |
| "learning_rate": 4.5042016806722694e-05, |
| "loss": 1.8267, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.6989710009354537, |
| "grad_norm": 1.0492209196090698, |
| "learning_rate": 4.470588235294118e-05, |
| "loss": 1.6835, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.700467726847521, |
| "grad_norm": 0.9897292256355286, |
| "learning_rate": 4.4369747899159665e-05, |
| "loss": 1.6976, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.7019644527595884, |
| "grad_norm": 1.3974395990371704, |
| "learning_rate": 4.403361344537815e-05, |
| "loss": 1.7081, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.7034611786716558, |
| "grad_norm": 1.0798640251159668, |
| "learning_rate": 4.369747899159664e-05, |
| "loss": 1.5884, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.7049579045837231, |
| "grad_norm": 1.010471224784851, |
| "learning_rate": 4.336134453781513e-05, |
| "loss": 1.8187, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.7064546304957905, |
| "grad_norm": 1.2067906856536865, |
| "learning_rate": 4.302521008403362e-05, |
| "loss": 1.7909, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.7079513564078578, |
| "grad_norm": 0.9305792450904846, |
| "learning_rate": 4.26890756302521e-05, |
| "loss": 1.8915, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.7094480823199252, |
| "grad_norm": 0.9608831405639648, |
| "learning_rate": 4.235294117647059e-05, |
| "loss": 1.8287, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.7109448082319925, |
| "grad_norm": 1.055216908454895, |
| "learning_rate": 4.201680672268908e-05, |
| "loss": 1.8816, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.7109448082319925, |
| "eval_loss": 1.7186332941055298, |
| "eval_runtime": 9.9278, |
| "eval_samples_per_second": 6.447, |
| "eval_steps_per_second": 3.223, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.7124415341440599, |
| "grad_norm": 1.1744225025177002, |
| "learning_rate": 4.168067226890757e-05, |
| "loss": 1.8259, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.7139382600561273, |
| "grad_norm": 1.0045477151870728, |
| "learning_rate": 4.134453781512605e-05, |
| "loss": 1.9401, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.7154349859681945, |
| "grad_norm": 1.0020571947097778, |
| "learning_rate": 4.100840336134454e-05, |
| "loss": 1.801, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.716931711880262, |
| "grad_norm": 1.0695722103118896, |
| "learning_rate": 4.0672268907563026e-05, |
| "loss": 1.7671, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.7184284377923292, |
| "grad_norm": 1.0022177696228027, |
| "learning_rate": 4.033613445378152e-05, |
| "loss": 1.9396, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7199251637043966, |
| "grad_norm": 1.308337688446045, |
| "learning_rate": 4e-05, |
| "loss": 1.8176, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.721421889616464, |
| "grad_norm": 1.133042573928833, |
| "learning_rate": 3.966386554621849e-05, |
| "loss": 1.726, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.7229186155285313, |
| "grad_norm": 0.9932194352149963, |
| "learning_rate": 3.9327731092436974e-05, |
| "loss": 1.8504, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.7244153414405987, |
| "grad_norm": 1.0440658330917358, |
| "learning_rate": 3.8991596638655467e-05, |
| "loss": 1.7443, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.725912067352666, |
| "grad_norm": 1.0093355178833008, |
| "learning_rate": 3.865546218487395e-05, |
| "loss": 1.9789, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.7274087932647334, |
| "grad_norm": 1.0096389055252075, |
| "learning_rate": 3.8319327731092444e-05, |
| "loss": 1.7372, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.7289055191768008, |
| "grad_norm": 0.9682419300079346, |
| "learning_rate": 3.798319327731092e-05, |
| "loss": 1.8613, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.7304022450888681, |
| "grad_norm": 1.174164891242981, |
| "learning_rate": 3.7647058823529415e-05, |
| "loss": 1.6717, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.7318989710009355, |
| "grad_norm": 1.0377211570739746, |
| "learning_rate": 3.73109243697479e-05, |
| "loss": 1.8994, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.7333956969130028, |
| "grad_norm": 1.137771725654602, |
| "learning_rate": 3.697478991596639e-05, |
| "loss": 1.6331, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.7348924228250702, |
| "grad_norm": 1.159590482711792, |
| "learning_rate": 3.663865546218487e-05, |
| "loss": 1.7866, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.7363891487371376, |
| "grad_norm": 1.0992891788482666, |
| "learning_rate": 3.6302521008403364e-05, |
| "loss": 1.6787, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.7378858746492049, |
| "grad_norm": 1.1458029747009277, |
| "learning_rate": 3.596638655462185e-05, |
| "loss": 1.8032, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.7393826005612723, |
| "grad_norm": 1.0241297483444214, |
| "learning_rate": 3.563025210084034e-05, |
| "loss": 1.8382, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.7408793264733395, |
| "grad_norm": 1.0229034423828125, |
| "learning_rate": 3.529411764705883e-05, |
| "loss": 1.6115, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.7423760523854069, |
| "grad_norm": 1.0173096656799316, |
| "learning_rate": 3.495798319327731e-05, |
| "loss": 1.719, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.7438727782974742, |
| "grad_norm": 1.0528109073638916, |
| "learning_rate": 3.46218487394958e-05, |
| "loss": 1.8406, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.7453695042095416, |
| "grad_norm": 1.131954312324524, |
| "learning_rate": 3.428571428571429e-05, |
| "loss": 1.7171, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.746866230121609, |
| "grad_norm": 1.2483571767807007, |
| "learning_rate": 3.3949579831932776e-05, |
| "loss": 1.6404, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.7483629560336763, |
| "grad_norm": 1.1184672117233276, |
| "learning_rate": 3.361344537815127e-05, |
| "loss": 1.7858, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7483629560336763, |
| "eval_loss": 1.7139549255371094, |
| "eval_runtime": 9.9252, |
| "eval_samples_per_second": 6.448, |
| "eval_steps_per_second": 3.224, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7498596819457437, |
| "grad_norm": 0.9178484082221985, |
| "learning_rate": 3.327731092436975e-05, |
| "loss": 1.8397, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.751356407857811, |
| "grad_norm": 0.9863389134407043, |
| "learning_rate": 3.294117647058824e-05, |
| "loss": 1.7721, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.7528531337698784, |
| "grad_norm": 1.0110721588134766, |
| "learning_rate": 3.2605042016806725e-05, |
| "loss": 1.8283, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.7543498596819458, |
| "grad_norm": 0.9291056394577026, |
| "learning_rate": 3.226890756302522e-05, |
| "loss": 1.8689, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.7558465855940131, |
| "grad_norm": 1.043022871017456, |
| "learning_rate": 3.1932773109243696e-05, |
| "loss": 1.6872, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.7573433115060805, |
| "grad_norm": 1.0019947290420532, |
| "learning_rate": 3.159663865546219e-05, |
| "loss": 1.7829, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.7588400374181478, |
| "grad_norm": 1.0303380489349365, |
| "learning_rate": 3.1260504201680673e-05, |
| "loss": 1.5978, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.7603367633302152, |
| "grad_norm": 1.1259996891021729, |
| "learning_rate": 3.0924369747899166e-05, |
| "loss": 1.78, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.7618334892422826, |
| "grad_norm": 1.0327061414718628, |
| "learning_rate": 3.058823529411765e-05, |
| "loss": 1.8328, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.7633302151543498, |
| "grad_norm": 1.0840046405792236, |
| "learning_rate": 3.0252100840336133e-05, |
| "loss": 1.8562, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.7648269410664172, |
| "grad_norm": 0.9591643214225769, |
| "learning_rate": 2.9915966386554622e-05, |
| "loss": 1.9301, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.7663236669784845, |
| "grad_norm": 0.9148833155632019, |
| "learning_rate": 2.957983193277311e-05, |
| "loss": 1.6514, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.7678203928905519, |
| "grad_norm": 1.2764103412628174, |
| "learning_rate": 2.92436974789916e-05, |
| "loss": 1.7717, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.7693171188026192, |
| "grad_norm": 0.9035190343856812, |
| "learning_rate": 2.890756302521009e-05, |
| "loss": 1.8695, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.7708138447146866, |
| "grad_norm": 1.047916293144226, |
| "learning_rate": 2.857142857142857e-05, |
| "loss": 1.7798, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.772310570626754, |
| "grad_norm": 1.090427279472351, |
| "learning_rate": 2.823529411764706e-05, |
| "loss": 1.7791, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.7738072965388213, |
| "grad_norm": 1.0227428674697876, |
| "learning_rate": 2.789915966386555e-05, |
| "loss": 1.8471, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.7753040224508887, |
| "grad_norm": 1.2380752563476562, |
| "learning_rate": 2.7563025210084037e-05, |
| "loss": 1.7979, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.776800748362956, |
| "grad_norm": 0.9750601649284363, |
| "learning_rate": 2.722689075630252e-05, |
| "loss": 1.7729, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.7782974742750234, |
| "grad_norm": 0.8714391589164734, |
| "learning_rate": 2.689075630252101e-05, |
| "loss": 1.8335, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.7797942001870908, |
| "grad_norm": 1.2003592252731323, |
| "learning_rate": 2.6554621848739497e-05, |
| "loss": 1.8318, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.7812909260991581, |
| "grad_norm": 1.1534149646759033, |
| "learning_rate": 2.6218487394957986e-05, |
| "loss": 1.8326, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.7827876520112255, |
| "grad_norm": 0.9890381693840027, |
| "learning_rate": 2.5882352941176475e-05, |
| "loss": 1.8477, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.7842843779232928, |
| "grad_norm": 1.1603058576583862, |
| "learning_rate": 2.5546218487394957e-05, |
| "loss": 1.7578, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.7857811038353602, |
| "grad_norm": 1.2228182554244995, |
| "learning_rate": 2.5210084033613446e-05, |
| "loss": 1.8909, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.7857811038353602, |
| "eval_loss": 1.7110382318496704, |
| "eval_runtime": 9.9304, |
| "eval_samples_per_second": 6.445, |
| "eval_steps_per_second": 3.222, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.7872778297474275, |
| "grad_norm": 1.1079294681549072, |
| "learning_rate": 2.4873949579831935e-05, |
| "loss": 1.8697, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.7887745556594948, |
| "grad_norm": 1.081797480583191, |
| "learning_rate": 2.453781512605042e-05, |
| "loss": 1.7759, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.7902712815715622, |
| "grad_norm": 0.9994049072265625, |
| "learning_rate": 2.420168067226891e-05, |
| "loss": 1.7292, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.7917680074836295, |
| "grad_norm": 1.032642126083374, |
| "learning_rate": 2.3865546218487398e-05, |
| "loss": 1.7072, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.7932647333956969, |
| "grad_norm": 0.9598051905632019, |
| "learning_rate": 2.3529411764705884e-05, |
| "loss": 1.9129, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.7947614593077643, |
| "grad_norm": 1.115540623664856, |
| "learning_rate": 2.3193277310924373e-05, |
| "loss": 1.8229, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.7962581852198316, |
| "grad_norm": 1.0516905784606934, |
| "learning_rate": 2.2857142857142858e-05, |
| "loss": 1.8115, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.797754911131899, |
| "grad_norm": 1.292768955230713, |
| "learning_rate": 2.2521008403361347e-05, |
| "loss": 1.5696, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.7992516370439663, |
| "grad_norm": 1.1004947423934937, |
| "learning_rate": 2.2184873949579832e-05, |
| "loss": 1.7799, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.8007483629560337, |
| "grad_norm": 1.0998330116271973, |
| "learning_rate": 2.184873949579832e-05, |
| "loss": 1.6451, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.802245088868101, |
| "grad_norm": 1.0818289518356323, |
| "learning_rate": 2.151260504201681e-05, |
| "loss": 1.7069, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.8037418147801684, |
| "grad_norm": 1.1351370811462402, |
| "learning_rate": 2.1176470588235296e-05, |
| "loss": 1.8398, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.8052385406922358, |
| "grad_norm": 1.030208945274353, |
| "learning_rate": 2.0840336134453785e-05, |
| "loss": 1.8557, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.8067352666043031, |
| "grad_norm": 1.0458731651306152, |
| "learning_rate": 2.050420168067227e-05, |
| "loss": 1.8046, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.8082319925163705, |
| "grad_norm": 1.2043875455856323, |
| "learning_rate": 2.016806722689076e-05, |
| "loss": 1.9645, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.8097287184284377, |
| "grad_norm": 1.0553354024887085, |
| "learning_rate": 1.9831932773109244e-05, |
| "loss": 1.821, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.8112254443405051, |
| "grad_norm": 0.9270862340927124, |
| "learning_rate": 1.9495798319327733e-05, |
| "loss": 1.6794, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.8127221702525725, |
| "grad_norm": 1.0153188705444336, |
| "learning_rate": 1.9159663865546222e-05, |
| "loss": 1.8425, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.8142188961646398, |
| "grad_norm": 0.8246691226959229, |
| "learning_rate": 1.8823529411764708e-05, |
| "loss": 1.8982, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.8157156220767072, |
| "grad_norm": 1.1857342720031738, |
| "learning_rate": 1.8487394957983196e-05, |
| "loss": 1.6989, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.8172123479887745, |
| "grad_norm": 0.9691339731216431, |
| "learning_rate": 1.8151260504201682e-05, |
| "loss": 1.7571, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.8187090739008419, |
| "grad_norm": 1.1744868755340576, |
| "learning_rate": 1.781512605042017e-05, |
| "loss": 1.7771, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.8202057998129093, |
| "grad_norm": 0.9755433201789856, |
| "learning_rate": 1.7478991596638656e-05, |
| "loss": 1.8593, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.8217025257249766, |
| "grad_norm": 1.1424980163574219, |
| "learning_rate": 1.7142857142857145e-05, |
| "loss": 1.6111, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.823199251637044, |
| "grad_norm": 0.9548054337501526, |
| "learning_rate": 1.6806722689075634e-05, |
| "loss": 1.9243, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.823199251637044, |
| "eval_loss": 1.7092292308807373, |
| "eval_runtime": 9.9161, |
| "eval_samples_per_second": 6.454, |
| "eval_steps_per_second": 3.227, |
| "step": 550 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 600, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.95873572023255e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|