| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1875, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0016, |
| "grad_norm": 3.443783900713759, |
| "learning_rate": 0.0, |
| "loss": 0.9037, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0032, |
| "grad_norm": 3.6390305366298263, |
| "learning_rate": 1.0660980810234543e-08, |
| "loss": 0.8625, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0048, |
| "grad_norm": 3.659207091105885, |
| "learning_rate": 2.1321961620469085e-08, |
| "loss": 0.9171, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0064, |
| "grad_norm": 3.7880542872635505, |
| "learning_rate": 3.1982942430703625e-08, |
| "loss": 0.9464, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 3.548747527929191, |
| "learning_rate": 4.264392324093817e-08, |
| "loss": 0.895, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0096, |
| "grad_norm": 3.759771231107573, |
| "learning_rate": 5.330490405117271e-08, |
| "loss": 0.941, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0112, |
| "grad_norm": 3.6420819920454246, |
| "learning_rate": 6.396588486140725e-08, |
| "loss": 0.8877, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0128, |
| "grad_norm": 3.6190454711556015, |
| "learning_rate": 7.462686567164179e-08, |
| "loss": 0.8941, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0144, |
| "grad_norm": 3.496287565902599, |
| "learning_rate": 8.528784648187634e-08, |
| "loss": 0.8912, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 3.4493514506690404, |
| "learning_rate": 9.59488272921109e-08, |
| "loss": 0.9044, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0176, |
| "grad_norm": 3.5793331150788594, |
| "learning_rate": 1.0660980810234542e-07, |
| "loss": 0.8867, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.0192, |
| "grad_norm": 3.575767404651874, |
| "learning_rate": 1.1727078891257997e-07, |
| "loss": 0.8774, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0208, |
| "grad_norm": 3.359144887048707, |
| "learning_rate": 1.279317697228145e-07, |
| "loss": 0.8802, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0224, |
| "grad_norm": 3.5053119807841733, |
| "learning_rate": 1.3859275053304905e-07, |
| "loss": 0.9283, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 3.359709007748787, |
| "learning_rate": 1.4925373134328358e-07, |
| "loss": 0.924, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0256, |
| "grad_norm": 3.477654638120491, |
| "learning_rate": 1.5991471215351813e-07, |
| "loss": 0.9346, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0272, |
| "grad_norm": 3.406446613844849, |
| "learning_rate": 1.7057569296375268e-07, |
| "loss": 0.9043, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0288, |
| "grad_norm": 3.313197232515461, |
| "learning_rate": 1.812366737739872e-07, |
| "loss": 0.8843, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0304, |
| "grad_norm": 3.4020111065038345, |
| "learning_rate": 1.918976545842218e-07, |
| "loss": 0.8978, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 3.4825014260059666, |
| "learning_rate": 2.0255863539445632e-07, |
| "loss": 0.934, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0336, |
| "grad_norm": 3.0230420303734395, |
| "learning_rate": 2.1321961620469084e-07, |
| "loss": 0.8744, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0352, |
| "grad_norm": 3.024255215285248, |
| "learning_rate": 2.2388059701492537e-07, |
| "loss": 0.9069, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0368, |
| "grad_norm": 2.879070204798837, |
| "learning_rate": 2.3454157782515995e-07, |
| "loss": 0.9035, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.0384, |
| "grad_norm": 2.790682421381564, |
| "learning_rate": 2.4520255863539447e-07, |
| "loss": 0.8457, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.8724037770448234, |
| "learning_rate": 2.55863539445629e-07, |
| "loss": 0.8928, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0416, |
| "grad_norm": 2.844540912682716, |
| "learning_rate": 2.665245202558635e-07, |
| "loss": 0.864, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0432, |
| "grad_norm": 2.89503566391836, |
| "learning_rate": 2.771855010660981e-07, |
| "loss": 0.9328, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.0448, |
| "grad_norm": 2.759272854755458, |
| "learning_rate": 2.8784648187633263e-07, |
| "loss": 0.8844, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0464, |
| "grad_norm": 2.2083388745590598, |
| "learning_rate": 2.9850746268656716e-07, |
| "loss": 0.8057, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 2.1699446591907217, |
| "learning_rate": 3.0916844349680174e-07, |
| "loss": 0.8607, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0496, |
| "grad_norm": 2.05296694589419, |
| "learning_rate": 3.1982942430703626e-07, |
| "loss": 0.838, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0512, |
| "grad_norm": 2.121805383333904, |
| "learning_rate": 3.3049040511727084e-07, |
| "loss": 0.8602, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.0528, |
| "grad_norm": 2.011739875446632, |
| "learning_rate": 3.4115138592750537e-07, |
| "loss": 0.8457, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.0544, |
| "grad_norm": 1.9523453367834913, |
| "learning_rate": 3.518123667377399e-07, |
| "loss": 0.8293, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 1.981107101239095, |
| "learning_rate": 3.624733475479744e-07, |
| "loss": 0.8453, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0576, |
| "grad_norm": 1.9118185905806608, |
| "learning_rate": 3.7313432835820895e-07, |
| "loss": 0.8702, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0592, |
| "grad_norm": 1.9059114664746757, |
| "learning_rate": 3.837953091684436e-07, |
| "loss": 0.8539, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.0608, |
| "grad_norm": 1.945989919146672, |
| "learning_rate": 3.944562899786781e-07, |
| "loss": 0.8725, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.0624, |
| "grad_norm": 2.5159843027781186, |
| "learning_rate": 4.0511727078891263e-07, |
| "loss": 0.8626, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 2.5829022218410174, |
| "learning_rate": 4.1577825159914716e-07, |
| "loss": 0.8457, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0656, |
| "grad_norm": 1.8750845002422516, |
| "learning_rate": 4.264392324093817e-07, |
| "loss": 0.7791, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.0672, |
| "grad_norm": 1.5188407462797207, |
| "learning_rate": 4.371002132196162e-07, |
| "loss": 0.8197, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0688, |
| "grad_norm": 1.6521693591262758, |
| "learning_rate": 4.4776119402985074e-07, |
| "loss": 0.7645, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.0704, |
| "grad_norm": 1.815538999694159, |
| "learning_rate": 4.5842217484008537e-07, |
| "loss": 0.7951, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 2.018098877914276, |
| "learning_rate": 4.690831556503199e-07, |
| "loss": 0.8396, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0736, |
| "grad_norm": 1.8812363250114577, |
| "learning_rate": 4.797441364605544e-07, |
| "loss": 0.778, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.0752, |
| "grad_norm": 1.6856485907071797, |
| "learning_rate": 4.904051172707889e-07, |
| "loss": 0.8552, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0768, |
| "grad_norm": 1.6105066317850283, |
| "learning_rate": 5.010660980810235e-07, |
| "loss": 0.7828, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0784, |
| "grad_norm": 1.3835950677015068, |
| "learning_rate": 5.11727078891258e-07, |
| "loss": 0.7733, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 1.211651669134244, |
| "learning_rate": 5.223880597014925e-07, |
| "loss": 0.7857, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0816, |
| "grad_norm": 1.1514550282299225, |
| "learning_rate": 5.33049040511727e-07, |
| "loss": 0.8268, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.0832, |
| "grad_norm": 1.2595752472877826, |
| "learning_rate": 5.437100213219617e-07, |
| "loss": 0.8165, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0848, |
| "grad_norm": 1.3240720993253337, |
| "learning_rate": 5.543710021321962e-07, |
| "loss": 0.791, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.0864, |
| "grad_norm": 1.4966442377104252, |
| "learning_rate": 5.650319829424307e-07, |
| "loss": 0.7705, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 1.5713862721148337, |
| "learning_rate": 5.756929637526653e-07, |
| "loss": 0.7663, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0896, |
| "grad_norm": 1.495777580142115, |
| "learning_rate": 5.863539445628998e-07, |
| "loss": 0.8122, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0912, |
| "grad_norm": 1.2257308102132334, |
| "learning_rate": 5.970149253731343e-07, |
| "loss": 0.7651, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.0928, |
| "grad_norm": 1.0472266320437198, |
| "learning_rate": 6.076759061833689e-07, |
| "loss": 0.731, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.0944, |
| "grad_norm": 1.1407667161540418, |
| "learning_rate": 6.183368869936035e-07, |
| "loss": 0.7969, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 1.3216886366508096, |
| "learning_rate": 6.28997867803838e-07, |
| "loss": 0.7604, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0976, |
| "grad_norm": 1.3049071488059805, |
| "learning_rate": 6.396588486140725e-07, |
| "loss": 0.744, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.0992, |
| "grad_norm": 1.2834199728358815, |
| "learning_rate": 6.50319829424307e-07, |
| "loss": 0.7262, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1008, |
| "grad_norm": 1.1410102831594338, |
| "learning_rate": 6.609808102345417e-07, |
| "loss": 0.7415, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.1024, |
| "grad_norm": 0.9677160586738733, |
| "learning_rate": 6.716417910447762e-07, |
| "loss": 0.7602, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 0.9587451857542828, |
| "learning_rate": 6.823027718550107e-07, |
| "loss": 0.7119, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.1056, |
| "grad_norm": 0.9763862874537796, |
| "learning_rate": 6.929637526652453e-07, |
| "loss": 0.7234, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.1072, |
| "grad_norm": 1.077711976878752, |
| "learning_rate": 7.036247334754798e-07, |
| "loss": 0.7635, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.1088, |
| "grad_norm": 1.0324752460112436, |
| "learning_rate": 7.142857142857143e-07, |
| "loss": 0.69, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.1104, |
| "grad_norm": 0.9354890541545591, |
| "learning_rate": 7.249466950959488e-07, |
| "loss": 0.7202, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 0.7898520659815029, |
| "learning_rate": 7.356076759061834e-07, |
| "loss": 0.6578, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.1136, |
| "grad_norm": 0.8073968816858748, |
| "learning_rate": 7.462686567164179e-07, |
| "loss": 0.7249, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1152, |
| "grad_norm": 0.7814000776187614, |
| "learning_rate": 7.569296375266526e-07, |
| "loss": 0.6911, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.1168, |
| "grad_norm": 0.8545484790533285, |
| "learning_rate": 7.675906183368872e-07, |
| "loss": 0.7123, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.1184, |
| "grad_norm": 0.8382679376651664, |
| "learning_rate": 7.782515991471217e-07, |
| "loss": 0.6986, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 0.8634452208398014, |
| "learning_rate": 7.889125799573562e-07, |
| "loss": 0.7119, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.1216, |
| "grad_norm": 0.811486309668032, |
| "learning_rate": 7.995735607675907e-07, |
| "loss": 0.6783, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.1232, |
| "grad_norm": 0.7304908349636826, |
| "learning_rate": 8.102345415778253e-07, |
| "loss": 0.6484, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.1248, |
| "grad_norm": 0.7949603313914639, |
| "learning_rate": 8.208955223880598e-07, |
| "loss": 0.6771, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.1264, |
| "grad_norm": 0.7688327380249824, |
| "learning_rate": 8.315565031982943e-07, |
| "loss": 0.6832, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.8764394953703323, |
| "learning_rate": 8.422174840085288e-07, |
| "loss": 0.7174, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1296, |
| "grad_norm": 0.8220509077328104, |
| "learning_rate": 8.528784648187634e-07, |
| "loss": 0.6862, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.1312, |
| "grad_norm": 0.7947854466997331, |
| "learning_rate": 8.635394456289979e-07, |
| "loss": 0.6696, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.1328, |
| "grad_norm": 0.7349807053318366, |
| "learning_rate": 8.742004264392324e-07, |
| "loss": 0.6782, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.1344, |
| "grad_norm": 0.7171194189725714, |
| "learning_rate": 8.848614072494669e-07, |
| "loss": 0.6538, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 0.7262304453867153, |
| "learning_rate": 8.955223880597015e-07, |
| "loss": 0.6716, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.1376, |
| "grad_norm": 0.7313472659286344, |
| "learning_rate": 9.06183368869936e-07, |
| "loss": 0.6937, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.1392, |
| "grad_norm": 0.770645562826172, |
| "learning_rate": 9.168443496801707e-07, |
| "loss": 0.6659, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.1408, |
| "grad_norm": 0.783346316746928, |
| "learning_rate": 9.275053304904053e-07, |
| "loss": 0.7053, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.1424, |
| "grad_norm": 0.6991667565530584, |
| "learning_rate": 9.381663113006398e-07, |
| "loss": 0.6163, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 0.6930907823765393, |
| "learning_rate": 9.488272921108743e-07, |
| "loss": 0.6844, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1456, |
| "grad_norm": 0.7620232016264051, |
| "learning_rate": 9.594882729211088e-07, |
| "loss": 0.6752, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.1472, |
| "grad_norm": 0.743140636169742, |
| "learning_rate": 9.701492537313434e-07, |
| "loss": 0.6798, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.1488, |
| "grad_norm": 0.7042055777911003, |
| "learning_rate": 9.808102345415779e-07, |
| "loss": 0.6542, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.1504, |
| "grad_norm": 0.7050683591332227, |
| "learning_rate": 9.914712153518124e-07, |
| "loss": 0.6467, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 0.7004797614822288, |
| "learning_rate": 1.002132196162047e-06, |
| "loss": 0.6561, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.1536, |
| "grad_norm": 0.7244574630433713, |
| "learning_rate": 1.0127931769722815e-06, |
| "loss": 0.6752, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.1552, |
| "grad_norm": 0.6961978517264288, |
| "learning_rate": 1.023454157782516e-06, |
| "loss": 0.6521, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.1568, |
| "grad_norm": 0.7180269667132341, |
| "learning_rate": 1.0341151385927505e-06, |
| "loss": 0.6491, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.1584, |
| "grad_norm": 0.7227268220641379, |
| "learning_rate": 1.044776119402985e-06, |
| "loss": 0.6808, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.7181870774111148, |
| "learning_rate": 1.0554371002132196e-06, |
| "loss": 0.6731, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1616, |
| "grad_norm": 0.728313466676549, |
| "learning_rate": 1.066098081023454e-06, |
| "loss": 0.6668, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.1632, |
| "grad_norm": 0.7371923851925003, |
| "learning_rate": 1.0767590618336886e-06, |
| "loss": 0.6443, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.1648, |
| "grad_norm": 0.7449273360955099, |
| "learning_rate": 1.0874200426439234e-06, |
| "loss": 0.6697, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.1664, |
| "grad_norm": 0.7087640871697763, |
| "learning_rate": 1.0980810234541579e-06, |
| "loss": 0.6868, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 0.7336730798309535, |
| "learning_rate": 1.1087420042643924e-06, |
| "loss": 0.6464, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.1696, |
| "grad_norm": 0.7291761151685803, |
| "learning_rate": 1.119402985074627e-06, |
| "loss": 0.6577, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.1712, |
| "grad_norm": 0.7305168985226156, |
| "learning_rate": 1.1300639658848615e-06, |
| "loss": 0.7146, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.1728, |
| "grad_norm": 0.7296315597098482, |
| "learning_rate": 1.140724946695096e-06, |
| "loss": 0.6683, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.1744, |
| "grad_norm": 0.7097744903484696, |
| "learning_rate": 1.1513859275053305e-06, |
| "loss": 0.6398, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.7269961360067623, |
| "learning_rate": 1.162046908315565e-06, |
| "loss": 0.674, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1776, |
| "grad_norm": 0.7170312350089441, |
| "learning_rate": 1.1727078891257996e-06, |
| "loss": 0.6785, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.1792, |
| "grad_norm": 0.7487760970400611, |
| "learning_rate": 1.183368869936034e-06, |
| "loss": 0.7089, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1808, |
| "grad_norm": 0.7305797004043646, |
| "learning_rate": 1.1940298507462686e-06, |
| "loss": 0.6718, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.1824, |
| "grad_norm": 0.7069812667121284, |
| "learning_rate": 1.2046908315565034e-06, |
| "loss": 0.6402, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 0.7504988512166512, |
| "learning_rate": 1.2153518123667379e-06, |
| "loss": 0.6572, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.1856, |
| "grad_norm": 0.7106950161112279, |
| "learning_rate": 1.2260127931769724e-06, |
| "loss": 0.6519, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.1872, |
| "grad_norm": 0.7708646405486708, |
| "learning_rate": 1.236673773987207e-06, |
| "loss": 0.6763, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.1888, |
| "grad_norm": 0.7035298180344508, |
| "learning_rate": 1.2473347547974415e-06, |
| "loss": 0.671, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.1904, |
| "grad_norm": 0.7354734585683664, |
| "learning_rate": 1.257995735607676e-06, |
| "loss": 0.6427, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.7100024068696028, |
| "learning_rate": 1.2686567164179105e-06, |
| "loss": 0.6256, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1936, |
| "grad_norm": 0.7579478672252553, |
| "learning_rate": 1.279317697228145e-06, |
| "loss": 0.6447, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.1952, |
| "grad_norm": 0.7392544814192312, |
| "learning_rate": 1.2899786780383796e-06, |
| "loss": 0.6377, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.1968, |
| "grad_norm": 0.729164847634116, |
| "learning_rate": 1.300639658848614e-06, |
| "loss": 0.6494, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.1984, |
| "grad_norm": 0.690759653524007, |
| "learning_rate": 1.3113006396588488e-06, |
| "loss": 0.6443, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.7160920208659657, |
| "learning_rate": 1.3219616204690834e-06, |
| "loss": 0.6411, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.2016, |
| "grad_norm": 0.720471593378039, |
| "learning_rate": 1.3326226012793179e-06, |
| "loss": 0.6209, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.2032, |
| "grad_norm": 0.7223708699809386, |
| "learning_rate": 1.3432835820895524e-06, |
| "loss": 0.6365, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.2048, |
| "grad_norm": 0.7153278093036772, |
| "learning_rate": 1.353944562899787e-06, |
| "loss": 0.6345, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2064, |
| "grad_norm": 0.7019982722136778, |
| "learning_rate": 1.3646055437100215e-06, |
| "loss": 0.6134, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 0.7075092004932616, |
| "learning_rate": 1.375266524520256e-06, |
| "loss": 0.6388, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2096, |
| "grad_norm": 0.7077101361641048, |
| "learning_rate": 1.3859275053304905e-06, |
| "loss": 0.6159, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.2112, |
| "grad_norm": 0.7059765399735562, |
| "learning_rate": 1.396588486140725e-06, |
| "loss": 0.606, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2128, |
| "grad_norm": 0.7103532905249046, |
| "learning_rate": 1.4072494669509596e-06, |
| "loss": 0.6356, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.2144, |
| "grad_norm": 0.7318321126082343, |
| "learning_rate": 1.417910447761194e-06, |
| "loss": 0.6263, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 0.6844147627067427, |
| "learning_rate": 1.4285714285714286e-06, |
| "loss": 0.6287, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.2176, |
| "grad_norm": 0.7235841968233678, |
| "learning_rate": 1.4392324093816632e-06, |
| "loss": 0.6186, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.2192, |
| "grad_norm": 0.7319045516178274, |
| "learning_rate": 1.4498933901918977e-06, |
| "loss": 0.6425, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.2208, |
| "grad_norm": 0.7375286593228014, |
| "learning_rate": 1.4605543710021322e-06, |
| "loss": 0.6119, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.2224, |
| "grad_norm": 0.7275264913039928, |
| "learning_rate": 1.4712153518123667e-06, |
| "loss": 0.6107, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.7577612784375718, |
| "learning_rate": 1.4818763326226013e-06, |
| "loss": 0.6244, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2256, |
| "grad_norm": 0.705941256323019, |
| "learning_rate": 1.4925373134328358e-06, |
| "loss": 0.6167, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.2272, |
| "grad_norm": 0.7186107088852769, |
| "learning_rate": 1.5031982942430705e-06, |
| "loss": 0.6305, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.2288, |
| "grad_norm": 0.7184996826843989, |
| "learning_rate": 1.5138592750533053e-06, |
| "loss": 0.6295, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.2304, |
| "grad_norm": 0.7099819162560698, |
| "learning_rate": 1.5245202558635398e-06, |
| "loss": 0.6287, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 0.7389568794462862, |
| "learning_rate": 1.5351812366737743e-06, |
| "loss": 0.6396, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.2336, |
| "grad_norm": 0.7541550155510219, |
| "learning_rate": 1.5458422174840088e-06, |
| "loss": 0.6177, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.2352, |
| "grad_norm": 0.7651733268047308, |
| "learning_rate": 1.5565031982942434e-06, |
| "loss": 0.6444, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.2368, |
| "grad_norm": 0.7615011387468057, |
| "learning_rate": 1.5671641791044779e-06, |
| "loss": 0.6195, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.2384, |
| "grad_norm": 0.7414999526883134, |
| "learning_rate": 1.5778251599147124e-06, |
| "loss": 0.6369, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.7301319037721387, |
| "learning_rate": 1.588486140724947e-06, |
| "loss": 0.6438, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2416, |
| "grad_norm": 0.7818881841606198, |
| "learning_rate": 1.5991471215351815e-06, |
| "loss": 0.623, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.2432, |
| "grad_norm": 0.7737881500179932, |
| "learning_rate": 1.609808102345416e-06, |
| "loss": 0.5829, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.2448, |
| "grad_norm": 0.7205381796324217, |
| "learning_rate": 1.6204690831556505e-06, |
| "loss": 0.6099, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.2464, |
| "grad_norm": 0.7318576981930963, |
| "learning_rate": 1.631130063965885e-06, |
| "loss": 0.6336, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 0.8198291644853148, |
| "learning_rate": 1.6417910447761196e-06, |
| "loss": 0.6125, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.2496, |
| "grad_norm": 0.7485929239838601, |
| "learning_rate": 1.652452025586354e-06, |
| "loss": 0.6107, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.2512, |
| "grad_norm": 0.734476272045767, |
| "learning_rate": 1.6631130063965886e-06, |
| "loss": 0.5674, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.2528, |
| "grad_norm": 0.6993584392139643, |
| "learning_rate": 1.6737739872068232e-06, |
| "loss": 0.5984, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.2544, |
| "grad_norm": 0.7599971478300804, |
| "learning_rate": 1.6844349680170577e-06, |
| "loss": 0.6051, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.744260685014072, |
| "learning_rate": 1.6950959488272922e-06, |
| "loss": 0.6133, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2576, |
| "grad_norm": 0.7192519279626429, |
| "learning_rate": 1.7057569296375267e-06, |
| "loss": 0.5707, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.2592, |
| "grad_norm": 0.7872870172965513, |
| "learning_rate": 1.7164179104477613e-06, |
| "loss": 0.6267, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.2608, |
| "grad_norm": 0.7707728775648776, |
| "learning_rate": 1.7270788912579958e-06, |
| "loss": 0.6311, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.2624, |
| "grad_norm": 0.8356371962280772, |
| "learning_rate": 1.7377398720682303e-06, |
| "loss": 0.6, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 0.7530958899947432, |
| "learning_rate": 1.7484008528784648e-06, |
| "loss": 0.6228, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.2656, |
| "grad_norm": 0.7629164202614659, |
| "learning_rate": 1.7590618336886994e-06, |
| "loss": 0.6173, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.2672, |
| "grad_norm": 0.827098028261084, |
| "learning_rate": 1.7697228144989339e-06, |
| "loss": 0.6007, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.2688, |
| "grad_norm": 0.7589188696810613, |
| "learning_rate": 1.7803837953091684e-06, |
| "loss": 0.6229, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.2704, |
| "grad_norm": 0.7589601883183021, |
| "learning_rate": 1.791044776119403e-06, |
| "loss": 0.6205, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.8288996421143208, |
| "learning_rate": 1.8017057569296375e-06, |
| "loss": 0.6181, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.2736, |
| "grad_norm": 0.7406268388466966, |
| "learning_rate": 1.812366737739872e-06, |
| "loss": 0.6008, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.2752, |
| "grad_norm": 0.7167010511482258, |
| "learning_rate": 1.8230277185501067e-06, |
| "loss": 0.6138, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.2768, |
| "grad_norm": 0.7977129098669627, |
| "learning_rate": 1.8336886993603415e-06, |
| "loss": 0.6398, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.2784, |
| "grad_norm": 0.8566534837663508, |
| "learning_rate": 1.844349680170576e-06, |
| "loss": 0.6397, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 0.7579341388238693, |
| "learning_rate": 1.8550106609808105e-06, |
| "loss": 0.6258, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.2816, |
| "grad_norm": 0.833538686471727, |
| "learning_rate": 1.865671641791045e-06, |
| "loss": 0.6178, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.2832, |
| "grad_norm": 0.776175666432481, |
| "learning_rate": 1.8763326226012796e-06, |
| "loss": 0.5811, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.2848, |
| "grad_norm": 0.7346297393699228, |
| "learning_rate": 1.886993603411514e-06, |
| "loss": 0.6211, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.2864, |
| "grad_norm": 0.7684302974057745, |
| "learning_rate": 1.8976545842217486e-06, |
| "loss": 0.6055, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.7411137167271561, |
| "learning_rate": 1.908315565031983e-06, |
| "loss": 0.6144, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2896, |
| "grad_norm": 0.7354676749120596, |
| "learning_rate": 1.9189765458422177e-06, |
| "loss": 0.5896, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.2912, |
| "grad_norm": 0.7782977909380668, |
| "learning_rate": 1.929637526652452e-06, |
| "loss": 0.6155, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.2928, |
| "grad_norm": 0.7356159662149813, |
| "learning_rate": 1.9402985074626867e-06, |
| "loss": 0.6058, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.2944, |
| "grad_norm": 0.7453299967245929, |
| "learning_rate": 1.9509594882729213e-06, |
| "loss": 0.5688, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 0.7415112555793427, |
| "learning_rate": 1.9616204690831558e-06, |
| "loss": 0.5822, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.2976, |
| "grad_norm": 0.7099834115712084, |
| "learning_rate": 1.9722814498933903e-06, |
| "loss": 0.584, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.2992, |
| "grad_norm": 0.7741998117558853, |
| "learning_rate": 1.982942430703625e-06, |
| "loss": 0.6286, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.3008, |
| "grad_norm": 0.7703941304690587, |
| "learning_rate": 1.9936034115138594e-06, |
| "loss": 0.5885, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.3024, |
| "grad_norm": 0.6971250203143535, |
| "learning_rate": 2.004264392324094e-06, |
| "loss": 0.5636, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.7805552614079694, |
| "learning_rate": 2.0149253731343284e-06, |
| "loss": 0.6077, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.3056, |
| "grad_norm": 0.8106414675734367, |
| "learning_rate": 2.025586353944563e-06, |
| "loss": 0.6196, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.3072, |
| "grad_norm": 0.7898048465687718, |
| "learning_rate": 2.0362473347547975e-06, |
| "loss": 0.6203, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.3088, |
| "grad_norm": 0.795622798580363, |
| "learning_rate": 2.046908315565032e-06, |
| "loss": 0.6128, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.3104, |
| "grad_norm": 0.739615365221959, |
| "learning_rate": 2.0575692963752665e-06, |
| "loss": 0.5656, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 0.7451740864151697, |
| "learning_rate": 2.068230277185501e-06, |
| "loss": 0.6456, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.3136, |
| "grad_norm": 0.7620174503836828, |
| "learning_rate": 2.0788912579957356e-06, |
| "loss": 0.6231, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.3152, |
| "grad_norm": 0.7854516907193481, |
| "learning_rate": 2.08955223880597e-06, |
| "loss": 0.6043, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.3168, |
| "grad_norm": 0.6584891538592907, |
| "learning_rate": 2.1002132196162046e-06, |
| "loss": 0.5464, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.3184, |
| "grad_norm": 0.776844868408368, |
| "learning_rate": 2.110874200426439e-06, |
| "loss": 0.6057, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.7850990380214087, |
| "learning_rate": 2.1215351812366737e-06, |
| "loss": 0.5612, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3216, |
| "grad_norm": 0.6885916851336799, |
| "learning_rate": 2.132196162046908e-06, |
| "loss": 0.6019, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.3232, |
| "grad_norm": 0.841176153584393, |
| "learning_rate": 2.1428571428571427e-06, |
| "loss": 0.5731, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.3248, |
| "grad_norm": 0.7846478827660619, |
| "learning_rate": 2.1535181236673773e-06, |
| "loss": 0.5828, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.3264, |
| "grad_norm": 0.7428540418442645, |
| "learning_rate": 2.1641791044776118e-06, |
| "loss": 0.5661, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 0.870995701553606, |
| "learning_rate": 2.1748400852878467e-06, |
| "loss": 0.6067, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.3296, |
| "grad_norm": 0.7778388771110853, |
| "learning_rate": 2.1855010660980813e-06, |
| "loss": 0.5713, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.3312, |
| "grad_norm": 0.7808137509105751, |
| "learning_rate": 2.1961620469083158e-06, |
| "loss": 0.565, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.3328, |
| "grad_norm": 0.853167109393607, |
| "learning_rate": 2.2068230277185503e-06, |
| "loss": 0.5857, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.3344, |
| "grad_norm": 0.734590731677972, |
| "learning_rate": 2.217484008528785e-06, |
| "loss": 0.5687, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.802493654740643, |
| "learning_rate": 2.2281449893390194e-06, |
| "loss": 0.5973, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3376, |
| "grad_norm": 0.763288832765317, |
| "learning_rate": 2.238805970149254e-06, |
| "loss": 0.5878, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.3392, |
| "grad_norm": 0.8031347437576318, |
| "learning_rate": 2.2494669509594884e-06, |
| "loss": 0.6072, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.3408, |
| "grad_norm": 0.8434571464754188, |
| "learning_rate": 2.260127931769723e-06, |
| "loss": 0.5953, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.3424, |
| "grad_norm": 0.8355518526289846, |
| "learning_rate": 2.2707889125799575e-06, |
| "loss": 0.6361, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 0.7706620507762623, |
| "learning_rate": 2.281449893390192e-06, |
| "loss": 0.5814, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.3456, |
| "grad_norm": 0.7638464771267637, |
| "learning_rate": 2.2921108742004265e-06, |
| "loss": 0.5506, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3472, |
| "grad_norm": 0.7814296599045286, |
| "learning_rate": 2.302771855010661e-06, |
| "loss": 0.5826, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.3488, |
| "grad_norm": 0.7845339762934284, |
| "learning_rate": 2.3134328358208956e-06, |
| "loss": 0.6091, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.3504, |
| "grad_norm": 0.7560660254775938, |
| "learning_rate": 2.32409381663113e-06, |
| "loss": 0.5967, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.7508649264529814, |
| "learning_rate": 2.3347547974413646e-06, |
| "loss": 0.6053, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3536, |
| "grad_norm": 0.7210458508613927, |
| "learning_rate": 2.345415778251599e-06, |
| "loss": 0.5642, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.3552, |
| "grad_norm": 0.736381434405332, |
| "learning_rate": 2.3560767590618337e-06, |
| "loss": 0.5673, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.3568, |
| "grad_norm": 0.7601497997101813, |
| "learning_rate": 2.366737739872068e-06, |
| "loss": 0.5991, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.3584, |
| "grad_norm": 0.7084448686385765, |
| "learning_rate": 2.3773987206823027e-06, |
| "loss": 0.5662, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 0.7961580690261183, |
| "learning_rate": 2.3880597014925373e-06, |
| "loss": 0.6292, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.3616, |
| "grad_norm": 0.74581691114757, |
| "learning_rate": 2.398720682302772e-06, |
| "loss": 0.545, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.3632, |
| "grad_norm": 0.8058971329947406, |
| "learning_rate": 2.4093816631130067e-06, |
| "loss": 0.5996, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.3648, |
| "grad_norm": 0.760064099858659, |
| "learning_rate": 2.4200426439232413e-06, |
| "loss": 0.603, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.3664, |
| "grad_norm": 0.7549868329039784, |
| "learning_rate": 2.4307036247334758e-06, |
| "loss": 0.6114, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.7720717264723672, |
| "learning_rate": 2.4413646055437103e-06, |
| "loss": 0.5941, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3696, |
| "grad_norm": 0.7616410748286908, |
| "learning_rate": 2.452025586353945e-06, |
| "loss": 0.5533, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.3712, |
| "grad_norm": 0.7504708721409487, |
| "learning_rate": 2.4626865671641794e-06, |
| "loss": 0.5766, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.3728, |
| "grad_norm": 0.7762657496338241, |
| "learning_rate": 2.473347547974414e-06, |
| "loss": 0.5978, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.3744, |
| "grad_norm": 0.7831198389239687, |
| "learning_rate": 2.4840085287846484e-06, |
| "loss": 0.6233, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 0.7681970903498869, |
| "learning_rate": 2.494669509594883e-06, |
| "loss": 0.6009, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.3776, |
| "grad_norm": 0.7337967052937636, |
| "learning_rate": 2.5053304904051175e-06, |
| "loss": 0.5977, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.3792, |
| "grad_norm": 0.7813473994552436, |
| "learning_rate": 2.515991471215352e-06, |
| "loss": 0.5749, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.3808, |
| "grad_norm": 0.7161895053490572, |
| "learning_rate": 2.5266524520255865e-06, |
| "loss": 0.5761, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.3824, |
| "grad_norm": 0.8038905440863002, |
| "learning_rate": 2.537313432835821e-06, |
| "loss": 0.6717, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.7852844599544667, |
| "learning_rate": 2.5479744136460556e-06, |
| "loss": 0.5716, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3856, |
| "grad_norm": 0.7950726788936678, |
| "learning_rate": 2.55863539445629e-06, |
| "loss": 0.5855, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.3872, |
| "grad_norm": 0.8502336760749962, |
| "learning_rate": 2.5692963752665246e-06, |
| "loss": 0.5564, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.3888, |
| "grad_norm": 0.7742437970001474, |
| "learning_rate": 2.579957356076759e-06, |
| "loss": 0.571, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.3904, |
| "grad_norm": 0.8648393139239822, |
| "learning_rate": 2.5906183368869937e-06, |
| "loss": 0.5728, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 0.6979337264095263, |
| "learning_rate": 2.601279317697228e-06, |
| "loss": 0.5495, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.3936, |
| "grad_norm": 0.7256603223068586, |
| "learning_rate": 2.6119402985074627e-06, |
| "loss": 0.5439, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.3952, |
| "grad_norm": 0.7918254415993081, |
| "learning_rate": 2.6226012793176977e-06, |
| "loss": 0.5709, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.3968, |
| "grad_norm": 0.8347112020129184, |
| "learning_rate": 2.6332622601279318e-06, |
| "loss": 0.5987, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.3984, |
| "grad_norm": 0.7603267236888341, |
| "learning_rate": 2.6439232409381667e-06, |
| "loss": 0.5615, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.81780496643962, |
| "learning_rate": 2.654584221748401e-06, |
| "loss": 0.6007, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4016, |
| "grad_norm": 0.7791704293627059, |
| "learning_rate": 2.6652452025586358e-06, |
| "loss": 0.5799, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.4032, |
| "grad_norm": 0.7488384390242884, |
| "learning_rate": 2.6759061833688703e-06, |
| "loss": 0.56, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.4048, |
| "grad_norm": 0.7597491390169727, |
| "learning_rate": 2.686567164179105e-06, |
| "loss": 0.5794, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.4064, |
| "grad_norm": 0.7923958441674507, |
| "learning_rate": 2.6972281449893394e-06, |
| "loss": 0.5708, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 0.7729640423091064, |
| "learning_rate": 2.707889125799574e-06, |
| "loss": 0.6063, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.4096, |
| "grad_norm": 0.8053586551349151, |
| "learning_rate": 2.7185501066098084e-06, |
| "loss": 0.6016, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.4112, |
| "grad_norm": 0.8039885381135673, |
| "learning_rate": 2.729211087420043e-06, |
| "loss": 0.5848, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.4128, |
| "grad_norm": 0.9513014083370441, |
| "learning_rate": 2.7398720682302775e-06, |
| "loss": 0.6345, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.4144, |
| "grad_norm": 0.7742998578313859, |
| "learning_rate": 2.750533049040512e-06, |
| "loss": 0.5694, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.7641716911555947, |
| "learning_rate": 2.7611940298507465e-06, |
| "loss": 0.587, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.4176, |
| "grad_norm": 0.821831437603987, |
| "learning_rate": 2.771855010660981e-06, |
| "loss": 0.5875, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.4192, |
| "grad_norm": 0.7503591578304264, |
| "learning_rate": 2.7825159914712156e-06, |
| "loss": 0.6075, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.4208, |
| "grad_norm": 0.719957241909583, |
| "learning_rate": 2.79317697228145e-06, |
| "loss": 0.5803, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.4224, |
| "grad_norm": 0.7451469464502543, |
| "learning_rate": 2.8038379530916846e-06, |
| "loss": 0.5663, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 0.80423744402681, |
| "learning_rate": 2.814498933901919e-06, |
| "loss": 0.6023, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.4256, |
| "grad_norm": 0.7971922540683725, |
| "learning_rate": 2.825159914712154e-06, |
| "loss": 0.5985, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.4272, |
| "grad_norm": 0.769676645099999, |
| "learning_rate": 2.835820895522388e-06, |
| "loss": 0.566, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.4288, |
| "grad_norm": 0.7646967688330923, |
| "learning_rate": 2.846481876332623e-06, |
| "loss": 0.5374, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.4304, |
| "grad_norm": 0.7833466676671014, |
| "learning_rate": 2.8571428571428573e-06, |
| "loss": 0.5523, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.7391354432900153, |
| "learning_rate": 2.867803837953092e-06, |
| "loss": 0.5619, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.4336, |
| "grad_norm": 0.8465814174774607, |
| "learning_rate": 2.8784648187633263e-06, |
| "loss": 0.6245, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.4352, |
| "grad_norm": 0.7805615042465259, |
| "learning_rate": 2.8891257995735613e-06, |
| "loss": 0.6124, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.4368, |
| "grad_norm": 0.8174129655040298, |
| "learning_rate": 2.8997867803837954e-06, |
| "loss": 0.5967, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.4384, |
| "grad_norm": 0.7523355194081328, |
| "learning_rate": 2.9104477611940303e-06, |
| "loss": 0.5763, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 0.7377514276234232, |
| "learning_rate": 2.9211087420042644e-06, |
| "loss": 0.5545, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.4416, |
| "grad_norm": 0.8215970879872154, |
| "learning_rate": 2.9317697228144994e-06, |
| "loss": 0.5889, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.4432, |
| "grad_norm": 0.8344937751406882, |
| "learning_rate": 2.9424307036247335e-06, |
| "loss": 0.6108, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.4448, |
| "grad_norm": 0.7745579794081016, |
| "learning_rate": 2.9530916844349684e-06, |
| "loss": 0.5786, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.4464, |
| "grad_norm": 0.804786843475639, |
| "learning_rate": 2.9637526652452025e-06, |
| "loss": 0.5707, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.8181561905182807, |
| "learning_rate": 2.9744136460554375e-06, |
| "loss": 0.6074, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4496, |
| "grad_norm": 0.8955915763396078, |
| "learning_rate": 2.9850746268656716e-06, |
| "loss": 0.5841, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.4512, |
| "grad_norm": 0.7284050946807041, |
| "learning_rate": 2.9957356076759065e-06, |
| "loss": 0.5554, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.4528, |
| "grad_norm": 0.8146518309559156, |
| "learning_rate": 3.006396588486141e-06, |
| "loss": 0.5881, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.4544, |
| "grad_norm": 0.7768178946322084, |
| "learning_rate": 3.0170575692963756e-06, |
| "loss": 0.5533, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 0.7696150804059677, |
| "learning_rate": 3.0277185501066105e-06, |
| "loss": 0.573, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.4576, |
| "grad_norm": 0.7691229758539965, |
| "learning_rate": 3.0383795309168446e-06, |
| "loss": 0.5523, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.4592, |
| "grad_norm": 0.742131995359427, |
| "learning_rate": 3.0490405117270796e-06, |
| "loss": 0.5502, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.4608, |
| "grad_norm": 0.7655172897040609, |
| "learning_rate": 3.0597014925373137e-06, |
| "loss": 0.5758, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.4624, |
| "grad_norm": 0.7691849080534547, |
| "learning_rate": 3.0703624733475486e-06, |
| "loss": 0.5791, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 0.7818739523919556, |
| "learning_rate": 3.0810234541577827e-06, |
| "loss": 0.5623, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.4656, |
| "grad_norm": 0.819560066863059, |
| "learning_rate": 3.0916844349680177e-06, |
| "loss": 0.5939, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.4672, |
| "grad_norm": 0.8169731024775341, |
| "learning_rate": 3.1023454157782518e-06, |
| "loss": 0.5894, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.4688, |
| "grad_norm": 0.7865620876740786, |
| "learning_rate": 3.1130063965884867e-06, |
| "loss": 0.5864, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.4704, |
| "grad_norm": 0.8084169911506447, |
| "learning_rate": 3.123667377398721e-06, |
| "loss": 0.5836, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 0.8443142773932891, |
| "learning_rate": 3.1343283582089558e-06, |
| "loss": 0.587, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.4736, |
| "grad_norm": 0.8987651915438104, |
| "learning_rate": 3.14498933901919e-06, |
| "loss": 0.5771, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.4752, |
| "grad_norm": 0.860270543929545, |
| "learning_rate": 3.155650319829425e-06, |
| "loss": 0.5821, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.4768, |
| "grad_norm": 0.824939899329991, |
| "learning_rate": 3.166311300639659e-06, |
| "loss": 0.5481, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.4784, |
| "grad_norm": 0.8672862862240402, |
| "learning_rate": 3.176972281449894e-06, |
| "loss": 0.5575, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.8182300142439433, |
| "learning_rate": 3.187633262260128e-06, |
| "loss": 0.5815, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4816, |
| "grad_norm": 0.7380772504538786, |
| "learning_rate": 3.198294243070363e-06, |
| "loss": 0.5893, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.4832, |
| "grad_norm": 0.7977879234410605, |
| "learning_rate": 3.208955223880597e-06, |
| "loss": 0.5423, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.4848, |
| "grad_norm": 0.7369770503071659, |
| "learning_rate": 3.219616204690832e-06, |
| "loss": 0.5707, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.4864, |
| "grad_norm": 0.8429735563676172, |
| "learning_rate": 3.230277185501066e-06, |
| "loss": 0.5371, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 0.7462981202694815, |
| "learning_rate": 3.240938166311301e-06, |
| "loss": 0.5569, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.4896, |
| "grad_norm": 0.8635339096769287, |
| "learning_rate": 3.251599147121535e-06, |
| "loss": 0.5608, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.4912, |
| "grad_norm": 0.7803060364394415, |
| "learning_rate": 3.26226012793177e-06, |
| "loss": 0.5667, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.4928, |
| "grad_norm": 0.8494397388711882, |
| "learning_rate": 3.272921108742004e-06, |
| "loss": 0.5588, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.4944, |
| "grad_norm": 0.7832352945833961, |
| "learning_rate": 3.283582089552239e-06, |
| "loss": 0.5299, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.8062104067281601, |
| "learning_rate": 3.2942430703624733e-06, |
| "loss": 0.5696, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.4976, |
| "grad_norm": 0.8054657716765189, |
| "learning_rate": 3.304904051172708e-06, |
| "loss": 0.5328, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.4992, |
| "grad_norm": 0.7876354196805521, |
| "learning_rate": 3.3155650319829423e-06, |
| "loss": 0.546, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.5008, |
| "grad_norm": 0.8126381266985822, |
| "learning_rate": 3.3262260127931773e-06, |
| "loss": 0.6014, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.5024, |
| "grad_norm": 0.8445398126335238, |
| "learning_rate": 3.336886993603412e-06, |
| "loss": 0.5658, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.504, |
| "grad_norm": 0.8304476975533555, |
| "learning_rate": 3.3475479744136463e-06, |
| "loss": 0.5805, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.5056, |
| "grad_norm": 0.7547981390881123, |
| "learning_rate": 3.3582089552238813e-06, |
| "loss": 0.5433, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.5072, |
| "grad_norm": 0.8181361264873499, |
| "learning_rate": 3.3688699360341154e-06, |
| "loss": 0.5738, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.5088, |
| "grad_norm": 0.8462943112139383, |
| "learning_rate": 3.3795309168443503e-06, |
| "loss": 0.5509, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.5104, |
| "grad_norm": 0.7922383013268821, |
| "learning_rate": 3.3901918976545844e-06, |
| "loss": 0.5693, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.8477252050554914, |
| "learning_rate": 3.4008528784648194e-06, |
| "loss": 0.5764, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5136, |
| "grad_norm": 0.8094481547663196, |
| "learning_rate": 3.4115138592750535e-06, |
| "loss": 0.5353, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.5152, |
| "grad_norm": 0.8864054471322013, |
| "learning_rate": 3.4221748400852884e-06, |
| "loss": 0.575, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.5168, |
| "grad_norm": 0.7876415032718879, |
| "learning_rate": 3.4328358208955225e-06, |
| "loss": 0.5414, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.5184, |
| "grad_norm": 0.8210715320082055, |
| "learning_rate": 3.4434968017057575e-06, |
| "loss": 0.5521, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 0.8371168961714202, |
| "learning_rate": 3.4541577825159916e-06, |
| "loss": 0.5827, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.5216, |
| "grad_norm": 0.773292575986764, |
| "learning_rate": 3.4648187633262265e-06, |
| "loss": 0.5749, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.5232, |
| "grad_norm": 0.850689297987853, |
| "learning_rate": 3.4754797441364606e-06, |
| "loss": 0.5691, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.5248, |
| "grad_norm": 0.7507404127475026, |
| "learning_rate": 3.4861407249466956e-06, |
| "loss": 0.5269, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.5264, |
| "grad_norm": 0.7845396303400523, |
| "learning_rate": 3.4968017057569297e-06, |
| "loss": 0.5565, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 0.7824098744828355, |
| "learning_rate": 3.5074626865671646e-06, |
| "loss": 0.5706, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5296, |
| "grad_norm": 0.7342948796645827, |
| "learning_rate": 3.5181236673773987e-06, |
| "loss": 0.5321, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.5312, |
| "grad_norm": 0.8194834791920059, |
| "learning_rate": 3.5287846481876337e-06, |
| "loss": 0.5535, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.5328, |
| "grad_norm": 0.7847452627526185, |
| "learning_rate": 3.5394456289978678e-06, |
| "loss": 0.5584, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.5344, |
| "grad_norm": 0.8998666925556803, |
| "learning_rate": 3.5501066098081027e-06, |
| "loss": 0.5566, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.536, |
| "grad_norm": 0.7941633997637204, |
| "learning_rate": 3.560767590618337e-06, |
| "loss": 0.5991, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.5376, |
| "grad_norm": 0.881854884085909, |
| "learning_rate": 3.5714285714285718e-06, |
| "loss": 0.5689, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.5392, |
| "grad_norm": 0.7882024113715358, |
| "learning_rate": 3.582089552238806e-06, |
| "loss": 0.5642, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.5408, |
| "grad_norm": 0.8657625305194023, |
| "learning_rate": 3.592750533049041e-06, |
| "loss": 0.5461, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.5424, |
| "grad_norm": 0.8581207132915274, |
| "learning_rate": 3.603411513859275e-06, |
| "loss": 0.5724, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 0.8859736113637721, |
| "learning_rate": 3.61407249466951e-06, |
| "loss": 0.5302, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5456, |
| "grad_norm": 0.8073037679646752, |
| "learning_rate": 3.624733475479744e-06, |
| "loss": 0.5682, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.5472, |
| "grad_norm": 0.8692275414543921, |
| "learning_rate": 3.635394456289979e-06, |
| "loss": 0.5393, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.5488, |
| "grad_norm": 0.8368478492119548, |
| "learning_rate": 3.6460554371002135e-06, |
| "loss": 0.5657, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.5504, |
| "grad_norm": 0.7995247958601864, |
| "learning_rate": 3.656716417910448e-06, |
| "loss": 0.5556, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.552, |
| "grad_norm": 0.8276090020791603, |
| "learning_rate": 3.667377398720683e-06, |
| "loss": 0.545, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.5536, |
| "grad_norm": 0.7681514100554063, |
| "learning_rate": 3.678038379530917e-06, |
| "loss": 0.5442, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.5552, |
| "grad_norm": 0.804481990476876, |
| "learning_rate": 3.688699360341152e-06, |
| "loss": 0.6083, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.5568, |
| "grad_norm": 0.7405084001120108, |
| "learning_rate": 3.699360341151386e-06, |
| "loss": 0.5594, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.5584, |
| "grad_norm": 0.7772627463789789, |
| "learning_rate": 3.710021321961621e-06, |
| "loss": 0.575, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.7789355204470892, |
| "learning_rate": 3.720682302771855e-06, |
| "loss": 0.5725, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5616, |
| "grad_norm": 0.8454290799867418, |
| "learning_rate": 3.73134328358209e-06, |
| "loss": 0.5542, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.5632, |
| "grad_norm": 0.7990810798027119, |
| "learning_rate": 3.742004264392324e-06, |
| "loss": 0.5258, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.5648, |
| "grad_norm": 0.8167499165126225, |
| "learning_rate": 3.752665245202559e-06, |
| "loss": 0.5774, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.5664, |
| "grad_norm": 0.8063317607872015, |
| "learning_rate": 3.7633262260127933e-06, |
| "loss": 0.5362, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.568, |
| "grad_norm": 0.7932749827016408, |
| "learning_rate": 3.773987206823028e-06, |
| "loss": 0.5334, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.5696, |
| "grad_norm": 0.8031722120801229, |
| "learning_rate": 3.7846481876332623e-06, |
| "loss": 0.549, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.5712, |
| "grad_norm": 0.805457309127876, |
| "learning_rate": 3.7953091684434973e-06, |
| "loss": 0.5457, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.5728, |
| "grad_norm": 0.8063931320319081, |
| "learning_rate": 3.8059701492537314e-06, |
| "loss": 0.5713, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.5744, |
| "grad_norm": 0.7733195413815381, |
| "learning_rate": 3.816631130063966e-06, |
| "loss": 0.5186, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.8113756279778045, |
| "learning_rate": 3.827292110874201e-06, |
| "loss": 0.5601, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5776, |
| "grad_norm": 0.9030002101601021, |
| "learning_rate": 3.837953091684435e-06, |
| "loss": 0.596, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.5792, |
| "grad_norm": 0.7984289955625544, |
| "learning_rate": 3.84861407249467e-06, |
| "loss": 0.5447, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.5808, |
| "grad_norm": 0.8131316696626657, |
| "learning_rate": 3.859275053304904e-06, |
| "loss": 0.5661, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.5824, |
| "grad_norm": 0.796620771283217, |
| "learning_rate": 3.869936034115139e-06, |
| "loss": 0.5412, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.584, |
| "grad_norm": 0.8315896370386743, |
| "learning_rate": 3.8805970149253735e-06, |
| "loss": 0.5421, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.5856, |
| "grad_norm": 0.8010472482295572, |
| "learning_rate": 3.891257995735608e-06, |
| "loss": 0.5545, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.5872, |
| "grad_norm": 0.85710233342375, |
| "learning_rate": 3.9019189765458425e-06, |
| "loss": 0.5292, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.5888, |
| "grad_norm": 0.7800981161456916, |
| "learning_rate": 3.912579957356077e-06, |
| "loss": 0.5722, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.5904, |
| "grad_norm": 0.8120785689836847, |
| "learning_rate": 3.9232409381663116e-06, |
| "loss": 0.5221, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 0.7501430227392313, |
| "learning_rate": 3.933901918976546e-06, |
| "loss": 0.5066, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5936, |
| "grad_norm": 0.7687524109570407, |
| "learning_rate": 3.944562899786781e-06, |
| "loss": 0.5429, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.5952, |
| "grad_norm": 0.7508289935518603, |
| "learning_rate": 3.955223880597015e-06, |
| "loss": 0.5238, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.5968, |
| "grad_norm": 0.7591693655529905, |
| "learning_rate": 3.96588486140725e-06, |
| "loss": 0.5444, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.5984, |
| "grad_norm": 0.8451880038162318, |
| "learning_rate": 3.976545842217484e-06, |
| "loss": 0.5713, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.758974892187254, |
| "learning_rate": 3.987206823027719e-06, |
| "loss": 0.5465, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.6016, |
| "grad_norm": 0.8953319848826586, |
| "learning_rate": 3.997867803837953e-06, |
| "loss": 0.5952, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.6032, |
| "grad_norm": 0.831194906832879, |
| "learning_rate": 4.008528784648188e-06, |
| "loss": 0.5198, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.6048, |
| "grad_norm": 0.9163151609769153, |
| "learning_rate": 4.019189765458423e-06, |
| "loss": 0.5415, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.6064, |
| "grad_norm": 0.869200099149563, |
| "learning_rate": 4.029850746268657e-06, |
| "loss": 0.5626, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.8404925217554066, |
| "learning_rate": 4.040511727078892e-06, |
| "loss": 0.5302, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6096, |
| "grad_norm": 0.7826710011465347, |
| "learning_rate": 4.051172707889126e-06, |
| "loss": 0.5352, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.6112, |
| "grad_norm": 0.8241550929780408, |
| "learning_rate": 4.061833688699361e-06, |
| "loss": 0.5374, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.6128, |
| "grad_norm": 0.7510788009103369, |
| "learning_rate": 4.072494669509595e-06, |
| "loss": 0.5391, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.6144, |
| "grad_norm": 0.8061130294153335, |
| "learning_rate": 4.08315565031983e-06, |
| "loss": 0.596, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.616, |
| "grad_norm": 0.8080429382071841, |
| "learning_rate": 4.093816631130064e-06, |
| "loss": 0.5326, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.6176, |
| "grad_norm": 0.8170177676336794, |
| "learning_rate": 4.104477611940299e-06, |
| "loss": 0.5571, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.6192, |
| "grad_norm": 0.8160013454370157, |
| "learning_rate": 4.115138592750533e-06, |
| "loss": 0.5292, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.6208, |
| "grad_norm": 0.7967263545203118, |
| "learning_rate": 4.125799573560768e-06, |
| "loss": 0.5626, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.6224, |
| "grad_norm": 0.8568044544627053, |
| "learning_rate": 4.136460554371002e-06, |
| "loss": 0.5274, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 0.8066358962082286, |
| "learning_rate": 4.1471215351812375e-06, |
| "loss": 0.5488, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.6256, |
| "grad_norm": 0.9667800237647179, |
| "learning_rate": 4.157782515991471e-06, |
| "loss": 0.5474, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.6272, |
| "grad_norm": 0.8056755321027613, |
| "learning_rate": 4.1684434968017065e-06, |
| "loss": 0.5483, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.6288, |
| "grad_norm": 1.000045905913509, |
| "learning_rate": 4.17910447761194e-06, |
| "loss": 0.5391, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.6304, |
| "grad_norm": 0.8010942641365666, |
| "learning_rate": 4.1897654584221756e-06, |
| "loss": 0.5361, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.632, |
| "grad_norm": 0.9451955462134625, |
| "learning_rate": 4.200426439232409e-06, |
| "loss": 0.5408, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.6336, |
| "grad_norm": 0.7798088066161465, |
| "learning_rate": 4.211087420042645e-06, |
| "loss": 0.5189, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.6352, |
| "grad_norm": 0.9464532007112202, |
| "learning_rate": 4.221748400852878e-06, |
| "loss": 0.5442, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.6368, |
| "grad_norm": 0.9021258173152447, |
| "learning_rate": 4.232409381663114e-06, |
| "loss": 0.5776, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.6384, |
| "grad_norm": 0.9646933747683049, |
| "learning_rate": 4.243070362473347e-06, |
| "loss": 0.5534, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.8688602817866495, |
| "learning_rate": 4.253731343283583e-06, |
| "loss": 0.5216, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6416, |
| "grad_norm": 0.9892060933344063, |
| "learning_rate": 4.264392324093816e-06, |
| "loss": 0.5332, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.6432, |
| "grad_norm": 0.9385981933705216, |
| "learning_rate": 4.275053304904052e-06, |
| "loss": 0.5665, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.6448, |
| "grad_norm": 1.0764580640875403, |
| "learning_rate": 4.2857142857142855e-06, |
| "loss": 0.5349, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.6464, |
| "grad_norm": 1.0040496956050853, |
| "learning_rate": 4.296375266524521e-06, |
| "loss": 0.5404, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.648, |
| "grad_norm": 0.8827309331973633, |
| "learning_rate": 4.3070362473347545e-06, |
| "loss": 0.5868, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.6496, |
| "grad_norm": 0.9809937804440383, |
| "learning_rate": 4.31769722814499e-06, |
| "loss": 0.5546, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.6512, |
| "grad_norm": 0.884351522075039, |
| "learning_rate": 4.3283582089552236e-06, |
| "loss": 0.5345, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.6528, |
| "grad_norm": 0.9790501988552092, |
| "learning_rate": 4.339019189765459e-06, |
| "loss": 0.5599, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.6544, |
| "grad_norm": 0.9071527284238047, |
| "learning_rate": 4.3496801705756935e-06, |
| "loss": 0.5054, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.8445713569492348, |
| "learning_rate": 4.360341151385928e-06, |
| "loss": 0.5219, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6576, |
| "grad_norm": 0.8737956626335754, |
| "learning_rate": 4.3710021321961625e-06, |
| "loss": 0.5413, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.6592, |
| "grad_norm": 0.8261342813870577, |
| "learning_rate": 4.381663113006397e-06, |
| "loss": 0.5539, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.6608, |
| "grad_norm": 0.8184073447460407, |
| "learning_rate": 4.3923240938166316e-06, |
| "loss": 0.5426, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.6624, |
| "grad_norm": 0.8366177996309456, |
| "learning_rate": 4.402985074626866e-06, |
| "loss": 0.5554, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.664, |
| "grad_norm": 0.8280357274609805, |
| "learning_rate": 4.413646055437101e-06, |
| "loss": 0.5308, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.6656, |
| "grad_norm": 0.8377902715368897, |
| "learning_rate": 4.424307036247335e-06, |
| "loss": 0.5456, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.6672, |
| "grad_norm": 0.8510022057043192, |
| "learning_rate": 4.43496801705757e-06, |
| "loss": 0.5325, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.6688, |
| "grad_norm": 0.9329195540707577, |
| "learning_rate": 4.445628997867804e-06, |
| "loss": 0.542, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.6704, |
| "grad_norm": 0.8224587637626823, |
| "learning_rate": 4.456289978678039e-06, |
| "loss": 0.5593, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.8512952980107195, |
| "learning_rate": 4.466950959488273e-06, |
| "loss": 0.5118, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6736, |
| "grad_norm": 0.8607386380556552, |
| "learning_rate": 4.477611940298508e-06, |
| "loss": 0.5473, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.6752, |
| "grad_norm": 0.8411448840445225, |
| "learning_rate": 4.488272921108742e-06, |
| "loss": 0.5114, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.6768, |
| "grad_norm": 0.827972617616303, |
| "learning_rate": 4.498933901918977e-06, |
| "loss": 0.5571, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.6784, |
| "grad_norm": 0.8375199395465468, |
| "learning_rate": 4.509594882729211e-06, |
| "loss": 0.5976, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 0.8110166668311388, |
| "learning_rate": 4.520255863539446e-06, |
| "loss": 0.5348, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.6816, |
| "grad_norm": 0.7420626199354639, |
| "learning_rate": 4.53091684434968e-06, |
| "loss": 0.532, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.6832, |
| "grad_norm": 0.899693608967382, |
| "learning_rate": 4.541577825159915e-06, |
| "loss": 0.573, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.6848, |
| "grad_norm": 0.7700059252057434, |
| "learning_rate": 4.5522388059701495e-06, |
| "loss": 0.5118, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.6864, |
| "grad_norm": 0.9380211710233359, |
| "learning_rate": 4.562899786780384e-06, |
| "loss": 0.5977, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 0.8526769684369551, |
| "learning_rate": 4.5735607675906185e-06, |
| "loss": 0.5487, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6896, |
| "grad_norm": 0.8710529228457372, |
| "learning_rate": 4.584221748400853e-06, |
| "loss": 0.5353, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.6912, |
| "grad_norm": 0.8141253356345576, |
| "learning_rate": 4.5948827292110876e-06, |
| "loss": 0.5141, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.6928, |
| "grad_norm": 0.8368614135087445, |
| "learning_rate": 4.605543710021322e-06, |
| "loss": 0.5263, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.6944, |
| "grad_norm": 0.8137340425586644, |
| "learning_rate": 4.616204690831557e-06, |
| "loss": 0.5548, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.696, |
| "grad_norm": 0.8459034172420522, |
| "learning_rate": 4.626865671641791e-06, |
| "loss": 0.5343, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.6976, |
| "grad_norm": 0.7649917545547742, |
| "learning_rate": 4.637526652452026e-06, |
| "loss": 0.5399, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.6992, |
| "grad_norm": 0.7892001175990301, |
| "learning_rate": 4.64818763326226e-06, |
| "loss": 0.5149, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.7008, |
| "grad_norm": 0.8002693345532315, |
| "learning_rate": 4.658848614072495e-06, |
| "loss": 0.5286, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.7024, |
| "grad_norm": 0.7990955003417757, |
| "learning_rate": 4.669509594882729e-06, |
| "loss": 0.5366, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.7790282352454362, |
| "learning_rate": 4.680170575692965e-06, |
| "loss": 0.5396, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.7056, |
| "grad_norm": 0.8545689926618768, |
| "learning_rate": 4.690831556503198e-06, |
| "loss": 0.5117, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.7072, |
| "grad_norm": 0.8207273201254476, |
| "learning_rate": 4.701492537313434e-06, |
| "loss": 0.5905, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.7088, |
| "grad_norm": 0.8399233633785136, |
| "learning_rate": 4.712153518123667e-06, |
| "loss": 0.516, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.7104, |
| "grad_norm": 0.8508399508493361, |
| "learning_rate": 4.722814498933903e-06, |
| "loss": 0.551, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.712, |
| "grad_norm": 0.8184981164850842, |
| "learning_rate": 4.733475479744136e-06, |
| "loss": 0.5851, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.7136, |
| "grad_norm": 0.8030486987368878, |
| "learning_rate": 4.744136460554372e-06, |
| "loss": 0.5452, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.7152, |
| "grad_norm": 0.8653743154740211, |
| "learning_rate": 4.7547974413646055e-06, |
| "loss": 0.5354, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.7168, |
| "grad_norm": 0.7283574727939222, |
| "learning_rate": 4.765458422174841e-06, |
| "loss": 0.5335, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.7184, |
| "grad_norm": 0.8315206489545724, |
| "learning_rate": 4.7761194029850745e-06, |
| "loss": 0.5433, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.8441786568165387, |
| "learning_rate": 4.78678038379531e-06, |
| "loss": 0.5642, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.7216, |
| "grad_norm": 0.8332152020628741, |
| "learning_rate": 4.797441364605544e-06, |
| "loss": 0.5816, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.7232, |
| "grad_norm": 0.9173593393357525, |
| "learning_rate": 4.808102345415779e-06, |
| "loss": 0.5252, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.7248, |
| "grad_norm": 0.8648604534406458, |
| "learning_rate": 4.8187633262260135e-06, |
| "loss": 0.519, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.7264, |
| "grad_norm": 0.8711058461886974, |
| "learning_rate": 4.829424307036248e-06, |
| "loss": 0.5751, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.728, |
| "grad_norm": 0.8420986638862816, |
| "learning_rate": 4.8400852878464825e-06, |
| "loss": 0.5334, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.7296, |
| "grad_norm": 0.7555701798663501, |
| "learning_rate": 4.850746268656717e-06, |
| "loss": 0.5149, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.7312, |
| "grad_norm": 0.7711499442433453, |
| "learning_rate": 4.8614072494669516e-06, |
| "loss": 0.5443, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.7328, |
| "grad_norm": 0.8613489542257805, |
| "learning_rate": 4.872068230277186e-06, |
| "loss": 0.5647, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.7344, |
| "grad_norm": 0.9106949411264529, |
| "learning_rate": 4.882729211087421e-06, |
| "loss": 0.5425, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.8638599204665492, |
| "learning_rate": 4.893390191897655e-06, |
| "loss": 0.5275, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.7376, |
| "grad_norm": 0.8785400163755627, |
| "learning_rate": 4.90405117270789e-06, |
| "loss": 0.5469, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.7392, |
| "grad_norm": 0.7875425137925275, |
| "learning_rate": 4.914712153518124e-06, |
| "loss": 0.5475, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.7408, |
| "grad_norm": 0.9410710482326542, |
| "learning_rate": 4.925373134328359e-06, |
| "loss": 0.5452, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.7424, |
| "grad_norm": 0.8002566346058072, |
| "learning_rate": 4.936034115138593e-06, |
| "loss": 0.5263, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.744, |
| "grad_norm": 0.8937768916403107, |
| "learning_rate": 4.946695095948828e-06, |
| "loss": 0.5818, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.7456, |
| "grad_norm": 0.7637459435110706, |
| "learning_rate": 4.957356076759062e-06, |
| "loss": 0.5436, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.7472, |
| "grad_norm": 0.8123190542135358, |
| "learning_rate": 4.968017057569297e-06, |
| "loss": 0.5709, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.7488, |
| "grad_norm": 0.8285740674633666, |
| "learning_rate": 4.978678038379531e-06, |
| "loss": 0.5609, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.7504, |
| "grad_norm": 0.8762082463372437, |
| "learning_rate": 4.989339019189766e-06, |
| "loss": 0.5885, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 0.8953998312516339, |
| "learning_rate": 5e-06, |
| "loss": 0.5488, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.7536, |
| "grad_norm": 0.8613184172440235, |
| "learning_rate": 5.010660980810235e-06, |
| "loss": 0.5116, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.7552, |
| "grad_norm": 0.8082855515563323, |
| "learning_rate": 5.02132196162047e-06, |
| "loss": 0.5328, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.7568, |
| "grad_norm": 0.8580014764636912, |
| "learning_rate": 5.031982942430704e-06, |
| "loss": 0.5176, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.7584, |
| "grad_norm": 0.8397257602418947, |
| "learning_rate": 5.0426439232409385e-06, |
| "loss": 0.5538, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 0.8147459038098553, |
| "learning_rate": 5.053304904051173e-06, |
| "loss": 0.5419, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.7616, |
| "grad_norm": 0.8575872649069343, |
| "learning_rate": 5.063965884861408e-06, |
| "loss": 0.5379, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.7632, |
| "grad_norm": 0.8075822295459566, |
| "learning_rate": 5.074626865671642e-06, |
| "loss": 0.5249, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.7648, |
| "grad_norm": 0.8591596637488736, |
| "learning_rate": 5.085287846481877e-06, |
| "loss": 0.5503, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.7664, |
| "grad_norm": 0.7829151047023781, |
| "learning_rate": 5.095948827292111e-06, |
| "loss": 0.543, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.9089122932444886, |
| "learning_rate": 5.1066098081023465e-06, |
| "loss": 0.5379, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7696, |
| "grad_norm": 0.8944528930331815, |
| "learning_rate": 5.11727078891258e-06, |
| "loss": 0.5501, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.7712, |
| "grad_norm": 0.8811832962850422, |
| "learning_rate": 5.127931769722815e-06, |
| "loss": 0.5087, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.7728, |
| "grad_norm": 0.9520455522966428, |
| "learning_rate": 5.138592750533049e-06, |
| "loss": 0.5754, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.7744, |
| "grad_norm": 0.9481383621935475, |
| "learning_rate": 5.149253731343285e-06, |
| "loss": 0.5402, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.776, |
| "grad_norm": 0.8412227940463143, |
| "learning_rate": 5.159914712153518e-06, |
| "loss": 0.5321, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.7776, |
| "grad_norm": 0.9454760034452969, |
| "learning_rate": 5.170575692963753e-06, |
| "loss": 0.5378, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.7792, |
| "grad_norm": 0.8533494620573859, |
| "learning_rate": 5.181236673773987e-06, |
| "loss": 0.5159, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.7808, |
| "grad_norm": 0.9361516013200838, |
| "learning_rate": 5.191897654584223e-06, |
| "loss": 0.515, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.7824, |
| "grad_norm": 0.8782329235928078, |
| "learning_rate": 5.202558635394456e-06, |
| "loss": 0.5107, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 0.9947369706499299, |
| "learning_rate": 5.213219616204691e-06, |
| "loss": 0.5237, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.7856, |
| "grad_norm": 0.8189802535019133, |
| "learning_rate": 5.2238805970149255e-06, |
| "loss": 0.5588, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.7872, |
| "grad_norm": 1.0555978132118156, |
| "learning_rate": 5.234541577825161e-06, |
| "loss": 0.5253, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.7888, |
| "grad_norm": 0.8155783631263221, |
| "learning_rate": 5.245202558635395e-06, |
| "loss": 0.4902, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.7904, |
| "grad_norm": 0.9774352650284707, |
| "learning_rate": 5.255863539445629e-06, |
| "loss": 0.5559, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.792, |
| "grad_norm": 0.8976663445833297, |
| "learning_rate": 5.2665245202558636e-06, |
| "loss": 0.5431, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.7936, |
| "grad_norm": 0.9808412685023696, |
| "learning_rate": 5.277185501066099e-06, |
| "loss": 0.5518, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.7952, |
| "grad_norm": 0.8938141729801423, |
| "learning_rate": 5.2878464818763335e-06, |
| "loss": 0.5249, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.7968, |
| "grad_norm": 0.9043328808303781, |
| "learning_rate": 5.298507462686567e-06, |
| "loss": 0.5401, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.7984, |
| "grad_norm": 0.8623402415295424, |
| "learning_rate": 5.309168443496802e-06, |
| "loss": 0.5229, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.8750031520187871, |
| "learning_rate": 5.319829424307037e-06, |
| "loss": 0.5207, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8016, |
| "grad_norm": 0.763266475972754, |
| "learning_rate": 5.3304904051172716e-06, |
| "loss": 0.5056, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.8032, |
| "grad_norm": 0.9198507138970855, |
| "learning_rate": 5.341151385927505e-06, |
| "loss": 0.538, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.8048, |
| "grad_norm": 0.7977495583319136, |
| "learning_rate": 5.351812366737741e-06, |
| "loss": 0.544, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.8064, |
| "grad_norm": 0.8338757340960701, |
| "learning_rate": 5.362473347547975e-06, |
| "loss": 0.5123, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.808, |
| "grad_norm": 0.8791799631125965, |
| "learning_rate": 5.37313432835821e-06, |
| "loss": 0.5491, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.8096, |
| "grad_norm": 0.8293019370822474, |
| "learning_rate": 5.383795309168443e-06, |
| "loss": 0.5212, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.8112, |
| "grad_norm": 0.8554435528861921, |
| "learning_rate": 5.394456289978679e-06, |
| "loss": 0.5045, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.8128, |
| "grad_norm": 0.8355974593461122, |
| "learning_rate": 5.405117270788913e-06, |
| "loss": 0.5413, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.8144, |
| "grad_norm": 0.77389969820565, |
| "learning_rate": 5.415778251599148e-06, |
| "loss": 0.5002, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 0.8249599592546532, |
| "learning_rate": 5.4264392324093815e-06, |
| "loss": 0.5436, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.8176, |
| "grad_norm": 0.8228797504357074, |
| "learning_rate": 5.437100213219617e-06, |
| "loss": 0.5419, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.8192, |
| "grad_norm": 0.8300496904175892, |
| "learning_rate": 5.447761194029851e-06, |
| "loss": 0.5532, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.8208, |
| "grad_norm": 0.7994881849240946, |
| "learning_rate": 5.458422174840086e-06, |
| "loss": 0.5315, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.8224, |
| "grad_norm": 0.8694989380132128, |
| "learning_rate": 5.4690831556503196e-06, |
| "loss": 0.5375, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.824, |
| "grad_norm": 0.7776010437173337, |
| "learning_rate": 5.479744136460555e-06, |
| "loss": 0.5435, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.8256, |
| "grad_norm": 0.9503455442662696, |
| "learning_rate": 5.4904051172707895e-06, |
| "loss": 0.5034, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.8272, |
| "grad_norm": 0.8213271153418595, |
| "learning_rate": 5.501066098081024e-06, |
| "loss": 0.5644, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.8288, |
| "grad_norm": 0.8824829583017852, |
| "learning_rate": 5.511727078891258e-06, |
| "loss": 0.5265, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.8304, |
| "grad_norm": 0.8451810272877978, |
| "learning_rate": 5.522388059701493e-06, |
| "loss": 0.5502, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.8746711744310373, |
| "learning_rate": 5.5330490405117276e-06, |
| "loss": 0.5338, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.8336, |
| "grad_norm": 0.8679819152550929, |
| "learning_rate": 5.543710021321962e-06, |
| "loss": 0.5533, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.8352, |
| "grad_norm": 0.8679233312343541, |
| "learning_rate": 5.554371002132196e-06, |
| "loss": 0.5309, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.8368, |
| "grad_norm": 0.7910814695342003, |
| "learning_rate": 5.565031982942431e-06, |
| "loss": 0.5267, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.8384, |
| "grad_norm": 0.8342811987095753, |
| "learning_rate": 5.575692963752666e-06, |
| "loss": 0.5372, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 0.7834131537642278, |
| "learning_rate": 5.5863539445629e-06, |
| "loss": 0.5229, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.8416, |
| "grad_norm": 0.8614673719559317, |
| "learning_rate": 5.597014925373134e-06, |
| "loss": 0.5294, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.8432, |
| "grad_norm": 0.7798874148485664, |
| "learning_rate": 5.607675906183369e-06, |
| "loss": 0.5445, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.8448, |
| "grad_norm": 0.8596496489429021, |
| "learning_rate": 5.618336886993604e-06, |
| "loss": 0.5652, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.8464, |
| "grad_norm": 0.793807162427068, |
| "learning_rate": 5.628997867803838e-06, |
| "loss": 0.5343, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 0.8449495883957229, |
| "learning_rate": 5.639658848614073e-06, |
| "loss": 0.5655, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.8496, |
| "grad_norm": 0.8182016637240901, |
| "learning_rate": 5.650319829424308e-06, |
| "loss": 0.5856, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.8512, |
| "grad_norm": 0.8109408391980678, |
| "learning_rate": 5.660980810234542e-06, |
| "loss": 0.5121, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.8528, |
| "grad_norm": 0.8265194308547619, |
| "learning_rate": 5.671641791044776e-06, |
| "loss": 0.5358, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.8544, |
| "grad_norm": 0.8204400281580362, |
| "learning_rate": 5.682302771855012e-06, |
| "loss": 0.559, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.856, |
| "grad_norm": 0.8270305363765442, |
| "learning_rate": 5.692963752665246e-06, |
| "loss": 0.5239, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.8576, |
| "grad_norm": 0.8714688502657366, |
| "learning_rate": 5.70362473347548e-06, |
| "loss": 0.5606, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.8592, |
| "grad_norm": 0.8266359853649553, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 0.5529, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.8608, |
| "grad_norm": 0.7423922030019656, |
| "learning_rate": 5.72494669509595e-06, |
| "loss": 0.5439, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.8624, |
| "grad_norm": 0.7948415966659854, |
| "learning_rate": 5.735607675906184e-06, |
| "loss": 0.5104, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.8184234776238962, |
| "learning_rate": 5.746268656716418e-06, |
| "loss": 0.5512, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.8656, |
| "grad_norm": 0.8735831797222657, |
| "learning_rate": 5.756929637526653e-06, |
| "loss": 0.5489, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.8672, |
| "grad_norm": 0.7696842650627593, |
| "learning_rate": 5.767590618336888e-06, |
| "loss": 0.5367, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.8688, |
| "grad_norm": 0.8541064682006467, |
| "learning_rate": 5.7782515991471225e-06, |
| "loss": 0.5463, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.8704, |
| "grad_norm": 0.893401210150427, |
| "learning_rate": 5.788912579957356e-06, |
| "loss": 0.4927, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.872, |
| "grad_norm": 0.9094618080161737, |
| "learning_rate": 5.799573560767591e-06, |
| "loss": 0.5112, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.8736, |
| "grad_norm": 0.9163620143155544, |
| "learning_rate": 5.810234541577826e-06, |
| "loss": 0.5242, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.8752, |
| "grad_norm": 0.854627870380798, |
| "learning_rate": 5.820895522388061e-06, |
| "loss": 0.5097, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.8768, |
| "grad_norm": 0.9049857621042802, |
| "learning_rate": 5.831556503198294e-06, |
| "loss": 0.4897, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.8784, |
| "grad_norm": 0.841595814510566, |
| "learning_rate": 5.842217484008529e-06, |
| "loss": 0.5235, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.981541003468602, |
| "learning_rate": 5.852878464818764e-06, |
| "loss": 0.5183, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.8816, |
| "grad_norm": 0.888025361245545, |
| "learning_rate": 5.863539445628999e-06, |
| "loss": 0.5338, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.8832, |
| "grad_norm": 0.8532402860379611, |
| "learning_rate": 5.874200426439232e-06, |
| "loss": 0.5877, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.8848, |
| "grad_norm": 0.827294528054697, |
| "learning_rate": 5.884861407249467e-06, |
| "loss": 0.4924, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.8864, |
| "grad_norm": 0.9526621297802707, |
| "learning_rate": 5.895522388059702e-06, |
| "loss": 0.5455, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.888, |
| "grad_norm": 0.9046496486378539, |
| "learning_rate": 5.906183368869937e-06, |
| "loss": 0.5171, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.8896, |
| "grad_norm": 0.9025775673297872, |
| "learning_rate": 5.9168443496801705e-06, |
| "loss": 0.4893, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.8912, |
| "grad_norm": 0.8574022155093447, |
| "learning_rate": 5.927505330490405e-06, |
| "loss": 0.5433, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.8928, |
| "grad_norm": 0.8231304371582934, |
| "learning_rate": 5.93816631130064e-06, |
| "loss": 0.4815, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.8944, |
| "grad_norm": 0.9173680501366946, |
| "learning_rate": 5.948827292110875e-06, |
| "loss": 0.5544, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.8785255714629714, |
| "learning_rate": 5.959488272921109e-06, |
| "loss": 0.5251, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.8976, |
| "grad_norm": 0.8539243959014604, |
| "learning_rate": 5.970149253731343e-06, |
| "loss": 0.558, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.8992, |
| "grad_norm": 0.7961416140257315, |
| "learning_rate": 5.9808102345415785e-06, |
| "loss": 0.5193, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.9008, |
| "grad_norm": 0.9080999868996099, |
| "learning_rate": 5.991471215351813e-06, |
| "loss": 0.5312, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.9024, |
| "grad_norm": 0.813439181463869, |
| "learning_rate": 6.002132196162047e-06, |
| "loss": 0.4965, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.904, |
| "grad_norm": 0.9219168145673883, |
| "learning_rate": 6.012793176972282e-06, |
| "loss": 0.5396, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.9056, |
| "grad_norm": 0.794480434630688, |
| "learning_rate": 6.023454157782517e-06, |
| "loss": 0.5393, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.9072, |
| "grad_norm": 0.8548750027758196, |
| "learning_rate": 6.034115138592751e-06, |
| "loss": 0.5026, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.9088, |
| "grad_norm": 0.8251007973212211, |
| "learning_rate": 6.044776119402986e-06, |
| "loss": 0.5099, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.9104, |
| "grad_norm": 0.8345484986699218, |
| "learning_rate": 6.055437100213221e-06, |
| "loss": 0.5399, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 0.8483664276369052, |
| "learning_rate": 6.066098081023455e-06, |
| "loss": 0.5209, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.9136, |
| "grad_norm": 0.8108353261117107, |
| "learning_rate": 6.076759061833689e-06, |
| "loss": 0.5167, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.9152, |
| "grad_norm": 0.8063591795914883, |
| "learning_rate": 6.087420042643924e-06, |
| "loss": 0.4935, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.9168, |
| "grad_norm": 0.828769688833686, |
| "learning_rate": 6.098081023454159e-06, |
| "loss": 0.5483, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.9184, |
| "grad_norm": 0.8555238456757491, |
| "learning_rate": 6.108742004264393e-06, |
| "loss": 0.5534, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 0.8315415336747674, |
| "learning_rate": 6.119402985074627e-06, |
| "loss": 0.5221, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.9216, |
| "grad_norm": 0.8565368931538652, |
| "learning_rate": 6.130063965884862e-06, |
| "loss": 0.46, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.9232, |
| "grad_norm": 0.8259783667037305, |
| "learning_rate": 6.140724946695097e-06, |
| "loss": 0.5222, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.9248, |
| "grad_norm": 0.8933835874543237, |
| "learning_rate": 6.151385927505331e-06, |
| "loss": 0.5229, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.9264, |
| "grad_norm": 0.8299341146771261, |
| "learning_rate": 6.1620469083155655e-06, |
| "loss": 0.5162, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 0.851826227347469, |
| "learning_rate": 6.1727078891258e-06, |
| "loss": 0.5294, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.9296, |
| "grad_norm": 0.9771804477958785, |
| "learning_rate": 6.183368869936035e-06, |
| "loss": 0.5493, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.9312, |
| "grad_norm": 0.844552453653095, |
| "learning_rate": 6.194029850746269e-06, |
| "loss": 0.5122, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.9328, |
| "grad_norm": 0.8985001503860374, |
| "learning_rate": 6.2046908315565036e-06, |
| "loss": 0.5209, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.9344, |
| "grad_norm": 0.8405023016845977, |
| "learning_rate": 6.215351812366738e-06, |
| "loss": 0.5229, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.936, |
| "grad_norm": 0.9290507990979149, |
| "learning_rate": 6.2260127931769735e-06, |
| "loss": 0.5177, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.9376, |
| "grad_norm": 0.8051113618708801, |
| "learning_rate": 6.236673773987207e-06, |
| "loss": 0.5399, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.9392, |
| "grad_norm": 0.8643207774567074, |
| "learning_rate": 6.247334754797442e-06, |
| "loss": 0.5239, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.9408, |
| "grad_norm": 0.7556872262805366, |
| "learning_rate": 6.257995735607676e-06, |
| "loss": 0.5382, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.9424, |
| "grad_norm": 0.9526333574647944, |
| "learning_rate": 6.2686567164179116e-06, |
| "loss": 0.5377, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 0.7958583963770268, |
| "learning_rate": 6.279317697228145e-06, |
| "loss": 0.4945, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.9456, |
| "grad_norm": 0.9757428010352267, |
| "learning_rate": 6.28997867803838e-06, |
| "loss": 0.531, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.9472, |
| "grad_norm": 0.9484280544374282, |
| "learning_rate": 6.300639658848614e-06, |
| "loss": 0.5344, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.9488, |
| "grad_norm": 0.8953536000746694, |
| "learning_rate": 6.31130063965885e-06, |
| "loss": 0.4969, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.9504, |
| "grad_norm": 1.0762465973095379, |
| "learning_rate": 6.321961620469083e-06, |
| "loss": 0.5615, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.952, |
| "grad_norm": 0.8868282604227625, |
| "learning_rate": 6.332622601279318e-06, |
| "loss": 0.5015, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.9536, |
| "grad_norm": 0.9387462058082829, |
| "learning_rate": 6.343283582089553e-06, |
| "loss": 0.5384, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.9552, |
| "grad_norm": 1.038161905784565, |
| "learning_rate": 6.353944562899788e-06, |
| "loss": 0.5406, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.9568, |
| "grad_norm": 0.8349063964387037, |
| "learning_rate": 6.3646055437100215e-06, |
| "loss": 0.5128, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.9584, |
| "grad_norm": 0.891273791046686, |
| "learning_rate": 6.375266524520256e-06, |
| "loss": 0.5145, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.8487119519500591, |
| "learning_rate": 6.385927505330491e-06, |
| "loss": 0.5433, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.9616, |
| "grad_norm": 0.8727449705126482, |
| "learning_rate": 6.396588486140726e-06, |
| "loss": 0.5415, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.9632, |
| "grad_norm": 0.7867763723105303, |
| "learning_rate": 6.4072494669509596e-06, |
| "loss": 0.4936, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.9648, |
| "grad_norm": 0.8287779412757438, |
| "learning_rate": 6.417910447761194e-06, |
| "loss": 0.5278, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.9664, |
| "grad_norm": 0.7745857956565567, |
| "learning_rate": 6.4285714285714295e-06, |
| "loss": 0.495, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.968, |
| "grad_norm": 0.8270024054653756, |
| "learning_rate": 6.439232409381664e-06, |
| "loss": 0.5222, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.9696, |
| "grad_norm": 0.8290003392577493, |
| "learning_rate": 6.449893390191898e-06, |
| "loss": 0.4942, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.9712, |
| "grad_norm": 0.753920920589916, |
| "learning_rate": 6.460554371002132e-06, |
| "loss": 0.5188, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.9728, |
| "grad_norm": 0.8358847966610979, |
| "learning_rate": 6.4712153518123676e-06, |
| "loss": 0.5526, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.9744, |
| "grad_norm": 0.8114863070028855, |
| "learning_rate": 6.481876332622602e-06, |
| "loss": 0.5377, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 0.8195628044086989, |
| "learning_rate": 6.492537313432837e-06, |
| "loss": 0.502, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.9776, |
| "grad_norm": 0.8377558444331323, |
| "learning_rate": 6.50319829424307e-06, |
| "loss": 0.4701, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.9792, |
| "grad_norm": 0.8016725399438086, |
| "learning_rate": 6.513859275053306e-06, |
| "loss": 0.5309, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.9808, |
| "grad_norm": 0.7755966838681521, |
| "learning_rate": 6.52452025586354e-06, |
| "loss": 0.5171, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.9824, |
| "grad_norm": 0.7654081600201667, |
| "learning_rate": 6.535181236673775e-06, |
| "loss": 0.5103, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.984, |
| "grad_norm": 0.788504199793285, |
| "learning_rate": 6.545842217484008e-06, |
| "loss": 0.4946, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.9856, |
| "grad_norm": 0.8245717363647899, |
| "learning_rate": 6.556503198294244e-06, |
| "loss": 0.5527, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.9872, |
| "grad_norm": 0.8245960934834444, |
| "learning_rate": 6.567164179104478e-06, |
| "loss": 0.5363, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.9888, |
| "grad_norm": 0.727073026640822, |
| "learning_rate": 6.577825159914713e-06, |
| "loss": 0.532, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.9904, |
| "grad_norm": 0.8600642583259648, |
| "learning_rate": 6.5884861407249465e-06, |
| "loss": 0.5376, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 0.821299436298884, |
| "learning_rate": 6.599147121535182e-06, |
| "loss": 0.5306, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.9936, |
| "grad_norm": 0.8493937107942993, |
| "learning_rate": 6.609808102345416e-06, |
| "loss": 0.5425, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.9952, |
| "grad_norm": 0.8117156613742099, |
| "learning_rate": 6.620469083155651e-06, |
| "loss": 0.5148, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.9968, |
| "grad_norm": 0.8106067040078264, |
| "learning_rate": 6.631130063965885e-06, |
| "loss": 0.5088, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.9984, |
| "grad_norm": 0.9006626802499182, |
| "learning_rate": 6.64179104477612e-06, |
| "loss": 0.5348, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.7955489761135057, |
| "learning_rate": 6.6524520255863545e-06, |
| "loss": 0.526, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.0016, |
| "grad_norm": 0.8939049889815265, |
| "learning_rate": 6.663113006396589e-06, |
| "loss": 0.4748, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.0032, |
| "grad_norm": 0.9142102536005957, |
| "learning_rate": 6.673773987206824e-06, |
| "loss": 0.4791, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.0048, |
| "grad_norm": 0.85000434786646, |
| "learning_rate": 6.684434968017058e-06, |
| "loss": 0.5234, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.0064, |
| "grad_norm": 0.9585313430390451, |
| "learning_rate": 6.695095948827293e-06, |
| "loss": 0.4802, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.008, |
| "grad_norm": 0.8750443177760309, |
| "learning_rate": 6.705756929637527e-06, |
| "loss": 0.5185, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.0096, |
| "grad_norm": 0.9477247147352341, |
| "learning_rate": 6.7164179104477625e-06, |
| "loss": 0.5025, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.0112, |
| "grad_norm": 0.904368055639127, |
| "learning_rate": 6.727078891257996e-06, |
| "loss": 0.4737, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.0128, |
| "grad_norm": 0.929987383446468, |
| "learning_rate": 6.737739872068231e-06, |
| "loss": 0.521, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.0144, |
| "grad_norm": 0.8091474393380501, |
| "learning_rate": 6.748400852878465e-06, |
| "loss": 0.4827, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.016, |
| "grad_norm": 0.8707887258583898, |
| "learning_rate": 6.759061833688701e-06, |
| "loss": 0.4707, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.0176, |
| "grad_norm": 0.9182994454586503, |
| "learning_rate": 6.769722814498934e-06, |
| "loss": 0.4966, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.0192, |
| "grad_norm": 0.8821203820370487, |
| "learning_rate": 6.780383795309169e-06, |
| "loss": 0.4895, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.0208, |
| "grad_norm": 0.8392554797468061, |
| "learning_rate": 6.791044776119403e-06, |
| "loss": 0.5143, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.0224, |
| "grad_norm": 0.9493234827147355, |
| "learning_rate": 6.801705756929639e-06, |
| "loss": 0.4879, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.024, |
| "grad_norm": 0.8932952086378858, |
| "learning_rate": 6.812366737739872e-06, |
| "loss": 0.488, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.0256, |
| "grad_norm": 0.8970192831510824, |
| "learning_rate": 6.823027718550107e-06, |
| "loss": 0.4812, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.0272, |
| "grad_norm": 0.8934554057803764, |
| "learning_rate": 6.8336886993603415e-06, |
| "loss": 0.4937, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.0288, |
| "grad_norm": 0.8735754866413923, |
| "learning_rate": 6.844349680170577e-06, |
| "loss": 0.4763, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.0304, |
| "grad_norm": 0.8742904882080139, |
| "learning_rate": 6.8550106609808105e-06, |
| "loss": 0.49, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.032, |
| "grad_norm": 0.8328916277112867, |
| "learning_rate": 6.865671641791045e-06, |
| "loss": 0.4996, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.0336, |
| "grad_norm": 0.9184053544404314, |
| "learning_rate": 6.8763326226012796e-06, |
| "loss": 0.5048, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.0352, |
| "grad_norm": 0.8456606611788858, |
| "learning_rate": 6.886993603411515e-06, |
| "loss": 0.4898, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.0368, |
| "grad_norm": 0.8284969357938996, |
| "learning_rate": 6.8976545842217495e-06, |
| "loss": 0.5172, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.0384, |
| "grad_norm": 0.8782530091874459, |
| "learning_rate": 6.908315565031983e-06, |
| "loss": 0.4988, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 0.8545255868928607, |
| "learning_rate": 6.918976545842218e-06, |
| "loss": 0.4855, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.0416, |
| "grad_norm": 0.9449070971234702, |
| "learning_rate": 6.929637526652453e-06, |
| "loss": 0.525, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.0432, |
| "grad_norm": 0.8329960687657197, |
| "learning_rate": 6.9402985074626876e-06, |
| "loss": 0.4852, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.0448, |
| "grad_norm": 0.858653034494543, |
| "learning_rate": 6.950959488272921e-06, |
| "loss": 0.5175, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.0464, |
| "grad_norm": 0.7785586676865667, |
| "learning_rate": 6.961620469083156e-06, |
| "loss": 0.4627, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.048, |
| "grad_norm": 0.8613855083022859, |
| "learning_rate": 6.972281449893391e-06, |
| "loss": 0.5209, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.0496, |
| "grad_norm": 0.8286890577089111, |
| "learning_rate": 6.982942430703626e-06, |
| "loss": 0.5325, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.0512, |
| "grad_norm": 0.8112752029689932, |
| "learning_rate": 6.993603411513859e-06, |
| "loss": 0.4905, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.0528, |
| "grad_norm": 0.8565051990117192, |
| "learning_rate": 7.004264392324095e-06, |
| "loss": 0.4939, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.0544, |
| "grad_norm": 0.7784473305249311, |
| "learning_rate": 7.014925373134329e-06, |
| "loss": 0.4703, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.056, |
| "grad_norm": 0.8561933854878488, |
| "learning_rate": 7.025586353944564e-06, |
| "loss": 0.4892, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.0576, |
| "grad_norm": 0.8522962801371073, |
| "learning_rate": 7.0362473347547975e-06, |
| "loss": 0.5085, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.0592, |
| "grad_norm": 0.8604350686873483, |
| "learning_rate": 7.046908315565033e-06, |
| "loss": 0.473, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.0608, |
| "grad_norm": 0.8675460649549699, |
| "learning_rate": 7.057569296375267e-06, |
| "loss": 0.4949, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.0624, |
| "grad_norm": 0.8594099163899366, |
| "learning_rate": 7.068230277185502e-06, |
| "loss": 0.5117, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.064, |
| "grad_norm": 0.8640881532732602, |
| "learning_rate": 7.0788912579957356e-06, |
| "loss": 0.4783, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.0656, |
| "grad_norm": 0.8919589834064163, |
| "learning_rate": 7.089552238805971e-06, |
| "loss": 0.5246, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.0672, |
| "grad_norm": 0.9750324339852304, |
| "learning_rate": 7.1002132196162055e-06, |
| "loss": 0.4973, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.0688, |
| "grad_norm": 0.9957384605818356, |
| "learning_rate": 7.11087420042644e-06, |
| "loss": 0.4966, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.0704, |
| "grad_norm": 0.7997520245450188, |
| "learning_rate": 7.121535181236674e-06, |
| "loss": 0.4865, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.072, |
| "grad_norm": 0.9582197449325179, |
| "learning_rate": 7.132196162046909e-06, |
| "loss": 0.5023, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.0735999999999999, |
| "grad_norm": 0.9272567743413159, |
| "learning_rate": 7.1428571428571436e-06, |
| "loss": 0.5332, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.0752, |
| "grad_norm": 0.9017820260888095, |
| "learning_rate": 7.153518123667378e-06, |
| "loss": 0.4644, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.0768, |
| "grad_norm": 0.8858975000261681, |
| "learning_rate": 7.164179104477612e-06, |
| "loss": 0.4837, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.0784, |
| "grad_norm": 0.8930445495465718, |
| "learning_rate": 7.174840085287847e-06, |
| "loss": 0.5044, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 0.9323861547341619, |
| "learning_rate": 7.185501066098082e-06, |
| "loss": 0.4975, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.0816, |
| "grad_norm": 0.8695592279737832, |
| "learning_rate": 7.196162046908316e-06, |
| "loss": 0.497, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.0832, |
| "grad_norm": 0.8810401640135046, |
| "learning_rate": 7.20682302771855e-06, |
| "loss": 0.4884, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.0848, |
| "grad_norm": 0.9117589561225861, |
| "learning_rate": 7.217484008528785e-06, |
| "loss": 0.4944, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.0864, |
| "grad_norm": 0.8400188680420124, |
| "learning_rate": 7.22814498933902e-06, |
| "loss": 0.5221, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.088, |
| "grad_norm": 0.8683054433248262, |
| "learning_rate": 7.238805970149254e-06, |
| "loss": 0.4992, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.0896, |
| "grad_norm": 0.8532920431348728, |
| "learning_rate": 7.249466950959488e-06, |
| "loss": 0.4894, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.0912, |
| "grad_norm": 0.8656995078586304, |
| "learning_rate": 7.260127931769723e-06, |
| "loss": 0.4946, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.0928, |
| "grad_norm": 0.8472745906446407, |
| "learning_rate": 7.270788912579958e-06, |
| "loss": 0.4866, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.0944, |
| "grad_norm": 0.8865029623698553, |
| "learning_rate": 7.281449893390192e-06, |
| "loss": 0.4859, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.096, |
| "grad_norm": 0.8938595923880026, |
| "learning_rate": 7.292110874200427e-06, |
| "loss": 0.5124, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.0976, |
| "grad_norm": 0.9001915895636022, |
| "learning_rate": 7.302771855010662e-06, |
| "loss": 0.505, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.0992, |
| "grad_norm": 0.8648181883246533, |
| "learning_rate": 7.313432835820896e-06, |
| "loss": 0.4547, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.1008, |
| "grad_norm": 0.9013716761336116, |
| "learning_rate": 7.3240938166311305e-06, |
| "loss": 0.5068, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.1024, |
| "grad_norm": 0.8897699929901375, |
| "learning_rate": 7.334754797441366e-06, |
| "loss": 0.4699, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.104, |
| "grad_norm": 0.9274052628271223, |
| "learning_rate": 7.3454157782516e-06, |
| "loss": 0.5205, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.1056, |
| "grad_norm": 0.8904130581010617, |
| "learning_rate": 7.356076759061834e-06, |
| "loss": 0.4974, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.1072, |
| "grad_norm": 0.9248330646869078, |
| "learning_rate": 7.366737739872069e-06, |
| "loss": 0.487, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.1088, |
| "grad_norm": 0.8849242860814813, |
| "learning_rate": 7.377398720682304e-06, |
| "loss": 0.4862, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.1104, |
| "grad_norm": 0.8404508145439668, |
| "learning_rate": 7.3880597014925385e-06, |
| "loss": 0.5115, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.112, |
| "grad_norm": 0.9210103265284583, |
| "learning_rate": 7.398720682302772e-06, |
| "loss": 0.5137, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.1136, |
| "grad_norm": 0.9028642559788765, |
| "learning_rate": 7.409381663113007e-06, |
| "loss": 0.4994, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.1152, |
| "grad_norm": 0.8622481549136872, |
| "learning_rate": 7.420042643923242e-06, |
| "loss": 0.4894, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.1168, |
| "grad_norm": 0.8580340963349197, |
| "learning_rate": 7.430703624733477e-06, |
| "loss": 0.4795, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.1184, |
| "grad_norm": 0.8555460870305887, |
| "learning_rate": 7.44136460554371e-06, |
| "loss": 0.5014, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 0.923140841505284, |
| "learning_rate": 7.452025586353945e-06, |
| "loss": 0.4562, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.1216, |
| "grad_norm": 0.8100751043736653, |
| "learning_rate": 7.46268656716418e-06, |
| "loss": 0.4733, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.1232, |
| "grad_norm": 0.9147204854059461, |
| "learning_rate": 7.473347547974415e-06, |
| "loss": 0.4752, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.1248, |
| "grad_norm": 0.8419294026515683, |
| "learning_rate": 7.484008528784648e-06, |
| "loss": 0.486, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.1264, |
| "grad_norm": 0.8643702806570176, |
| "learning_rate": 7.494669509594883e-06, |
| "loss": 0.4613, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.1280000000000001, |
| "grad_norm": 0.949302912936103, |
| "learning_rate": 7.505330490405118e-06, |
| "loss": 0.5118, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.1296, |
| "grad_norm": 0.7783378740632438, |
| "learning_rate": 7.515991471215353e-06, |
| "loss": 0.4741, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.1312, |
| "grad_norm": 0.9194591494916258, |
| "learning_rate": 7.5266524520255865e-06, |
| "loss": 0.4782, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.1328, |
| "grad_norm": 0.8084257080509638, |
| "learning_rate": 7.537313432835821e-06, |
| "loss": 0.4789, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.1344, |
| "grad_norm": 0.9370225113345622, |
| "learning_rate": 7.547974413646056e-06, |
| "loss": 0.5038, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.1360000000000001, |
| "grad_norm": 0.8967784446409596, |
| "learning_rate": 7.558635394456291e-06, |
| "loss": 0.5123, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.1376, |
| "grad_norm": 0.8353568264432971, |
| "learning_rate": 7.569296375266525e-06, |
| "loss": 0.4981, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.1392, |
| "grad_norm": 1.0148502497573662, |
| "learning_rate": 7.579957356076759e-06, |
| "loss": 0.5154, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.1408, |
| "grad_norm": 0.794448266298346, |
| "learning_rate": 7.5906183368869945e-06, |
| "loss": 0.4914, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.1424, |
| "grad_norm": 0.931979450219089, |
| "learning_rate": 7.601279317697229e-06, |
| "loss": 0.4814, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.144, |
| "grad_norm": 0.7837557760281342, |
| "learning_rate": 7.611940298507463e-06, |
| "loss": 0.4665, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.1456, |
| "grad_norm": 0.8079659200165911, |
| "learning_rate": 7.622601279317697e-06, |
| "loss": 0.5002, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.1472, |
| "grad_norm": 0.8433060870697712, |
| "learning_rate": 7.633262260127933e-06, |
| "loss": 0.4924, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.1488, |
| "grad_norm": 0.8562511609970845, |
| "learning_rate": 7.643923240938167e-06, |
| "loss": 0.4909, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.1504, |
| "grad_norm": 0.8170143423224167, |
| "learning_rate": 7.654584221748402e-06, |
| "loss": 0.493, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.152, |
| "grad_norm": 0.8988104256313746, |
| "learning_rate": 7.665245202558636e-06, |
| "loss": 0.4858, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.1536, |
| "grad_norm": 0.8926410072505501, |
| "learning_rate": 7.67590618336887e-06, |
| "loss": 0.5134, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.1552, |
| "grad_norm": 0.8063108281175257, |
| "learning_rate": 7.686567164179105e-06, |
| "loss": 0.4504, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.1568, |
| "grad_norm": 0.9031510431095552, |
| "learning_rate": 7.69722814498934e-06, |
| "loss": 0.4897, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.1584, |
| "grad_norm": 0.8000465406306086, |
| "learning_rate": 7.707889125799574e-06, |
| "loss": 0.5103, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 0.8807137653031148, |
| "learning_rate": 7.718550106609809e-06, |
| "loss": 0.5188, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.1616, |
| "grad_norm": 0.8529454057323239, |
| "learning_rate": 7.729211087420043e-06, |
| "loss": 0.4901, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.1632, |
| "grad_norm": 0.8865930965204165, |
| "learning_rate": 7.739872068230278e-06, |
| "loss": 0.5172, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.1648, |
| "grad_norm": 0.8489456406347604, |
| "learning_rate": 7.750533049040512e-06, |
| "loss": 0.5318, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.1663999999999999, |
| "grad_norm": 0.8389695400553605, |
| "learning_rate": 7.761194029850747e-06, |
| "loss": 0.4847, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.168, |
| "grad_norm": 0.9129636581931746, |
| "learning_rate": 7.771855010660981e-06, |
| "loss": 0.5036, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.1696, |
| "grad_norm": 0.8689782710805041, |
| "learning_rate": 7.782515991471216e-06, |
| "loss": 0.4621, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.1712, |
| "grad_norm": 0.8366002015417364, |
| "learning_rate": 7.79317697228145e-06, |
| "loss": 0.4919, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.1728, |
| "grad_norm": 0.862046670020214, |
| "learning_rate": 7.803837953091685e-06, |
| "loss": 0.505, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.1743999999999999, |
| "grad_norm": 0.894624237535269, |
| "learning_rate": 7.81449893390192e-06, |
| "loss": 0.4719, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.176, |
| "grad_norm": 0.8185676529493747, |
| "learning_rate": 7.825159914712154e-06, |
| "loss": 0.5157, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.1776, |
| "grad_norm": 0.9507916410459172, |
| "learning_rate": 7.835820895522389e-06, |
| "loss": 0.4884, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.1792, |
| "grad_norm": 0.8167479057507443, |
| "learning_rate": 7.846481876332623e-06, |
| "loss": 0.4666, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.1808, |
| "grad_norm": 0.8135227530782192, |
| "learning_rate": 7.857142857142858e-06, |
| "loss": 0.509, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.1824, |
| "grad_norm": 0.9340892372683886, |
| "learning_rate": 7.867803837953092e-06, |
| "loss": 0.471, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.184, |
| "grad_norm": 0.8646629530041654, |
| "learning_rate": 7.878464818763327e-06, |
| "loss": 0.4923, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.1856, |
| "grad_norm": 0.997273455214431, |
| "learning_rate": 7.889125799573561e-06, |
| "loss": 0.4873, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.1872, |
| "grad_norm": 0.9219572311600239, |
| "learning_rate": 7.899786780383796e-06, |
| "loss": 0.5069, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.1888, |
| "grad_norm": 0.7934354225458706, |
| "learning_rate": 7.91044776119403e-06, |
| "loss": 0.4763, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.1904, |
| "grad_norm": 0.8988143693580111, |
| "learning_rate": 7.921108742004265e-06, |
| "loss": 0.5059, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.192, |
| "grad_norm": 0.8620595700348912, |
| "learning_rate": 7.9317697228145e-06, |
| "loss": 0.4875, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.1936, |
| "grad_norm": 0.8926860784613844, |
| "learning_rate": 7.942430703624734e-06, |
| "loss": 0.4911, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.1952, |
| "grad_norm": 0.9287528764007131, |
| "learning_rate": 7.953091684434968e-06, |
| "loss": 0.4801, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.1968, |
| "grad_norm": 0.8261534615904715, |
| "learning_rate": 7.963752665245203e-06, |
| "loss": 0.4893, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.1984, |
| "grad_norm": 0.8625619754008521, |
| "learning_rate": 7.974413646055437e-06, |
| "loss": 0.4817, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.8660545440351582, |
| "learning_rate": 7.985074626865672e-06, |
| "loss": 0.5194, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.2016, |
| "grad_norm": 0.9225217211510944, |
| "learning_rate": 7.995735607675907e-06, |
| "loss": 0.5066, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.2032, |
| "grad_norm": 0.91250700607779, |
| "learning_rate": 8.006396588486141e-06, |
| "loss": 0.4978, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.2048, |
| "grad_norm": 0.8165175944409211, |
| "learning_rate": 8.017057569296376e-06, |
| "loss": 0.4945, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.2064, |
| "grad_norm": 0.9576031761806364, |
| "learning_rate": 8.02771855010661e-06, |
| "loss": 0.5141, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.208, |
| "grad_norm": 0.8270917187108142, |
| "learning_rate": 8.038379530916846e-06, |
| "loss": 0.4736, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.2096, |
| "grad_norm": 0.8035593655771217, |
| "learning_rate": 8.049040511727079e-06, |
| "loss": 0.4772, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.2112, |
| "grad_norm": 0.8255593987600729, |
| "learning_rate": 8.059701492537314e-06, |
| "loss": 0.4688, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.2128, |
| "grad_norm": 0.8335731681930802, |
| "learning_rate": 8.070362473347548e-06, |
| "loss": 0.4711, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.2144, |
| "grad_norm": 0.8194251984563062, |
| "learning_rate": 8.081023454157784e-06, |
| "loss": 0.4454, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.216, |
| "grad_norm": 0.8702856100363334, |
| "learning_rate": 8.091684434968017e-06, |
| "loss": 0.5012, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.2176, |
| "grad_norm": 0.9075441167906476, |
| "learning_rate": 8.102345415778252e-06, |
| "loss": 0.4892, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.2192, |
| "grad_norm": 0.8005134308272335, |
| "learning_rate": 8.113006396588486e-06, |
| "loss": 0.5026, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.2208, |
| "grad_norm": 0.7771323696086689, |
| "learning_rate": 8.123667377398723e-06, |
| "loss": 0.4886, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.2224, |
| "grad_norm": 0.78717220004623, |
| "learning_rate": 8.134328358208955e-06, |
| "loss": 0.4892, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.224, |
| "grad_norm": 0.7628253673408774, |
| "learning_rate": 8.14498933901919e-06, |
| "loss": 0.4567, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.2256, |
| "grad_norm": 0.7703212238803351, |
| "learning_rate": 8.155650319829424e-06, |
| "loss": 0.4509, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.2272, |
| "grad_norm": 0.9016403747699305, |
| "learning_rate": 8.16631130063966e-06, |
| "loss": 0.4967, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.2288000000000001, |
| "grad_norm": 0.8393187669313528, |
| "learning_rate": 8.176972281449893e-06, |
| "loss": 0.4923, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.2304, |
| "grad_norm": 0.8364970899148162, |
| "learning_rate": 8.187633262260128e-06, |
| "loss": 0.4933, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.232, |
| "grad_norm": 1.036196569024722, |
| "learning_rate": 8.198294243070363e-06, |
| "loss": 0.5072, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.2336, |
| "grad_norm": 0.784372659669558, |
| "learning_rate": 8.208955223880599e-06, |
| "loss": 0.4837, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.2352, |
| "grad_norm": 0.8506324572550054, |
| "learning_rate": 8.219616204690832e-06, |
| "loss": 0.486, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.2368000000000001, |
| "grad_norm": 0.8299386307354703, |
| "learning_rate": 8.230277185501066e-06, |
| "loss": 0.4742, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.2384, |
| "grad_norm": 0.7949690986265776, |
| "learning_rate": 8.2409381663113e-06, |
| "loss": 0.5153, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 0.81322959927153, |
| "learning_rate": 8.251599147121537e-06, |
| "loss": 0.4916, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.2416, |
| "grad_norm": 0.7506748375530281, |
| "learning_rate": 8.26226012793177e-06, |
| "loss": 0.4909, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.2432, |
| "grad_norm": 0.869003804560298, |
| "learning_rate": 8.272921108742004e-06, |
| "loss": 0.4952, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.2448, |
| "grad_norm": 0.7828796386986692, |
| "learning_rate": 8.283582089552239e-06, |
| "loss": 0.491, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.2464, |
| "grad_norm": 0.943803876658911, |
| "learning_rate": 8.294243070362475e-06, |
| "loss": 0.521, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.248, |
| "grad_norm": 0.9284743039559729, |
| "learning_rate": 8.304904051172708e-06, |
| "loss": 0.5005, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.2496, |
| "grad_norm": 0.8563783132882432, |
| "learning_rate": 8.315565031982942e-06, |
| "loss": 0.4965, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.2511999999999999, |
| "grad_norm": 0.9414147179698323, |
| "learning_rate": 8.326226012793177e-06, |
| "loss": 0.5101, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.2528000000000001, |
| "grad_norm": 0.847176708925908, |
| "learning_rate": 8.336886993603413e-06, |
| "loss": 0.4997, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.2544, |
| "grad_norm": 0.8922585077194567, |
| "learning_rate": 8.347547974413648e-06, |
| "loss": 0.4812, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.256, |
| "grad_norm": 0.8492575450825625, |
| "learning_rate": 8.35820895522388e-06, |
| "loss": 0.4973, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.2576, |
| "grad_norm": 0.769955130113286, |
| "learning_rate": 8.368869936034117e-06, |
| "loss": 0.4798, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.2591999999999999, |
| "grad_norm": 0.8808043798910176, |
| "learning_rate": 8.379530916844351e-06, |
| "loss": 0.4881, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.2608, |
| "grad_norm": 0.7911441145492251, |
| "learning_rate": 8.390191897654586e-06, |
| "loss": 0.4781, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.2624, |
| "grad_norm": 0.8831191122354372, |
| "learning_rate": 8.400852878464819e-06, |
| "loss": 0.5249, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.264, |
| "grad_norm": 0.8490944822273508, |
| "learning_rate": 8.411513859275055e-06, |
| "loss": 0.496, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.2656, |
| "grad_norm": 0.9034453286543956, |
| "learning_rate": 8.42217484008529e-06, |
| "loss": 0.4998, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.2671999999999999, |
| "grad_norm": 0.8186106542485699, |
| "learning_rate": 8.432835820895524e-06, |
| "loss": 0.4612, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.2688, |
| "grad_norm": 0.8617138344306301, |
| "learning_rate": 8.443496801705757e-06, |
| "loss": 0.4476, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.2704, |
| "grad_norm": 0.7710685418084188, |
| "learning_rate": 8.454157782515993e-06, |
| "loss": 0.5233, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.272, |
| "grad_norm": 0.8538109064578701, |
| "learning_rate": 8.464818763326227e-06, |
| "loss": 0.4895, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.2736, |
| "grad_norm": 0.807083428196643, |
| "learning_rate": 8.475479744136462e-06, |
| "loss": 0.5115, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.2752, |
| "grad_norm": 0.7951784742912327, |
| "learning_rate": 8.486140724946695e-06, |
| "loss": 0.4875, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.2768, |
| "grad_norm": 0.8166159349941166, |
| "learning_rate": 8.496801705756931e-06, |
| "loss": 0.4889, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.2784, |
| "grad_norm": 0.7561811800967494, |
| "learning_rate": 8.507462686567165e-06, |
| "loss": 0.4948, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 0.8170354309076975, |
| "learning_rate": 8.5181236673774e-06, |
| "loss": 0.4801, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.2816, |
| "grad_norm": 0.8325732227308348, |
| "learning_rate": 8.528784648187633e-06, |
| "loss": 0.4771, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.2832, |
| "grad_norm": 0.8532852379008969, |
| "learning_rate": 8.539445628997869e-06, |
| "loss": 0.5032, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.2848, |
| "grad_norm": 0.8364157169622278, |
| "learning_rate": 8.550106609808104e-06, |
| "loss": 0.4926, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.2864, |
| "grad_norm": 0.8198265344617243, |
| "learning_rate": 8.560767590618338e-06, |
| "loss": 0.4953, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.288, |
| "grad_norm": 0.7676253771661686, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 0.5121, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.2896, |
| "grad_norm": 0.8178141149780725, |
| "learning_rate": 8.582089552238807e-06, |
| "loss": 0.4941, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.2912, |
| "grad_norm": 0.727794607570312, |
| "learning_rate": 8.592750533049042e-06, |
| "loss": 0.4691, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.2928, |
| "grad_norm": 0.8254744891795319, |
| "learning_rate": 8.603411513859276e-06, |
| "loss": 0.4754, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.2944, |
| "grad_norm": 0.8217573350822549, |
| "learning_rate": 8.614072494669509e-06, |
| "loss": 0.5127, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.296, |
| "grad_norm": 0.8681921933879128, |
| "learning_rate": 8.624733475479745e-06, |
| "loss": 0.4767, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.2976, |
| "grad_norm": 0.9449479811028989, |
| "learning_rate": 8.63539445628998e-06, |
| "loss": 0.4863, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.2992, |
| "grad_norm": 0.8262347272337431, |
| "learning_rate": 8.646055437100214e-06, |
| "loss": 0.4775, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.3008, |
| "grad_norm": 0.8851702609211491, |
| "learning_rate": 8.656716417910447e-06, |
| "loss": 0.4906, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.3024, |
| "grad_norm": 0.8049544924013474, |
| "learning_rate": 8.667377398720683e-06, |
| "loss": 0.4872, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.304, |
| "grad_norm": 0.9884021117715628, |
| "learning_rate": 8.678038379530918e-06, |
| "loss": 0.5038, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.3056, |
| "grad_norm": 0.7869006872785753, |
| "learning_rate": 8.688699360341152e-06, |
| "loss": 0.4882, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.3072, |
| "grad_norm": 0.7954379611740962, |
| "learning_rate": 8.699360341151387e-06, |
| "loss": 0.5194, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.3088, |
| "grad_norm": 0.8058906484306712, |
| "learning_rate": 8.710021321961621e-06, |
| "loss": 0.4767, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.3104, |
| "grad_norm": 0.7928405824005407, |
| "learning_rate": 8.720682302771856e-06, |
| "loss": 0.4954, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.312, |
| "grad_norm": 0.8268634860285111, |
| "learning_rate": 8.73134328358209e-06, |
| "loss": 0.5298, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.3136, |
| "grad_norm": 0.7680714308955621, |
| "learning_rate": 8.742004264392325e-06, |
| "loss": 0.4745, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.3152, |
| "grad_norm": 0.8226721900025242, |
| "learning_rate": 8.75266524520256e-06, |
| "loss": 0.4834, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.3168, |
| "grad_norm": 0.8588142531758319, |
| "learning_rate": 8.763326226012794e-06, |
| "loss": 0.4995, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.3184, |
| "grad_norm": 0.7973825718924447, |
| "learning_rate": 8.773987206823029e-06, |
| "loss": 0.471, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 0.8196008578787768, |
| "learning_rate": 8.784648187633263e-06, |
| "loss": 0.4434, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.3216, |
| "grad_norm": 0.821898919951824, |
| "learning_rate": 8.795309168443498e-06, |
| "loss": 0.4932, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.3232, |
| "grad_norm": 0.7429051792355805, |
| "learning_rate": 8.805970149253732e-06, |
| "loss": 0.487, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.3248, |
| "grad_norm": 0.7808144550211977, |
| "learning_rate": 8.816631130063967e-06, |
| "loss": 0.4848, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.3264, |
| "grad_norm": 0.75406464373619, |
| "learning_rate": 8.827292110874201e-06, |
| "loss": 0.4797, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.328, |
| "grad_norm": 0.8474425214756218, |
| "learning_rate": 8.837953091684436e-06, |
| "loss": 0.4983, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.3296000000000001, |
| "grad_norm": 0.8801019534613489, |
| "learning_rate": 8.84861407249467e-06, |
| "loss": 0.5018, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.3312, |
| "grad_norm": 0.7435162835174438, |
| "learning_rate": 8.859275053304905e-06, |
| "loss": 0.4638, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.3328, |
| "grad_norm": 1.0349062751171558, |
| "learning_rate": 8.86993603411514e-06, |
| "loss": 0.5137, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.3344, |
| "grad_norm": 0.8071414440849783, |
| "learning_rate": 8.880597014925374e-06, |
| "loss": 0.5009, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.336, |
| "grad_norm": 0.9477676903674344, |
| "learning_rate": 8.891257995735608e-06, |
| "loss": 0.4892, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.3376000000000001, |
| "grad_norm": 0.8771818428184452, |
| "learning_rate": 8.901918976545843e-06, |
| "loss": 0.4853, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.3392, |
| "grad_norm": 0.9525100546819315, |
| "learning_rate": 8.912579957356077e-06, |
| "loss": 0.5087, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.3408, |
| "grad_norm": 1.0250030593240957, |
| "learning_rate": 8.923240938166312e-06, |
| "loss": 0.4976, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.3424, |
| "grad_norm": 0.8329762799790746, |
| "learning_rate": 8.933901918976547e-06, |
| "loss": 0.4512, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.3439999999999999, |
| "grad_norm": 0.9725783256416181, |
| "learning_rate": 8.944562899786781e-06, |
| "loss": 0.4723, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.3456000000000001, |
| "grad_norm": 0.7926950757963382, |
| "learning_rate": 8.955223880597016e-06, |
| "loss": 0.4812, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.3472, |
| "grad_norm": 0.9029107439242626, |
| "learning_rate": 8.96588486140725e-06, |
| "loss": 0.4734, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.3488, |
| "grad_norm": 0.8609612617899417, |
| "learning_rate": 8.976545842217485e-06, |
| "loss": 0.4706, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.3504, |
| "grad_norm": 0.8609639077025752, |
| "learning_rate": 8.987206823027719e-06, |
| "loss": 0.4698, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.3519999999999999, |
| "grad_norm": 0.8751160804112281, |
| "learning_rate": 8.997867803837954e-06, |
| "loss": 0.4864, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.3536000000000001, |
| "grad_norm": 0.9006318823556566, |
| "learning_rate": 9.008528784648188e-06, |
| "loss": 0.5023, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.3552, |
| "grad_norm": 0.8527123853283762, |
| "learning_rate": 9.019189765458423e-06, |
| "loss": 0.4994, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.3568, |
| "grad_norm": 0.8599406865687744, |
| "learning_rate": 9.029850746268657e-06, |
| "loss": 0.4874, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.3584, |
| "grad_norm": 0.8722361781773674, |
| "learning_rate": 9.040511727078892e-06, |
| "loss": 0.5227, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.3599999999999999, |
| "grad_norm": 0.8870612314576228, |
| "learning_rate": 9.051172707889126e-06, |
| "loss": 0.5151, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.3616, |
| "grad_norm": 0.8134429554681971, |
| "learning_rate": 9.06183368869936e-06, |
| "loss": 0.4919, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.3632, |
| "grad_norm": 0.7863555698546318, |
| "learning_rate": 9.072494669509595e-06, |
| "loss": 0.4766, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.3648, |
| "grad_norm": 0.8186764343933153, |
| "learning_rate": 9.08315565031983e-06, |
| "loss": 0.5021, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.3664, |
| "grad_norm": 0.8219695239391486, |
| "learning_rate": 9.093816631130064e-06, |
| "loss": 0.5007, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.3679999999999999, |
| "grad_norm": 0.841769164632584, |
| "learning_rate": 9.104477611940299e-06, |
| "loss": 0.4914, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.3696, |
| "grad_norm": 0.8330514656539206, |
| "learning_rate": 9.115138592750533e-06, |
| "loss": 0.4987, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.3712, |
| "grad_norm": 0.811133989438271, |
| "learning_rate": 9.125799573560768e-06, |
| "loss": 0.462, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.3728, |
| "grad_norm": 0.8633384492304504, |
| "learning_rate": 9.136460554371003e-06, |
| "loss": 0.4963, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.3744, |
| "grad_norm": 0.8410989072416641, |
| "learning_rate": 9.147121535181237e-06, |
| "loss": 0.4977, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.376, |
| "grad_norm": 0.806728983493438, |
| "learning_rate": 9.157782515991472e-06, |
| "loss": 0.4624, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.3776, |
| "grad_norm": 0.806341154096036, |
| "learning_rate": 9.168443496801706e-06, |
| "loss": 0.4658, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.3792, |
| "grad_norm": 0.8038719681175753, |
| "learning_rate": 9.17910447761194e-06, |
| "loss": 0.4697, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.3808, |
| "grad_norm": 0.8714151874327415, |
| "learning_rate": 9.189765458422175e-06, |
| "loss": 0.5101, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.3824, |
| "grad_norm": 0.8780911686986344, |
| "learning_rate": 9.200426439232411e-06, |
| "loss": 0.5252, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.384, |
| "grad_norm": 0.8516702000502967, |
| "learning_rate": 9.211087420042644e-06, |
| "loss": 0.4838, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.3856, |
| "grad_norm": 1.0025506106485897, |
| "learning_rate": 9.221748400852879e-06, |
| "loss": 0.5069, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.3872, |
| "grad_norm": 0.7996027724204295, |
| "learning_rate": 9.232409381663113e-06, |
| "loss": 0.5178, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.3888, |
| "grad_norm": 1.080490918542818, |
| "learning_rate": 9.24307036247335e-06, |
| "loss": 0.5032, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.3904, |
| "grad_norm": 0.8444844807264513, |
| "learning_rate": 9.253731343283582e-06, |
| "loss": 0.4701, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.392, |
| "grad_norm": 0.899927637543412, |
| "learning_rate": 9.264392324093817e-06, |
| "loss": 0.4904, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.3936, |
| "grad_norm": 0.8718461575504228, |
| "learning_rate": 9.275053304904051e-06, |
| "loss": 0.4855, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.3952, |
| "grad_norm": 0.9337795759969514, |
| "learning_rate": 9.285714285714288e-06, |
| "loss": 0.5186, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.3968, |
| "grad_norm": 0.8199842573828481, |
| "learning_rate": 9.29637526652452e-06, |
| "loss": 0.4707, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.3984, |
| "grad_norm": 0.8652313612196955, |
| "learning_rate": 9.307036247334755e-06, |
| "loss": 0.4997, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.8176745862255748, |
| "learning_rate": 9.31769722814499e-06, |
| "loss": 0.4759, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.4016, |
| "grad_norm": 0.9753918720629351, |
| "learning_rate": 9.328358208955226e-06, |
| "loss": 0.4631, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.4032, |
| "grad_norm": 0.8891976160972742, |
| "learning_rate": 9.339019189765458e-06, |
| "loss": 0.5, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.4048, |
| "grad_norm": 0.8913833986097975, |
| "learning_rate": 9.349680170575693e-06, |
| "loss": 0.5187, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.4064, |
| "grad_norm": 0.9274476926254841, |
| "learning_rate": 9.36034115138593e-06, |
| "loss": 0.4998, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.408, |
| "grad_norm": 0.787290628319696, |
| "learning_rate": 9.371002132196164e-06, |
| "loss": 0.4704, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.4096, |
| "grad_norm": 0.7814721109331324, |
| "learning_rate": 9.381663113006397e-06, |
| "loss": 0.4694, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.4112, |
| "grad_norm": 0.7991738854610555, |
| "learning_rate": 9.392324093816631e-06, |
| "loss": 0.4449, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.4128, |
| "grad_norm": 0.8389584825409092, |
| "learning_rate": 9.402985074626867e-06, |
| "loss": 0.4828, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.4144, |
| "grad_norm": 0.8594585961944166, |
| "learning_rate": 9.413646055437102e-06, |
| "loss": 0.4592, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.416, |
| "grad_norm": 0.8488703802616425, |
| "learning_rate": 9.424307036247335e-06, |
| "loss": 0.4867, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.4176, |
| "grad_norm": 0.7909492063396593, |
| "learning_rate": 9.43496801705757e-06, |
| "loss": 0.4714, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.4192, |
| "grad_norm": 0.8665486508087874, |
| "learning_rate": 9.445628997867805e-06, |
| "loss": 0.4703, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.4208, |
| "grad_norm": 0.7916642549445045, |
| "learning_rate": 9.45628997867804e-06, |
| "loss": 0.4897, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.4224, |
| "grad_norm": 0.8124562469349942, |
| "learning_rate": 9.466950959488273e-06, |
| "loss": 0.474, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.424, |
| "grad_norm": 0.7619798562274529, |
| "learning_rate": 9.477611940298507e-06, |
| "loss": 0.4799, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.4256, |
| "grad_norm": 0.785232437222393, |
| "learning_rate": 9.488272921108744e-06, |
| "loss": 0.5038, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.4272, |
| "grad_norm": 0.8071136715131856, |
| "learning_rate": 9.498933901918978e-06, |
| "loss": 0.5083, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.4288, |
| "grad_norm": 0.75611163321337, |
| "learning_rate": 9.509594882729211e-06, |
| "loss": 0.47, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.4304000000000001, |
| "grad_norm": 0.8947031000453353, |
| "learning_rate": 9.520255863539445e-06, |
| "loss": 0.5253, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.432, |
| "grad_norm": 0.7777694756552822, |
| "learning_rate": 9.530916844349682e-06, |
| "loss": 0.5, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.4336, |
| "grad_norm": 0.8413775097766173, |
| "learning_rate": 9.541577825159916e-06, |
| "loss": 0.5227, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.4352, |
| "grad_norm": 0.7595043259234036, |
| "learning_rate": 9.552238805970149e-06, |
| "loss": 0.4772, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.4368, |
| "grad_norm": 0.8124697261241832, |
| "learning_rate": 9.562899786780384e-06, |
| "loss": 0.4981, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.4384000000000001, |
| "grad_norm": 0.859236848802032, |
| "learning_rate": 9.57356076759062e-06, |
| "loss": 0.4693, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 0.8104200026032801, |
| "learning_rate": 9.584221748400854e-06, |
| "loss": 0.4771, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.4416, |
| "grad_norm": 0.8584614348837653, |
| "learning_rate": 9.594882729211089e-06, |
| "loss": 0.5066, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.4432, |
| "grad_norm": 0.8259724591743942, |
| "learning_rate": 9.605543710021322e-06, |
| "loss": 0.477, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.4447999999999999, |
| "grad_norm": 0.817674349808855, |
| "learning_rate": 9.616204690831558e-06, |
| "loss": 0.4648, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.4464000000000001, |
| "grad_norm": 0.8859816874161797, |
| "learning_rate": 9.626865671641792e-06, |
| "loss": 0.4618, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.448, |
| "grad_norm": 0.8575821423889298, |
| "learning_rate": 9.637526652452027e-06, |
| "loss": 0.5056, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.4496, |
| "grad_norm": 0.9607868269874424, |
| "learning_rate": 9.64818763326226e-06, |
| "loss": 0.4691, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.4512, |
| "grad_norm": 0.8672657067709886, |
| "learning_rate": 9.658848614072496e-06, |
| "loss": 0.4962, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.4527999999999999, |
| "grad_norm": 1.0445509426721418, |
| "learning_rate": 9.66950959488273e-06, |
| "loss": 0.476, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.4544000000000001, |
| "grad_norm": 0.9684439200424949, |
| "learning_rate": 9.680170575692965e-06, |
| "loss": 0.4726, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.456, |
| "grad_norm": 0.7742572368999419, |
| "learning_rate": 9.6908315565032e-06, |
| "loss": 0.4998, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.4576, |
| "grad_norm": 0.8676632527642012, |
| "learning_rate": 9.701492537313434e-06, |
| "loss": 0.4948, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.4592, |
| "grad_norm": 0.7629886760452764, |
| "learning_rate": 9.712153518123669e-06, |
| "loss": 0.4777, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.4607999999999999, |
| "grad_norm": 0.9709918958387708, |
| "learning_rate": 9.722814498933903e-06, |
| "loss": 0.4813, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.4624, |
| "grad_norm": 0.8319565078199239, |
| "learning_rate": 9.733475479744138e-06, |
| "loss": 0.483, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.464, |
| "grad_norm": 0.9560500791255669, |
| "learning_rate": 9.744136460554372e-06, |
| "loss": 0.4597, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.4656, |
| "grad_norm": 0.8551324757472956, |
| "learning_rate": 9.754797441364607e-06, |
| "loss": 0.483, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.4672, |
| "grad_norm": 0.8819372002315914, |
| "learning_rate": 9.765458422174841e-06, |
| "loss": 0.5022, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.4687999999999999, |
| "grad_norm": 0.8919454392102223, |
| "learning_rate": 9.776119402985076e-06, |
| "loss": 0.4811, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.4704, |
| "grad_norm": 0.7956928740696931, |
| "learning_rate": 9.78678038379531e-06, |
| "loss": 0.5032, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.472, |
| "grad_norm": 0.9460431224219428, |
| "learning_rate": 9.797441364605545e-06, |
| "loss": 0.5013, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.4736, |
| "grad_norm": 0.7563920215009783, |
| "learning_rate": 9.80810234541578e-06, |
| "loss": 0.4912, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.4752, |
| "grad_norm": 0.7958797181312411, |
| "learning_rate": 9.818763326226014e-06, |
| "loss": 0.4845, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.4768, |
| "grad_norm": 0.8749244767024981, |
| "learning_rate": 9.829424307036248e-06, |
| "loss": 0.4977, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.4784, |
| "grad_norm": 0.7973735882306109, |
| "learning_rate": 9.840085287846483e-06, |
| "loss": 0.4984, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 0.8868550866351855, |
| "learning_rate": 9.850746268656717e-06, |
| "loss": 0.4926, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.4816, |
| "grad_norm": 0.9107461226553807, |
| "learning_rate": 9.861407249466952e-06, |
| "loss": 0.4739, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.4832, |
| "grad_norm": 0.9588502326001234, |
| "learning_rate": 9.872068230277187e-06, |
| "loss": 0.4782, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.4848, |
| "grad_norm": 0.8364354427376426, |
| "learning_rate": 9.882729211087421e-06, |
| "loss": 0.4477, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.4864, |
| "grad_norm": 0.7874286019641639, |
| "learning_rate": 9.893390191897656e-06, |
| "loss": 0.4413, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.488, |
| "grad_norm": 0.978506341009771, |
| "learning_rate": 9.90405117270789e-06, |
| "loss": 0.4612, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.4896, |
| "grad_norm": 0.8490652879114352, |
| "learning_rate": 9.914712153518125e-06, |
| "loss": 0.4751, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.4912, |
| "grad_norm": 1.0018664232526466, |
| "learning_rate": 9.925373134328359e-06, |
| "loss": 0.4671, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.4928, |
| "grad_norm": 0.8614595050477257, |
| "learning_rate": 9.936034115138594e-06, |
| "loss": 0.4939, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.4944, |
| "grad_norm": 0.8324806829999502, |
| "learning_rate": 9.946695095948828e-06, |
| "loss": 0.457, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.496, |
| "grad_norm": 0.8503951859763951, |
| "learning_rate": 9.957356076759063e-06, |
| "loss": 0.4725, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.4976, |
| "grad_norm": 0.8755011425954693, |
| "learning_rate": 9.968017057569297e-06, |
| "loss": 0.4851, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.4992, |
| "grad_norm": 0.9263738559864301, |
| "learning_rate": 9.978678038379532e-06, |
| "loss": 0.4885, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.5008, |
| "grad_norm": 0.9673864844853349, |
| "learning_rate": 9.989339019189766e-06, |
| "loss": 0.4881, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.5024, |
| "grad_norm": 0.9290777102985807, |
| "learning_rate": 1e-05, |
| "loss": 0.4821, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.504, |
| "grad_norm": 0.8458709982219547, |
| "learning_rate": 9.999971896515836e-06, |
| "loss": 0.4915, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.5056, |
| "grad_norm": 1.0858511586378827, |
| "learning_rate": 9.999887586379264e-06, |
| "loss": 0.4756, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.5072, |
| "grad_norm": 0.8354556740923118, |
| "learning_rate": 9.99974707053805e-06, |
| "loss": 0.4847, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.5088, |
| "grad_norm": 0.9898179436259126, |
| "learning_rate": 9.999550350571785e-06, |
| "loss": 0.4801, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.5104, |
| "grad_norm": 0.8793163891262439, |
| "learning_rate": 9.999297428691878e-06, |
| "loss": 0.4699, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.512, |
| "grad_norm": 0.9930914467085877, |
| "learning_rate": 9.998988307741521e-06, |
| "loss": 0.5036, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.5135999999999998, |
| "grad_norm": 0.8909325985734913, |
| "learning_rate": 9.998622991195668e-06, |
| "loss": 0.4843, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.5152, |
| "grad_norm": 0.8906776966778176, |
| "learning_rate": 9.998201483160981e-06, |
| "loss": 0.4924, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.5168, |
| "grad_norm": 1.0210001302818335, |
| "learning_rate": 9.997723788375803e-06, |
| "loss": 0.4908, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.5184, |
| "grad_norm": 0.8343669235401521, |
| "learning_rate": 9.997189912210086e-06, |
| "loss": 0.4665, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 0.8811832399885232, |
| "learning_rate": 9.996599860665342e-06, |
| "loss": 0.4589, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.5215999999999998, |
| "grad_norm": 0.8437250504573268, |
| "learning_rate": 9.995953640374574e-06, |
| "loss": 0.5018, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.5232, |
| "grad_norm": 0.9152349301031418, |
| "learning_rate": 9.9952512586022e-06, |
| "loss": 0.488, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.5248, |
| "grad_norm": 0.7795464645786981, |
| "learning_rate": 9.994492723243965e-06, |
| "loss": 0.4933, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.5264, |
| "grad_norm": 0.763019952273118, |
| "learning_rate": 9.993678042826869e-06, |
| "loss": 0.4714, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.528, |
| "grad_norm": 0.886034738901285, |
| "learning_rate": 9.99280722650905e-06, |
| "loss": 0.5121, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.5295999999999998, |
| "grad_norm": 0.7455654076044084, |
| "learning_rate": 9.991880284079704e-06, |
| "loss": 0.4568, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.5312000000000001, |
| "grad_norm": 0.8682016590039219, |
| "learning_rate": 9.99089722595895e-06, |
| "loss": 0.4554, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.5328, |
| "grad_norm": 0.8015242545658919, |
| "learning_rate": 9.989858063197735e-06, |
| "loss": 0.4814, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.5344, |
| "grad_norm": 0.8341406535742181, |
| "learning_rate": 9.988762807477694e-06, |
| "loss": 0.4821, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.536, |
| "grad_norm": 0.8709933534074977, |
| "learning_rate": 9.987611471111027e-06, |
| "loss": 0.4877, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.5375999999999999, |
| "grad_norm": 0.7987019576052976, |
| "learning_rate": 9.986404067040363e-06, |
| "loss": 0.4707, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.5392000000000001, |
| "grad_norm": 0.8902420538131095, |
| "learning_rate": 9.985140608838604e-06, |
| "loss": 0.5248, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.5408, |
| "grad_norm": 0.7170194508042936, |
| "learning_rate": 9.98382111070878e-06, |
| "loss": 0.424, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.5424, |
| "grad_norm": 0.914961108671794, |
| "learning_rate": 9.982445587483893e-06, |
| "loss": 0.4805, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.544, |
| "grad_norm": 0.9088053940764901, |
| "learning_rate": 9.981014054626737e-06, |
| "loss": 0.5042, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.5455999999999999, |
| "grad_norm": 0.8489619285484816, |
| "learning_rate": 9.979526528229737e-06, |
| "loss": 0.4956, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.5472000000000001, |
| "grad_norm": 0.8164706702959176, |
| "learning_rate": 9.977983025014765e-06, |
| "loss": 0.4689, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.5488, |
| "grad_norm": 0.7566483029226344, |
| "learning_rate": 9.976383562332946e-06, |
| "loss": 0.5193, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.5504, |
| "grad_norm": 0.880276594890666, |
| "learning_rate": 9.974728158164471e-06, |
| "loss": 0.4655, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.552, |
| "grad_norm": 0.8081458173299156, |
| "learning_rate": 9.973016831118389e-06, |
| "loss": 0.5069, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.5535999999999999, |
| "grad_norm": 0.8544537513815259, |
| "learning_rate": 9.971249600432403e-06, |
| "loss": 0.4798, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.5552000000000001, |
| "grad_norm": 0.7504450860493904, |
| "learning_rate": 9.969426485972645e-06, |
| "loss": 0.4867, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.5568, |
| "grad_norm": 0.8701881540311115, |
| "learning_rate": 9.967547508233466e-06, |
| "loss": 0.4684, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.5584, |
| "grad_norm": 0.8852314246097367, |
| "learning_rate": 9.965612688337194e-06, |
| "loss": 0.4855, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 0.8242831459153037, |
| "learning_rate": 9.9636220480339e-06, |
| "loss": 0.5001, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.5615999999999999, |
| "grad_norm": 1.005604417980349, |
| "learning_rate": 9.961575609701154e-06, |
| "loss": 0.4891, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.5632000000000001, |
| "grad_norm": 0.8862228251379858, |
| "learning_rate": 9.959473396343777e-06, |
| "loss": 0.4831, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.5648, |
| "grad_norm": 0.864096678336817, |
| "learning_rate": 9.957315431593578e-06, |
| "loss": 0.4843, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.5664, |
| "grad_norm": 0.776906764480194, |
| "learning_rate": 9.955101739709085e-06, |
| "loss": 0.4616, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.568, |
| "grad_norm": 0.8029486812052213, |
| "learning_rate": 9.952832345575283e-06, |
| "loss": 0.4757, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.5695999999999999, |
| "grad_norm": 0.8036715371775551, |
| "learning_rate": 9.950507274703323e-06, |
| "loss": 0.5217, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.5712000000000002, |
| "grad_norm": 0.7765331989278372, |
| "learning_rate": 9.948126553230242e-06, |
| "loss": 0.5046, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.5728, |
| "grad_norm": 0.8150699203250963, |
| "learning_rate": 9.945690207918667e-06, |
| "loss": 0.4506, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.5744, |
| "grad_norm": 0.7255927188827002, |
| "learning_rate": 9.943198266156517e-06, |
| "loss": 0.4603, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.576, |
| "grad_norm": 0.8155544836444186, |
| "learning_rate": 9.940650755956686e-06, |
| "loss": 0.4495, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.5776, |
| "grad_norm": 0.7461262093206759, |
| "learning_rate": 9.938047705956746e-06, |
| "loss": 0.4772, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.5792000000000002, |
| "grad_norm": 0.8761054409345614, |
| "learning_rate": 9.935389145418599e-06, |
| "loss": 0.4763, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.5808, |
| "grad_norm": 0.7085545139303683, |
| "learning_rate": 9.932675104228177e-06, |
| "loss": 0.4716, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.5824, |
| "grad_norm": 0.8608971225616455, |
| "learning_rate": 9.929905612895082e-06, |
| "loss": 0.5045, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.584, |
| "grad_norm": 0.8489045991866748, |
| "learning_rate": 9.927080702552256e-06, |
| "loss": 0.4897, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.5856, |
| "grad_norm": 0.7519848393673628, |
| "learning_rate": 9.924200404955628e-06, |
| "loss": 0.5145, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.5872000000000002, |
| "grad_norm": 0.7892016944926196, |
| "learning_rate": 9.921264752483761e-06, |
| "loss": 0.488, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.5888, |
| "grad_norm": 0.7995854543733288, |
| "learning_rate": 9.918273778137477e-06, |
| "loss": 0.4903, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.5904, |
| "grad_norm": 0.7423383199728011, |
| "learning_rate": 9.915227515539497e-06, |
| "loss": 0.4656, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.592, |
| "grad_norm": 0.8102371092720678, |
| "learning_rate": 9.912125998934055e-06, |
| "loss": 0.5035, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.5936, |
| "grad_norm": 0.8743193972829729, |
| "learning_rate": 9.908969263186525e-06, |
| "loss": 0.4819, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.5952, |
| "grad_norm": 0.832621075746147, |
| "learning_rate": 9.905757343783014e-06, |
| "loss": 0.4652, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.5968, |
| "grad_norm": 0.8268268770260372, |
| "learning_rate": 9.90249027682997e-06, |
| "loss": 0.4914, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.5984, |
| "grad_norm": 0.8225814361360733, |
| "learning_rate": 9.899168099053784e-06, |
| "loss": 0.4919, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.8339861968131481, |
| "learning_rate": 9.895790847800361e-06, |
| "loss": 0.5045, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.6016, |
| "grad_norm": 0.7750417430083946, |
| "learning_rate": 9.892358561034713e-06, |
| "loss": 0.5089, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.6032, |
| "grad_norm": 0.8973432178284, |
| "learning_rate": 9.888871277340522e-06, |
| "loss": 0.4907, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.6048, |
| "grad_norm": 0.8396657295268389, |
| "learning_rate": 9.885329035919724e-06, |
| "loss": 0.4591, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.6064, |
| "grad_norm": 0.8393541454238976, |
| "learning_rate": 9.881731876592046e-06, |
| "loss": 0.4808, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.608, |
| "grad_norm": 0.9385582992222241, |
| "learning_rate": 9.878079839794572e-06, |
| "loss": 0.494, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.6096, |
| "grad_norm": 0.830952348808366, |
| "learning_rate": 9.874372966581285e-06, |
| "loss": 0.4515, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.6112, |
| "grad_norm": 0.9056178883886478, |
| "learning_rate": 9.870611298622606e-06, |
| "loss": 0.4644, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.6128, |
| "grad_norm": 0.8817177962821087, |
| "learning_rate": 9.866794878204926e-06, |
| "loss": 0.4801, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.6143999999999998, |
| "grad_norm": 0.8449838869033015, |
| "learning_rate": 9.862923748230128e-06, |
| "loss": 0.5034, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.616, |
| "grad_norm": 0.8231314388625361, |
| "learning_rate": 9.858997952215112e-06, |
| "loss": 0.4919, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.6176, |
| "grad_norm": 0.9054712168233684, |
| "learning_rate": 9.855017534291293e-06, |
| "loss": 0.4702, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.6192, |
| "grad_norm": 0.8603610256941876, |
| "learning_rate": 9.850982539204115e-06, |
| "loss": 0.494, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.6208, |
| "grad_norm": 0.7903765631524319, |
| "learning_rate": 9.846893012312549e-06, |
| "loss": 0.4865, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.6223999999999998, |
| "grad_norm": 0.8246003235011296, |
| "learning_rate": 9.842748999588575e-06, |
| "loss": 0.4781, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.624, |
| "grad_norm": 0.875643686096793, |
| "learning_rate": 9.838550547616671e-06, |
| "loss": 0.4808, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.6256, |
| "grad_norm": 0.881772767604039, |
| "learning_rate": 9.83429770359329e-06, |
| "loss": 0.5087, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.6272, |
| "grad_norm": 0.786175611896203, |
| "learning_rate": 9.829990515326324e-06, |
| "loss": 0.4822, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.6288, |
| "grad_norm": 0.832941696772607, |
| "learning_rate": 9.825629031234574e-06, |
| "loss": 0.4578, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.6303999999999998, |
| "grad_norm": 0.8394407049227415, |
| "learning_rate": 9.821213300347198e-06, |
| "loss": 0.4995, |
| "step": 1019 |
| }, |
| { |
| "epoch": 1.6320000000000001, |
| "grad_norm": 0.8861442201395248, |
| "learning_rate": 9.816743372303166e-06, |
| "loss": 0.4272, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.6336, |
| "grad_norm": 0.8465108245209845, |
| "learning_rate": 9.812219297350699e-06, |
| "loss": 0.5211, |
| "step": 1021 |
| }, |
| { |
| "epoch": 1.6352, |
| "grad_norm": 0.9345603525137967, |
| "learning_rate": 9.807641126346704e-06, |
| "loss": 0.4861, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.6368, |
| "grad_norm": 0.7714038775151155, |
| "learning_rate": 9.803008910756203e-06, |
| "loss": 0.5068, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.6383999999999999, |
| "grad_norm": 0.9111951235060974, |
| "learning_rate": 9.798322702651754e-06, |
| "loss": 0.4941, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.6400000000000001, |
| "grad_norm": 0.7219012597652522, |
| "learning_rate": 9.793582554712873e-06, |
| "loss": 0.4612, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.6416, |
| "grad_norm": 0.9229091704285989, |
| "learning_rate": 9.788788520225421e-06, |
| "loss": 0.4793, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.6432, |
| "grad_norm": 0.883303948978402, |
| "learning_rate": 9.783940653081033e-06, |
| "loss": 0.4766, |
| "step": 1027 |
| }, |
| { |
| "epoch": 1.6448, |
| "grad_norm": 0.8053642115782714, |
| "learning_rate": 9.779039007776487e-06, |
| "loss": 0.4932, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.6463999999999999, |
| "grad_norm": 0.904105581732504, |
| "learning_rate": 9.774083639413112e-06, |
| "loss": 0.4911, |
| "step": 1029 |
| }, |
| { |
| "epoch": 1.6480000000000001, |
| "grad_norm": 0.7208310136248466, |
| "learning_rate": 9.769074603696153e-06, |
| "loss": 0.4462, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.6496, |
| "grad_norm": 0.8215946897369514, |
| "learning_rate": 9.764011956934153e-06, |
| "loss": 0.4784, |
| "step": 1031 |
| }, |
| { |
| "epoch": 1.6512, |
| "grad_norm": 0.7917541871283812, |
| "learning_rate": 9.758895756038314e-06, |
| "loss": 0.4666, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.6528, |
| "grad_norm": 0.7759384663457598, |
| "learning_rate": 9.753726058521868e-06, |
| "loss": 0.4807, |
| "step": 1033 |
| }, |
| { |
| "epoch": 1.6543999999999999, |
| "grad_norm": 0.6925254734409707, |
| "learning_rate": 9.748502922499418e-06, |
| "loss": 0.4503, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.6560000000000001, |
| "grad_norm": 0.7794587615717855, |
| "learning_rate": 9.743226406686293e-06, |
| "loss": 0.4834, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.6576, |
| "grad_norm": 0.7395717362574595, |
| "learning_rate": 9.737896570397885e-06, |
| "loss": 0.4474, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.6592, |
| "grad_norm": 0.7921173834301597, |
| "learning_rate": 9.73251347354898e-06, |
| "loss": 0.4673, |
| "step": 1037 |
| }, |
| { |
| "epoch": 1.6608, |
| "grad_norm": 0.7735229936168149, |
| "learning_rate": 9.72707717665309e-06, |
| "loss": 0.4767, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.6623999999999999, |
| "grad_norm": 0.7323984798081137, |
| "learning_rate": 9.721587740821768e-06, |
| "loss": 0.4533, |
| "step": 1039 |
| }, |
| { |
| "epoch": 1.6640000000000001, |
| "grad_norm": 0.7996750771350366, |
| "learning_rate": 9.716045227763923e-06, |
| "loss": 0.5183, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.6656, |
| "grad_norm": 0.755207320032317, |
| "learning_rate": 9.71044969978513e-06, |
| "loss": 0.525, |
| "step": 1041 |
| }, |
| { |
| "epoch": 1.6672, |
| "grad_norm": 0.8031777726974793, |
| "learning_rate": 9.704801219786915e-06, |
| "loss": 0.5058, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.6688, |
| "grad_norm": 0.7202638009085313, |
| "learning_rate": 9.699099851266071e-06, |
| "loss": 0.4838, |
| "step": 1043 |
| }, |
| { |
| "epoch": 1.6703999999999999, |
| "grad_norm": 0.7882504924842982, |
| "learning_rate": 9.693345658313923e-06, |
| "loss": 0.4797, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.6720000000000002, |
| "grad_norm": 0.7207002076199619, |
| "learning_rate": 9.68753870561562e-06, |
| "loss": 0.4625, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.6736, |
| "grad_norm": 0.8360369650104762, |
| "learning_rate": 9.681679058449402e-06, |
| "loss": 0.4921, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.6752, |
| "grad_norm": 0.7392880407370019, |
| "learning_rate": 9.675766782685874e-06, |
| "loss": 0.4671, |
| "step": 1047 |
| }, |
| { |
| "epoch": 1.6768, |
| "grad_norm": 0.901397058863532, |
| "learning_rate": 9.669801944787249e-06, |
| "loss": 0.4909, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.6784, |
| "grad_norm": 0.7554033748327394, |
| "learning_rate": 9.663784611806624e-06, |
| "loss": 0.4583, |
| "step": 1049 |
| }, |
| { |
| "epoch": 1.6800000000000002, |
| "grad_norm": 0.8472780441657755, |
| "learning_rate": 9.657714851387204e-06, |
| "loss": 0.4892, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.6816, |
| "grad_norm": 0.7982580606887658, |
| "learning_rate": 9.651592731761554e-06, |
| "loss": 0.4884, |
| "step": 1051 |
| }, |
| { |
| "epoch": 1.6832, |
| "grad_norm": 0.9345405844469842, |
| "learning_rate": 9.645418321750835e-06, |
| "loss": 0.4774, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.6848, |
| "grad_norm": 0.8228296409803656, |
| "learning_rate": 9.639191690764018e-06, |
| "loss": 0.4693, |
| "step": 1053 |
| }, |
| { |
| "epoch": 1.6864, |
| "grad_norm": 0.9276080068135167, |
| "learning_rate": 9.632912908797116e-06, |
| "loss": 0.4657, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.688, |
| "grad_norm": 0.8426690179390168, |
| "learning_rate": 9.626582046432384e-06, |
| "loss": 0.4787, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.6896, |
| "grad_norm": 0.8977757427025327, |
| "learning_rate": 9.620199174837542e-06, |
| "loss": 0.4835, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.6912, |
| "grad_norm": 0.7919301046680037, |
| "learning_rate": 9.61376436576496e-06, |
| "loss": 0.4829, |
| "step": 1057 |
| }, |
| { |
| "epoch": 1.6928, |
| "grad_norm": 0.8129688641139422, |
| "learning_rate": 9.607277691550862e-06, |
| "loss": 0.4712, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.6944, |
| "grad_norm": 0.8450952852686898, |
| "learning_rate": 9.600739225114506e-06, |
| "loss": 0.4912, |
| "step": 1059 |
| }, |
| { |
| "epoch": 1.696, |
| "grad_norm": 0.7760494775184295, |
| "learning_rate": 9.594149039957366e-06, |
| "loss": 0.4566, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.6976, |
| "grad_norm": 0.8362616732216648, |
| "learning_rate": 9.587507210162307e-06, |
| "loss": 0.4674, |
| "step": 1061 |
| }, |
| { |
| "epoch": 1.6992, |
| "grad_norm": 0.7923720181156818, |
| "learning_rate": 9.580813810392755e-06, |
| "loss": 0.4911, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.7008, |
| "grad_norm": 0.76930025123472, |
| "learning_rate": 9.57406891589185e-06, |
| "loss": 0.4443, |
| "step": 1063 |
| }, |
| { |
| "epoch": 1.7024, |
| "grad_norm": 0.7388257976016034, |
| "learning_rate": 9.567272602481607e-06, |
| "loss": 0.4625, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.704, |
| "grad_norm": 0.7435395961998326, |
| "learning_rate": 9.56042494656206e-06, |
| "loss": 0.4629, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.7056, |
| "grad_norm": 0.7445853385287796, |
| "learning_rate": 9.553526025110404e-06, |
| "loss": 0.51, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.7072, |
| "grad_norm": 0.7566025320046942, |
| "learning_rate": 9.546575915680134e-06, |
| "loss": 0.5005, |
| "step": 1067 |
| }, |
| { |
| "epoch": 1.7088, |
| "grad_norm": 0.7759422448278186, |
| "learning_rate": 9.539574696400165e-06, |
| "loss": 0.5031, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.7104, |
| "grad_norm": 0.7543277771064258, |
| "learning_rate": 9.532522445973956e-06, |
| "loss": 0.4995, |
| "step": 1069 |
| }, |
| { |
| "epoch": 1.712, |
| "grad_norm": 0.7341510333436678, |
| "learning_rate": 9.525419243678633e-06, |
| "loss": 0.4624, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.7136, |
| "grad_norm": 0.7875756776882786, |
| "learning_rate": 9.51826516936409e-06, |
| "loss": 0.4972, |
| "step": 1071 |
| }, |
| { |
| "epoch": 1.7151999999999998, |
| "grad_norm": 0.7843503644841715, |
| "learning_rate": 9.51106030345209e-06, |
| "loss": 0.4795, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.7168, |
| "grad_norm": 0.7237225873861687, |
| "learning_rate": 9.503804726935369e-06, |
| "loss": 0.4818, |
| "step": 1073 |
| }, |
| { |
| "epoch": 1.7184, |
| "grad_norm": 0.7787633601269695, |
| "learning_rate": 9.496498521376718e-06, |
| "loss": 0.495, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 0.6977403808538022, |
| "learning_rate": 9.48914176890807e-06, |
| "loss": 0.4472, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.7216, |
| "grad_norm": 0.7696335166426286, |
| "learning_rate": 9.481734552229578e-06, |
| "loss": 0.455, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.7231999999999998, |
| "grad_norm": 0.7112621901106786, |
| "learning_rate": 9.474276954608677e-06, |
| "loss": 0.5013, |
| "step": 1077 |
| }, |
| { |
| "epoch": 1.7248, |
| "grad_norm": 0.9176947750950379, |
| "learning_rate": 9.46676905987916e-06, |
| "loss": 0.5003, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.7264, |
| "grad_norm": 1.3248973520670817, |
| "learning_rate": 9.459210952440226e-06, |
| "loss": 0.4855, |
| "step": 1079 |
| }, |
| { |
| "epoch": 1.728, |
| "grad_norm": 0.8812839955457178, |
| "learning_rate": 9.451602717255536e-06, |
| "loss": 0.4756, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.7296, |
| "grad_norm": 0.7358226984295164, |
| "learning_rate": 9.44394443985226e-06, |
| "loss": 0.4564, |
| "step": 1081 |
| }, |
| { |
| "epoch": 1.7311999999999999, |
| "grad_norm": 0.7730761571200185, |
| "learning_rate": 9.436236206320104e-06, |
| "loss": 0.4766, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.7328000000000001, |
| "grad_norm": 0.7509702127464707, |
| "learning_rate": 9.428478103310358e-06, |
| "loss": 0.4867, |
| "step": 1083 |
| }, |
| { |
| "epoch": 1.7344, |
| "grad_norm": 0.8109776958271638, |
| "learning_rate": 9.420670218034913e-06, |
| "loss": 0.4955, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.736, |
| "grad_norm": 0.7674813988649232, |
| "learning_rate": 9.412812638265279e-06, |
| "loss": 0.4891, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.7376, |
| "grad_norm": 0.8525778346012978, |
| "learning_rate": 9.404905452331605e-06, |
| "loss": 0.4575, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.7391999999999999, |
| "grad_norm": 0.7941716219885411, |
| "learning_rate": 9.39694874912168e-06, |
| "loss": 0.4816, |
| "step": 1087 |
| }, |
| { |
| "epoch": 1.7408000000000001, |
| "grad_norm": 0.7524721878502354, |
| "learning_rate": 9.38894261807994e-06, |
| "loss": 0.4736, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.7424, |
| "grad_norm": 0.7673717141057368, |
| "learning_rate": 9.380887149206453e-06, |
| "loss": 0.4751, |
| "step": 1089 |
| }, |
| { |
| "epoch": 1.744, |
| "grad_norm": 0.8733631085887028, |
| "learning_rate": 9.372782433055915e-06, |
| "loss": 0.4983, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.7456, |
| "grad_norm": 0.7790443134514797, |
| "learning_rate": 9.364628560736631e-06, |
| "loss": 0.4634, |
| "step": 1091 |
| }, |
| { |
| "epoch": 1.7471999999999999, |
| "grad_norm": 0.7347132470320543, |
| "learning_rate": 9.356425623909493e-06, |
| "loss": 0.4538, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.7488000000000001, |
| "grad_norm": 0.7984639832990776, |
| "learning_rate": 9.34817371478694e-06, |
| "loss": 0.4576, |
| "step": 1093 |
| }, |
| { |
| "epoch": 1.7504, |
| "grad_norm": 0.7949795614582291, |
| "learning_rate": 9.33987292613193e-06, |
| "loss": 0.494, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.752, |
| "grad_norm": 0.8528298981207325, |
| "learning_rate": 9.331523351256898e-06, |
| "loss": 0.4719, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.7536, |
| "grad_norm": 0.8273161679245847, |
| "learning_rate": 9.323125084022701e-06, |
| "loss": 0.5114, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.7551999999999999, |
| "grad_norm": 0.8177732465705448, |
| "learning_rate": 9.31467821883757e-06, |
| "loss": 0.4693, |
| "step": 1097 |
| }, |
| { |
| "epoch": 1.7568000000000001, |
| "grad_norm": 0.7929842641850815, |
| "learning_rate": 9.306182850656037e-06, |
| "loss": 0.5143, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.7584, |
| "grad_norm": 0.7983649940388804, |
| "learning_rate": 9.297639074977885e-06, |
| "loss": 0.4477, |
| "step": 1099 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 0.8030706601108953, |
| "learning_rate": 9.289046987847058e-06, |
| "loss": 0.4946, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.7616, |
| "grad_norm": 0.7414340357556809, |
| "learning_rate": 9.280406685850587e-06, |
| "loss": 0.4696, |
| "step": 1101 |
| }, |
| { |
| "epoch": 1.7631999999999999, |
| "grad_norm": 0.7824443709979357, |
| "learning_rate": 9.271718266117512e-06, |
| "loss": 0.4979, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.7648000000000001, |
| "grad_norm": 0.7926314063991798, |
| "learning_rate": 9.262981826317778e-06, |
| "loss": 0.4782, |
| "step": 1103 |
| }, |
| { |
| "epoch": 1.7664, |
| "grad_norm": 0.7604584164739576, |
| "learning_rate": 9.254197464661143e-06, |
| "loss": 0.4895, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.768, |
| "grad_norm": 0.761316814462793, |
| "learning_rate": 9.245365279896077e-06, |
| "loss": 0.4837, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.7696, |
| "grad_norm": 0.7209835263967452, |
| "learning_rate": 9.236485371308642e-06, |
| "loss": 0.4837, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.7711999999999999, |
| "grad_norm": 0.8213247627372725, |
| "learning_rate": 9.227557838721391e-06, |
| "loss": 0.4822, |
| "step": 1107 |
| }, |
| { |
| "epoch": 1.7728000000000002, |
| "grad_norm": 0.7460338028112166, |
| "learning_rate": 9.218582782492228e-06, |
| "loss": 0.495, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.7744, |
| "grad_norm": 0.7267567965373588, |
| "learning_rate": 9.209560303513296e-06, |
| "loss": 0.4368, |
| "step": 1109 |
| }, |
| { |
| "epoch": 1.776, |
| "grad_norm": 0.7211707144951695, |
| "learning_rate": 9.200490503209831e-06, |
| "loss": 0.4996, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.7776, |
| "grad_norm": 0.7257292962586889, |
| "learning_rate": 9.191373483539032e-06, |
| "loss": 0.4519, |
| "step": 1111 |
| }, |
| { |
| "epoch": 1.7792, |
| "grad_norm": 0.7181000434476451, |
| "learning_rate": 9.182209346988901e-06, |
| "loss": 0.4877, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.7808000000000002, |
| "grad_norm": 0.6998913729441977, |
| "learning_rate": 9.17299819657711e-06, |
| "loss": 0.4756, |
| "step": 1113 |
| }, |
| { |
| "epoch": 1.7824, |
| "grad_norm": 0.7585859336115078, |
| "learning_rate": 9.163740135849824e-06, |
| "loss": 0.4832, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.784, |
| "grad_norm": 0.7835474770178205, |
| "learning_rate": 9.154435268880547e-06, |
| "loss": 0.4748, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.7856, |
| "grad_norm": 0.7508571411491034, |
| "learning_rate": 9.145083700268955e-06, |
| "loss": 0.4649, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.7872, |
| "grad_norm": 0.8020723261386411, |
| "learning_rate": 9.135685535139709e-06, |
| "loss": 0.5001, |
| "step": 1117 |
| }, |
| { |
| "epoch": 1.7888, |
| "grad_norm": 0.6910426818371657, |
| "learning_rate": 9.126240879141286e-06, |
| "loss": 0.4466, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.7904, |
| "grad_norm": 0.8312177303482557, |
| "learning_rate": 9.116749838444778e-06, |
| "loss": 0.5066, |
| "step": 1119 |
| }, |
| { |
| "epoch": 1.792, |
| "grad_norm": 0.8798938353858576, |
| "learning_rate": 9.107212519742714e-06, |
| "loss": 0.4837, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.7936, |
| "grad_norm": 0.7398989429279293, |
| "learning_rate": 9.097629030247846e-06, |
| "loss": 0.5078, |
| "step": 1121 |
| }, |
| { |
| "epoch": 1.7952, |
| "grad_norm": 0.7872104783544963, |
| "learning_rate": 9.087999477691953e-06, |
| "loss": 0.4552, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.7968, |
| "grad_norm": 0.8673266664400423, |
| "learning_rate": 9.078323970324626e-06, |
| "loss": 0.4481, |
| "step": 1123 |
| }, |
| { |
| "epoch": 1.7984, |
| "grad_norm": 0.7759002350665339, |
| "learning_rate": 9.06860261691205e-06, |
| "loss": 0.4497, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.8384770692839796, |
| "learning_rate": 9.058835526735788e-06, |
| "loss": 0.4474, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.8016, |
| "grad_norm": 0.7322304789714222, |
| "learning_rate": 9.049022809591546e-06, |
| "loss": 0.4449, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.8032, |
| "grad_norm": 0.7937302647096102, |
| "learning_rate": 9.039164575787937e-06, |
| "loss": 0.5042, |
| "step": 1127 |
| }, |
| { |
| "epoch": 1.8048, |
| "grad_norm": 0.8346903570431222, |
| "learning_rate": 9.029260936145252e-06, |
| "loss": 0.4603, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.8064, |
| "grad_norm": 0.7557433180719302, |
| "learning_rate": 9.019312001994203e-06, |
| "loss": 0.4576, |
| "step": 1129 |
| }, |
| { |
| "epoch": 1.808, |
| "grad_norm": 0.9072781260749982, |
| "learning_rate": 9.009317885174672e-06, |
| "loss": 0.4964, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.8096, |
| "grad_norm": 0.7673078175366279, |
| "learning_rate": 8.999278698034462e-06, |
| "loss": 0.4591, |
| "step": 1131 |
| }, |
| { |
| "epoch": 1.8112, |
| "grad_norm": 0.9578376130833234, |
| "learning_rate": 8.989194553428028e-06, |
| "loss": 0.4822, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.8128, |
| "grad_norm": 0.8435660382315425, |
| "learning_rate": 8.979065564715209e-06, |
| "loss": 0.4764, |
| "step": 1133 |
| }, |
| { |
| "epoch": 1.8144, |
| "grad_norm": 0.9384898259414467, |
| "learning_rate": 8.968891845759955e-06, |
| "loss": 0.4693, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.8159999999999998, |
| "grad_norm": 0.7740493192334655, |
| "learning_rate": 8.958673510929046e-06, |
| "loss": 0.4879, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.8176, |
| "grad_norm": 1.202185398672549, |
| "learning_rate": 8.948410675090807e-06, |
| "loss": 0.4947, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.8192, |
| "grad_norm": 0.792379067255798, |
| "learning_rate": 8.938103453613814e-06, |
| "loss": 0.4771, |
| "step": 1137 |
| }, |
| { |
| "epoch": 1.8208, |
| "grad_norm": 0.8306544602637841, |
| "learning_rate": 8.927751962365603e-06, |
| "loss": 0.4794, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.8224, |
| "grad_norm": 0.81541668232239, |
| "learning_rate": 8.917356317711359e-06, |
| "loss": 0.4741, |
| "step": 1139 |
| }, |
| { |
| "epoch": 1.8239999999999998, |
| "grad_norm": 0.7683318265954167, |
| "learning_rate": 8.906916636512618e-06, |
| "loss": 0.4822, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.8256000000000001, |
| "grad_norm": 0.8849502579326289, |
| "learning_rate": 8.89643303612595e-06, |
| "loss": 0.4685, |
| "step": 1141 |
| }, |
| { |
| "epoch": 1.8272, |
| "grad_norm": 0.7608968464316281, |
| "learning_rate": 8.885905634401629e-06, |
| "loss": 0.4666, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.8288, |
| "grad_norm": 0.8097542549953858, |
| "learning_rate": 8.875334549682322e-06, |
| "loss": 0.4783, |
| "step": 1143 |
| }, |
| { |
| "epoch": 1.8304, |
| "grad_norm": 0.8216152980420542, |
| "learning_rate": 8.864719900801755e-06, |
| "loss": 0.4584, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.8319999999999999, |
| "grad_norm": 0.8656307140884373, |
| "learning_rate": 8.854061807083376e-06, |
| "loss": 0.504, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.8336000000000001, |
| "grad_norm": 0.8224033400645684, |
| "learning_rate": 8.84336038833901e-06, |
| "loss": 0.4727, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.8352, |
| "grad_norm": 0.8363949626195645, |
| "learning_rate": 8.832615764867521e-06, |
| "loss": 0.4804, |
| "step": 1147 |
| }, |
| { |
| "epoch": 1.8368, |
| "grad_norm": 0.8119714327103984, |
| "learning_rate": 8.821828057453448e-06, |
| "loss": 0.5037, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.8384, |
| "grad_norm": 0.8058780927155984, |
| "learning_rate": 8.810997387365656e-06, |
| "loss": 0.4653, |
| "step": 1149 |
| }, |
| { |
| "epoch": 1.8399999999999999, |
| "grad_norm": 0.8968949671889885, |
| "learning_rate": 8.800123876355976e-06, |
| "loss": 0.4854, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.8416000000000001, |
| "grad_norm": 0.7386589358702967, |
| "learning_rate": 8.789207646657823e-06, |
| "loss": 0.4835, |
| "step": 1151 |
| }, |
| { |
| "epoch": 1.8432, |
| "grad_norm": 0.7837448106837076, |
| "learning_rate": 8.778248820984829e-06, |
| "loss": 0.4884, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.8448, |
| "grad_norm": 0.7522920292251475, |
| "learning_rate": 8.767247522529473e-06, |
| "loss": 0.485, |
| "step": 1153 |
| }, |
| { |
| "epoch": 1.8464, |
| "grad_norm": 0.8199450680001841, |
| "learning_rate": 8.75620387496168e-06, |
| "loss": 0.5207, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.8479999999999999, |
| "grad_norm": 0.7464684959146133, |
| "learning_rate": 8.74511800242744e-06, |
| "loss": 0.4942, |
| "step": 1155 |
| }, |
| { |
| "epoch": 1.8496000000000001, |
| "grad_norm": 0.8258051483853871, |
| "learning_rate": 8.733990029547408e-06, |
| "loss": 0.4882, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.8512, |
| "grad_norm": 0.6732953958913235, |
| "learning_rate": 8.72282008141551e-06, |
| "loss": 0.439, |
| "step": 1157 |
| }, |
| { |
| "epoch": 1.8528, |
| "grad_norm": 0.7402695579684151, |
| "learning_rate": 8.71160828359753e-06, |
| "loss": 0.4768, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.8544, |
| "grad_norm": 0.7040926189197414, |
| "learning_rate": 8.7003547621297e-06, |
| "loss": 0.4648, |
| "step": 1159 |
| }, |
| { |
| "epoch": 1.8559999999999999, |
| "grad_norm": 0.7250897200570943, |
| "learning_rate": 8.689059643517285e-06, |
| "loss": 0.4718, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.8576000000000001, |
| "grad_norm": 0.7012526603280671, |
| "learning_rate": 8.677723054733163e-06, |
| "loss": 0.449, |
| "step": 1161 |
| }, |
| { |
| "epoch": 1.8592, |
| "grad_norm": 0.8723369562580054, |
| "learning_rate": 8.666345123216387e-06, |
| "loss": 0.4954, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.8608, |
| "grad_norm": 0.7295611805392298, |
| "learning_rate": 8.654925976870766e-06, |
| "loss": 0.465, |
| "step": 1163 |
| }, |
| { |
| "epoch": 1.8624, |
| "grad_norm": 0.73472158103287, |
| "learning_rate": 8.64346574406342e-06, |
| "loss": 0.4837, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.8639999999999999, |
| "grad_norm": 0.7455549509974911, |
| "learning_rate": 8.631964553623336e-06, |
| "loss": 0.4818, |
| "step": 1165 |
| }, |
| { |
| "epoch": 1.8656000000000001, |
| "grad_norm": 0.7115678369682501, |
| "learning_rate": 8.620422534839925e-06, |
| "loss": 0.463, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.8672, |
| "grad_norm": 0.7857802546484215, |
| "learning_rate": 8.608839817461565e-06, |
| "loss": 0.4725, |
| "step": 1167 |
| }, |
| { |
| "epoch": 1.8688, |
| "grad_norm": 0.768644071166201, |
| "learning_rate": 8.597216531694136e-06, |
| "loss": 0.4873, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.8704, |
| "grad_norm": 0.7846588618058573, |
| "learning_rate": 8.585552808199577e-06, |
| "loss": 0.4738, |
| "step": 1169 |
| }, |
| { |
| "epoch": 1.8719999999999999, |
| "grad_norm": 0.7471086835732865, |
| "learning_rate": 8.57384877809439e-06, |
| "loss": 0.4537, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.8736000000000002, |
| "grad_norm": 0.8648658959127388, |
| "learning_rate": 8.562104572948185e-06, |
| "loss": 0.4698, |
| "step": 1171 |
| }, |
| { |
| "epoch": 1.8752, |
| "grad_norm": 0.7109035208423685, |
| "learning_rate": 8.550320324782198e-06, |
| "loss": 0.4553, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.8768, |
| "grad_norm": 0.8015327722654481, |
| "learning_rate": 8.538496166067798e-06, |
| "loss": 0.4774, |
| "step": 1173 |
| }, |
| { |
| "epoch": 1.8784, |
| "grad_norm": 0.7306504452949123, |
| "learning_rate": 8.526632229725012e-06, |
| "loss": 0.4827, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 0.7004149184104759, |
| "learning_rate": 8.514728649121017e-06, |
| "loss": 0.4659, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.8816000000000002, |
| "grad_norm": 0.7913013945698448, |
| "learning_rate": 8.50278555806865e-06, |
| "loss": 0.4684, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.8832, |
| "grad_norm": 0.7801219375804972, |
| "learning_rate": 8.490803090824895e-06, |
| "loss": 0.4905, |
| "step": 1177 |
| }, |
| { |
| "epoch": 1.8848, |
| "grad_norm": 0.7351394597035321, |
| "learning_rate": 8.478781382089387e-06, |
| "loss": 0.469, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.8864, |
| "grad_norm": 0.7707855237564337, |
| "learning_rate": 8.466720567002887e-06, |
| "loss": 0.4831, |
| "step": 1179 |
| }, |
| { |
| "epoch": 1.888, |
| "grad_norm": 0.7208606345948122, |
| "learning_rate": 8.454620781145761e-06, |
| "loss": 0.4742, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.8896, |
| "grad_norm": 0.79848628475022, |
| "learning_rate": 8.442482160536469e-06, |
| "loss": 0.4863, |
| "step": 1181 |
| }, |
| { |
| "epoch": 1.8912, |
| "grad_norm": 0.6769590611616342, |
| "learning_rate": 8.430304841630024e-06, |
| "loss": 0.4649, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.8928, |
| "grad_norm": 0.7530969549620007, |
| "learning_rate": 8.418088961316459e-06, |
| "loss": 0.471, |
| "step": 1183 |
| }, |
| { |
| "epoch": 1.8944, |
| "grad_norm": 0.7114720544049936, |
| "learning_rate": 8.405834656919295e-06, |
| "loss": 0.4826, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.896, |
| "grad_norm": 0.7328135064170878, |
| "learning_rate": 8.393542066193994e-06, |
| "loss": 0.491, |
| "step": 1185 |
| }, |
| { |
| "epoch": 1.8976, |
| "grad_norm": 0.692361493124172, |
| "learning_rate": 8.381211327326403e-06, |
| "loss": 0.4407, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.8992, |
| "grad_norm": 0.7657820423628003, |
| "learning_rate": 8.368842578931214e-06, |
| "loss": 0.4808, |
| "step": 1187 |
| }, |
| { |
| "epoch": 1.9008, |
| "grad_norm": 0.7493563054639181, |
| "learning_rate": 8.356435960050398e-06, |
| "loss": 0.4588, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.9024, |
| "grad_norm": 0.7076961903732508, |
| "learning_rate": 8.34399161015164e-06, |
| "loss": 0.4603, |
| "step": 1189 |
| }, |
| { |
| "epoch": 1.904, |
| "grad_norm": 0.8155352599270661, |
| "learning_rate": 8.331509669126778e-06, |
| "loss": 0.4588, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.9056, |
| "grad_norm": 0.7246019082046947, |
| "learning_rate": 8.318990277290224e-06, |
| "loss": 0.462, |
| "step": 1191 |
| }, |
| { |
| "epoch": 1.9072, |
| "grad_norm": 0.8329063517847972, |
| "learning_rate": 8.306433575377388e-06, |
| "loss": 0.4558, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.9088, |
| "grad_norm": 0.6981008593587018, |
| "learning_rate": 8.293839704543103e-06, |
| "loss": 0.4834, |
| "step": 1193 |
| }, |
| { |
| "epoch": 1.9104, |
| "grad_norm": 0.8653080211979014, |
| "learning_rate": 8.281208806360028e-06, |
| "loss": 0.4507, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.912, |
| "grad_norm": 0.95604800095903, |
| "learning_rate": 8.268541022817058e-06, |
| "loss": 0.5203, |
| "step": 1195 |
| }, |
| { |
| "epoch": 1.9136, |
| "grad_norm": 0.7952132943771812, |
| "learning_rate": 8.255836496317739e-06, |
| "loss": 0.445, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.9152, |
| "grad_norm": 0.7661599790643271, |
| "learning_rate": 8.243095369678653e-06, |
| "loss": 0.45, |
| "step": 1197 |
| }, |
| { |
| "epoch": 1.9167999999999998, |
| "grad_norm": 0.736039388177421, |
| "learning_rate": 8.230317786127822e-06, |
| "loss": 0.4813, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.9184, |
| "grad_norm": 0.8051710006606744, |
| "learning_rate": 8.217503889303088e-06, |
| "loss": 0.4595, |
| "step": 1199 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 0.7836426970956726, |
| "learning_rate": 8.204653823250516e-06, |
| "loss": 0.4831, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.9216, |
| "grad_norm": 0.7734510329192413, |
| "learning_rate": 8.191767732422754e-06, |
| "loss": 0.4809, |
| "step": 1201 |
| }, |
| { |
| "epoch": 1.9232, |
| "grad_norm": 0.7187700406077102, |
| "learning_rate": 8.17884576167742e-06, |
| "loss": 0.4409, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.9247999999999998, |
| "grad_norm": 0.745714087535346, |
| "learning_rate": 8.165888056275478e-06, |
| "loss": 0.443, |
| "step": 1203 |
| }, |
| { |
| "epoch": 1.9264000000000001, |
| "grad_norm": 0.7048072632404838, |
| "learning_rate": 8.152894761879593e-06, |
| "loss": 0.4838, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.928, |
| "grad_norm": 0.7264527631172872, |
| "learning_rate": 8.1398660245525e-06, |
| "loss": 0.4613, |
| "step": 1205 |
| }, |
| { |
| "epoch": 1.9296, |
| "grad_norm": 0.7195940478277059, |
| "learning_rate": 8.126801990755371e-06, |
| "loss": 0.4459, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.9312, |
| "grad_norm": 0.7553387112860547, |
| "learning_rate": 8.113702807346147e-06, |
| "loss": 0.4886, |
| "step": 1207 |
| }, |
| { |
| "epoch": 1.9327999999999999, |
| "grad_norm": 0.6954464450652126, |
| "learning_rate": 8.100568621577907e-06, |
| "loss": 0.4666, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.9344000000000001, |
| "grad_norm": 0.7304012484152707, |
| "learning_rate": 8.087399581097205e-06, |
| "loss": 0.4621, |
| "step": 1209 |
| }, |
| { |
| "epoch": 1.936, |
| "grad_norm": 1.6917118741128514, |
| "learning_rate": 8.074195833942405e-06, |
| "loss": 0.4705, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.9376, |
| "grad_norm": 0.7656279018780524, |
| "learning_rate": 8.060957528542032e-06, |
| "loss": 0.4889, |
| "step": 1211 |
| }, |
| { |
| "epoch": 1.9392, |
| "grad_norm": 0.6513452251174792, |
| "learning_rate": 8.047684813713086e-06, |
| "loss": 0.4582, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.9407999999999999, |
| "grad_norm": 1.0730911684214248, |
| "learning_rate": 8.03437783865938e-06, |
| "loss": 0.4389, |
| "step": 1213 |
| }, |
| { |
| "epoch": 1.9424000000000001, |
| "grad_norm": 0.7023367058796101, |
| "learning_rate": 8.021036752969859e-06, |
| "loss": 0.445, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.944, |
| "grad_norm": 0.6984689461295697, |
| "learning_rate": 8.007661706616919e-06, |
| "loss": 0.4888, |
| "step": 1215 |
| }, |
| { |
| "epoch": 1.9456, |
| "grad_norm": 0.7502708537039777, |
| "learning_rate": 7.99425284995472e-06, |
| "loss": 0.4468, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.9472, |
| "grad_norm": 0.7168320783099587, |
| "learning_rate": 7.980810333717499e-06, |
| "loss": 0.4701, |
| "step": 1217 |
| }, |
| { |
| "epoch": 1.9487999999999999, |
| "grad_norm": 0.8045767509305717, |
| "learning_rate": 7.967334309017876e-06, |
| "loss": 0.4439, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.9504000000000001, |
| "grad_norm": 0.8849219260640486, |
| "learning_rate": 7.953824927345146e-06, |
| "loss": 0.5201, |
| "step": 1219 |
| }, |
| { |
| "epoch": 1.952, |
| "grad_norm": 0.6787271238107349, |
| "learning_rate": 7.940282340563586e-06, |
| "loss": 0.466, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.9536, |
| "grad_norm": 0.7690530385600742, |
| "learning_rate": 7.92670670091075e-06, |
| "loss": 0.4524, |
| "step": 1221 |
| }, |
| { |
| "epoch": 1.9552, |
| "grad_norm": 0.6686012178909408, |
| "learning_rate": 7.913098160995742e-06, |
| "loss": 0.443, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.9567999999999999, |
| "grad_norm": 0.6996879963962194, |
| "learning_rate": 7.899456873797519e-06, |
| "loss": 0.4637, |
| "step": 1223 |
| }, |
| { |
| "epoch": 1.9584000000000001, |
| "grad_norm": 0.7150953281715766, |
| "learning_rate": 7.885782992663162e-06, |
| "loss": 0.4832, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 0.6918952092029659, |
| "learning_rate": 7.87207667130615e-06, |
| "loss": 0.476, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.9616, |
| "grad_norm": 0.7115691290659868, |
| "learning_rate": 7.858338063804638e-06, |
| "loss": 0.499, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.9632, |
| "grad_norm": 0.6555011571772331, |
| "learning_rate": 7.84456732459972e-06, |
| "loss": 0.4843, |
| "step": 1227 |
| }, |
| { |
| "epoch": 1.9647999999999999, |
| "grad_norm": 0.715962295702684, |
| "learning_rate": 7.830764608493697e-06, |
| "loss": 0.4946, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.9664000000000001, |
| "grad_norm": 0.7541602992549761, |
| "learning_rate": 7.816930070648335e-06, |
| "loss": 0.4666, |
| "step": 1229 |
| }, |
| { |
| "epoch": 1.968, |
| "grad_norm": 0.720723686996417, |
| "learning_rate": 7.803063866583119e-06, |
| "loss": 0.4752, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.9696, |
| "grad_norm": 0.8413086585547764, |
| "learning_rate": 7.789166152173508e-06, |
| "loss": 0.4664, |
| "step": 1231 |
| }, |
| { |
| "epoch": 1.9712, |
| "grad_norm": 0.6792387613339629, |
| "learning_rate": 7.775237083649182e-06, |
| "loss": 0.4477, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.9727999999999999, |
| "grad_norm": 0.773696750431894, |
| "learning_rate": 7.761276817592283e-06, |
| "loss": 0.4861, |
| "step": 1233 |
| }, |
| { |
| "epoch": 1.9744000000000002, |
| "grad_norm": 0.696941964362057, |
| "learning_rate": 7.747285510935654e-06, |
| "loss": 0.4598, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.976, |
| "grad_norm": 0.7080059593102934, |
| "learning_rate": 7.733263320961087e-06, |
| "loss": 0.4656, |
| "step": 1235 |
| }, |
| { |
| "epoch": 1.9776, |
| "grad_norm": 0.7186736221997335, |
| "learning_rate": 7.719210405297537e-06, |
| "loss": 0.4651, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.9792, |
| "grad_norm": 0.7170560204985752, |
| "learning_rate": 7.705126921919358e-06, |
| "loss": 0.4561, |
| "step": 1237 |
| }, |
| { |
| "epoch": 1.9808, |
| "grad_norm": 0.7414750990700175, |
| "learning_rate": 7.691013029144535e-06, |
| "loss": 0.4574, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.9824000000000002, |
| "grad_norm": 0.7559504861881715, |
| "learning_rate": 7.676868885632893e-06, |
| "loss": 0.4478, |
| "step": 1239 |
| }, |
| { |
| "epoch": 1.984, |
| "grad_norm": 0.7159506812568182, |
| "learning_rate": 7.662694650384315e-06, |
| "loss": 0.4738, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.9856, |
| "grad_norm": 0.7148912940280229, |
| "learning_rate": 7.648490482736959e-06, |
| "loss": 0.4716, |
| "step": 1241 |
| }, |
| { |
| "epoch": 1.9872, |
| "grad_norm": 0.6786983608203037, |
| "learning_rate": 7.634256542365468e-06, |
| "loss": 0.4797, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.9888, |
| "grad_norm": 0.8016066893153321, |
| "learning_rate": 7.6199929892791666e-06, |
| "loss": 0.5211, |
| "step": 1243 |
| }, |
| { |
| "epoch": 1.9904, |
| "grad_norm": 0.6830168298566425, |
| "learning_rate": 7.60569998382027e-06, |
| "loss": 0.4985, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.992, |
| "grad_norm": 0.812026382596777, |
| "learning_rate": 7.591377686662081e-06, |
| "loss": 0.4719, |
| "step": 1245 |
| }, |
| { |
| "epoch": 1.9936, |
| "grad_norm": 0.7132848502425145, |
| "learning_rate": 7.577026258807181e-06, |
| "loss": 0.4802, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.9952, |
| "grad_norm": 0.7187112930479812, |
| "learning_rate": 7.562645861585615e-06, |
| "loss": 0.4658, |
| "step": 1247 |
| }, |
| { |
| "epoch": 1.9968, |
| "grad_norm": 0.7218289482909626, |
| "learning_rate": 7.548236656653095e-06, |
| "loss": 0.4692, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.9984, |
| "grad_norm": 0.6893057203147361, |
| "learning_rate": 7.533798805989165e-06, |
| "loss": 0.479, |
| "step": 1249 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.6811147668238169, |
| "learning_rate": 7.519332471895384e-06, |
| "loss": 0.4228, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.0016, |
| "grad_norm": 0.942142555999305, |
| "learning_rate": 7.504837816993513e-06, |
| "loss": 0.4094, |
| "step": 1251 |
| }, |
| { |
| "epoch": 2.0032, |
| "grad_norm": 0.7740007104758746, |
| "learning_rate": 7.490315004223672e-06, |
| "loss": 0.4108, |
| "step": 1252 |
| }, |
| { |
| "epoch": 2.0048, |
| "grad_norm": 0.8169139096512836, |
| "learning_rate": 7.475764196842516e-06, |
| "loss": 0.4433, |
| "step": 1253 |
| }, |
| { |
| "epoch": 2.0064, |
| "grad_norm": 0.8209520669424164, |
| "learning_rate": 7.4611855584214e-06, |
| "loss": 0.3826, |
| "step": 1254 |
| }, |
| { |
| "epoch": 2.008, |
| "grad_norm": 0.8798367918362482, |
| "learning_rate": 7.446579252844536e-06, |
| "loss": 0.4224, |
| "step": 1255 |
| }, |
| { |
| "epoch": 2.0096, |
| "grad_norm": 0.8691405382550204, |
| "learning_rate": 7.431945444307157e-06, |
| "loss": 0.4289, |
| "step": 1256 |
| }, |
| { |
| "epoch": 2.0112, |
| "grad_norm": 0.800556536362489, |
| "learning_rate": 7.417284297313665e-06, |
| "loss": 0.4023, |
| "step": 1257 |
| }, |
| { |
| "epoch": 2.0128, |
| "grad_norm": 0.7728720252217788, |
| "learning_rate": 7.402595976675785e-06, |
| "loss": 0.4122, |
| "step": 1258 |
| }, |
| { |
| "epoch": 2.0144, |
| "grad_norm": 0.7536268800020502, |
| "learning_rate": 7.387880647510709e-06, |
| "loss": 0.395, |
| "step": 1259 |
| }, |
| { |
| "epoch": 2.016, |
| "grad_norm": 0.7495826111062508, |
| "learning_rate": 7.37313847523925e-06, |
| "loss": 0.4156, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.0176, |
| "grad_norm": 0.7977316124513112, |
| "learning_rate": 7.358369625583966e-06, |
| "loss": 0.4342, |
| "step": 1261 |
| }, |
| { |
| "epoch": 2.0192, |
| "grad_norm": 0.763736878663743, |
| "learning_rate": 7.343574264567311e-06, |
| "loss": 0.403, |
| "step": 1262 |
| }, |
| { |
| "epoch": 2.0208, |
| "grad_norm": 0.7318474686470899, |
| "learning_rate": 7.3287525585097615e-06, |
| "loss": 0.3909, |
| "step": 1263 |
| }, |
| { |
| "epoch": 2.0224, |
| "grad_norm": 0.7605413484257524, |
| "learning_rate": 7.313904674027954e-06, |
| "loss": 0.3885, |
| "step": 1264 |
| }, |
| { |
| "epoch": 2.024, |
| "grad_norm": 0.7279511815872336, |
| "learning_rate": 7.299030778032799e-06, |
| "loss": 0.4048, |
| "step": 1265 |
| }, |
| { |
| "epoch": 2.0256, |
| "grad_norm": 0.7523015788911891, |
| "learning_rate": 7.284131037727618e-06, |
| "loss": 0.4022, |
| "step": 1266 |
| }, |
| { |
| "epoch": 2.0272, |
| "grad_norm": 0.7599105856257794, |
| "learning_rate": 7.269205620606259e-06, |
| "loss": 0.3668, |
| "step": 1267 |
| }, |
| { |
| "epoch": 2.0288, |
| "grad_norm": 0.7438421120510148, |
| "learning_rate": 7.2542546944512106e-06, |
| "loss": 0.3808, |
| "step": 1268 |
| }, |
| { |
| "epoch": 2.0304, |
| "grad_norm": 0.727359932201372, |
| "learning_rate": 7.239278427331718e-06, |
| "loss": 0.3785, |
| "step": 1269 |
| }, |
| { |
| "epoch": 2.032, |
| "grad_norm": 0.6937857053944503, |
| "learning_rate": 7.224276987601895e-06, |
| "loss": 0.3757, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.0336, |
| "grad_norm": 0.780207663850111, |
| "learning_rate": 7.209250543898834e-06, |
| "loss": 0.4329, |
| "step": 1271 |
| }, |
| { |
| "epoch": 2.0352, |
| "grad_norm": 0.7309618417958235, |
| "learning_rate": 7.194199265140701e-06, |
| "loss": 0.4289, |
| "step": 1272 |
| }, |
| { |
| "epoch": 2.0368, |
| "grad_norm": 0.756730142412508, |
| "learning_rate": 7.179123320524848e-06, |
| "loss": 0.4011, |
| "step": 1273 |
| }, |
| { |
| "epoch": 2.0384, |
| "grad_norm": 0.7748502996836906, |
| "learning_rate": 7.1640228795259025e-06, |
| "loss": 0.4166, |
| "step": 1274 |
| }, |
| { |
| "epoch": 2.04, |
| "grad_norm": 0.7138213951471011, |
| "learning_rate": 7.148898111893867e-06, |
| "loss": 0.4211, |
| "step": 1275 |
| }, |
| { |
| "epoch": 2.0416, |
| "grad_norm": 0.7096395677980951, |
| "learning_rate": 7.133749187652208e-06, |
| "loss": 0.4351, |
| "step": 1276 |
| }, |
| { |
| "epoch": 2.0432, |
| "grad_norm": 0.7598339781728084, |
| "learning_rate": 7.118576277095944e-06, |
| "loss": 0.4173, |
| "step": 1277 |
| }, |
| { |
| "epoch": 2.0448, |
| "grad_norm": 0.7318720964428549, |
| "learning_rate": 7.103379550789741e-06, |
| "loss": 0.3985, |
| "step": 1278 |
| }, |
| { |
| "epoch": 2.0464, |
| "grad_norm": 0.7207495438632298, |
| "learning_rate": 7.088159179565978e-06, |
| "loss": 0.3681, |
| "step": 1279 |
| }, |
| { |
| "epoch": 2.048, |
| "grad_norm": 0.7495804691040051, |
| "learning_rate": 7.07291533452284e-06, |
| "loss": 0.4188, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.0496, |
| "grad_norm": 0.7464935959309549, |
| "learning_rate": 7.05764818702239e-06, |
| "loss": 0.387, |
| "step": 1281 |
| }, |
| { |
| "epoch": 2.0512, |
| "grad_norm": 0.7699151568223941, |
| "learning_rate": 7.042357908688646e-06, |
| "loss": 0.4167, |
| "step": 1282 |
| }, |
| { |
| "epoch": 2.0528, |
| "grad_norm": 0.7495209441991714, |
| "learning_rate": 7.027044671405643e-06, |
| "loss": 0.4072, |
| "step": 1283 |
| }, |
| { |
| "epoch": 2.0544, |
| "grad_norm": 0.6843159358600286, |
| "learning_rate": 7.0117086473155095e-06, |
| "loss": 0.3873, |
| "step": 1284 |
| }, |
| { |
| "epoch": 2.056, |
| "grad_norm": 0.7506916305788107, |
| "learning_rate": 6.996350008816532e-06, |
| "loss": 0.416, |
| "step": 1285 |
| }, |
| { |
| "epoch": 2.0576, |
| "grad_norm": 0.6939939433254977, |
| "learning_rate": 6.980968928561209e-06, |
| "loss": 0.3881, |
| "step": 1286 |
| }, |
| { |
| "epoch": 2.0592, |
| "grad_norm": 0.736883088527874, |
| "learning_rate": 6.965565579454322e-06, |
| "loss": 0.4142, |
| "step": 1287 |
| }, |
| { |
| "epoch": 2.0608, |
| "grad_norm": 0.7892335221877158, |
| "learning_rate": 6.9501401346509786e-06, |
| "loss": 0.4345, |
| "step": 1288 |
| }, |
| { |
| "epoch": 2.0624, |
| "grad_norm": 0.7166910275994461, |
| "learning_rate": 6.934692767554679e-06, |
| "loss": 0.409, |
| "step": 1289 |
| }, |
| { |
| "epoch": 2.064, |
| "grad_norm": 0.7612787510568383, |
| "learning_rate": 6.9192236518153566e-06, |
| "loss": 0.4234, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.0656, |
| "grad_norm": 0.7385227507104376, |
| "learning_rate": 6.903732961327432e-06, |
| "loss": 0.4231, |
| "step": 1291 |
| }, |
| { |
| "epoch": 2.0672, |
| "grad_norm": 0.7287381426108751, |
| "learning_rate": 6.888220870227853e-06, |
| "loss": 0.4269, |
| "step": 1292 |
| }, |
| { |
| "epoch": 2.0688, |
| "grad_norm": 0.7260025903032917, |
| "learning_rate": 6.872687552894145e-06, |
| "loss": 0.4167, |
| "step": 1293 |
| }, |
| { |
| "epoch": 2.0704, |
| "grad_norm": 0.6962157986575372, |
| "learning_rate": 6.857133183942442e-06, |
| "loss": 0.4125, |
| "step": 1294 |
| }, |
| { |
| "epoch": 2.072, |
| "grad_norm": 0.7451887904030556, |
| "learning_rate": 6.841557938225527e-06, |
| "loss": 0.4403, |
| "step": 1295 |
| }, |
| { |
| "epoch": 2.0736, |
| "grad_norm": 0.7219167742786095, |
| "learning_rate": 6.825961990830871e-06, |
| "loss": 0.4155, |
| "step": 1296 |
| }, |
| { |
| "epoch": 2.0752, |
| "grad_norm": 0.6884631866006362, |
| "learning_rate": 6.810345517078657e-06, |
| "loss": 0.413, |
| "step": 1297 |
| }, |
| { |
| "epoch": 2.0768, |
| "grad_norm": 0.7660646636597886, |
| "learning_rate": 6.794708692519815e-06, |
| "loss": 0.4048, |
| "step": 1298 |
| }, |
| { |
| "epoch": 2.0784, |
| "grad_norm": 0.7096954819778793, |
| "learning_rate": 6.779051692934043e-06, |
| "loss": 0.4095, |
| "step": 1299 |
| }, |
| { |
| "epoch": 2.08, |
| "grad_norm": 0.7507337262396936, |
| "learning_rate": 6.76337469432784e-06, |
| "loss": 0.4588, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.0816, |
| "grad_norm": 0.7695804036957934, |
| "learning_rate": 6.747677872932519e-06, |
| "loss": 0.4387, |
| "step": 1301 |
| }, |
| { |
| "epoch": 2.0832, |
| "grad_norm": 0.6821148233816933, |
| "learning_rate": 6.731961405202224e-06, |
| "loss": 0.4015, |
| "step": 1302 |
| }, |
| { |
| "epoch": 2.0848, |
| "grad_norm": 0.8274265466470279, |
| "learning_rate": 6.716225467811961e-06, |
| "loss": 0.4157, |
| "step": 1303 |
| }, |
| { |
| "epoch": 2.0864, |
| "grad_norm": 0.7695532034604734, |
| "learning_rate": 6.700470237655597e-06, |
| "loss": 0.424, |
| "step": 1304 |
| }, |
| { |
| "epoch": 2.088, |
| "grad_norm": 0.7574175438272212, |
| "learning_rate": 6.684695891843871e-06, |
| "loss": 0.4273, |
| "step": 1305 |
| }, |
| { |
| "epoch": 2.0896, |
| "grad_norm": 0.7106036941292224, |
| "learning_rate": 6.668902607702419e-06, |
| "loss": 0.4033, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.0912, |
| "grad_norm": 0.746696090756007, |
| "learning_rate": 6.653090562769764e-06, |
| "loss": 0.4172, |
| "step": 1307 |
| }, |
| { |
| "epoch": 2.0928, |
| "grad_norm": 0.7065553861543276, |
| "learning_rate": 6.637259934795328e-06, |
| "loss": 0.4036, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.0944, |
| "grad_norm": 0.7827478356395094, |
| "learning_rate": 6.6214109017374306e-06, |
| "loss": 0.4124, |
| "step": 1309 |
| }, |
| { |
| "epoch": 2.096, |
| "grad_norm": 0.7013377808905202, |
| "learning_rate": 6.605543641761293e-06, |
| "loss": 0.401, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.0976, |
| "grad_norm": 0.7311854764718958, |
| "learning_rate": 6.589658333237031e-06, |
| "loss": 0.3862, |
| "step": 1311 |
| }, |
| { |
| "epoch": 2.0992, |
| "grad_norm": 0.7372615193443035, |
| "learning_rate": 6.573755154737651e-06, |
| "loss": 0.4055, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.1008, |
| "grad_norm": 0.7291518334922953, |
| "learning_rate": 6.5578342850370415e-06, |
| "loss": 0.4261, |
| "step": 1313 |
| }, |
| { |
| "epoch": 2.1024, |
| "grad_norm": 0.8324400879041759, |
| "learning_rate": 6.54189590310797e-06, |
| "loss": 0.4466, |
| "step": 1314 |
| }, |
| { |
| "epoch": 2.104, |
| "grad_norm": 0.7392933627356066, |
| "learning_rate": 6.525940188120059e-06, |
| "loss": 0.4085, |
| "step": 1315 |
| }, |
| { |
| "epoch": 2.1056, |
| "grad_norm": 0.7991093699398838, |
| "learning_rate": 6.509967319437782e-06, |
| "loss": 0.3926, |
| "step": 1316 |
| }, |
| { |
| "epoch": 2.1072, |
| "grad_norm": 0.7382714582408669, |
| "learning_rate": 6.493977476618445e-06, |
| "loss": 0.3966, |
| "step": 1317 |
| }, |
| { |
| "epoch": 2.1088, |
| "grad_norm": 0.8306869188995181, |
| "learning_rate": 6.477970839410166e-06, |
| "loss": 0.3873, |
| "step": 1318 |
| }, |
| { |
| "epoch": 2.1104, |
| "grad_norm": 0.7363308972189302, |
| "learning_rate": 6.461947587749855e-06, |
| "loss": 0.4211, |
| "step": 1319 |
| }, |
| { |
| "epoch": 2.112, |
| "grad_norm": 0.7422749242705443, |
| "learning_rate": 6.445907901761189e-06, |
| "loss": 0.4142, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.1136, |
| "grad_norm": 0.822246172800683, |
| "learning_rate": 6.429851961752597e-06, |
| "loss": 0.4132, |
| "step": 1321 |
| }, |
| { |
| "epoch": 2.1152, |
| "grad_norm": 0.7378843525152939, |
| "learning_rate": 6.413779948215218e-06, |
| "loss": 0.4128, |
| "step": 1322 |
| }, |
| { |
| "epoch": 2.1168, |
| "grad_norm": 0.8531643497470118, |
| "learning_rate": 6.397692041820885e-06, |
| "loss": 0.4171, |
| "step": 1323 |
| }, |
| { |
| "epoch": 2.1184, |
| "grad_norm": 0.7333781651418984, |
| "learning_rate": 6.381588423420085e-06, |
| "loss": 0.4092, |
| "step": 1324 |
| }, |
| { |
| "epoch": 2.12, |
| "grad_norm": 0.7895400176236084, |
| "learning_rate": 6.365469274039936e-06, |
| "loss": 0.3994, |
| "step": 1325 |
| }, |
| { |
| "epoch": 2.1216, |
| "grad_norm": 0.7977366495086916, |
| "learning_rate": 6.349334774882136e-06, |
| "loss": 0.4004, |
| "step": 1326 |
| }, |
| { |
| "epoch": 2.1232, |
| "grad_norm": 0.7203694849580552, |
| "learning_rate": 6.333185107320945e-06, |
| "loss": 0.3949, |
| "step": 1327 |
| }, |
| { |
| "epoch": 2.1248, |
| "grad_norm": 0.7660899144917048, |
| "learning_rate": 6.317020452901134e-06, |
| "loss": 0.3907, |
| "step": 1328 |
| }, |
| { |
| "epoch": 2.1264, |
| "grad_norm": 0.7346196820608468, |
| "learning_rate": 6.300840993335945e-06, |
| "loss": 0.4109, |
| "step": 1329 |
| }, |
| { |
| "epoch": 2.128, |
| "grad_norm": 0.7605130816243495, |
| "learning_rate": 6.2846469105050545e-06, |
| "loss": 0.3845, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.1296, |
| "grad_norm": 0.763565798253721, |
| "learning_rate": 6.26843838645252e-06, |
| "loss": 0.4275, |
| "step": 1331 |
| }, |
| { |
| "epoch": 2.1312, |
| "grad_norm": 0.7635222394945375, |
| "learning_rate": 6.2522156033847435e-06, |
| "loss": 0.4019, |
| "step": 1332 |
| }, |
| { |
| "epoch": 2.1328, |
| "grad_norm": 0.7122514894717155, |
| "learning_rate": 6.235978743668415e-06, |
| "loss": 0.3992, |
| "step": 1333 |
| }, |
| { |
| "epoch": 2.1344, |
| "grad_norm": 0.7634523079399845, |
| "learning_rate": 6.219727989828466e-06, |
| "loss": 0.3962, |
| "step": 1334 |
| }, |
| { |
| "epoch": 2.136, |
| "grad_norm": 0.7411828984909158, |
| "learning_rate": 6.203463524546017e-06, |
| "loss": 0.4274, |
| "step": 1335 |
| }, |
| { |
| "epoch": 2.1376, |
| "grad_norm": 0.6811926949135216, |
| "learning_rate": 6.187185530656327e-06, |
| "loss": 0.3777, |
| "step": 1336 |
| }, |
| { |
| "epoch": 2.1391999999999998, |
| "grad_norm": 0.7346885484792756, |
| "learning_rate": 6.1708941911467335e-06, |
| "loss": 0.4176, |
| "step": 1337 |
| }, |
| { |
| "epoch": 2.1408, |
| "grad_norm": 0.700465605432178, |
| "learning_rate": 6.154589689154594e-06, |
| "loss": 0.3943, |
| "step": 1338 |
| }, |
| { |
| "epoch": 2.1424, |
| "grad_norm": 0.6885094603302031, |
| "learning_rate": 6.138272207965238e-06, |
| "loss": 0.4284, |
| "step": 1339 |
| }, |
| { |
| "epoch": 2.144, |
| "grad_norm": 0.7409839744147182, |
| "learning_rate": 6.121941931009894e-06, |
| "loss": 0.4261, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.1456, |
| "grad_norm": 0.7569295619787667, |
| "learning_rate": 6.105599041863631e-06, |
| "loss": 0.4386, |
| "step": 1341 |
| }, |
| { |
| "epoch": 2.1471999999999998, |
| "grad_norm": 0.7014645801815328, |
| "learning_rate": 6.089243724243303e-06, |
| "loss": 0.3973, |
| "step": 1342 |
| }, |
| { |
| "epoch": 2.1488, |
| "grad_norm": 0.6854172780386442, |
| "learning_rate": 6.072876162005474e-06, |
| "loss": 0.4007, |
| "step": 1343 |
| }, |
| { |
| "epoch": 2.1504, |
| "grad_norm": 0.69782675982927, |
| "learning_rate": 6.056496539144351e-06, |
| "loss": 0.3958, |
| "step": 1344 |
| }, |
| { |
| "epoch": 2.152, |
| "grad_norm": 0.665761808333572, |
| "learning_rate": 6.040105039789726e-06, |
| "loss": 0.4382, |
| "step": 1345 |
| }, |
| { |
| "epoch": 2.1536, |
| "grad_norm": 0.6715297399502295, |
| "learning_rate": 6.023701848204893e-06, |
| "loss": 0.3722, |
| "step": 1346 |
| }, |
| { |
| "epoch": 2.1552, |
| "grad_norm": 0.6703345368133065, |
| "learning_rate": 6.007287148784591e-06, |
| "loss": 0.3935, |
| "step": 1347 |
| }, |
| { |
| "epoch": 2.1568, |
| "grad_norm": 0.7079035209974975, |
| "learning_rate": 5.990861126052914e-06, |
| "loss": 0.3882, |
| "step": 1348 |
| }, |
| { |
| "epoch": 2.1584, |
| "grad_norm": 0.7252107816623263, |
| "learning_rate": 5.974423964661249e-06, |
| "loss": 0.421, |
| "step": 1349 |
| }, |
| { |
| "epoch": 2.16, |
| "grad_norm": 0.6859455329532241, |
| "learning_rate": 5.957975849386202e-06, |
| "loss": 0.3898, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.1616, |
| "grad_norm": 0.658909658030749, |
| "learning_rate": 5.941516965127509e-06, |
| "loss": 0.4145, |
| "step": 1351 |
| }, |
| { |
| "epoch": 2.1632, |
| "grad_norm": 0.6964614246833752, |
| "learning_rate": 5.925047496905968e-06, |
| "loss": 0.4198, |
| "step": 1352 |
| }, |
| { |
| "epoch": 2.1648, |
| "grad_norm": 0.7142994737245355, |
| "learning_rate": 5.908567629861354e-06, |
| "loss": 0.4222, |
| "step": 1353 |
| }, |
| { |
| "epoch": 2.1664, |
| "grad_norm": 0.6826892894964197, |
| "learning_rate": 5.892077549250341e-06, |
| "loss": 0.388, |
| "step": 1354 |
| }, |
| { |
| "epoch": 2.168, |
| "grad_norm": 0.6947181993249654, |
| "learning_rate": 5.8755774404444175e-06, |
| "loss": 0.4027, |
| "step": 1355 |
| }, |
| { |
| "epoch": 2.1696, |
| "grad_norm": 0.6623191040439221, |
| "learning_rate": 5.8590674889278e-06, |
| "loss": 0.3933, |
| "step": 1356 |
| }, |
| { |
| "epoch": 2.1712, |
| "grad_norm": 0.7076610375413062, |
| "learning_rate": 5.842547880295353e-06, |
| "loss": 0.4218, |
| "step": 1357 |
| }, |
| { |
| "epoch": 2.1728, |
| "grad_norm": 0.670755940027531, |
| "learning_rate": 5.8260188002505034e-06, |
| "loss": 0.4177, |
| "step": 1358 |
| }, |
| { |
| "epoch": 2.1744, |
| "grad_norm": 0.6831146691129467, |
| "learning_rate": 5.809480434603143e-06, |
| "loss": 0.3685, |
| "step": 1359 |
| }, |
| { |
| "epoch": 2.176, |
| "grad_norm": 0.726705280178454, |
| "learning_rate": 5.792932969267553e-06, |
| "loss": 0.4068, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.1776, |
| "grad_norm": 0.7488174281934294, |
| "learning_rate": 5.776376590260306e-06, |
| "loss": 0.4223, |
| "step": 1361 |
| }, |
| { |
| "epoch": 2.1792, |
| "grad_norm": 0.7355960902533142, |
| "learning_rate": 5.759811483698173e-06, |
| "loss": 0.398, |
| "step": 1362 |
| }, |
| { |
| "epoch": 2.1808, |
| "grad_norm": 0.687800573239867, |
| "learning_rate": 5.743237835796042e-06, |
| "loss": 0.3723, |
| "step": 1363 |
| }, |
| { |
| "epoch": 2.1824, |
| "grad_norm": 0.7153679038108298, |
| "learning_rate": 5.726655832864809e-06, |
| "loss": 0.403, |
| "step": 1364 |
| }, |
| { |
| "epoch": 2.184, |
| "grad_norm": 0.6856145780856769, |
| "learning_rate": 5.7100656613093005e-06, |
| "loss": 0.3883, |
| "step": 1365 |
| }, |
| { |
| "epoch": 2.1856, |
| "grad_norm": 0.699073434830909, |
| "learning_rate": 5.693467507626165e-06, |
| "loss": 0.4006, |
| "step": 1366 |
| }, |
| { |
| "epoch": 2.1872, |
| "grad_norm": 0.7413382057726158, |
| "learning_rate": 5.67686155840178e-06, |
| "loss": 0.3986, |
| "step": 1367 |
| }, |
| { |
| "epoch": 2.1888, |
| "grad_norm": 0.696181735941245, |
| "learning_rate": 5.660248000310162e-06, |
| "loss": 0.4102, |
| "step": 1368 |
| }, |
| { |
| "epoch": 2.1904, |
| "grad_norm": 0.6633405370718249, |
| "learning_rate": 5.643627020110855e-06, |
| "loss": 0.3799, |
| "step": 1369 |
| }, |
| { |
| "epoch": 2.192, |
| "grad_norm": 0.73029597277892, |
| "learning_rate": 5.626998804646842e-06, |
| "loss": 0.3803, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.1936, |
| "grad_norm": 0.6845929372793488, |
| "learning_rate": 5.610363540842435e-06, |
| "loss": 0.418, |
| "step": 1371 |
| }, |
| { |
| "epoch": 2.1952, |
| "grad_norm": 0.7142592582014309, |
| "learning_rate": 5.593721415701189e-06, |
| "loss": 0.4162, |
| "step": 1372 |
| }, |
| { |
| "epoch": 2.1968, |
| "grad_norm": 0.6786135519346064, |
| "learning_rate": 5.577072616303779e-06, |
| "loss": 0.4161, |
| "step": 1373 |
| }, |
| { |
| "epoch": 2.1984, |
| "grad_norm": 0.6783654100973778, |
| "learning_rate": 5.560417329805916e-06, |
| "loss": 0.4124, |
| "step": 1374 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.6819976648386019, |
| "learning_rate": 5.543755743436231e-06, |
| "loss": 0.3737, |
| "step": 1375 |
| }, |
| { |
| "epoch": 2.2016, |
| "grad_norm": 0.6592168446016077, |
| "learning_rate": 5.527088044494176e-06, |
| "loss": 0.3682, |
| "step": 1376 |
| }, |
| { |
| "epoch": 2.2032, |
| "grad_norm": 0.6952606563442375, |
| "learning_rate": 5.510414420347918e-06, |
| "loss": 0.4014, |
| "step": 1377 |
| }, |
| { |
| "epoch": 2.2048, |
| "grad_norm": 0.7695855630431729, |
| "learning_rate": 5.493735058432227e-06, |
| "loss": 0.4068, |
| "step": 1378 |
| }, |
| { |
| "epoch": 2.2064, |
| "grad_norm": 0.7088645745459955, |
| "learning_rate": 5.477050146246379e-06, |
| "loss": 0.404, |
| "step": 1379 |
| }, |
| { |
| "epoch": 2.208, |
| "grad_norm": 0.7210274092132056, |
| "learning_rate": 5.4603598713520354e-06, |
| "loss": 0.4213, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.2096, |
| "grad_norm": 0.6867630862942176, |
| "learning_rate": 5.443664421371153e-06, |
| "loss": 0.4103, |
| "step": 1381 |
| }, |
| { |
| "epoch": 2.2112, |
| "grad_norm": 0.7375263347537202, |
| "learning_rate": 5.426963983983853e-06, |
| "loss": 0.3992, |
| "step": 1382 |
| }, |
| { |
| "epoch": 2.2128, |
| "grad_norm": 0.7295547316150184, |
| "learning_rate": 5.410258746926328e-06, |
| "loss": 0.4223, |
| "step": 1383 |
| }, |
| { |
| "epoch": 2.2144, |
| "grad_norm": 0.7223275500404251, |
| "learning_rate": 5.393548897988724e-06, |
| "loss": 0.4034, |
| "step": 1384 |
| }, |
| { |
| "epoch": 2.216, |
| "grad_norm": 0.7243637838300921, |
| "learning_rate": 5.376834625013031e-06, |
| "loss": 0.4177, |
| "step": 1385 |
| }, |
| { |
| "epoch": 2.2176, |
| "grad_norm": 0.7173220811903077, |
| "learning_rate": 5.360116115890972e-06, |
| "loss": 0.3634, |
| "step": 1386 |
| }, |
| { |
| "epoch": 2.2192, |
| "grad_norm": 0.7305499937559662, |
| "learning_rate": 5.343393558561888e-06, |
| "loss": 0.4021, |
| "step": 1387 |
| }, |
| { |
| "epoch": 2.2208, |
| "grad_norm": 0.7035111004271161, |
| "learning_rate": 5.3266671410106306e-06, |
| "loss": 0.3911, |
| "step": 1388 |
| }, |
| { |
| "epoch": 2.2224, |
| "grad_norm": 0.7142086611702474, |
| "learning_rate": 5.309937051265443e-06, |
| "loss": 0.4013, |
| "step": 1389 |
| }, |
| { |
| "epoch": 2.224, |
| "grad_norm": 0.6912644551993837, |
| "learning_rate": 5.293203477395851e-06, |
| "loss": 0.3718, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.2256, |
| "grad_norm": 0.7557855577623867, |
| "learning_rate": 5.276466607510544e-06, |
| "loss": 0.4226, |
| "step": 1391 |
| }, |
| { |
| "epoch": 2.2272, |
| "grad_norm": 0.6771738582039201, |
| "learning_rate": 5.259726629755268e-06, |
| "loss": 0.4066, |
| "step": 1392 |
| }, |
| { |
| "epoch": 2.2288, |
| "grad_norm": 0.7163773998662869, |
| "learning_rate": 5.2429837323107005e-06, |
| "loss": 0.4141, |
| "step": 1393 |
| }, |
| { |
| "epoch": 2.2304, |
| "grad_norm": 0.7453580760304513, |
| "learning_rate": 5.226238103390343e-06, |
| "loss": 0.3945, |
| "step": 1394 |
| }, |
| { |
| "epoch": 2.232, |
| "grad_norm": 0.6675446305575985, |
| "learning_rate": 5.209489931238405e-06, |
| "loss": 0.4206, |
| "step": 1395 |
| }, |
| { |
| "epoch": 2.2336, |
| "grad_norm": 0.7179812275637177, |
| "learning_rate": 5.192739404127679e-06, |
| "loss": 0.4039, |
| "step": 1396 |
| }, |
| { |
| "epoch": 2.2352, |
| "grad_norm": 0.6819937541847448, |
| "learning_rate": 5.175986710357439e-06, |
| "loss": 0.3953, |
| "step": 1397 |
| }, |
| { |
| "epoch": 2.2368, |
| "grad_norm": 0.708633520110563, |
| "learning_rate": 5.159232038251305e-06, |
| "loss": 0.4016, |
| "step": 1398 |
| }, |
| { |
| "epoch": 2.2384, |
| "grad_norm": 0.7089142714394822, |
| "learning_rate": 5.142475576155146e-06, |
| "loss": 0.4128, |
| "step": 1399 |
| }, |
| { |
| "epoch": 2.24, |
| "grad_norm": 0.682097796087665, |
| "learning_rate": 5.125717512434947e-06, |
| "loss": 0.3925, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.2416, |
| "grad_norm": 0.6873278404458673, |
| "learning_rate": 5.108958035474703e-06, |
| "loss": 0.3986, |
| "step": 1401 |
| }, |
| { |
| "epoch": 2.2432, |
| "grad_norm": 0.6833518258535074, |
| "learning_rate": 5.092197333674286e-06, |
| "loss": 0.3849, |
| "step": 1402 |
| }, |
| { |
| "epoch": 2.2448, |
| "grad_norm": 0.6899471206939742, |
| "learning_rate": 5.075435595447346e-06, |
| "loss": 0.395, |
| "step": 1403 |
| }, |
| { |
| "epoch": 2.2464, |
| "grad_norm": 0.7060680016467793, |
| "learning_rate": 5.0586730092191835e-06, |
| "loss": 0.4117, |
| "step": 1404 |
| }, |
| { |
| "epoch": 2.248, |
| "grad_norm": 0.6691116987846536, |
| "learning_rate": 5.041909763424625e-06, |
| "loss": 0.3869, |
| "step": 1405 |
| }, |
| { |
| "epoch": 2.2496, |
| "grad_norm": 0.6726090508188094, |
| "learning_rate": 5.0251460465059175e-06, |
| "loss": 0.3973, |
| "step": 1406 |
| }, |
| { |
| "epoch": 2.2512, |
| "grad_norm": 0.7261719342392827, |
| "learning_rate": 5.0083820469106016e-06, |
| "loss": 0.3885, |
| "step": 1407 |
| }, |
| { |
| "epoch": 2.2528, |
| "grad_norm": 0.7045454425538864, |
| "learning_rate": 4.991617953089399e-06, |
| "loss": 0.4085, |
| "step": 1408 |
| }, |
| { |
| "epoch": 2.2544, |
| "grad_norm": 0.7268522978922883, |
| "learning_rate": 4.9748539534940825e-06, |
| "loss": 0.3925, |
| "step": 1409 |
| }, |
| { |
| "epoch": 2.2560000000000002, |
| "grad_norm": 0.7293107359251259, |
| "learning_rate": 4.9580902365753765e-06, |
| "loss": 0.413, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.2576, |
| "grad_norm": 0.7306458660459159, |
| "learning_rate": 4.941326990780819e-06, |
| "loss": 0.3819, |
| "step": 1411 |
| }, |
| { |
| "epoch": 2.2592, |
| "grad_norm": 0.7453668668217899, |
| "learning_rate": 4.9245644045526546e-06, |
| "loss": 0.4128, |
| "step": 1412 |
| }, |
| { |
| "epoch": 2.2608, |
| "grad_norm": 0.7327780131800568, |
| "learning_rate": 4.907802666325716e-06, |
| "loss": 0.3984, |
| "step": 1413 |
| }, |
| { |
| "epoch": 2.2624, |
| "grad_norm": 0.6705661754023295, |
| "learning_rate": 4.891041964525301e-06, |
| "loss": 0.363, |
| "step": 1414 |
| }, |
| { |
| "epoch": 2.2640000000000002, |
| "grad_norm": 0.6705860102552303, |
| "learning_rate": 4.874282487565053e-06, |
| "loss": 0.4082, |
| "step": 1415 |
| }, |
| { |
| "epoch": 2.2656, |
| "grad_norm": 0.6754476261858788, |
| "learning_rate": 4.857524423844855e-06, |
| "loss": 0.4024, |
| "step": 1416 |
| }, |
| { |
| "epoch": 2.2672, |
| "grad_norm": 0.6943591916964985, |
| "learning_rate": 4.840767961748697e-06, |
| "loss": 0.4042, |
| "step": 1417 |
| }, |
| { |
| "epoch": 2.2688, |
| "grad_norm": 0.6479606601514696, |
| "learning_rate": 4.824013289642563e-06, |
| "loss": 0.4014, |
| "step": 1418 |
| }, |
| { |
| "epoch": 2.2704, |
| "grad_norm": 0.7011706070992588, |
| "learning_rate": 4.807260595872322e-06, |
| "loss": 0.4016, |
| "step": 1419 |
| }, |
| { |
| "epoch": 2.2720000000000002, |
| "grad_norm": 0.661445156193842, |
| "learning_rate": 4.790510068761596e-06, |
| "loss": 0.4132, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.2736, |
| "grad_norm": 0.6418778504051741, |
| "learning_rate": 4.773761896609658e-06, |
| "loss": 0.3758, |
| "step": 1421 |
| }, |
| { |
| "epoch": 2.2752, |
| "grad_norm": 0.693742601668366, |
| "learning_rate": 4.757016267689302e-06, |
| "loss": 0.3709, |
| "step": 1422 |
| }, |
| { |
| "epoch": 2.2768, |
| "grad_norm": 0.6749659667015893, |
| "learning_rate": 4.740273370244734e-06, |
| "loss": 0.3955, |
| "step": 1423 |
| }, |
| { |
| "epoch": 2.2784, |
| "grad_norm": 0.7249497133831025, |
| "learning_rate": 4.723533392489457e-06, |
| "loss": 0.3826, |
| "step": 1424 |
| }, |
| { |
| "epoch": 2.2800000000000002, |
| "grad_norm": 0.638182443419821, |
| "learning_rate": 4.706796522604152e-06, |
| "loss": 0.3764, |
| "step": 1425 |
| }, |
| { |
| "epoch": 2.2816, |
| "grad_norm": 0.7091242913232427, |
| "learning_rate": 4.690062948734558e-06, |
| "loss": 0.4075, |
| "step": 1426 |
| }, |
| { |
| "epoch": 2.2832, |
| "grad_norm": 0.7643436712622311, |
| "learning_rate": 4.673332858989371e-06, |
| "loss": 0.4033, |
| "step": 1427 |
| }, |
| { |
| "epoch": 2.2848, |
| "grad_norm": 0.6519541942146109, |
| "learning_rate": 4.656606441438114e-06, |
| "loss": 0.4077, |
| "step": 1428 |
| }, |
| { |
| "epoch": 2.2864, |
| "grad_norm": 0.6789895179066501, |
| "learning_rate": 4.639883884109029e-06, |
| "loss": 0.3892, |
| "step": 1429 |
| }, |
| { |
| "epoch": 2.288, |
| "grad_norm": 0.7086470439814316, |
| "learning_rate": 4.623165374986971e-06, |
| "loss": 0.3915, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.2896, |
| "grad_norm": 0.7074373363966382, |
| "learning_rate": 4.606451102011278e-06, |
| "loss": 0.4109, |
| "step": 1431 |
| }, |
| { |
| "epoch": 2.2912, |
| "grad_norm": 0.6975980679382322, |
| "learning_rate": 4.589741253073673e-06, |
| "loss": 0.4318, |
| "step": 1432 |
| }, |
| { |
| "epoch": 2.2928, |
| "grad_norm": 0.7023847612883237, |
| "learning_rate": 4.573036016016149e-06, |
| "loss": 0.3808, |
| "step": 1433 |
| }, |
| { |
| "epoch": 2.2944, |
| "grad_norm": 0.7036622385598531, |
| "learning_rate": 4.556335578628849e-06, |
| "loss": 0.4258, |
| "step": 1434 |
| }, |
| { |
| "epoch": 2.296, |
| "grad_norm": 0.673940239730178, |
| "learning_rate": 4.539640128647965e-06, |
| "loss": 0.4067, |
| "step": 1435 |
| }, |
| { |
| "epoch": 2.2976, |
| "grad_norm": 0.7483025545223577, |
| "learning_rate": 4.522949853753624e-06, |
| "loss": 0.411, |
| "step": 1436 |
| }, |
| { |
| "epoch": 2.2992, |
| "grad_norm": 0.7146685489644412, |
| "learning_rate": 4.506264941567774e-06, |
| "loss": 0.4396, |
| "step": 1437 |
| }, |
| { |
| "epoch": 2.3008, |
| "grad_norm": 0.668252809567489, |
| "learning_rate": 4.489585579652083e-06, |
| "loss": 0.3773, |
| "step": 1438 |
| }, |
| { |
| "epoch": 2.3024, |
| "grad_norm": 0.7823858754349284, |
| "learning_rate": 4.472911955505825e-06, |
| "loss": 0.4211, |
| "step": 1439 |
| }, |
| { |
| "epoch": 2.304, |
| "grad_norm": 0.6753315804492159, |
| "learning_rate": 4.456244256563769e-06, |
| "loss": 0.3964, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.3056, |
| "grad_norm": 0.6821633560700455, |
| "learning_rate": 4.439582670194086e-06, |
| "loss": 0.3763, |
| "step": 1441 |
| }, |
| { |
| "epoch": 2.3072, |
| "grad_norm": 0.7103390458787602, |
| "learning_rate": 4.422927383696224e-06, |
| "loss": 0.4258, |
| "step": 1442 |
| }, |
| { |
| "epoch": 2.3088, |
| "grad_norm": 0.6720513053935617, |
| "learning_rate": 4.406278584298813e-06, |
| "loss": 0.4202, |
| "step": 1443 |
| }, |
| { |
| "epoch": 2.3104, |
| "grad_norm": 0.6805679607043243, |
| "learning_rate": 4.389636459157567e-06, |
| "loss": 0.41, |
| "step": 1444 |
| }, |
| { |
| "epoch": 2.312, |
| "grad_norm": 0.6819644157134925, |
| "learning_rate": 4.373001195353159e-06, |
| "loss": 0.3685, |
| "step": 1445 |
| }, |
| { |
| "epoch": 2.3136, |
| "grad_norm": 0.6802122050642477, |
| "learning_rate": 4.356372979889146e-06, |
| "loss": 0.4091, |
| "step": 1446 |
| }, |
| { |
| "epoch": 2.3152, |
| "grad_norm": 0.6841376043715974, |
| "learning_rate": 4.339751999689839e-06, |
| "loss": 0.4062, |
| "step": 1447 |
| }, |
| { |
| "epoch": 2.3168, |
| "grad_norm": 0.7051365598389653, |
| "learning_rate": 4.323138441598219e-06, |
| "loss": 0.4131, |
| "step": 1448 |
| }, |
| { |
| "epoch": 2.3184, |
| "grad_norm": 0.6850129830520767, |
| "learning_rate": 4.306532492373836e-06, |
| "loss": 0.3851, |
| "step": 1449 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 0.6813794267985254, |
| "learning_rate": 4.2899343386907e-06, |
| "loss": 0.4046, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.3216, |
| "grad_norm": 0.8127582644849977, |
| "learning_rate": 4.273344167135191e-06, |
| "loss": 0.4326, |
| "step": 1451 |
| }, |
| { |
| "epoch": 2.3232, |
| "grad_norm": 0.6877511095487422, |
| "learning_rate": 4.25676216420396e-06, |
| "loss": 0.3993, |
| "step": 1452 |
| }, |
| { |
| "epoch": 2.3247999999999998, |
| "grad_norm": 0.6755597194123087, |
| "learning_rate": 4.240188516301829e-06, |
| "loss": 0.3774, |
| "step": 1453 |
| }, |
| { |
| "epoch": 2.3264, |
| "grad_norm": 0.6712226844945036, |
| "learning_rate": 4.223623409739695e-06, |
| "loss": 0.4036, |
| "step": 1454 |
| }, |
| { |
| "epoch": 2.328, |
| "grad_norm": 0.6909899770747273, |
| "learning_rate": 4.207067030732449e-06, |
| "loss": 0.4003, |
| "step": 1455 |
| }, |
| { |
| "epoch": 2.3296, |
| "grad_norm": 0.6253345765747766, |
| "learning_rate": 4.190519565396859e-06, |
| "loss": 0.3804, |
| "step": 1456 |
| }, |
| { |
| "epoch": 2.3312, |
| "grad_norm": 0.6458198181452498, |
| "learning_rate": 4.173981199749498e-06, |
| "loss": 0.396, |
| "step": 1457 |
| }, |
| { |
| "epoch": 2.3327999999999998, |
| "grad_norm": 0.6704496718692096, |
| "learning_rate": 4.157452119704648e-06, |
| "loss": 0.3901, |
| "step": 1458 |
| }, |
| { |
| "epoch": 2.3344, |
| "grad_norm": 0.6493221379438446, |
| "learning_rate": 4.140932511072201e-06, |
| "loss": 0.3735, |
| "step": 1459 |
| }, |
| { |
| "epoch": 2.336, |
| "grad_norm": 0.6295406358467901, |
| "learning_rate": 4.124422559555584e-06, |
| "loss": 0.3564, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.3376, |
| "grad_norm": 0.7077651554025548, |
| "learning_rate": 4.10792245074966e-06, |
| "loss": 0.4208, |
| "step": 1461 |
| }, |
| { |
| "epoch": 2.3392, |
| "grad_norm": 0.6567217376957504, |
| "learning_rate": 4.091432370138646e-06, |
| "loss": 0.3922, |
| "step": 1462 |
| }, |
| { |
| "epoch": 2.3407999999999998, |
| "grad_norm": 0.6556453133870094, |
| "learning_rate": 4.0749525030940335e-06, |
| "loss": 0.4057, |
| "step": 1463 |
| }, |
| { |
| "epoch": 2.3424, |
| "grad_norm": 0.6724886679673604, |
| "learning_rate": 4.058483034872493e-06, |
| "loss": 0.3861, |
| "step": 1464 |
| }, |
| { |
| "epoch": 2.344, |
| "grad_norm": 0.6855294133077684, |
| "learning_rate": 4.042024150613798e-06, |
| "loss": 0.3774, |
| "step": 1465 |
| }, |
| { |
| "epoch": 2.3456, |
| "grad_norm": 0.6635423226767675, |
| "learning_rate": 4.025576035338752e-06, |
| "loss": 0.3946, |
| "step": 1466 |
| }, |
| { |
| "epoch": 2.3472, |
| "grad_norm": 0.7423848787310959, |
| "learning_rate": 4.009138873947089e-06, |
| "loss": 0.3914, |
| "step": 1467 |
| }, |
| { |
| "epoch": 2.3487999999999998, |
| "grad_norm": 0.6431595325825293, |
| "learning_rate": 3.992712851215411e-06, |
| "loss": 0.3826, |
| "step": 1468 |
| }, |
| { |
| "epoch": 2.3504, |
| "grad_norm": 0.6878948261458981, |
| "learning_rate": 3.976298151795107e-06, |
| "loss": 0.4079, |
| "step": 1469 |
| }, |
| { |
| "epoch": 2.352, |
| "grad_norm": 0.7080433729321897, |
| "learning_rate": 3.959894960210275e-06, |
| "loss": 0.4105, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.3536, |
| "grad_norm": 0.6683034505218146, |
| "learning_rate": 3.9435034608556505e-06, |
| "loss": 0.3808, |
| "step": 1471 |
| }, |
| { |
| "epoch": 2.3552, |
| "grad_norm": 0.6960317935323845, |
| "learning_rate": 3.9271238379945285e-06, |
| "loss": 0.4075, |
| "step": 1472 |
| }, |
| { |
| "epoch": 2.3568, |
| "grad_norm": 0.6424432168544976, |
| "learning_rate": 3.9107562757566975e-06, |
| "loss": 0.3995, |
| "step": 1473 |
| }, |
| { |
| "epoch": 2.3584, |
| "grad_norm": 0.6589890194361783, |
| "learning_rate": 3.8944009581363696e-06, |
| "loss": 0.3845, |
| "step": 1474 |
| }, |
| { |
| "epoch": 2.36, |
| "grad_norm": 0.6952275153409164, |
| "learning_rate": 3.87805806899011e-06, |
| "loss": 0.395, |
| "step": 1475 |
| }, |
| { |
| "epoch": 2.3616, |
| "grad_norm": 0.6648463342159043, |
| "learning_rate": 3.861727792034762e-06, |
| "loss": 0.3829, |
| "step": 1476 |
| }, |
| { |
| "epoch": 2.3632, |
| "grad_norm": 0.7094582369296529, |
| "learning_rate": 3.8454103108454075e-06, |
| "loss": 0.394, |
| "step": 1477 |
| }, |
| { |
| "epoch": 2.3648, |
| "grad_norm": 0.6262697064772254, |
| "learning_rate": 3.82910580885327e-06, |
| "loss": 0.4001, |
| "step": 1478 |
| }, |
| { |
| "epoch": 2.3664, |
| "grad_norm": 0.6503012682537402, |
| "learning_rate": 3.8128144693436743e-06, |
| "loss": 0.3826, |
| "step": 1479 |
| }, |
| { |
| "epoch": 2.368, |
| "grad_norm": 0.7263227516132823, |
| "learning_rate": 3.7965364754539845e-06, |
| "loss": 0.4194, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.3696, |
| "grad_norm": 0.6982469981323273, |
| "learning_rate": 3.7802720101715355e-06, |
| "loss": 0.4261, |
| "step": 1481 |
| }, |
| { |
| "epoch": 2.3712, |
| "grad_norm": 0.638292800352635, |
| "learning_rate": 3.764021256331587e-06, |
| "loss": 0.3899, |
| "step": 1482 |
| }, |
| { |
| "epoch": 2.3728, |
| "grad_norm": 0.6877066110692359, |
| "learning_rate": 3.747784396615258e-06, |
| "loss": 0.4094, |
| "step": 1483 |
| }, |
| { |
| "epoch": 2.3744, |
| "grad_norm": 0.6903168810990971, |
| "learning_rate": 3.731561613547481e-06, |
| "loss": 0.404, |
| "step": 1484 |
| }, |
| { |
| "epoch": 2.376, |
| "grad_norm": 0.7006555167018877, |
| "learning_rate": 3.7153530894949476e-06, |
| "loss": 0.3846, |
| "step": 1485 |
| }, |
| { |
| "epoch": 2.3776, |
| "grad_norm": 0.672790227815282, |
| "learning_rate": 3.699159006664056e-06, |
| "loss": 0.3967, |
| "step": 1486 |
| }, |
| { |
| "epoch": 2.3792, |
| "grad_norm": 0.6525571495063067, |
| "learning_rate": 3.682979547098867e-06, |
| "loss": 0.3889, |
| "step": 1487 |
| }, |
| { |
| "epoch": 2.3808, |
| "grad_norm": 0.6814511121574097, |
| "learning_rate": 3.6668148926790557e-06, |
| "loss": 0.4082, |
| "step": 1488 |
| }, |
| { |
| "epoch": 2.3824, |
| "grad_norm": 0.6920829456162518, |
| "learning_rate": 3.6506652251178663e-06, |
| "loss": 0.3912, |
| "step": 1489 |
| }, |
| { |
| "epoch": 2.384, |
| "grad_norm": 0.6793947033978027, |
| "learning_rate": 3.6345307259600657e-06, |
| "loss": 0.4029, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.3856, |
| "grad_norm": 0.6885773294894112, |
| "learning_rate": 3.618411576579916e-06, |
| "loss": 0.3924, |
| "step": 1491 |
| }, |
| { |
| "epoch": 2.3872, |
| "grad_norm": 0.6528553289067818, |
| "learning_rate": 3.602307958179117e-06, |
| "loss": 0.3756, |
| "step": 1492 |
| }, |
| { |
| "epoch": 2.3888, |
| "grad_norm": 0.6846371926456436, |
| "learning_rate": 3.586220051784783e-06, |
| "loss": 0.4027, |
| "step": 1493 |
| }, |
| { |
| "epoch": 2.3904, |
| "grad_norm": 0.658940194551754, |
| "learning_rate": 3.5701480382474047e-06, |
| "loss": 0.3877, |
| "step": 1494 |
| }, |
| { |
| "epoch": 2.392, |
| "grad_norm": 0.649993839146253, |
| "learning_rate": 3.554092098238811e-06, |
| "loss": 0.3749, |
| "step": 1495 |
| }, |
| { |
| "epoch": 2.3936, |
| "grad_norm": 0.6997098749775951, |
| "learning_rate": 3.538052412250147e-06, |
| "loss": 0.4402, |
| "step": 1496 |
| }, |
| { |
| "epoch": 2.3952, |
| "grad_norm": 0.6723568514834078, |
| "learning_rate": 3.5220291605898354e-06, |
| "loss": 0.3833, |
| "step": 1497 |
| }, |
| { |
| "epoch": 2.3968, |
| "grad_norm": 0.6446746094646758, |
| "learning_rate": 3.5060225233815554e-06, |
| "loss": 0.3913, |
| "step": 1498 |
| }, |
| { |
| "epoch": 2.3984, |
| "grad_norm": 0.6788233040081576, |
| "learning_rate": 3.4900326805622185e-06, |
| "loss": 0.4148, |
| "step": 1499 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.6450106514990551, |
| "learning_rate": 3.474059811879944e-06, |
| "loss": 0.366, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.4016, |
| "grad_norm": 0.6634551584509225, |
| "learning_rate": 3.458104096892031e-06, |
| "loss": 0.3909, |
| "step": 1501 |
| }, |
| { |
| "epoch": 2.4032, |
| "grad_norm": 0.6619386717966989, |
| "learning_rate": 3.4421657149629593e-06, |
| "loss": 0.3943, |
| "step": 1502 |
| }, |
| { |
| "epoch": 2.4048, |
| "grad_norm": 0.6424827311751734, |
| "learning_rate": 3.4262448452623514e-06, |
| "loss": 0.3719, |
| "step": 1503 |
| }, |
| { |
| "epoch": 2.4064, |
| "grad_norm": 0.6658069057214353, |
| "learning_rate": 3.410341666762971e-06, |
| "loss": 0.3898, |
| "step": 1504 |
| }, |
| { |
| "epoch": 2.408, |
| "grad_norm": 0.6687417884193916, |
| "learning_rate": 3.3944563582387084e-06, |
| "loss": 0.3798, |
| "step": 1505 |
| }, |
| { |
| "epoch": 2.4096, |
| "grad_norm": 0.6681859209001135, |
| "learning_rate": 3.3785890982625702e-06, |
| "loss": 0.3919, |
| "step": 1506 |
| }, |
| { |
| "epoch": 2.4112, |
| "grad_norm": 0.6902976293558055, |
| "learning_rate": 3.3627400652046736e-06, |
| "loss": 0.4113, |
| "step": 1507 |
| }, |
| { |
| "epoch": 2.4128, |
| "grad_norm": 0.6567954118535304, |
| "learning_rate": 3.3469094372302374e-06, |
| "loss": 0.3871, |
| "step": 1508 |
| }, |
| { |
| "epoch": 2.4144, |
| "grad_norm": 0.6462346211089492, |
| "learning_rate": 3.331097392297582e-06, |
| "loss": 0.3721, |
| "step": 1509 |
| }, |
| { |
| "epoch": 2.416, |
| "grad_norm": 0.6980015096147874, |
| "learning_rate": 3.31530410815613e-06, |
| "loss": 0.3966, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.4176, |
| "grad_norm": 0.6468230714773202, |
| "learning_rate": 3.2995297623444067e-06, |
| "loss": 0.3988, |
| "step": 1511 |
| }, |
| { |
| "epoch": 2.4192, |
| "grad_norm": 0.6223331150007658, |
| "learning_rate": 3.283774532188039e-06, |
| "loss": 0.3783, |
| "step": 1512 |
| }, |
| { |
| "epoch": 2.4208, |
| "grad_norm": 0.6945157649484203, |
| "learning_rate": 3.268038594797777e-06, |
| "loss": 0.4102, |
| "step": 1513 |
| }, |
| { |
| "epoch": 2.4224, |
| "grad_norm": 0.6609439584259712, |
| "learning_rate": 3.2523221270674845e-06, |
| "loss": 0.4097, |
| "step": 1514 |
| }, |
| { |
| "epoch": 2.424, |
| "grad_norm": 0.6551730823477213, |
| "learning_rate": 3.2366253056721607e-06, |
| "loss": 0.3776, |
| "step": 1515 |
| }, |
| { |
| "epoch": 2.4256, |
| "grad_norm": 0.7236681230147213, |
| "learning_rate": 3.220948307065959e-06, |
| "loss": 0.4086, |
| "step": 1516 |
| }, |
| { |
| "epoch": 2.4272, |
| "grad_norm": 0.6480195720845133, |
| "learning_rate": 3.2052913074801876e-06, |
| "loss": 0.4135, |
| "step": 1517 |
| }, |
| { |
| "epoch": 2.4288, |
| "grad_norm": 0.6703274388094672, |
| "learning_rate": 3.1896544829213444e-06, |
| "loss": 0.3984, |
| "step": 1518 |
| }, |
| { |
| "epoch": 2.4304, |
| "grad_norm": 0.7483782518618497, |
| "learning_rate": 3.17403800916913e-06, |
| "loss": 0.4037, |
| "step": 1519 |
| }, |
| { |
| "epoch": 2.432, |
| "grad_norm": 0.66746703547942, |
| "learning_rate": 3.1584420617744737e-06, |
| "loss": 0.4035, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.4336, |
| "grad_norm": 0.6401406179681777, |
| "learning_rate": 3.142866816057559e-06, |
| "loss": 0.4157, |
| "step": 1521 |
| }, |
| { |
| "epoch": 2.4352, |
| "grad_norm": 0.6455361206817064, |
| "learning_rate": 3.1273124471058567e-06, |
| "loss": 0.3962, |
| "step": 1522 |
| }, |
| { |
| "epoch": 2.4368, |
| "grad_norm": 0.6384977031003991, |
| "learning_rate": 3.1117791297721468e-06, |
| "loss": 0.3905, |
| "step": 1523 |
| }, |
| { |
| "epoch": 2.4384, |
| "grad_norm": 0.6725488719932304, |
| "learning_rate": 3.09626703867257e-06, |
| "loss": 0.4252, |
| "step": 1524 |
| }, |
| { |
| "epoch": 2.44, |
| "grad_norm": 0.6605135568050072, |
| "learning_rate": 3.0807763481846455e-06, |
| "loss": 0.3826, |
| "step": 1525 |
| }, |
| { |
| "epoch": 2.4416, |
| "grad_norm": 0.6721958666254848, |
| "learning_rate": 3.0653072324453226e-06, |
| "loss": 0.3962, |
| "step": 1526 |
| }, |
| { |
| "epoch": 2.4432, |
| "grad_norm": 0.6780206249071795, |
| "learning_rate": 3.049859865349023e-06, |
| "loss": 0.3751, |
| "step": 1527 |
| }, |
| { |
| "epoch": 2.4448, |
| "grad_norm": 0.6238495618746309, |
| "learning_rate": 3.0344344205456807e-06, |
| "loss": 0.3903, |
| "step": 1528 |
| }, |
| { |
| "epoch": 2.4464, |
| "grad_norm": 0.6513304236677114, |
| "learning_rate": 3.0190310714387914e-06, |
| "loss": 0.3975, |
| "step": 1529 |
| }, |
| { |
| "epoch": 2.448, |
| "grad_norm": 0.7177034625230926, |
| "learning_rate": 3.00364999118347e-06, |
| "loss": 0.4159, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.4496, |
| "grad_norm": 0.6507837034685228, |
| "learning_rate": 2.988291352684491e-06, |
| "loss": 0.378, |
| "step": 1531 |
| }, |
| { |
| "epoch": 2.4512, |
| "grad_norm": 0.6118401749337214, |
| "learning_rate": 2.9729553285943587e-06, |
| "loss": 0.3795, |
| "step": 1532 |
| }, |
| { |
| "epoch": 2.4528, |
| "grad_norm": 0.6395089221435761, |
| "learning_rate": 2.9576420913113568e-06, |
| "loss": 0.4056, |
| "step": 1533 |
| }, |
| { |
| "epoch": 2.4544, |
| "grad_norm": 0.625923794552961, |
| "learning_rate": 2.9423518129776095e-06, |
| "loss": 0.37, |
| "step": 1534 |
| }, |
| { |
| "epoch": 2.456, |
| "grad_norm": 0.6827282105564476, |
| "learning_rate": 2.927084665477162e-06, |
| "loss": 0.4257, |
| "step": 1535 |
| }, |
| { |
| "epoch": 2.4576000000000002, |
| "grad_norm": 0.6605488868655838, |
| "learning_rate": 2.9118408204340244e-06, |
| "loss": 0.3719, |
| "step": 1536 |
| }, |
| { |
| "epoch": 2.4592, |
| "grad_norm": 0.6406328281031711, |
| "learning_rate": 2.8966204492102606e-06, |
| "loss": 0.3965, |
| "step": 1537 |
| }, |
| { |
| "epoch": 2.4608, |
| "grad_norm": 0.6326830223678251, |
| "learning_rate": 2.8814237229040556e-06, |
| "loss": 0.3783, |
| "step": 1538 |
| }, |
| { |
| "epoch": 2.4624, |
| "grad_norm": 0.6219392243662581, |
| "learning_rate": 2.866250812347795e-06, |
| "loss": 0.3932, |
| "step": 1539 |
| }, |
| { |
| "epoch": 2.464, |
| "grad_norm": 0.6251015621535402, |
| "learning_rate": 2.8511018881061347e-06, |
| "loss": 0.3815, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.4656000000000002, |
| "grad_norm": 0.6179759318368455, |
| "learning_rate": 2.8359771204741e-06, |
| "loss": 0.3849, |
| "step": 1541 |
| }, |
| { |
| "epoch": 2.4672, |
| "grad_norm": 0.652891973703576, |
| "learning_rate": 2.8208766794751518e-06, |
| "loss": 0.3752, |
| "step": 1542 |
| }, |
| { |
| "epoch": 2.4688, |
| "grad_norm": 0.6357740525025578, |
| "learning_rate": 2.8058007348593003e-06, |
| "loss": 0.401, |
| "step": 1543 |
| }, |
| { |
| "epoch": 2.4704, |
| "grad_norm": 0.6395930935434486, |
| "learning_rate": 2.7907494561011693e-06, |
| "loss": 0.381, |
| "step": 1544 |
| }, |
| { |
| "epoch": 2.472, |
| "grad_norm": 0.6512737399321729, |
| "learning_rate": 2.775723012398107e-06, |
| "loss": 0.3814, |
| "step": 1545 |
| }, |
| { |
| "epoch": 2.4736000000000002, |
| "grad_norm": 0.6583217552351495, |
| "learning_rate": 2.760721572668284e-06, |
| "loss": 0.4403, |
| "step": 1546 |
| }, |
| { |
| "epoch": 2.4752, |
| "grad_norm": 0.6372114454208074, |
| "learning_rate": 2.745745305548793e-06, |
| "loss": 0.4075, |
| "step": 1547 |
| }, |
| { |
| "epoch": 2.4768, |
| "grad_norm": 0.6407639143002334, |
| "learning_rate": 2.730794379393742e-06, |
| "loss": 0.394, |
| "step": 1548 |
| }, |
| { |
| "epoch": 2.4784, |
| "grad_norm": 0.6329406769881196, |
| "learning_rate": 2.7158689622723816e-06, |
| "loss": 0.3912, |
| "step": 1549 |
| }, |
| { |
| "epoch": 2.48, |
| "grad_norm": 0.6195750421768822, |
| "learning_rate": 2.7009692219672025e-06, |
| "loss": 0.373, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.4816, |
| "grad_norm": 0.6192216383642458, |
| "learning_rate": 2.6860953259720473e-06, |
| "loss": 0.3829, |
| "step": 1551 |
| }, |
| { |
| "epoch": 2.4832, |
| "grad_norm": 0.6850583448382874, |
| "learning_rate": 2.67124744149024e-06, |
| "loss": 0.4015, |
| "step": 1552 |
| }, |
| { |
| "epoch": 2.4848, |
| "grad_norm": 0.6562865958799802, |
| "learning_rate": 2.6564257354326915e-06, |
| "loss": 0.4099, |
| "step": 1553 |
| }, |
| { |
| "epoch": 2.4864, |
| "grad_norm": 0.6511507259040457, |
| "learning_rate": 2.641630374416036e-06, |
| "loss": 0.4062, |
| "step": 1554 |
| }, |
| { |
| "epoch": 2.488, |
| "grad_norm": 0.640268506310053, |
| "learning_rate": 2.6268615247607533e-06, |
| "loss": 0.405, |
| "step": 1555 |
| }, |
| { |
| "epoch": 2.4896, |
| "grad_norm": 0.6211246689771531, |
| "learning_rate": 2.612119352489292e-06, |
| "loss": 0.3971, |
| "step": 1556 |
| }, |
| { |
| "epoch": 2.4912, |
| "grad_norm": 0.6368488390713289, |
| "learning_rate": 2.597404023324217e-06, |
| "loss": 0.4017, |
| "step": 1557 |
| }, |
| { |
| "epoch": 2.4928, |
| "grad_norm": 0.7336452901332965, |
| "learning_rate": 2.582715702686337e-06, |
| "loss": 0.407, |
| "step": 1558 |
| }, |
| { |
| "epoch": 2.4944, |
| "grad_norm": 0.616540685351605, |
| "learning_rate": 2.5680545556928438e-06, |
| "loss": 0.3558, |
| "step": 1559 |
| }, |
| { |
| "epoch": 2.496, |
| "grad_norm": 0.6577552715026054, |
| "learning_rate": 2.5534207471554644e-06, |
| "loss": 0.3981, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.4976, |
| "grad_norm": 0.6549969308925954, |
| "learning_rate": 2.5388144415786026e-06, |
| "loss": 0.4143, |
| "step": 1561 |
| }, |
| { |
| "epoch": 2.4992, |
| "grad_norm": 0.6288196948164876, |
| "learning_rate": 2.5242358031574853e-06, |
| "loss": 0.3885, |
| "step": 1562 |
| }, |
| { |
| "epoch": 2.5008, |
| "grad_norm": 0.6126052917870453, |
| "learning_rate": 2.509684995776329e-06, |
| "loss": 0.3689, |
| "step": 1563 |
| }, |
| { |
| "epoch": 2.5023999999999997, |
| "grad_norm": 0.6876081050348912, |
| "learning_rate": 2.4951621830064887e-06, |
| "loss": 0.4122, |
| "step": 1564 |
| }, |
| { |
| "epoch": 2.504, |
| "grad_norm": 0.6426287104445904, |
| "learning_rate": 2.480667528104617e-06, |
| "loss": 0.3971, |
| "step": 1565 |
| }, |
| { |
| "epoch": 2.5056000000000003, |
| "grad_norm": 0.6187941851724073, |
| "learning_rate": 2.4662011940108383e-06, |
| "loss": 0.3863, |
| "step": 1566 |
| }, |
| { |
| "epoch": 2.5072, |
| "grad_norm": 0.6466697588066401, |
| "learning_rate": 2.4517633433469062e-06, |
| "loss": 0.388, |
| "step": 1567 |
| }, |
| { |
| "epoch": 2.5088, |
| "grad_norm": 0.6482939191272499, |
| "learning_rate": 2.437354138414385e-06, |
| "loss": 0.3903, |
| "step": 1568 |
| }, |
| { |
| "epoch": 2.5103999999999997, |
| "grad_norm": 0.6396499447362041, |
| "learning_rate": 2.4229737411928222e-06, |
| "loss": 0.4091, |
| "step": 1569 |
| }, |
| { |
| "epoch": 2.512, |
| "grad_norm": 0.6245065552164869, |
| "learning_rate": 2.40862231333792e-06, |
| "loss": 0.395, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.5136, |
| "grad_norm": 0.6399504852316908, |
| "learning_rate": 2.3943000161797304e-06, |
| "loss": 0.3986, |
| "step": 1571 |
| }, |
| { |
| "epoch": 2.5152, |
| "grad_norm": 0.6291806247948137, |
| "learning_rate": 2.3800070107208355e-06, |
| "loss": 0.3804, |
| "step": 1572 |
| }, |
| { |
| "epoch": 2.5168, |
| "grad_norm": 0.6423976811330442, |
| "learning_rate": 2.365743457634533e-06, |
| "loss": 0.3722, |
| "step": 1573 |
| }, |
| { |
| "epoch": 2.5183999999999997, |
| "grad_norm": 0.6631554110429378, |
| "learning_rate": 2.351509517263041e-06, |
| "loss": 0.3978, |
| "step": 1574 |
| }, |
| { |
| "epoch": 2.52, |
| "grad_norm": 0.6585162256686338, |
| "learning_rate": 2.3373053496156865e-06, |
| "loss": 0.4175, |
| "step": 1575 |
| }, |
| { |
| "epoch": 2.5216, |
| "grad_norm": 0.6774760409466307, |
| "learning_rate": 2.3231311143671077e-06, |
| "loss": 0.3994, |
| "step": 1576 |
| }, |
| { |
| "epoch": 2.5232, |
| "grad_norm": 0.6518443745193649, |
| "learning_rate": 2.308986970855466e-06, |
| "loss": 0.3857, |
| "step": 1577 |
| }, |
| { |
| "epoch": 2.5248, |
| "grad_norm": 0.6235079803424979, |
| "learning_rate": 2.2948730780806407e-06, |
| "loss": 0.3818, |
| "step": 1578 |
| }, |
| { |
| "epoch": 2.5263999999999998, |
| "grad_norm": 0.6965629135690532, |
| "learning_rate": 2.2807895947024643e-06, |
| "loss": 0.3796, |
| "step": 1579 |
| }, |
| { |
| "epoch": 2.528, |
| "grad_norm": 0.6217783322318544, |
| "learning_rate": 2.2667366790389152e-06, |
| "loss": 0.3862, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.5296, |
| "grad_norm": 0.618178271536513, |
| "learning_rate": 2.2527144890643465e-06, |
| "loss": 0.4029, |
| "step": 1581 |
| }, |
| { |
| "epoch": 2.5312, |
| "grad_norm": 0.64690974988133, |
| "learning_rate": 2.2387231824077188e-06, |
| "loss": 0.3964, |
| "step": 1582 |
| }, |
| { |
| "epoch": 2.5328, |
| "grad_norm": 0.6322672998920709, |
| "learning_rate": 2.2247629163508207e-06, |
| "loss": 0.385, |
| "step": 1583 |
| }, |
| { |
| "epoch": 2.5343999999999998, |
| "grad_norm": 0.6406596537736634, |
| "learning_rate": 2.2108338478264934e-06, |
| "loss": 0.3845, |
| "step": 1584 |
| }, |
| { |
| "epoch": 2.536, |
| "grad_norm": 0.6721282769087035, |
| "learning_rate": 2.196936133416882e-06, |
| "loss": 0.4234, |
| "step": 1585 |
| }, |
| { |
| "epoch": 2.5376, |
| "grad_norm": 0.6412509414173777, |
| "learning_rate": 2.1830699293516677e-06, |
| "loss": 0.4255, |
| "step": 1586 |
| }, |
| { |
| "epoch": 2.5392, |
| "grad_norm": 0.6913729833010835, |
| "learning_rate": 2.1692353915063047e-06, |
| "loss": 0.4186, |
| "step": 1587 |
| }, |
| { |
| "epoch": 2.5408, |
| "grad_norm": 0.6694757232889293, |
| "learning_rate": 2.155432675400283e-06, |
| "loss": 0.3556, |
| "step": 1588 |
| }, |
| { |
| "epoch": 2.5423999999999998, |
| "grad_norm": 0.6715304054251654, |
| "learning_rate": 2.141661936195364e-06, |
| "loss": 0.4319, |
| "step": 1589 |
| }, |
| { |
| "epoch": 2.544, |
| "grad_norm": 0.6653104930547877, |
| "learning_rate": 2.1279233286938503e-06, |
| "loss": 0.4249, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.5456, |
| "grad_norm": 0.6268563379389929, |
| "learning_rate": 2.1142170073368396e-06, |
| "loss": 0.3891, |
| "step": 1591 |
| }, |
| { |
| "epoch": 2.5472, |
| "grad_norm": 0.6643197899692231, |
| "learning_rate": 2.100543126202481e-06, |
| "loss": 0.4063, |
| "step": 1592 |
| }, |
| { |
| "epoch": 2.5488, |
| "grad_norm": 0.6342692376125808, |
| "learning_rate": 2.0869018390042588e-06, |
| "loss": 0.3992, |
| "step": 1593 |
| }, |
| { |
| "epoch": 2.5504, |
| "grad_norm": 0.5948066156047811, |
| "learning_rate": 2.0732932990892528e-06, |
| "loss": 0.3673, |
| "step": 1594 |
| }, |
| { |
| "epoch": 2.552, |
| "grad_norm": 0.6694485740217448, |
| "learning_rate": 2.059717659436415e-06, |
| "loss": 0.397, |
| "step": 1595 |
| }, |
| { |
| "epoch": 2.5536, |
| "grad_norm": 0.6308509391397539, |
| "learning_rate": 2.0461750726548558e-06, |
| "loss": 0.407, |
| "step": 1596 |
| }, |
| { |
| "epoch": 2.5552, |
| "grad_norm": 0.6261729443272391, |
| "learning_rate": 2.032665690982126e-06, |
| "loss": 0.3773, |
| "step": 1597 |
| }, |
| { |
| "epoch": 2.5568, |
| "grad_norm": 0.6346772474236525, |
| "learning_rate": 2.0191896662825012e-06, |
| "loss": 0.3941, |
| "step": 1598 |
| }, |
| { |
| "epoch": 2.5584, |
| "grad_norm": 0.6376245729227132, |
| "learning_rate": 2.0057471500452822e-06, |
| "loss": 0.4113, |
| "step": 1599 |
| }, |
| { |
| "epoch": 2.56, |
| "grad_norm": 0.641020908385709, |
| "learning_rate": 1.9923382933830836e-06, |
| "loss": 0.4098, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.5616, |
| "grad_norm": 0.6275775456371993, |
| "learning_rate": 1.9789632470301423e-06, |
| "loss": 0.3992, |
| "step": 1601 |
| }, |
| { |
| "epoch": 2.5632, |
| "grad_norm": 0.6453400022000114, |
| "learning_rate": 1.9656221613406217e-06, |
| "loss": 0.3961, |
| "step": 1602 |
| }, |
| { |
| "epoch": 2.5648, |
| "grad_norm": 0.6728800160345044, |
| "learning_rate": 1.952315186286915e-06, |
| "loss": 0.4069, |
| "step": 1603 |
| }, |
| { |
| "epoch": 2.5664, |
| "grad_norm": 0.5922663369940925, |
| "learning_rate": 1.9390424714579683e-06, |
| "loss": 0.3796, |
| "step": 1604 |
| }, |
| { |
| "epoch": 2.568, |
| "grad_norm": 0.6055756534765394, |
| "learning_rate": 1.925804166057596e-06, |
| "loss": 0.3734, |
| "step": 1605 |
| }, |
| { |
| "epoch": 2.5696, |
| "grad_norm": 0.6337357094885002, |
| "learning_rate": 1.9126004189027975e-06, |
| "loss": 0.382, |
| "step": 1606 |
| }, |
| { |
| "epoch": 2.5712, |
| "grad_norm": 0.6223091168842378, |
| "learning_rate": 1.8994313784220942e-06, |
| "loss": 0.3899, |
| "step": 1607 |
| }, |
| { |
| "epoch": 2.5728, |
| "grad_norm": 0.6304486561080737, |
| "learning_rate": 1.8862971926538553e-06, |
| "loss": 0.3935, |
| "step": 1608 |
| }, |
| { |
| "epoch": 2.5744, |
| "grad_norm": 0.6638930774983509, |
| "learning_rate": 1.8731980092446305e-06, |
| "loss": 0.4189, |
| "step": 1609 |
| }, |
| { |
| "epoch": 2.576, |
| "grad_norm": 0.6609598736574402, |
| "learning_rate": 1.8601339754475007e-06, |
| "loss": 0.4043, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.5776, |
| "grad_norm": 0.6460433585689176, |
| "learning_rate": 1.8471052381204091e-06, |
| "loss": 0.4289, |
| "step": 1611 |
| }, |
| { |
| "epoch": 2.5792, |
| "grad_norm": 0.6284239142431678, |
| "learning_rate": 1.8341119437245231e-06, |
| "loss": 0.4009, |
| "step": 1612 |
| }, |
| { |
| "epoch": 2.5808, |
| "grad_norm": 0.5788234045672292, |
| "learning_rate": 1.8211542383225811e-06, |
| "loss": 0.3798, |
| "step": 1613 |
| }, |
| { |
| "epoch": 2.5824, |
| "grad_norm": 0.6168466381836182, |
| "learning_rate": 1.8082322675772478e-06, |
| "loss": 0.3871, |
| "step": 1614 |
| }, |
| { |
| "epoch": 2.584, |
| "grad_norm": 0.5918253150989333, |
| "learning_rate": 1.795346176749484e-06, |
| "loss": 0.3745, |
| "step": 1615 |
| }, |
| { |
| "epoch": 2.5856, |
| "grad_norm": 0.6069246083161115, |
| "learning_rate": 1.7824961106969124e-06, |
| "loss": 0.403, |
| "step": 1616 |
| }, |
| { |
| "epoch": 2.5872, |
| "grad_norm": 0.5930680357250637, |
| "learning_rate": 1.7696822138721798e-06, |
| "loss": 0.4001, |
| "step": 1617 |
| }, |
| { |
| "epoch": 2.5888, |
| "grad_norm": 0.6129454651899682, |
| "learning_rate": 1.756904630321347e-06, |
| "loss": 0.3801, |
| "step": 1618 |
| }, |
| { |
| "epoch": 2.5904, |
| "grad_norm": 0.6189740075978353, |
| "learning_rate": 1.7441635036822624e-06, |
| "loss": 0.3943, |
| "step": 1619 |
| }, |
| { |
| "epoch": 2.592, |
| "grad_norm": 0.6274624006210813, |
| "learning_rate": 1.7314589771829426e-06, |
| "loss": 0.4132, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.5936, |
| "grad_norm": 0.9954803403258453, |
| "learning_rate": 1.718791193639973e-06, |
| "loss": 0.3763, |
| "step": 1621 |
| }, |
| { |
| "epoch": 2.5952, |
| "grad_norm": 0.619882141163173, |
| "learning_rate": 1.706160295456898e-06, |
| "loss": 0.4138, |
| "step": 1622 |
| }, |
| { |
| "epoch": 2.5968, |
| "grad_norm": 0.6228088213238236, |
| "learning_rate": 1.693566424622612e-06, |
| "loss": 0.3804, |
| "step": 1623 |
| }, |
| { |
| "epoch": 2.5984, |
| "grad_norm": 0.6265320543753518, |
| "learning_rate": 1.6810097227097782e-06, |
| "loss": 0.3911, |
| "step": 1624 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.6203707060758011, |
| "learning_rate": 1.668490330873223e-06, |
| "loss": 0.39, |
| "step": 1625 |
| }, |
| { |
| "epoch": 2.6016, |
| "grad_norm": 0.621294828917456, |
| "learning_rate": 1.6560083898483598e-06, |
| "loss": 0.4142, |
| "step": 1626 |
| }, |
| { |
| "epoch": 2.6032, |
| "grad_norm": 0.6101531315821962, |
| "learning_rate": 1.6435640399496033e-06, |
| "loss": 0.3725, |
| "step": 1627 |
| }, |
| { |
| "epoch": 2.6048, |
| "grad_norm": 0.6863906992808965, |
| "learning_rate": 1.6311574210687865e-06, |
| "loss": 0.4023, |
| "step": 1628 |
| }, |
| { |
| "epoch": 2.6064, |
| "grad_norm": 0.6426995722369155, |
| "learning_rate": 1.618788672673598e-06, |
| "loss": 0.4103, |
| "step": 1629 |
| }, |
| { |
| "epoch": 2.608, |
| "grad_norm": 0.6533732877012939, |
| "learning_rate": 1.6064579338060088e-06, |
| "loss": 0.4059, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.6096, |
| "grad_norm": 0.6078243562020307, |
| "learning_rate": 1.5941653430807052e-06, |
| "loss": 0.3763, |
| "step": 1631 |
| }, |
| { |
| "epoch": 2.6112, |
| "grad_norm": 0.5944464717669109, |
| "learning_rate": 1.5819110386835413e-06, |
| "loss": 0.3768, |
| "step": 1632 |
| }, |
| { |
| "epoch": 2.6128, |
| "grad_norm": 0.623580671550532, |
| "learning_rate": 1.5696951583699776e-06, |
| "loss": 0.3875, |
| "step": 1633 |
| }, |
| { |
| "epoch": 2.6144, |
| "grad_norm": 0.5922332226181343, |
| "learning_rate": 1.5575178394635315e-06, |
| "loss": 0.4053, |
| "step": 1634 |
| }, |
| { |
| "epoch": 2.616, |
| "grad_norm": 0.5845659932749108, |
| "learning_rate": 1.545379218854241e-06, |
| "loss": 0.3871, |
| "step": 1635 |
| }, |
| { |
| "epoch": 2.6176, |
| "grad_norm": 0.6268357052713963, |
| "learning_rate": 1.5332794329971157e-06, |
| "loss": 0.4103, |
| "step": 1636 |
| }, |
| { |
| "epoch": 2.6192, |
| "grad_norm": 0.7478861047169991, |
| "learning_rate": 1.5212186179106142e-06, |
| "loss": 0.4175, |
| "step": 1637 |
| }, |
| { |
| "epoch": 2.6208, |
| "grad_norm": 0.6192499205634353, |
| "learning_rate": 1.5091969091751073e-06, |
| "loss": 0.3843, |
| "step": 1638 |
| }, |
| { |
| "epoch": 2.6224, |
| "grad_norm": 0.6166702089908779, |
| "learning_rate": 1.4972144419313528e-06, |
| "loss": 0.3806, |
| "step": 1639 |
| }, |
| { |
| "epoch": 2.624, |
| "grad_norm": 0.6349397684516485, |
| "learning_rate": 1.4852713508789835e-06, |
| "loss": 0.3863, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.6256, |
| "grad_norm": 0.5927888932635257, |
| "learning_rate": 1.4733677702749894e-06, |
| "loss": 0.3655, |
| "step": 1641 |
| }, |
| { |
| "epoch": 2.6272, |
| "grad_norm": 0.6026968586334261, |
| "learning_rate": 1.4615038339322025e-06, |
| "loss": 0.3685, |
| "step": 1642 |
| }, |
| { |
| "epoch": 2.6288, |
| "grad_norm": 0.6543361088043362, |
| "learning_rate": 1.4496796752178032e-06, |
| "loss": 0.3983, |
| "step": 1643 |
| }, |
| { |
| "epoch": 2.6304, |
| "grad_norm": 0.6458512084445657, |
| "learning_rate": 1.4378954270518169e-06, |
| "loss": 0.3945, |
| "step": 1644 |
| }, |
| { |
| "epoch": 2.632, |
| "grad_norm": 0.6374123540148537, |
| "learning_rate": 1.4261512219056118e-06, |
| "loss": 0.3941, |
| "step": 1645 |
| }, |
| { |
| "epoch": 2.6336, |
| "grad_norm": 0.6904183914760732, |
| "learning_rate": 1.4144471918004255e-06, |
| "loss": 0.4231, |
| "step": 1646 |
| }, |
| { |
| "epoch": 2.6352, |
| "grad_norm": 0.62514925414384, |
| "learning_rate": 1.402783468305864e-06, |
| "loss": 0.4042, |
| "step": 1647 |
| }, |
| { |
| "epoch": 2.6368, |
| "grad_norm": 0.6250879922886552, |
| "learning_rate": 1.391160182538437e-06, |
| "loss": 0.3839, |
| "step": 1648 |
| }, |
| { |
| "epoch": 2.6384, |
| "grad_norm": 0.6251249723911877, |
| "learning_rate": 1.3795774651600757e-06, |
| "loss": 0.4204, |
| "step": 1649 |
| }, |
| { |
| "epoch": 2.64, |
| "grad_norm": 0.6287593747093052, |
| "learning_rate": 1.3680354463766642e-06, |
| "loss": 0.4043, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.6416, |
| "grad_norm": 0.64865146086009, |
| "learning_rate": 1.3565342559365808e-06, |
| "loss": 0.4074, |
| "step": 1651 |
| }, |
| { |
| "epoch": 2.6432, |
| "grad_norm": 0.6267476637499041, |
| "learning_rate": 1.3450740231292354e-06, |
| "loss": 0.3842, |
| "step": 1652 |
| }, |
| { |
| "epoch": 2.6448, |
| "grad_norm": 0.6293389567269077, |
| "learning_rate": 1.3336548767836144e-06, |
| "loss": 0.3762, |
| "step": 1653 |
| }, |
| { |
| "epoch": 2.6464, |
| "grad_norm": 0.6058061494114316, |
| "learning_rate": 1.3222769452668382e-06, |
| "loss": 0.4058, |
| "step": 1654 |
| }, |
| { |
| "epoch": 2.648, |
| "grad_norm": 0.5997822993884304, |
| "learning_rate": 1.3109403564827155e-06, |
| "loss": 0.39, |
| "step": 1655 |
| }, |
| { |
| "epoch": 2.6496, |
| "grad_norm": 0.5798357022123661, |
| "learning_rate": 1.2996452378703013e-06, |
| "loss": 0.3567, |
| "step": 1656 |
| }, |
| { |
| "epoch": 2.6512000000000002, |
| "grad_norm": 0.592566773403498, |
| "learning_rate": 1.2883917164024722e-06, |
| "loss": 0.3867, |
| "step": 1657 |
| }, |
| { |
| "epoch": 2.6528, |
| "grad_norm": 0.5931018419806823, |
| "learning_rate": 1.2771799185844913e-06, |
| "loss": 0.4138, |
| "step": 1658 |
| }, |
| { |
| "epoch": 2.6544, |
| "grad_norm": 0.6085959754789083, |
| "learning_rate": 1.266009970452593e-06, |
| "loss": 0.3784, |
| "step": 1659 |
| }, |
| { |
| "epoch": 2.656, |
| "grad_norm": 0.6183384483894584, |
| "learning_rate": 1.2548819975725624e-06, |
| "loss": 0.3888, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.6576, |
| "grad_norm": 0.6214291059574565, |
| "learning_rate": 1.2437961250383207e-06, |
| "loss": 0.4044, |
| "step": 1661 |
| }, |
| { |
| "epoch": 2.6592000000000002, |
| "grad_norm": 0.6325046429616501, |
| "learning_rate": 1.2327524774705268e-06, |
| "loss": 0.3855, |
| "step": 1662 |
| }, |
| { |
| "epoch": 2.6608, |
| "grad_norm": 0.6197427462940891, |
| "learning_rate": 1.221751179015172e-06, |
| "loss": 0.379, |
| "step": 1663 |
| }, |
| { |
| "epoch": 2.6624, |
| "grad_norm": 0.6206703848814074, |
| "learning_rate": 1.2107923533421795e-06, |
| "loss": 0.3981, |
| "step": 1664 |
| }, |
| { |
| "epoch": 2.664, |
| "grad_norm": 0.5929949370443285, |
| "learning_rate": 1.1998761236440248e-06, |
| "loss": 0.4103, |
| "step": 1665 |
| }, |
| { |
| "epoch": 2.6656, |
| "grad_norm": 0.6376212703380394, |
| "learning_rate": 1.1890026126343446e-06, |
| "loss": 0.4125, |
| "step": 1666 |
| }, |
| { |
| "epoch": 2.6672000000000002, |
| "grad_norm": 0.6369696343567768, |
| "learning_rate": 1.1781719425465538e-06, |
| "loss": 0.4099, |
| "step": 1667 |
| }, |
| { |
| "epoch": 2.6688, |
| "grad_norm": 0.6405197317366532, |
| "learning_rate": 1.1673842351324816e-06, |
| "loss": 0.3802, |
| "step": 1668 |
| }, |
| { |
| "epoch": 2.6704, |
| "grad_norm": 0.6262504215886, |
| "learning_rate": 1.1566396116609907e-06, |
| "loss": 0.39, |
| "step": 1669 |
| }, |
| { |
| "epoch": 2.672, |
| "grad_norm": 0.6304118500726281, |
| "learning_rate": 1.1459381929166251e-06, |
| "loss": 0.3854, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.6736, |
| "grad_norm": 0.6147956319129659, |
| "learning_rate": 1.1352800991982467e-06, |
| "loss": 0.3829, |
| "step": 1671 |
| }, |
| { |
| "epoch": 2.6752000000000002, |
| "grad_norm": 0.6083959768111769, |
| "learning_rate": 1.1246654503176795e-06, |
| "loss": 0.3824, |
| "step": 1672 |
| }, |
| { |
| "epoch": 2.6768, |
| "grad_norm": 0.6474014155941403, |
| "learning_rate": 1.1140943655983727e-06, |
| "loss": 0.384, |
| "step": 1673 |
| }, |
| { |
| "epoch": 2.6784, |
| "grad_norm": 0.661173342284011, |
| "learning_rate": 1.103566963874052e-06, |
| "loss": 0.3671, |
| "step": 1674 |
| }, |
| { |
| "epoch": 2.68, |
| "grad_norm": 0.6671292261409086, |
| "learning_rate": 1.0930833634873811e-06, |
| "loss": 0.4108, |
| "step": 1675 |
| }, |
| { |
| "epoch": 2.6816, |
| "grad_norm": 0.5919654658407986, |
| "learning_rate": 1.082643682288641e-06, |
| "loss": 0.3861, |
| "step": 1676 |
| }, |
| { |
| "epoch": 2.6832000000000003, |
| "grad_norm": 0.606017708841482, |
| "learning_rate": 1.0722480376343997e-06, |
| "loss": 0.3838, |
| "step": 1677 |
| }, |
| { |
| "epoch": 2.6848, |
| "grad_norm": 0.6111833117856191, |
| "learning_rate": 1.0618965463861868e-06, |
| "loss": 0.3959, |
| "step": 1678 |
| }, |
| { |
| "epoch": 2.6864, |
| "grad_norm": 0.66229175827883, |
| "learning_rate": 1.0515893249091936e-06, |
| "loss": 0.3954, |
| "step": 1679 |
| }, |
| { |
| "epoch": 2.6879999999999997, |
| "grad_norm": 0.6225602473696187, |
| "learning_rate": 1.0413264890709546e-06, |
| "loss": 0.3915, |
| "step": 1680 |
| }, |
| { |
| "epoch": 2.6896, |
| "grad_norm": 0.6324295682096658, |
| "learning_rate": 1.0311081542400452e-06, |
| "loss": 0.4035, |
| "step": 1681 |
| }, |
| { |
| "epoch": 2.6912000000000003, |
| "grad_norm": 0.5782408195023804, |
| "learning_rate": 1.0209344352847923e-06, |
| "loss": 0.3814, |
| "step": 1682 |
| }, |
| { |
| "epoch": 2.6928, |
| "grad_norm": 0.5981292289007456, |
| "learning_rate": 1.0108054465719736e-06, |
| "loss": 0.3843, |
| "step": 1683 |
| }, |
| { |
| "epoch": 2.6944, |
| "grad_norm": 0.6386070296775564, |
| "learning_rate": 1.0007213019655393e-06, |
| "loss": 0.3925, |
| "step": 1684 |
| }, |
| { |
| "epoch": 2.6959999999999997, |
| "grad_norm": 0.5986251172966329, |
| "learning_rate": 9.906821148253303e-07, |
| "loss": 0.3624, |
| "step": 1685 |
| }, |
| { |
| "epoch": 2.6976, |
| "grad_norm": 0.6107708577953296, |
| "learning_rate": 9.806879980057993e-07, |
| "loss": 0.3783, |
| "step": 1686 |
| }, |
| { |
| "epoch": 2.6992000000000003, |
| "grad_norm": 0.6231399319138614, |
| "learning_rate": 9.707390638547482e-07, |
| "loss": 0.3763, |
| "step": 1687 |
| }, |
| { |
| "epoch": 2.7008, |
| "grad_norm": 0.6088089708455741, |
| "learning_rate": 9.608354242120637e-07, |
| "loss": 0.3989, |
| "step": 1688 |
| }, |
| { |
| "epoch": 2.7024, |
| "grad_norm": 0.6207549015074908, |
| "learning_rate": 9.509771904084558e-07, |
| "loss": 0.3808, |
| "step": 1689 |
| }, |
| { |
| "epoch": 2.7039999999999997, |
| "grad_norm": 0.6591205512689258, |
| "learning_rate": 9.411644732642122e-07, |
| "loss": 0.416, |
| "step": 1690 |
| }, |
| { |
| "epoch": 2.7056, |
| "grad_norm": 0.6045094661220406, |
| "learning_rate": 9.313973830879514e-07, |
| "loss": 0.3996, |
| "step": 1691 |
| }, |
| { |
| "epoch": 2.7072000000000003, |
| "grad_norm": 0.6017543127945517, |
| "learning_rate": 9.216760296753758e-07, |
| "loss": 0.3624, |
| "step": 1692 |
| }, |
| { |
| "epoch": 2.7088, |
| "grad_norm": 0.6051557230372354, |
| "learning_rate": 9.120005223080486e-07, |
| "loss": 0.3934, |
| "step": 1693 |
| }, |
| { |
| "epoch": 2.7104, |
| "grad_norm": 0.6055162741880877, |
| "learning_rate": 9.023709697521543e-07, |
| "loss": 0.3886, |
| "step": 1694 |
| }, |
| { |
| "epoch": 2.7119999999999997, |
| "grad_norm": 0.6102023101677977, |
| "learning_rate": 8.927874802572861e-07, |
| "loss": 0.4042, |
| "step": 1695 |
| }, |
| { |
| "epoch": 2.7136, |
| "grad_norm": 0.5776267937267195, |
| "learning_rate": 8.832501615552225e-07, |
| "loss": 0.3803, |
| "step": 1696 |
| }, |
| { |
| "epoch": 2.7152, |
| "grad_norm": 0.615711936633695, |
| "learning_rate": 8.737591208587159e-07, |
| "loss": 0.405, |
| "step": 1697 |
| }, |
| { |
| "epoch": 2.7168, |
| "grad_norm": 0.6455560899948569, |
| "learning_rate": 8.643144648602913e-07, |
| "loss": 0.4374, |
| "step": 1698 |
| }, |
| { |
| "epoch": 2.7184, |
| "grad_norm": 0.5905261653895563, |
| "learning_rate": 8.549162997310467e-07, |
| "loss": 0.3716, |
| "step": 1699 |
| }, |
| { |
| "epoch": 2.7199999999999998, |
| "grad_norm": 0.6112904373809765, |
| "learning_rate": 8.455647311194537e-07, |
| "loss": 0.386, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.7216, |
| "grad_norm": 0.6030522579380936, |
| "learning_rate": 8.362598641501774e-07, |
| "loss": 0.3966, |
| "step": 1701 |
| }, |
| { |
| "epoch": 2.7232, |
| "grad_norm": 0.5883945298360702, |
| "learning_rate": 8.270018034228916e-07, |
| "loss": 0.3901, |
| "step": 1702 |
| }, |
| { |
| "epoch": 2.7248, |
| "grad_norm": 0.5791821954738914, |
| "learning_rate": 8.177906530110996e-07, |
| "loss": 0.3498, |
| "step": 1703 |
| }, |
| { |
| "epoch": 2.7264, |
| "grad_norm": 0.5825217026653828, |
| "learning_rate": 8.086265164609708e-07, |
| "loss": 0.3869, |
| "step": 1704 |
| }, |
| { |
| "epoch": 2.7279999999999998, |
| "grad_norm": 0.5976749528633419, |
| "learning_rate": 7.995094967901701e-07, |
| "loss": 0.3849, |
| "step": 1705 |
| }, |
| { |
| "epoch": 2.7296, |
| "grad_norm": 0.6606592974479811, |
| "learning_rate": 7.90439696486705e-07, |
| "loss": 0.3983, |
| "step": 1706 |
| }, |
| { |
| "epoch": 2.7312, |
| "grad_norm": 0.5862915441222696, |
| "learning_rate": 7.814172175077738e-07, |
| "loss": 0.3682, |
| "step": 1707 |
| }, |
| { |
| "epoch": 2.7328, |
| "grad_norm": 0.6004467759052456, |
| "learning_rate": 7.724421612786109e-07, |
| "loss": 0.403, |
| "step": 1708 |
| }, |
| { |
| "epoch": 2.7344, |
| "grad_norm": 0.5893551587311924, |
| "learning_rate": 7.635146286913587e-07, |
| "loss": 0.3847, |
| "step": 1709 |
| }, |
| { |
| "epoch": 2.7359999999999998, |
| "grad_norm": 0.6081689478494531, |
| "learning_rate": 7.546347201039255e-07, |
| "loss": 0.3931, |
| "step": 1710 |
| }, |
| { |
| "epoch": 2.7376, |
| "grad_norm": 0.6174886614429648, |
| "learning_rate": 7.458025353388592e-07, |
| "loss": 0.3985, |
| "step": 1711 |
| }, |
| { |
| "epoch": 2.7392, |
| "grad_norm": 0.634124853662826, |
| "learning_rate": 7.37018173682223e-07, |
| "loss": 0.392, |
| "step": 1712 |
| }, |
| { |
| "epoch": 2.7408, |
| "grad_norm": 0.6588212500354214, |
| "learning_rate": 7.282817338824893e-07, |
| "loss": 0.4133, |
| "step": 1713 |
| }, |
| { |
| "epoch": 2.7424, |
| "grad_norm": 0.5955630737541837, |
| "learning_rate": 7.195933141494133e-07, |
| "loss": 0.3701, |
| "step": 1714 |
| }, |
| { |
| "epoch": 2.7439999999999998, |
| "grad_norm": 0.60471648702862, |
| "learning_rate": 7.109530121529439e-07, |
| "loss": 0.3921, |
| "step": 1715 |
| }, |
| { |
| "epoch": 2.7456, |
| "grad_norm": 0.6234697420391768, |
| "learning_rate": 7.023609250221153e-07, |
| "loss": 0.3805, |
| "step": 1716 |
| }, |
| { |
| "epoch": 2.7472, |
| "grad_norm": 0.5909780981357816, |
| "learning_rate": 6.938171493439622e-07, |
| "loss": 0.3822, |
| "step": 1717 |
| }, |
| { |
| "epoch": 2.7488, |
| "grad_norm": 0.5916412097862507, |
| "learning_rate": 6.853217811624313e-07, |
| "loss": 0.3896, |
| "step": 1718 |
| }, |
| { |
| "epoch": 2.7504, |
| "grad_norm": 0.5701027407029094, |
| "learning_rate": 6.768749159772992e-07, |
| "loss": 0.3841, |
| "step": 1719 |
| }, |
| { |
| "epoch": 2.752, |
| "grad_norm": 0.6224648857193659, |
| "learning_rate": 6.684766487431027e-07, |
| "loss": 0.411, |
| "step": 1720 |
| }, |
| { |
| "epoch": 2.7536, |
| "grad_norm": 0.6062600883850013, |
| "learning_rate": 6.601270738680721e-07, |
| "loss": 0.3896, |
| "step": 1721 |
| }, |
| { |
| "epoch": 2.7552, |
| "grad_norm": 0.5825798421411809, |
| "learning_rate": 6.518262852130625e-07, |
| "loss": 0.3469, |
| "step": 1722 |
| }, |
| { |
| "epoch": 2.7568, |
| "grad_norm": 0.6245531507828768, |
| "learning_rate": 6.435743760905083e-07, |
| "loss": 0.4002, |
| "step": 1723 |
| }, |
| { |
| "epoch": 2.7584, |
| "grad_norm": 0.6012872581602149, |
| "learning_rate": 6.353714392633698e-07, |
| "loss": 0.3946, |
| "step": 1724 |
| }, |
| { |
| "epoch": 2.76, |
| "grad_norm": 0.600518750459418, |
| "learning_rate": 6.272175669440861e-07, |
| "loss": 0.4028, |
| "step": 1725 |
| }, |
| { |
| "epoch": 2.7616, |
| "grad_norm": 0.6565378768747119, |
| "learning_rate": 6.191128507935479e-07, |
| "loss": 0.4181, |
| "step": 1726 |
| }, |
| { |
| "epoch": 2.7632, |
| "grad_norm": 0.5880124521353837, |
| "learning_rate": 6.110573819200605e-07, |
| "loss": 0.3771, |
| "step": 1727 |
| }, |
| { |
| "epoch": 2.7648, |
| "grad_norm": 0.6116040078923606, |
| "learning_rate": 6.030512508783187e-07, |
| "loss": 0.4195, |
| "step": 1728 |
| }, |
| { |
| "epoch": 2.7664, |
| "grad_norm": 0.6014679070546552, |
| "learning_rate": 5.950945476683955e-07, |
| "loss": 0.3974, |
| "step": 1729 |
| }, |
| { |
| "epoch": 2.768, |
| "grad_norm": 0.6009701852260378, |
| "learning_rate": 5.871873617347218e-07, |
| "loss": 0.3785, |
| "step": 1730 |
| }, |
| { |
| "epoch": 2.7696, |
| "grad_norm": 0.5859448462013398, |
| "learning_rate": 5.793297819650884e-07, |
| "loss": 0.3762, |
| "step": 1731 |
| }, |
| { |
| "epoch": 2.7712, |
| "grad_norm": 0.6291628272389389, |
| "learning_rate": 5.715218966896435e-07, |
| "loss": 0.3854, |
| "step": 1732 |
| }, |
| { |
| "epoch": 2.7728, |
| "grad_norm": 0.6197404357834241, |
| "learning_rate": 5.637637936798978e-07, |
| "loss": 0.3818, |
| "step": 1733 |
| }, |
| { |
| "epoch": 2.7744, |
| "grad_norm": 0.5905004221255729, |
| "learning_rate": 5.560555601477418e-07, |
| "loss": 0.3767, |
| "step": 1734 |
| }, |
| { |
| "epoch": 2.776, |
| "grad_norm": 0.5958395135943388, |
| "learning_rate": 5.483972827444645e-07, |
| "loss": 0.3937, |
| "step": 1735 |
| }, |
| { |
| "epoch": 2.7776, |
| "grad_norm": 0.5996780255952401, |
| "learning_rate": 5.407890475597761e-07, |
| "loss": 0.3896, |
| "step": 1736 |
| }, |
| { |
| "epoch": 2.7792, |
| "grad_norm": 0.5669617446118168, |
| "learning_rate": 5.332309401208407e-07, |
| "loss": 0.3726, |
| "step": 1737 |
| }, |
| { |
| "epoch": 2.7808, |
| "grad_norm": 0.6268307738196953, |
| "learning_rate": 5.257230453913237e-07, |
| "loss": 0.4233, |
| "step": 1738 |
| }, |
| { |
| "epoch": 2.7824, |
| "grad_norm": 0.5864553870059951, |
| "learning_rate": 5.182654477704229e-07, |
| "loss": 0.3846, |
| "step": 1739 |
| }, |
| { |
| "epoch": 2.784, |
| "grad_norm": 0.6036664703348141, |
| "learning_rate": 5.108582310919302e-07, |
| "loss": 0.3928, |
| "step": 1740 |
| }, |
| { |
| "epoch": 2.7856, |
| "grad_norm": 0.6031516892613834, |
| "learning_rate": 5.035014786232828e-07, |
| "loss": 0.3925, |
| "step": 1741 |
| }, |
| { |
| "epoch": 2.7872, |
| "grad_norm": 0.5866800018560955, |
| "learning_rate": 4.961952730646319e-07, |
| "loss": 0.3859, |
| "step": 1742 |
| }, |
| { |
| "epoch": 2.7888, |
| "grad_norm": 0.5877381777569246, |
| "learning_rate": 4.889396965479115e-07, |
| "loss": 0.3673, |
| "step": 1743 |
| }, |
| { |
| "epoch": 2.7904, |
| "grad_norm": 0.6209548847901005, |
| "learning_rate": 4.817348306359121e-07, |
| "loss": 0.4131, |
| "step": 1744 |
| }, |
| { |
| "epoch": 2.792, |
| "grad_norm": 0.6046029653791415, |
| "learning_rate": 4.745807563213678e-07, |
| "loss": 0.391, |
| "step": 1745 |
| }, |
| { |
| "epoch": 2.7936, |
| "grad_norm": 0.6135800654153217, |
| "learning_rate": 4.6747755402604565e-07, |
| "loss": 0.3988, |
| "step": 1746 |
| }, |
| { |
| "epoch": 2.7952, |
| "grad_norm": 0.5864985386522907, |
| "learning_rate": 4.6042530359983793e-07, |
| "loss": 0.3784, |
| "step": 1747 |
| }, |
| { |
| "epoch": 2.7968, |
| "grad_norm": 0.6125567210434135, |
| "learning_rate": 4.534240843198662e-07, |
| "loss": 0.4236, |
| "step": 1748 |
| }, |
| { |
| "epoch": 2.7984, |
| "grad_norm": 0.5972475717825011, |
| "learning_rate": 4.464739748895963e-07, |
| "loss": 0.4029, |
| "step": 1749 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.6051136361996698, |
| "learning_rate": 4.3957505343794115e-07, |
| "loss": 0.3704, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.8016, |
| "grad_norm": 0.5813134121600891, |
| "learning_rate": 4.327273975183949e-07, |
| "loss": 0.3789, |
| "step": 1751 |
| }, |
| { |
| "epoch": 2.8032, |
| "grad_norm": 0.6234931456274084, |
| "learning_rate": 4.259310841081515e-07, |
| "loss": 0.3879, |
| "step": 1752 |
| }, |
| { |
| "epoch": 2.8048, |
| "grad_norm": 0.5915396255605055, |
| "learning_rate": 4.191861896072458e-07, |
| "loss": 0.3942, |
| "step": 1753 |
| }, |
| { |
| "epoch": 2.8064, |
| "grad_norm": 0.6236188832413802, |
| "learning_rate": 4.1249278983769405e-07, |
| "loss": 0.3993, |
| "step": 1754 |
| }, |
| { |
| "epoch": 2.808, |
| "grad_norm": 0.589419057067984, |
| "learning_rate": 4.058509600426358e-07, |
| "loss": 0.3699, |
| "step": 1755 |
| }, |
| { |
| "epoch": 2.8096, |
| "grad_norm": 0.5920683961204296, |
| "learning_rate": 3.9926077488549543e-07, |
| "loss": 0.3723, |
| "step": 1756 |
| }, |
| { |
| "epoch": 2.8112, |
| "grad_norm": 0.5927044128760407, |
| "learning_rate": 3.9272230844913884e-07, |
| "loss": 0.415, |
| "step": 1757 |
| }, |
| { |
| "epoch": 2.8128, |
| "grad_norm": 0.5867650416517225, |
| "learning_rate": 3.8623563423504094e-07, |
| "loss": 0.3812, |
| "step": 1758 |
| }, |
| { |
| "epoch": 2.8144, |
| "grad_norm": 0.6011267395145728, |
| "learning_rate": 3.798008251624585e-07, |
| "loss": 0.3897, |
| "step": 1759 |
| }, |
| { |
| "epoch": 2.816, |
| "grad_norm": 0.5903530577818467, |
| "learning_rate": 3.734179535676169e-07, |
| "loss": 0.3712, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.8176, |
| "grad_norm": 0.6233995273902431, |
| "learning_rate": 3.6708709120288564e-07, |
| "loss": 0.4169, |
| "step": 1761 |
| }, |
| { |
| "epoch": 2.8192, |
| "grad_norm": 0.5819666810024561, |
| "learning_rate": 3.6080830923598266e-07, |
| "loss": 0.4034, |
| "step": 1762 |
| }, |
| { |
| "epoch": 2.8208, |
| "grad_norm": 0.624654571648536, |
| "learning_rate": 3.545816782491657e-07, |
| "loss": 0.407, |
| "step": 1763 |
| }, |
| { |
| "epoch": 2.8224, |
| "grad_norm": 0.556916942200227, |
| "learning_rate": 3.484072682384465e-07, |
| "loss": 0.38, |
| "step": 1764 |
| }, |
| { |
| "epoch": 2.824, |
| "grad_norm": 0.5783913767205718, |
| "learning_rate": 3.422851486127987e-07, |
| "loss": 0.3636, |
| "step": 1765 |
| }, |
| { |
| "epoch": 2.8256, |
| "grad_norm": 0.5950530497276595, |
| "learning_rate": 3.3621538819337776e-07, |
| "loss": 0.3936, |
| "step": 1766 |
| }, |
| { |
| "epoch": 2.8272, |
| "grad_norm": 0.5729311169002653, |
| "learning_rate": 3.301980552127509e-07, |
| "loss": 0.3953, |
| "step": 1767 |
| }, |
| { |
| "epoch": 2.8288, |
| "grad_norm": 0.6104157298212889, |
| "learning_rate": 3.2423321731412774e-07, |
| "loss": 0.392, |
| "step": 1768 |
| }, |
| { |
| "epoch": 2.8304, |
| "grad_norm": 0.6151767806763212, |
| "learning_rate": 3.1832094155059776e-07, |
| "loss": 0.409, |
| "step": 1769 |
| }, |
| { |
| "epoch": 2.832, |
| "grad_norm": 0.5970876033807373, |
| "learning_rate": 3.1246129438438076e-07, |
| "loss": 0.3966, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.8336, |
| "grad_norm": 0.5906164353444058, |
| "learning_rate": 3.0665434168607846e-07, |
| "loss": 0.3647, |
| "step": 1771 |
| }, |
| { |
| "epoch": 2.8352, |
| "grad_norm": 0.6423459013932351, |
| "learning_rate": 3.009001487339308e-07, |
| "loss": 0.3946, |
| "step": 1772 |
| }, |
| { |
| "epoch": 2.8368, |
| "grad_norm": 0.618731392362854, |
| "learning_rate": 2.9519878021308624e-07, |
| "loss": 0.4115, |
| "step": 1773 |
| }, |
| { |
| "epoch": 2.8384, |
| "grad_norm": 0.5631497037610242, |
| "learning_rate": 2.8955030021487254e-07, |
| "loss": 0.376, |
| "step": 1774 |
| }, |
| { |
| "epoch": 2.84, |
| "grad_norm": 0.5686353998203753, |
| "learning_rate": 2.839547722360769e-07, |
| "loss": 0.4046, |
| "step": 1775 |
| }, |
| { |
| "epoch": 2.8416, |
| "grad_norm": 0.5868907407507975, |
| "learning_rate": 2.7841225917823347e-07, |
| "loss": 0.3994, |
| "step": 1776 |
| }, |
| { |
| "epoch": 2.8432, |
| "grad_norm": 0.6131155477454447, |
| "learning_rate": 2.7292282334691167e-07, |
| "loss": 0.407, |
| "step": 1777 |
| }, |
| { |
| "epoch": 2.8448, |
| "grad_norm": 0.5611882012761621, |
| "learning_rate": 2.674865264510218e-07, |
| "loss": 0.383, |
| "step": 1778 |
| }, |
| { |
| "epoch": 2.8464, |
| "grad_norm": 0.6360575911404578, |
| "learning_rate": 2.6210342960211744e-07, |
| "loss": 0.4001, |
| "step": 1779 |
| }, |
| { |
| "epoch": 2.848, |
| "grad_norm": 0.5910729354119789, |
| "learning_rate": 2.5677359331370834e-07, |
| "loss": 0.3948, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.8496, |
| "grad_norm": 0.6193866816475463, |
| "learning_rate": 2.5149707750058316e-07, |
| "loss": 0.4185, |
| "step": 1781 |
| }, |
| { |
| "epoch": 2.8512, |
| "grad_norm": 0.5952981628387326, |
| "learning_rate": 2.462739414781334e-07, |
| "loss": 0.384, |
| "step": 1782 |
| }, |
| { |
| "epoch": 2.8528000000000002, |
| "grad_norm": 0.5766562432375145, |
| "learning_rate": 2.411042439616873e-07, |
| "loss": 0.3718, |
| "step": 1783 |
| }, |
| { |
| "epoch": 2.8544, |
| "grad_norm": 0.6042455836579004, |
| "learning_rate": 2.3598804306584843e-07, |
| "loss": 0.3972, |
| "step": 1784 |
| }, |
| { |
| "epoch": 2.856, |
| "grad_norm": 0.5832791231533607, |
| "learning_rate": 2.309253963038477e-07, |
| "loss": 0.3884, |
| "step": 1785 |
| }, |
| { |
| "epoch": 2.8576, |
| "grad_norm": 0.57155021302307, |
| "learning_rate": 2.2591636058688804e-07, |
| "loss": 0.3726, |
| "step": 1786 |
| }, |
| { |
| "epoch": 2.8592, |
| "grad_norm": 0.584952063487649, |
| "learning_rate": 2.2096099222351343e-07, |
| "loss": 0.3761, |
| "step": 1787 |
| }, |
| { |
| "epoch": 2.8608000000000002, |
| "grad_norm": 0.6044143923654395, |
| "learning_rate": 2.1605934691896868e-07, |
| "loss": 0.422, |
| "step": 1788 |
| }, |
| { |
| "epoch": 2.8624, |
| "grad_norm": 0.5983876466668332, |
| "learning_rate": 2.1121147977457956e-07, |
| "loss": 0.3915, |
| "step": 1789 |
| }, |
| { |
| "epoch": 2.864, |
| "grad_norm": 0.578092623216434, |
| "learning_rate": 2.0641744528712925e-07, |
| "loss": 0.3715, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.8656, |
| "grad_norm": 0.6189519979180191, |
| "learning_rate": 2.0167729734824558e-07, |
| "loss": 0.3964, |
| "step": 1791 |
| }, |
| { |
| "epoch": 2.8672, |
| "grad_norm": 0.6329375967863802, |
| "learning_rate": 1.9699108924379818e-07, |
| "loss": 0.3965, |
| "step": 1792 |
| }, |
| { |
| "epoch": 2.8688000000000002, |
| "grad_norm": 0.5936327910809372, |
| "learning_rate": 1.9235887365329774e-07, |
| "loss": 0.3969, |
| "step": 1793 |
| }, |
| { |
| "epoch": 2.8704, |
| "grad_norm": 0.6062843542755041, |
| "learning_rate": 1.877807026493028e-07, |
| "loss": 0.374, |
| "step": 1794 |
| }, |
| { |
| "epoch": 2.872, |
| "grad_norm": 0.6064985451141988, |
| "learning_rate": 1.832566276968345e-07, |
| "loss": 0.4009, |
| "step": 1795 |
| }, |
| { |
| "epoch": 2.8736, |
| "grad_norm": 0.5791394670396515, |
| "learning_rate": 1.7878669965280315e-07, |
| "loss": 0.4081, |
| "step": 1796 |
| }, |
| { |
| "epoch": 2.8752, |
| "grad_norm": 0.5772849682819012, |
| "learning_rate": 1.7437096876542713e-07, |
| "loss": 0.3877, |
| "step": 1797 |
| }, |
| { |
| "epoch": 2.8768000000000002, |
| "grad_norm": 0.5755792836414835, |
| "learning_rate": 1.7000948467367718e-07, |
| "loss": 0.3717, |
| "step": 1798 |
| }, |
| { |
| "epoch": 2.8784, |
| "grad_norm": 0.5641764856486274, |
| "learning_rate": 1.657022964067112e-07, |
| "loss": 0.3744, |
| "step": 1799 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 0.591931491296406, |
| "learning_rate": 1.6144945238332987e-07, |
| "loss": 0.3807, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.8816, |
| "grad_norm": 0.6021597426570837, |
| "learning_rate": 1.5725100041142694e-07, |
| "loss": 0.3961, |
| "step": 1801 |
| }, |
| { |
| "epoch": 2.8832, |
| "grad_norm": 0.5737465714509197, |
| "learning_rate": 1.5310698768745247e-07, |
| "loss": 0.3732, |
| "step": 1802 |
| }, |
| { |
| "epoch": 2.8848000000000003, |
| "grad_norm": 0.5892387520983471, |
| "learning_rate": 1.4901746079588552e-07, |
| "loss": 0.3896, |
| "step": 1803 |
| }, |
| { |
| "epoch": 2.8864, |
| "grad_norm": 0.5870420700599931, |
| "learning_rate": 1.4498246570870843e-07, |
| "loss": 0.3829, |
| "step": 1804 |
| }, |
| { |
| "epoch": 2.888, |
| "grad_norm": 0.578380767083382, |
| "learning_rate": 1.4100204778488947e-07, |
| "loss": 0.3827, |
| "step": 1805 |
| }, |
| { |
| "epoch": 2.8895999999999997, |
| "grad_norm": 0.561509751999802, |
| "learning_rate": 1.370762517698715e-07, |
| "loss": 0.3697, |
| "step": 1806 |
| }, |
| { |
| "epoch": 2.8912, |
| "grad_norm": 0.6183409039095087, |
| "learning_rate": 1.3320512179507528e-07, |
| "loss": 0.402, |
| "step": 1807 |
| }, |
| { |
| "epoch": 2.8928000000000003, |
| "grad_norm": 0.5875985516283856, |
| "learning_rate": 1.293887013773959e-07, |
| "loss": 0.3915, |
| "step": 1808 |
| }, |
| { |
| "epoch": 2.8944, |
| "grad_norm": 0.5828817132400705, |
| "learning_rate": 1.2562703341871708e-07, |
| "loss": 0.3962, |
| "step": 1809 |
| }, |
| { |
| "epoch": 2.896, |
| "grad_norm": 0.5709156992366151, |
| "learning_rate": 1.2192016020542986e-07, |
| "loss": 0.4018, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.8975999999999997, |
| "grad_norm": 0.5675316884964438, |
| "learning_rate": 1.1826812340795524e-07, |
| "loss": 0.3889, |
| "step": 1811 |
| }, |
| { |
| "epoch": 2.8992, |
| "grad_norm": 0.5660206983512364, |
| "learning_rate": 1.1467096408027678e-07, |
| "loss": 0.3797, |
| "step": 1812 |
| }, |
| { |
| "epoch": 2.9008000000000003, |
| "grad_norm": 0.5759501694299175, |
| "learning_rate": 1.1112872265947816e-07, |
| "loss": 0.3673, |
| "step": 1813 |
| }, |
| { |
| "epoch": 2.9024, |
| "grad_norm": 0.6136831172005237, |
| "learning_rate": 1.0764143896528967e-07, |
| "loss": 0.4292, |
| "step": 1814 |
| }, |
| { |
| "epoch": 2.904, |
| "grad_norm": 0.6035412046125581, |
| "learning_rate": 1.0420915219964023e-07, |
| "loss": 0.3758, |
| "step": 1815 |
| }, |
| { |
| "epoch": 2.9055999999999997, |
| "grad_norm": 0.5939026598018314, |
| "learning_rate": 1.008319009462172e-07, |
| "loss": 0.3907, |
| "step": 1816 |
| }, |
| { |
| "epoch": 2.9072, |
| "grad_norm": 0.5915764633109246, |
| "learning_rate": 9.75097231700295e-08, |
| "loss": 0.4082, |
| "step": 1817 |
| }, |
| { |
| "epoch": 2.9088000000000003, |
| "grad_norm": 0.5878852986273059, |
| "learning_rate": 9.424265621698736e-08, |
| "loss": 0.4037, |
| "step": 1818 |
| }, |
| { |
| "epoch": 2.9104, |
| "grad_norm": 0.6045378997343863, |
| "learning_rate": 9.103073681347607e-08, |
| "loss": 0.3877, |
| "step": 1819 |
| }, |
| { |
| "epoch": 2.912, |
| "grad_norm": 0.6137673568136339, |
| "learning_rate": 8.787400106594568e-08, |
| "loss": 0.3996, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.9135999999999997, |
| "grad_norm": 0.5874825425674062, |
| "learning_rate": 8.477248446050523e-08, |
| "loss": 0.4075, |
| "step": 1821 |
| }, |
| { |
| "epoch": 2.9152, |
| "grad_norm": 0.6191104133551966, |
| "learning_rate": 8.172622186252421e-08, |
| "loss": 0.4128, |
| "step": 1822 |
| }, |
| { |
| "epoch": 2.9168, |
| "grad_norm": 0.5986660459769473, |
| "learning_rate": 7.873524751624006e-08, |
| "loss": 0.3945, |
| "step": 1823 |
| }, |
| { |
| "epoch": 2.9184, |
| "grad_norm": 0.5814253850836604, |
| "learning_rate": 7.579959504437184e-08, |
| "loss": 0.3744, |
| "step": 1824 |
| }, |
| { |
| "epoch": 2.92, |
| "grad_norm": 0.6408180368914321, |
| "learning_rate": 7.291929744774495e-08, |
| "loss": 0.3957, |
| "step": 1825 |
| }, |
| { |
| "epoch": 2.9215999999999998, |
| "grad_norm": 0.5940898505743869, |
| "learning_rate": 7.009438710491978e-08, |
| "loss": 0.3974, |
| "step": 1826 |
| }, |
| { |
| "epoch": 2.9232, |
| "grad_norm": 0.5748885742134016, |
| "learning_rate": 6.732489577182422e-08, |
| "loss": 0.3863, |
| "step": 1827 |
| }, |
| { |
| "epoch": 2.9248, |
| "grad_norm": 0.5928273265231164, |
| "learning_rate": 6.461085458140059e-08, |
| "loss": 0.4054, |
| "step": 1828 |
| }, |
| { |
| "epoch": 2.9264, |
| "grad_norm": 0.61406609505904, |
| "learning_rate": 6.195229404325542e-08, |
| "loss": 0.4038, |
| "step": 1829 |
| }, |
| { |
| "epoch": 2.928, |
| "grad_norm": 0.5982594770367062, |
| "learning_rate": 5.934924404331355e-08, |
| "loss": 0.3942, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.9295999999999998, |
| "grad_norm": 0.5833835611307522, |
| "learning_rate": 5.680173384348453e-08, |
| "loss": 0.3814, |
| "step": 1831 |
| }, |
| { |
| "epoch": 2.9312, |
| "grad_norm": 0.5953616192217653, |
| "learning_rate": 5.4309792081334024e-08, |
| "loss": 0.378, |
| "step": 1832 |
| }, |
| { |
| "epoch": 2.9328, |
| "grad_norm": 0.5788430998553257, |
| "learning_rate": 5.187344676976014e-08, |
| "loss": 0.374, |
| "step": 1833 |
| }, |
| { |
| "epoch": 2.9344, |
| "grad_norm": 0.5983250963476545, |
| "learning_rate": 4.949272529667926e-08, |
| "loss": 0.3908, |
| "step": 1834 |
| }, |
| { |
| "epoch": 2.936, |
| "grad_norm": 0.6329342686357748, |
| "learning_rate": 4.716765442471849e-08, |
| "loss": 0.4017, |
| "step": 1835 |
| }, |
| { |
| "epoch": 2.9375999999999998, |
| "grad_norm": 0.6104543471470529, |
| "learning_rate": 4.489826029091593e-08, |
| "loss": 0.3935, |
| "step": 1836 |
| }, |
| { |
| "epoch": 2.9392, |
| "grad_norm": 0.6183565501070613, |
| "learning_rate": 4.2684568406423656e-08, |
| "loss": 0.3858, |
| "step": 1837 |
| }, |
| { |
| "epoch": 2.9408, |
| "grad_norm": 0.6040788483466192, |
| "learning_rate": 4.0526603656223515e-08, |
| "loss": 0.3921, |
| "step": 1838 |
| }, |
| { |
| "epoch": 2.9424, |
| "grad_norm": 0.59530129492777, |
| "learning_rate": 3.8424390298846815e-08, |
| "loss": 0.3872, |
| "step": 1839 |
| }, |
| { |
| "epoch": 2.944, |
| "grad_norm": 0.5905525722251078, |
| "learning_rate": 3.637795196610228e-08, |
| "loss": 0.4187, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.9455999999999998, |
| "grad_norm": 0.5954984827514153, |
| "learning_rate": 3.4387311662807396e-08, |
| "loss": 0.3882, |
| "step": 1841 |
| }, |
| { |
| "epoch": 2.9472, |
| "grad_norm": 0.5893859257500954, |
| "learning_rate": 3.24524917665342e-08, |
| "loss": 0.3845, |
| "step": 1842 |
| }, |
| { |
| "epoch": 2.9488, |
| "grad_norm": 0.5854141760253769, |
| "learning_rate": 3.0573514027355535e-08, |
| "loss": 0.385, |
| "step": 1843 |
| }, |
| { |
| "epoch": 2.9504, |
| "grad_norm": 0.5973858670348429, |
| "learning_rate": 2.8750399567599174e-08, |
| "loss": 0.3865, |
| "step": 1844 |
| }, |
| { |
| "epoch": 2.952, |
| "grad_norm": 0.5688150622764607, |
| "learning_rate": 2.6983168881611897e-08, |
| "loss": 0.3834, |
| "step": 1845 |
| }, |
| { |
| "epoch": 2.9536, |
| "grad_norm": 0.5866592155654836, |
| "learning_rate": 2.527184183553022e-08, |
| "loss": 0.3843, |
| "step": 1846 |
| }, |
| { |
| "epoch": 2.9552, |
| "grad_norm": 0.5943566654704281, |
| "learning_rate": 2.3616437667055014e-08, |
| "loss": 0.3991, |
| "step": 1847 |
| }, |
| { |
| "epoch": 2.9568, |
| "grad_norm": 0.5610313719080197, |
| "learning_rate": 2.2016974985236695e-08, |
| "loss": 0.3783, |
| "step": 1848 |
| }, |
| { |
| "epoch": 2.9584, |
| "grad_norm": 0.574680562576564, |
| "learning_rate": 2.047347177026371e-08, |
| "loss": 0.3757, |
| "step": 1849 |
| }, |
| { |
| "epoch": 2.96, |
| "grad_norm": 0.5775662336109608, |
| "learning_rate": 1.898594537326437e-08, |
| "loss": 0.3792, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.9616, |
| "grad_norm": 0.5896786369541753, |
| "learning_rate": 1.7554412516108678e-08, |
| "loss": 0.3852, |
| "step": 1851 |
| }, |
| { |
| "epoch": 2.9632, |
| "grad_norm": 0.5635671620269883, |
| "learning_rate": 1.6178889291220135e-08, |
| "loss": 0.3837, |
| "step": 1852 |
| }, |
| { |
| "epoch": 2.9648, |
| "grad_norm": 0.5856894931575174, |
| "learning_rate": 1.4859391161397008e-08, |
| "loss": 0.3809, |
| "step": 1853 |
| }, |
| { |
| "epoch": 2.9664, |
| "grad_norm": 0.5700795764285082, |
| "learning_rate": 1.3595932959638015e-08, |
| "loss": 0.3938, |
| "step": 1854 |
| }, |
| { |
| "epoch": 2.968, |
| "grad_norm": 0.5617189246959258, |
| "learning_rate": 1.2388528888973017e-08, |
| "loss": 0.3528, |
| "step": 1855 |
| }, |
| { |
| "epoch": 2.9696, |
| "grad_norm": 0.6603954428250471, |
| "learning_rate": 1.1237192522307594e-08, |
| "loss": 0.3925, |
| "step": 1856 |
| }, |
| { |
| "epoch": 2.9712, |
| "grad_norm": 0.6090255027413676, |
| "learning_rate": 1.014193680226594e-08, |
| "loss": 0.3624, |
| "step": 1857 |
| }, |
| { |
| "epoch": 2.9728, |
| "grad_norm": 0.5901397399448918, |
| "learning_rate": 9.102774041049867e-09, |
| "loss": 0.4002, |
| "step": 1858 |
| }, |
| { |
| "epoch": 2.9744, |
| "grad_norm": 0.5963203048784672, |
| "learning_rate": 8.119715920296145e-09, |
| "loss": 0.3811, |
| "step": 1859 |
| }, |
| { |
| "epoch": 2.976, |
| "grad_norm": 0.5860303110406762, |
| "learning_rate": 7.1927734909488235e-09, |
| "loss": 0.3954, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.9776, |
| "grad_norm": 0.5906482897156549, |
| "learning_rate": 6.321957173132665e-09, |
| "loss": 0.3966, |
| "step": 1861 |
| }, |
| { |
| "epoch": 2.9792, |
| "grad_norm": 0.6018549097676631, |
| "learning_rate": 5.507276756036018e-09, |
| "loss": 0.3831, |
| "step": 1862 |
| }, |
| { |
| "epoch": 2.9808, |
| "grad_norm": 0.5958485094010272, |
| "learning_rate": 4.74874139780257e-09, |
| "loss": 0.3624, |
| "step": 1863 |
| }, |
| { |
| "epoch": 2.9824, |
| "grad_norm": 0.5802974238961234, |
| "learning_rate": 4.046359625426988e-09, |
| "loss": 0.3913, |
| "step": 1864 |
| }, |
| { |
| "epoch": 2.984, |
| "grad_norm": 0.5941602413219743, |
| "learning_rate": 3.400139334658881e-09, |
| "loss": 0.4107, |
| "step": 1865 |
| }, |
| { |
| "epoch": 2.9856, |
| "grad_norm": 0.5949666726420274, |
| "learning_rate": 2.81008778991565e-09, |
| "loss": 0.3898, |
| "step": 1866 |
| }, |
| { |
| "epoch": 2.9872, |
| "grad_norm": 0.5706501152872799, |
| "learning_rate": 2.27621162419811e-09, |
| "loss": 0.3801, |
| "step": 1867 |
| }, |
| { |
| "epoch": 2.9888, |
| "grad_norm": 0.5920080363506407, |
| "learning_rate": 1.7985168390194375e-09, |
| "loss": 0.4038, |
| "step": 1868 |
| }, |
| { |
| "epoch": 2.9904, |
| "grad_norm": 0.5923134764523963, |
| "learning_rate": 1.3770088043335573e-09, |
| "loss": 0.4083, |
| "step": 1869 |
| }, |
| { |
| "epoch": 2.992, |
| "grad_norm": 0.5650835745974974, |
| "learning_rate": 1.01169225847908e-09, |
| "loss": 0.3843, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.9936, |
| "grad_norm": 0.577196003203904, |
| "learning_rate": 7.025713081232343e-10, |
| "loss": 0.3686, |
| "step": 1871 |
| }, |
| { |
| "epoch": 2.9952, |
| "grad_norm": 0.5902254985223518, |
| "learning_rate": 4.496494282157926e-10, |
| "loss": 0.3921, |
| "step": 1872 |
| }, |
| { |
| "epoch": 2.9968, |
| "grad_norm": 0.5784728891063586, |
| "learning_rate": 2.529294619513234e-10, |
| "loss": 0.4188, |
| "step": 1873 |
| }, |
| { |
| "epoch": 2.9984, |
| "grad_norm": 0.5636393556114256, |
| "learning_rate": 1.1241362073588502e-10, |
| "loss": 0.3836, |
| "step": 1874 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.5863281249393232, |
| "learning_rate": 2.8103484164820894e-11, |
| "loss": 0.3853, |
| "step": 1875 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1875, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1063007016714240.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|