| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.2, |
| "eval_steps": 500, |
| "global_step": 600, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002, |
| "grad_norm": 9.074012756347656, |
| "learning_rate": 0.0, |
| "loss": 1.1121, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.004, |
| "grad_norm": 10.233168601989746, |
| "learning_rate": 6.666666666666668e-08, |
| "loss": 1.2991, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.006, |
| "grad_norm": 10.353023529052734, |
| "learning_rate": 1.3333333333333336e-07, |
| "loss": 1.3845, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.008, |
| "grad_norm": 9.337516784667969, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 1.2514, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 10.30583667755127, |
| "learning_rate": 2.666666666666667e-07, |
| "loss": 1.3357, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.012, |
| "grad_norm": 9.072500228881836, |
| "learning_rate": 3.3333333333333335e-07, |
| "loss": 1.1423, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.014, |
| "grad_norm": 9.9359712600708, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 1.2476, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 9.5555419921875, |
| "learning_rate": 4.666666666666667e-07, |
| "loss": 1.2722, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.018, |
| "grad_norm": 11.1956148147583, |
| "learning_rate": 5.333333333333335e-07, |
| "loss": 1.4133, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 9.675064086914062, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 1.3254, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.022, |
| "grad_norm": 8.407377243041992, |
| "learning_rate": 6.666666666666667e-07, |
| "loss": 1.2715, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 8.663851737976074, |
| "learning_rate": 7.333333333333334e-07, |
| "loss": 1.2211, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.026, |
| "grad_norm": 6.422065258026123, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 1.1171, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.028, |
| "grad_norm": 5.890502452850342, |
| "learning_rate": 8.666666666666668e-07, |
| "loss": 1.1156, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 6.316369533538818, |
| "learning_rate": 9.333333333333334e-07, |
| "loss": 1.2838, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 5.3838911056518555, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.0919, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.034, |
| "grad_norm": 3.512526273727417, |
| "learning_rate": 1.066666666666667e-06, |
| "loss": 0.9868, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.036, |
| "grad_norm": 3.633479595184326, |
| "learning_rate": 1.1333333333333334e-06, |
| "loss": 1.154, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.038, |
| "grad_norm": 3.0341765880584717, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 0.9785, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 3.596338987350464, |
| "learning_rate": 1.2666666666666669e-06, |
| "loss": 1.0783, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.042, |
| "grad_norm": 3.751676082611084, |
| "learning_rate": 1.3333333333333334e-06, |
| "loss": 1.0437, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.044, |
| "grad_norm": 2.806690216064453, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 1.0281, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.046, |
| "grad_norm": 2.822889566421509, |
| "learning_rate": 1.4666666666666669e-06, |
| "loss": 0.9858, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 4.2820024490356445, |
| "learning_rate": 1.5333333333333334e-06, |
| "loss": 1.1258, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 4.147153377532959, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 0.9762, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.052, |
| "grad_norm": 3.698709726333618, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 0.9955, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.054, |
| "grad_norm": 3.4206106662750244, |
| "learning_rate": 1.7333333333333336e-06, |
| "loss": 0.9448, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 2.970322608947754, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 0.9204, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.058, |
| "grad_norm": 2.804030179977417, |
| "learning_rate": 1.8666666666666669e-06, |
| "loss": 0.9797, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 2.511726140975952, |
| "learning_rate": 1.9333333333333336e-06, |
| "loss": 0.9715, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.062, |
| "grad_norm": 2.303981304168701, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.9446, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 2.4346749782562256, |
| "learning_rate": 2.0666666666666666e-06, |
| "loss": 0.9981, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.066, |
| "grad_norm": 2.267148494720459, |
| "learning_rate": 2.133333333333334e-06, |
| "loss": 0.9105, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.068, |
| "grad_norm": 2.305480718612671, |
| "learning_rate": 2.2e-06, |
| "loss": 0.9894, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 2.2832963466644287, |
| "learning_rate": 2.266666666666667e-06, |
| "loss": 0.9252, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 2.43243408203125, |
| "learning_rate": 2.3333333333333336e-06, |
| "loss": 0.9257, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.074, |
| "grad_norm": 2.2071030139923096, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 0.9159, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.076, |
| "grad_norm": 2.364295482635498, |
| "learning_rate": 2.466666666666667e-06, |
| "loss": 0.9814, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.078, |
| "grad_norm": 2.3316822052001953, |
| "learning_rate": 2.5333333333333338e-06, |
| "loss": 0.8988, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.122209310531616, |
| "learning_rate": 2.6e-06, |
| "loss": 0.843, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.082, |
| "grad_norm": 2.1770570278167725, |
| "learning_rate": 2.666666666666667e-06, |
| "loss": 0.9904, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.084, |
| "grad_norm": 1.8280588388442993, |
| "learning_rate": 2.7333333333333336e-06, |
| "loss": 0.8662, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.086, |
| "grad_norm": 2.038935422897339, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 0.9375, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 1.918784260749817, |
| "learning_rate": 2.866666666666667e-06, |
| "loss": 0.8811, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 2.156174421310425, |
| "learning_rate": 2.9333333333333338e-06, |
| "loss": 0.9608, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.092, |
| "grad_norm": 1.9383772611618042, |
| "learning_rate": 3e-06, |
| "loss": 0.7929, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.094, |
| "grad_norm": 2.064772605895996, |
| "learning_rate": 3.066666666666667e-06, |
| "loss": 0.7918, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 1.6674835681915283, |
| "learning_rate": 3.133333333333334e-06, |
| "loss": 0.781, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.098, |
| "grad_norm": 1.9640334844589233, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 0.8781, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 1.7003456354141235, |
| "learning_rate": 3.266666666666667e-06, |
| "loss": 0.8295, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.102, |
| "grad_norm": 2.007157325744629, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.7991, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 1.923763632774353, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 0.8986, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.106, |
| "grad_norm": 1.9844627380371094, |
| "learning_rate": 3.4666666666666672e-06, |
| "loss": 0.8552, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.108, |
| "grad_norm": 1.7688173055648804, |
| "learning_rate": 3.5333333333333335e-06, |
| "loss": 0.8094, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 2.014064311981201, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 0.8492, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 1.7487848997116089, |
| "learning_rate": 3.6666666666666666e-06, |
| "loss": 0.8027, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.114, |
| "grad_norm": 2.024191379547119, |
| "learning_rate": 3.7333333333333337e-06, |
| "loss": 0.7526, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.116, |
| "grad_norm": 1.862096905708313, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 0.822, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.118, |
| "grad_norm": 1.8116499185562134, |
| "learning_rate": 3.866666666666667e-06, |
| "loss": 0.8448, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.121875762939453, |
| "learning_rate": 3.9333333333333335e-06, |
| "loss": 0.8509, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.122, |
| "grad_norm": 1.9175318479537964, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.7965, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.124, |
| "grad_norm": 1.937557339668274, |
| "learning_rate": 4.066666666666667e-06, |
| "loss": 0.8288, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.126, |
| "grad_norm": 1.920708417892456, |
| "learning_rate": 4.133333333333333e-06, |
| "loss": 0.837, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 1.8257769346237183, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 0.8358, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 2.02504825592041, |
| "learning_rate": 4.266666666666668e-06, |
| "loss": 0.8389, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.132, |
| "grad_norm": 1.7633429765701294, |
| "learning_rate": 4.333333333333334e-06, |
| "loss": 0.838, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.134, |
| "grad_norm": 1.7253111600875854, |
| "learning_rate": 4.4e-06, |
| "loss": 0.7638, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 1.757811188697815, |
| "learning_rate": 4.4666666666666665e-06, |
| "loss": 0.8302, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.138, |
| "grad_norm": 1.8107619285583496, |
| "learning_rate": 4.533333333333334e-06, |
| "loss": 0.8787, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 2.0448291301727295, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 0.844, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.142, |
| "grad_norm": 1.9952377080917358, |
| "learning_rate": 4.666666666666667e-06, |
| "loss": 0.8028, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 1.8734209537506104, |
| "learning_rate": 4.7333333333333335e-06, |
| "loss": 0.8019, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.146, |
| "grad_norm": 1.9044350385665894, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 0.8009, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.148, |
| "grad_norm": 1.6113317012786865, |
| "learning_rate": 4.866666666666667e-06, |
| "loss": 0.7548, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 1.8096058368682861, |
| "learning_rate": 4.933333333333334e-06, |
| "loss": 0.8315, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 1.917984962463379, |
| "learning_rate": 5e-06, |
| "loss": 0.6815, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.154, |
| "grad_norm": 1.8071333169937134, |
| "learning_rate": 5.0666666666666676e-06, |
| "loss": 0.8477, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.156, |
| "grad_norm": 1.7420942783355713, |
| "learning_rate": 5.133333333333334e-06, |
| "loss": 0.8223, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.158, |
| "grad_norm": 1.6158325672149658, |
| "learning_rate": 5.2e-06, |
| "loss": 0.7425, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 1.6784685850143433, |
| "learning_rate": 5.2666666666666665e-06, |
| "loss": 0.7937, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.162, |
| "grad_norm": 1.7116400003433228, |
| "learning_rate": 5.333333333333334e-06, |
| "loss": 0.7972, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.164, |
| "grad_norm": 1.817854881286621, |
| "learning_rate": 5.400000000000001e-06, |
| "loss": 0.7349, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.166, |
| "grad_norm": 1.6824537515640259, |
| "learning_rate": 5.466666666666667e-06, |
| "loss": 0.7843, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 1.8841086626052856, |
| "learning_rate": 5.533333333333334e-06, |
| "loss": 0.7751, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 1.7656822204589844, |
| "learning_rate": 5.600000000000001e-06, |
| "loss": 0.8324, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.172, |
| "grad_norm": 1.8588966131210327, |
| "learning_rate": 5.666666666666667e-06, |
| "loss": 0.7741, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.174, |
| "grad_norm": 1.826100468635559, |
| "learning_rate": 5.733333333333334e-06, |
| "loss": 0.7904, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 2.0713677406311035, |
| "learning_rate": 5.8e-06, |
| "loss": 0.7186, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.178, |
| "grad_norm": 1.842650294303894, |
| "learning_rate": 5.8666666666666675e-06, |
| "loss": 0.7625, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 1.7636773586273193, |
| "learning_rate": 5.933333333333335e-06, |
| "loss": 0.7325, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.182, |
| "grad_norm": 1.9637341499328613, |
| "learning_rate": 6e-06, |
| "loss": 0.7395, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 2.0242807865142822, |
| "learning_rate": 6.066666666666667e-06, |
| "loss": 0.7709, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.186, |
| "grad_norm": 1.6189954280853271, |
| "learning_rate": 6.133333333333334e-06, |
| "loss": 0.7162, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.188, |
| "grad_norm": 1.6651357412338257, |
| "learning_rate": 6.200000000000001e-06, |
| "loss": 0.6606, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 1.5688307285308838, |
| "learning_rate": 6.266666666666668e-06, |
| "loss": 0.6288, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 1.7620747089385986, |
| "learning_rate": 6.333333333333333e-06, |
| "loss": 0.7872, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.194, |
| "grad_norm": 1.949406385421753, |
| "learning_rate": 6.4000000000000006e-06, |
| "loss": 0.7401, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.196, |
| "grad_norm": 1.9228166341781616, |
| "learning_rate": 6.466666666666667e-06, |
| "loss": 0.8483, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.198, |
| "grad_norm": 1.688018798828125, |
| "learning_rate": 6.533333333333334e-06, |
| "loss": 0.7293, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 1.861033320426941, |
| "learning_rate": 6.600000000000001e-06, |
| "loss": 0.7434, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.202, |
| "grad_norm": 1.5343120098114014, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.7408, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.204, |
| "grad_norm": 2.037724256515503, |
| "learning_rate": 6.733333333333334e-06, |
| "loss": 0.7478, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.206, |
| "grad_norm": 1.756672739982605, |
| "learning_rate": 6.800000000000001e-06, |
| "loss": 0.7826, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 1.841848373413086, |
| "learning_rate": 6.866666666666667e-06, |
| "loss": 0.6668, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 1.9532805681228638, |
| "learning_rate": 6.9333333333333344e-06, |
| "loss": 0.7651, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.212, |
| "grad_norm": 2.2724688053131104, |
| "learning_rate": 7e-06, |
| "loss": 0.8298, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.214, |
| "grad_norm": 2.0979325771331787, |
| "learning_rate": 7.066666666666667e-06, |
| "loss": 0.7218, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 2.240551710128784, |
| "learning_rate": 7.133333333333334e-06, |
| "loss": 0.7275, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.218, |
| "grad_norm": 1.7819173336029053, |
| "learning_rate": 7.2000000000000005e-06, |
| "loss": 0.7546, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 1.8862335681915283, |
| "learning_rate": 7.266666666666668e-06, |
| "loss": 0.7409, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.222, |
| "grad_norm": 2.2147364616394043, |
| "learning_rate": 7.333333333333333e-06, |
| "loss": 0.8455, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 1.6574546098709106, |
| "learning_rate": 7.4e-06, |
| "loss": 0.6964, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.226, |
| "grad_norm": 1.7560558319091797, |
| "learning_rate": 7.4666666666666675e-06, |
| "loss": 0.6683, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.228, |
| "grad_norm": 1.7830218076705933, |
| "learning_rate": 7.533333333333334e-06, |
| "loss": 0.7328, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 1.626004695892334, |
| "learning_rate": 7.600000000000001e-06, |
| "loss": 0.7094, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 1.7532685995101929, |
| "learning_rate": 7.666666666666667e-06, |
| "loss": 0.6798, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.234, |
| "grad_norm": 1.8709031343460083, |
| "learning_rate": 7.733333333333334e-06, |
| "loss": 0.7504, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.236, |
| "grad_norm": 1.9467530250549316, |
| "learning_rate": 7.800000000000002e-06, |
| "loss": 0.6752, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.238, |
| "grad_norm": 1.9866005182266235, |
| "learning_rate": 7.866666666666667e-06, |
| "loss": 0.7465, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.031965970993042, |
| "learning_rate": 7.933333333333334e-06, |
| "loss": 0.6898, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.242, |
| "grad_norm": 2.1669931411743164, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.8142, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.244, |
| "grad_norm": 2.127737045288086, |
| "learning_rate": 8.066666666666667e-06, |
| "loss": 0.6916, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.246, |
| "grad_norm": 1.7279045581817627, |
| "learning_rate": 8.133333333333334e-06, |
| "loss": 0.6211, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 1.8746819496154785, |
| "learning_rate": 8.2e-06, |
| "loss": 0.7103, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.6384533643722534, |
| "learning_rate": 8.266666666666667e-06, |
| "loss": 0.7519, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.252, |
| "grad_norm": 1.7692192792892456, |
| "learning_rate": 8.333333333333334e-06, |
| "loss": 0.6414, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.254, |
| "grad_norm": 2.125847339630127, |
| "learning_rate": 8.400000000000001e-06, |
| "loss": 0.7211, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 1.9361572265625, |
| "learning_rate": 8.466666666666668e-06, |
| "loss": 0.7447, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.258, |
| "grad_norm": 1.7561177015304565, |
| "learning_rate": 8.533333333333335e-06, |
| "loss": 0.8059, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 1.6317267417907715, |
| "learning_rate": 8.6e-06, |
| "loss": 0.6754, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.262, |
| "grad_norm": 1.769873023033142, |
| "learning_rate": 8.666666666666668e-06, |
| "loss": 0.7687, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 1.6142659187316895, |
| "learning_rate": 8.733333333333333e-06, |
| "loss": 0.6884, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.266, |
| "grad_norm": 1.97073495388031, |
| "learning_rate": 8.8e-06, |
| "loss": 0.6229, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.268, |
| "grad_norm": 1.826964020729065, |
| "learning_rate": 8.866666666666668e-06, |
| "loss": 0.6991, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 1.6792782545089722, |
| "learning_rate": 8.933333333333333e-06, |
| "loss": 0.6273, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 2.159935474395752, |
| "learning_rate": 9e-06, |
| "loss": 0.6598, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.274, |
| "grad_norm": 1.8708237409591675, |
| "learning_rate": 9.066666666666667e-06, |
| "loss": 0.6693, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.276, |
| "grad_norm": 1.965039849281311, |
| "learning_rate": 9.133333333333335e-06, |
| "loss": 0.7231, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.278, |
| "grad_norm": 1.9504064321517944, |
| "learning_rate": 9.200000000000002e-06, |
| "loss": 0.7306, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 1.9902204275131226, |
| "learning_rate": 9.266666666666667e-06, |
| "loss": 0.712, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.282, |
| "grad_norm": 2.1614973545074463, |
| "learning_rate": 9.333333333333334e-06, |
| "loss": 0.6246, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.284, |
| "grad_norm": 2.2333343029022217, |
| "learning_rate": 9.4e-06, |
| "loss": 0.6273, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.286, |
| "grad_norm": 1.8635379076004028, |
| "learning_rate": 9.466666666666667e-06, |
| "loss": 0.6877, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 2.0924274921417236, |
| "learning_rate": 9.533333333333334e-06, |
| "loss": 0.6652, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 1.9547358751296997, |
| "learning_rate": 9.600000000000001e-06, |
| "loss": 0.6344, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.292, |
| "grad_norm": 2.028376817703247, |
| "learning_rate": 9.666666666666667e-06, |
| "loss": 0.6835, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.294, |
| "grad_norm": 2.3274097442626953, |
| "learning_rate": 9.733333333333334e-06, |
| "loss": 0.6882, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 3.013546943664551, |
| "learning_rate": 9.800000000000001e-06, |
| "loss": 0.7537, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.298, |
| "grad_norm": 1.912866234779358, |
| "learning_rate": 9.866666666666668e-06, |
| "loss": 0.6437, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.8834062814712524, |
| "learning_rate": 9.933333333333334e-06, |
| "loss": 0.6877, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.302, |
| "grad_norm": 2.4105477333068848, |
| "learning_rate": 1e-05, |
| "loss": 0.7029, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 2.0727202892303467, |
| "learning_rate": 9.99998646145412e-06, |
| "loss": 0.6817, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.306, |
| "grad_norm": 1.970862627029419, |
| "learning_rate": 9.999945845889795e-06, |
| "loss": 0.7434, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.308, |
| "grad_norm": 1.9560319185256958, |
| "learning_rate": 9.999878153526974e-06, |
| "loss": 0.6499, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 1.9579132795333862, |
| "learning_rate": 9.999783384732242e-06, |
| "loss": 0.6387, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 1.92261803150177, |
| "learning_rate": 9.999661540018812e-06, |
| "loss": 0.7011, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.314, |
| "grad_norm": 2.063448905944824, |
| "learning_rate": 9.999512620046523e-06, |
| "loss": 0.6473, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.316, |
| "grad_norm": 1.7672314643859863, |
| "learning_rate": 9.999336625621836e-06, |
| "loss": 0.5952, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.318, |
| "grad_norm": 2.16186261177063, |
| "learning_rate": 9.99913355769784e-06, |
| "loss": 0.6737, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 2.319474935531616, |
| "learning_rate": 9.998903417374228e-06, |
| "loss": 0.6113, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.322, |
| "grad_norm": 1.719564437866211, |
| "learning_rate": 9.99864620589731e-06, |
| "loss": 0.5909, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.324, |
| "grad_norm": 2.073621988296509, |
| "learning_rate": 9.998361924659989e-06, |
| "loss": 0.6285, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.326, |
| "grad_norm": 2.088226079940796, |
| "learning_rate": 9.998050575201772e-06, |
| "loss": 0.6524, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 2.1609091758728027, |
| "learning_rate": 9.997712159208745e-06, |
| "loss": 0.6309, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 1.558556079864502, |
| "learning_rate": 9.99734667851357e-06, |
| "loss": 0.5617, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.332, |
| "grad_norm": 2.1843314170837402, |
| "learning_rate": 9.99695413509548e-06, |
| "loss": 0.6855, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.334, |
| "grad_norm": 1.97001051902771, |
| "learning_rate": 9.99653453108026e-06, |
| "loss": 0.5322, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 2.2219862937927246, |
| "learning_rate": 9.996087868740244e-06, |
| "loss": 0.7239, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.338, |
| "grad_norm": 1.6730774641036987, |
| "learning_rate": 9.995614150494293e-06, |
| "loss": 0.5811, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 2.2813098430633545, |
| "learning_rate": 9.995113378907791e-06, |
| "loss": 0.65, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.342, |
| "grad_norm": 1.806111454963684, |
| "learning_rate": 9.994585556692624e-06, |
| "loss": 0.5377, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 1.8797581195831299, |
| "learning_rate": 9.994030686707171e-06, |
| "loss": 0.6768, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.346, |
| "grad_norm": 2.2682533264160156, |
| "learning_rate": 9.993448771956285e-06, |
| "loss": 0.697, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.348, |
| "grad_norm": 2.145071029663086, |
| "learning_rate": 9.99283981559128e-06, |
| "loss": 0.6764, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.1207141876220703, |
| "learning_rate": 9.992203820909906e-06, |
| "loss": 0.6779, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 2.1582446098327637, |
| "learning_rate": 9.991540791356342e-06, |
| "loss": 0.5686, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.354, |
| "grad_norm": 2.3938088417053223, |
| "learning_rate": 9.99085073052117e-06, |
| "loss": 0.5409, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.356, |
| "grad_norm": 1.7042397260665894, |
| "learning_rate": 9.990133642141359e-06, |
| "loss": 0.5424, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.358, |
| "grad_norm": 1.9700382947921753, |
| "learning_rate": 9.989389530100242e-06, |
| "loss": 0.5174, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 1.8847728967666626, |
| "learning_rate": 9.988618398427495e-06, |
| "loss": 0.6533, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.362, |
| "grad_norm": 1.59050452709198, |
| "learning_rate": 9.987820251299121e-06, |
| "loss": 0.4797, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.364, |
| "grad_norm": 2.219714641571045, |
| "learning_rate": 9.986995093037422e-06, |
| "loss": 0.6777, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.366, |
| "grad_norm": 1.7240313291549683, |
| "learning_rate": 9.986142928110972e-06, |
| "loss": 0.5583, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 1.6769351959228516, |
| "learning_rate": 9.985263761134602e-06, |
| "loss": 0.4817, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 1.8481873273849487, |
| "learning_rate": 9.984357596869369e-06, |
| "loss": 0.6243, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.372, |
| "grad_norm": 1.861271619796753, |
| "learning_rate": 9.98342444022253e-06, |
| "loss": 0.7299, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.374, |
| "grad_norm": 1.8269773721694946, |
| "learning_rate": 9.982464296247523e-06, |
| "loss": 0.5756, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 1.9181562662124634, |
| "learning_rate": 9.981477170143924e-06, |
| "loss": 0.5722, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.378, |
| "grad_norm": 1.8730833530426025, |
| "learning_rate": 9.980463067257437e-06, |
| "loss": 0.6558, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 1.6602306365966797, |
| "learning_rate": 9.979421993079853e-06, |
| "loss": 0.5833, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.382, |
| "grad_norm": 2.296027421951294, |
| "learning_rate": 9.978353953249023e-06, |
| "loss": 0.6008, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 1.9284954071044922, |
| "learning_rate": 9.977258953548831e-06, |
| "loss": 0.5399, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.386, |
| "grad_norm": 1.9524036645889282, |
| "learning_rate": 9.976136999909156e-06, |
| "loss": 0.6628, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.388, |
| "grad_norm": 1.8157129287719727, |
| "learning_rate": 9.97498809840585e-06, |
| "loss": 0.551, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 2.375074625015259, |
| "learning_rate": 9.973812255260692e-06, |
| "loss": 0.6485, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 1.9661322832107544, |
| "learning_rate": 9.972609476841368e-06, |
| "loss": 0.5929, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.394, |
| "grad_norm": 1.6226578950881958, |
| "learning_rate": 9.971379769661422e-06, |
| "loss": 0.482, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.396, |
| "grad_norm": 1.7707046270370483, |
| "learning_rate": 9.970123140380237e-06, |
| "loss": 0.5459, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.398, |
| "grad_norm": 1.897963047027588, |
| "learning_rate": 9.968839595802982e-06, |
| "loss": 0.543, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.0392866134643555, |
| "learning_rate": 9.967529142880592e-06, |
| "loss": 0.6598, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.402, |
| "grad_norm": 1.7297391891479492, |
| "learning_rate": 9.966191788709716e-06, |
| "loss": 0.5494, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.404, |
| "grad_norm": 1.7709267139434814, |
| "learning_rate": 9.964827540532685e-06, |
| "loss": 0.534, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.406, |
| "grad_norm": 1.688926339149475, |
| "learning_rate": 9.963436405737476e-06, |
| "loss": 0.4496, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 1.7673691511154175, |
| "learning_rate": 9.962018391857665e-06, |
| "loss": 0.4809, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 1.7127712965011597, |
| "learning_rate": 9.960573506572391e-06, |
| "loss": 0.5793, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.412, |
| "grad_norm": 2.070152997970581, |
| "learning_rate": 9.959101757706308e-06, |
| "loss": 0.6257, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.414, |
| "grad_norm": 1.820627212524414, |
| "learning_rate": 9.957603153229559e-06, |
| "loss": 0.5028, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 2.0696165561676025, |
| "learning_rate": 9.95607770125771e-06, |
| "loss": 0.484, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.418, |
| "grad_norm": 2.1388514041900635, |
| "learning_rate": 9.95452541005172e-06, |
| "loss": 0.565, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 1.8014533519744873, |
| "learning_rate": 9.952946288017899e-06, |
| "loss": 0.5564, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.422, |
| "grad_norm": 1.9076919555664062, |
| "learning_rate": 9.951340343707852e-06, |
| "loss": 0.4663, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 2.1348037719726562, |
| "learning_rate": 9.94970758581844e-06, |
| "loss": 0.5435, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.426, |
| "grad_norm": 1.8880589008331299, |
| "learning_rate": 9.948048023191728e-06, |
| "loss": 0.4695, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.428, |
| "grad_norm": 1.757129192352295, |
| "learning_rate": 9.946361664814942e-06, |
| "loss": 0.6256, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 2.1104235649108887, |
| "learning_rate": 9.94464851982042e-06, |
| "loss": 0.4596, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 1.8526471853256226, |
| "learning_rate": 9.942908597485558e-06, |
| "loss": 0.5644, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.434, |
| "grad_norm": 1.7613459825515747, |
| "learning_rate": 9.941141907232766e-06, |
| "loss": 0.4666, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.436, |
| "grad_norm": 1.5490107536315918, |
| "learning_rate": 9.939348458629406e-06, |
| "loss": 0.4919, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.438, |
| "grad_norm": 1.9331456422805786, |
| "learning_rate": 9.937528261387753e-06, |
| "loss": 0.5037, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 2.422027826309204, |
| "learning_rate": 9.93568132536494e-06, |
| "loss": 0.5772, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.442, |
| "grad_norm": 1.615033745765686, |
| "learning_rate": 9.933807660562898e-06, |
| "loss": 0.4155, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.444, |
| "grad_norm": 1.6419297456741333, |
| "learning_rate": 9.9319072771283e-06, |
| "loss": 0.4746, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.446, |
| "grad_norm": 2.0711774826049805, |
| "learning_rate": 9.929980185352525e-06, |
| "loss": 0.5218, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 1.5442270040512085, |
| "learning_rate": 9.928026395671577e-06, |
| "loss": 0.4077, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.8072466850280762, |
| "learning_rate": 9.926045918666045e-06, |
| "loss": 0.5136, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.452, |
| "grad_norm": 2.0246639251708984, |
| "learning_rate": 9.924038765061042e-06, |
| "loss": 0.4485, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.454, |
| "grad_norm": 2.249467372894287, |
| "learning_rate": 9.92200494572614e-06, |
| "loss": 0.4849, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 1.6570943593978882, |
| "learning_rate": 9.919944471675328e-06, |
| "loss": 0.4854, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.458, |
| "grad_norm": 1.7272253036499023, |
| "learning_rate": 9.91785735406693e-06, |
| "loss": 0.4429, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 1.8478769063949585, |
| "learning_rate": 9.915743604203563e-06, |
| "loss": 0.404, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.462, |
| "grad_norm": 1.7986191511154175, |
| "learning_rate": 9.913603233532067e-06, |
| "loss": 0.4054, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 2.0111937522888184, |
| "learning_rate": 9.911436253643445e-06, |
| "loss": 0.4901, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.466, |
| "grad_norm": 1.6492829322814941, |
| "learning_rate": 9.909242676272797e-06, |
| "loss": 0.4451, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.468, |
| "grad_norm": 1.6549859046936035, |
| "learning_rate": 9.907022513299264e-06, |
| "loss": 0.4552, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 1.8157479763031006, |
| "learning_rate": 9.904775776745959e-06, |
| "loss": 0.5326, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 1.8481768369674683, |
| "learning_rate": 9.902502478779897e-06, |
| "loss": 0.4379, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.474, |
| "grad_norm": 1.4958271980285645, |
| "learning_rate": 9.90020263171194e-06, |
| "loss": 0.3648, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.476, |
| "grad_norm": 1.71196711063385, |
| "learning_rate": 9.89787624799672e-06, |
| "loss": 0.462, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.478, |
| "grad_norm": 2.0301215648651123, |
| "learning_rate": 9.89552334023258e-06, |
| "loss": 0.4472, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 1.8186923265457153, |
| "learning_rate": 9.893143921161501e-06, |
| "loss": 0.495, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.482, |
| "grad_norm": 1.9236863851547241, |
| "learning_rate": 9.890738003669029e-06, |
| "loss": 0.4073, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.484, |
| "grad_norm": 2.024024486541748, |
| "learning_rate": 9.888305600784217e-06, |
| "loss": 0.4675, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.486, |
| "grad_norm": 1.7005952596664429, |
| "learning_rate": 9.88584672567954e-06, |
| "loss": 0.383, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 1.9218895435333252, |
| "learning_rate": 9.883361391670841e-06, |
| "loss": 0.4722, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 1.7836483716964722, |
| "learning_rate": 9.880849612217238e-06, |
| "loss": 0.3517, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.492, |
| "grad_norm": 1.9461677074432373, |
| "learning_rate": 9.878311400921072e-06, |
| "loss": 0.5054, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.494, |
| "grad_norm": 1.580214500427246, |
| "learning_rate": 9.875746771527817e-06, |
| "loss": 0.4537, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 1.7891846895217896, |
| "learning_rate": 9.873155737926014e-06, |
| "loss": 0.527, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.498, |
| "grad_norm": 1.7294811010360718, |
| "learning_rate": 9.870538314147194e-06, |
| "loss": 0.3914, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.6117743253707886, |
| "learning_rate": 9.867894514365802e-06, |
| "loss": 0.3725, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.502, |
| "grad_norm": 1.5580772161483765, |
| "learning_rate": 9.86522435289912e-06, |
| "loss": 0.3674, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.504, |
| "grad_norm": 2.0612754821777344, |
| "learning_rate": 9.862527844207189e-06, |
| "loss": 0.4836, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.506, |
| "grad_norm": 1.9967230558395386, |
| "learning_rate": 9.859805002892733e-06, |
| "loss": 0.4445, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.508, |
| "grad_norm": 1.7749967575073242, |
| "learning_rate": 9.857055843701073e-06, |
| "loss": 0.3683, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 1.5155335664749146, |
| "learning_rate": 9.85428038152006e-06, |
| "loss": 0.2863, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 2.0063834190368652, |
| "learning_rate": 9.851478631379982e-06, |
| "loss": 0.4551, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.514, |
| "grad_norm": 1.7783546447753906, |
| "learning_rate": 9.84865060845349e-06, |
| "loss": 0.3491, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.516, |
| "grad_norm": 2.130695343017578, |
| "learning_rate": 9.845796328055505e-06, |
| "loss": 0.3395, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.518, |
| "grad_norm": 1.6611360311508179, |
| "learning_rate": 9.842915805643156e-06, |
| "loss": 0.3728, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 1.8808437585830688, |
| "learning_rate": 9.840009056815674e-06, |
| "loss": 0.4014, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.522, |
| "grad_norm": 1.5398544073104858, |
| "learning_rate": 9.83707609731432e-06, |
| "loss": 0.3664, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.524, |
| "grad_norm": 1.8447811603546143, |
| "learning_rate": 9.834116943022299e-06, |
| "loss": 0.4879, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.526, |
| "grad_norm": 1.7809211015701294, |
| "learning_rate": 9.831131609964664e-06, |
| "loss": 0.3631, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 1.5172531604766846, |
| "learning_rate": 9.828120114308248e-06, |
| "loss": 0.3715, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 1.573677659034729, |
| "learning_rate": 9.825082472361558e-06, |
| "loss": 0.2771, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.532, |
| "grad_norm": 1.915945291519165, |
| "learning_rate": 9.822018700574696e-06, |
| "loss": 0.4118, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.534, |
| "grad_norm": 1.861373782157898, |
| "learning_rate": 9.818928815539266e-06, |
| "loss": 0.3969, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.536, |
| "grad_norm": 1.3833045959472656, |
| "learning_rate": 9.815812833988292e-06, |
| "loss": 0.2481, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.538, |
| "grad_norm": 1.7370811700820923, |
| "learning_rate": 9.812670772796113e-06, |
| "loss": 0.4429, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.5217926502227783, |
| "learning_rate": 9.809502648978311e-06, |
| "loss": 0.3402, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.542, |
| "grad_norm": 1.653145432472229, |
| "learning_rate": 9.806308479691595e-06, |
| "loss": 0.3477, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 1.70926034450531, |
| "learning_rate": 9.803088282233733e-06, |
| "loss": 0.4009, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.546, |
| "grad_norm": 1.609721064567566, |
| "learning_rate": 9.799842074043438e-06, |
| "loss": 0.3397, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.548, |
| "grad_norm": 1.60042142868042, |
| "learning_rate": 9.796569872700287e-06, |
| "loss": 0.3678, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 1.543382167816162, |
| "learning_rate": 9.793271695924621e-06, |
| "loss": 0.3365, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.552, |
| "grad_norm": 2.711608409881592, |
| "learning_rate": 9.789947561577445e-06, |
| "loss": 0.4371, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.554, |
| "grad_norm": 1.8913193941116333, |
| "learning_rate": 9.786597487660336e-06, |
| "loss": 0.4662, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.556, |
| "grad_norm": 1.6751612424850464, |
| "learning_rate": 9.78322149231535e-06, |
| "loss": 0.4259, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.558, |
| "grad_norm": 1.5974494218826294, |
| "learning_rate": 9.779819593824909e-06, |
| "loss": 0.4305, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 1.4549661874771118, |
| "learning_rate": 9.776391810611719e-06, |
| "loss": 0.3568, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.562, |
| "grad_norm": 1.9823698997497559, |
| "learning_rate": 9.77293816123866e-06, |
| "loss": 0.4721, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.564, |
| "grad_norm": 2.018462657928467, |
| "learning_rate": 9.769458664408689e-06, |
| "loss": 0.4735, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.566, |
| "grad_norm": 1.8653925657272339, |
| "learning_rate": 9.765953338964736e-06, |
| "loss": 0.3503, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.568, |
| "grad_norm": 1.7118836641311646, |
| "learning_rate": 9.762422203889604e-06, |
| "loss": 0.3659, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 1.6338058710098267, |
| "learning_rate": 9.75886527830587e-06, |
| "loss": 0.2939, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.572, |
| "grad_norm": 1.5765326023101807, |
| "learning_rate": 9.755282581475769e-06, |
| "loss": 0.2898, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.574, |
| "grad_norm": 1.7563010454177856, |
| "learning_rate": 9.751674132801106e-06, |
| "loss": 0.305, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 1.5117406845092773, |
| "learning_rate": 9.748039951823141e-06, |
| "loss": 0.2965, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.578, |
| "grad_norm": 1.775891900062561, |
| "learning_rate": 9.744380058222483e-06, |
| "loss": 0.3359, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 1.7677249908447266, |
| "learning_rate": 9.740694471818988e-06, |
| "loss": 0.3332, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.582, |
| "grad_norm": 1.7065130472183228, |
| "learning_rate": 9.736983212571646e-06, |
| "loss": 0.3361, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.584, |
| "grad_norm": 2.0350592136383057, |
| "learning_rate": 9.733246300578482e-06, |
| "loss": 0.3061, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.586, |
| "grad_norm": 1.881553053855896, |
| "learning_rate": 9.729483756076436e-06, |
| "loss": 0.4417, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.588, |
| "grad_norm": 1.845169186592102, |
| "learning_rate": 9.72569559944126e-06, |
| "loss": 0.401, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 1.8087979555130005, |
| "learning_rate": 9.721881851187406e-06, |
| "loss": 0.4629, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 1.7599945068359375, |
| "learning_rate": 9.718042531967918e-06, |
| "loss": 0.2951, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.594, |
| "grad_norm": 1.8704352378845215, |
| "learning_rate": 9.714177662574316e-06, |
| "loss": 0.4033, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.596, |
| "grad_norm": 1.8199055194854736, |
| "learning_rate": 9.710287263936485e-06, |
| "loss": 0.4836, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.598, |
| "grad_norm": 1.7064282894134521, |
| "learning_rate": 9.70637135712256e-06, |
| "loss": 0.3298, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 2.22114896774292, |
| "learning_rate": 9.702429963338812e-06, |
| "loss": 0.399, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.602, |
| "grad_norm": 1.8880242109298706, |
| "learning_rate": 9.698463103929542e-06, |
| "loss": 0.3465, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.604, |
| "grad_norm": 1.886757493019104, |
| "learning_rate": 9.694470800376951e-06, |
| "loss": 0.3468, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.606, |
| "grad_norm": 1.476881742477417, |
| "learning_rate": 9.690453074301035e-06, |
| "loss": 0.2446, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 1.8497904539108276, |
| "learning_rate": 9.68640994745946e-06, |
| "loss": 0.4195, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 1.8314309120178223, |
| "learning_rate": 9.682341441747446e-06, |
| "loss": 0.3153, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.612, |
| "grad_norm": 1.9367709159851074, |
| "learning_rate": 9.678247579197658e-06, |
| "loss": 0.3185, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.614, |
| "grad_norm": 1.4805539846420288, |
| "learning_rate": 9.674128381980073e-06, |
| "loss": 0.2507, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.616, |
| "grad_norm": 1.6094000339508057, |
| "learning_rate": 9.669983872401868e-06, |
| "loss": 0.2187, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.618, |
| "grad_norm": 1.5348446369171143, |
| "learning_rate": 9.665814072907293e-06, |
| "loss": 0.2326, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.8826684951782227, |
| "learning_rate": 9.661619006077562e-06, |
| "loss": 0.3359, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.622, |
| "grad_norm": 1.7773888111114502, |
| "learning_rate": 9.657398694630713e-06, |
| "loss": 0.2697, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 1.7263822555541992, |
| "learning_rate": 9.653153161421497e-06, |
| "loss": 0.2422, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.626, |
| "grad_norm": 2.116281747817993, |
| "learning_rate": 9.648882429441258e-06, |
| "loss": 0.3718, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.628, |
| "grad_norm": 1.7101234197616577, |
| "learning_rate": 9.644586521817792e-06, |
| "loss": 0.3674, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 2.12149977684021, |
| "learning_rate": 9.640265461815235e-06, |
| "loss": 0.5202, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.632, |
| "grad_norm": 2.06618070602417, |
| "learning_rate": 9.635919272833938e-06, |
| "loss": 0.3975, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.634, |
| "grad_norm": 1.9198070764541626, |
| "learning_rate": 9.63154797841033e-06, |
| "loss": 0.343, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.636, |
| "grad_norm": 1.5635013580322266, |
| "learning_rate": 9.627151602216801e-06, |
| "loss": 0.3303, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.638, |
| "grad_norm": 1.5643494129180908, |
| "learning_rate": 9.622730168061568e-06, |
| "loss": 0.2927, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 1.8530386686325073, |
| "learning_rate": 9.618283699888543e-06, |
| "loss": 0.2539, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.642, |
| "grad_norm": 2.2413387298583984, |
| "learning_rate": 9.613812221777212e-06, |
| "loss": 0.3513, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.644, |
| "grad_norm": 2.0326476097106934, |
| "learning_rate": 9.609315757942504e-06, |
| "loss": 0.2588, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.646, |
| "grad_norm": 2.1528379917144775, |
| "learning_rate": 9.604794332734647e-06, |
| "loss": 0.2732, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.648, |
| "grad_norm": 1.8883870840072632, |
| "learning_rate": 9.600247970639053e-06, |
| "loss": 0.3172, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 1.8538719415664673, |
| "learning_rate": 9.595676696276173e-06, |
| "loss": 0.3115, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.652, |
| "grad_norm": 1.676763653755188, |
| "learning_rate": 9.591080534401371e-06, |
| "loss": 0.3002, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.654, |
| "grad_norm": 1.8477318286895752, |
| "learning_rate": 9.586459509904786e-06, |
| "loss": 0.3321, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 1.7304288148880005, |
| "learning_rate": 9.581813647811199e-06, |
| "loss": 0.2766, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.658, |
| "grad_norm": 1.8328994512557983, |
| "learning_rate": 9.577142973279896e-06, |
| "loss": 0.3712, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 1.7225216627120972, |
| "learning_rate": 9.572447511604536e-06, |
| "loss": 0.3262, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.662, |
| "grad_norm": 1.5904541015625, |
| "learning_rate": 9.567727288213005e-06, |
| "loss": 0.2373, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.664, |
| "grad_norm": 1.7042733430862427, |
| "learning_rate": 9.56298232866729e-06, |
| "loss": 0.3051, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.666, |
| "grad_norm": 1.7410995960235596, |
| "learning_rate": 9.55821265866333e-06, |
| "loss": 0.3074, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.668, |
| "grad_norm": 1.7952817678451538, |
| "learning_rate": 9.553418304030886e-06, |
| "loss": 0.3572, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 1.4647603034973145, |
| "learning_rate": 9.548599290733393e-06, |
| "loss": 0.2161, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 2.135939359664917, |
| "learning_rate": 9.543755644867823e-06, |
| "loss": 0.4055, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.674, |
| "grad_norm": 1.717570424079895, |
| "learning_rate": 9.538887392664544e-06, |
| "loss": 0.2576, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.676, |
| "grad_norm": 1.7470098733901978, |
| "learning_rate": 9.53399456048718e-06, |
| "loss": 0.3408, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.678, |
| "grad_norm": 1.8334686756134033, |
| "learning_rate": 9.529077174832466e-06, |
| "loss": 0.3347, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 2.086667537689209, |
| "learning_rate": 9.524135262330098e-06, |
| "loss": 0.3498, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.682, |
| "grad_norm": 2.0446317195892334, |
| "learning_rate": 9.519168849742603e-06, |
| "loss": 0.4212, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.684, |
| "grad_norm": 1.5848861932754517, |
| "learning_rate": 9.514177963965181e-06, |
| "loss": 0.2083, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.686, |
| "grad_norm": 1.6067830324172974, |
| "learning_rate": 9.50916263202557e-06, |
| "loss": 0.3383, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 1.7842918634414673, |
| "learning_rate": 9.504122881083886e-06, |
| "loss": 0.376, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.6721829175949097, |
| "learning_rate": 9.499058738432492e-06, |
| "loss": 0.2289, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.692, |
| "grad_norm": 1.4510334730148315, |
| "learning_rate": 9.493970231495836e-06, |
| "loss": 0.2032, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.694, |
| "grad_norm": 1.9582585096359253, |
| "learning_rate": 9.488857387830315e-06, |
| "loss": 0.2712, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.696, |
| "grad_norm": 1.8526630401611328, |
| "learning_rate": 9.483720235124113e-06, |
| "loss": 0.4167, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.698, |
| "grad_norm": 1.9199018478393555, |
| "learning_rate": 9.478558801197065e-06, |
| "loss": 0.2576, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.4618760347366333, |
| "learning_rate": 9.473373114000493e-06, |
| "loss": 0.1777, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.702, |
| "grad_norm": 2.0231404304504395, |
| "learning_rate": 9.468163201617063e-06, |
| "loss": 0.254, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 1.3274458646774292, |
| "learning_rate": 9.46292909226063e-06, |
| "loss": 0.1577, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.706, |
| "grad_norm": 2.0219509601593018, |
| "learning_rate": 9.457670814276083e-06, |
| "loss": 0.278, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.708, |
| "grad_norm": 1.5874745845794678, |
| "learning_rate": 9.452388396139202e-06, |
| "loss": 0.2046, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 1.2424635887145996, |
| "learning_rate": 9.44708186645649e-06, |
| "loss": 0.1533, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.712, |
| "grad_norm": 1.6024327278137207, |
| "learning_rate": 9.441751253965022e-06, |
| "loss": 0.2354, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.714, |
| "grad_norm": 1.4302263259887695, |
| "learning_rate": 9.436396587532297e-06, |
| "loss": 0.181, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.716, |
| "grad_norm": 1.3045355081558228, |
| "learning_rate": 9.431017896156074e-06, |
| "loss": 0.1141, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.718, |
| "grad_norm": 1.9596627950668335, |
| "learning_rate": 9.425615208964217e-06, |
| "loss": 0.248, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 1.5831594467163086, |
| "learning_rate": 9.420188555214537e-06, |
| "loss": 0.2521, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.722, |
| "grad_norm": 1.524751901626587, |
| "learning_rate": 9.414737964294636e-06, |
| "loss": 0.3191, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.724, |
| "grad_norm": 1.6688232421875, |
| "learning_rate": 9.40926346572174e-06, |
| "loss": 0.2146, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.726, |
| "grad_norm": 1.8539663553237915, |
| "learning_rate": 9.403765089142554e-06, |
| "loss": 0.3524, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.728, |
| "grad_norm": 1.5958819389343262, |
| "learning_rate": 9.398242864333084e-06, |
| "loss": 0.2913, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 1.3911813497543335, |
| "learning_rate": 9.392696821198488e-06, |
| "loss": 0.2529, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.732, |
| "grad_norm": 1.3639315366744995, |
| "learning_rate": 9.38712698977291e-06, |
| "loss": 0.1505, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.734, |
| "grad_norm": 1.8511894941329956, |
| "learning_rate": 9.381533400219319e-06, |
| "loss": 0.2977, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 1.9356679916381836, |
| "learning_rate": 9.375916082829341e-06, |
| "loss": 0.2784, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.738, |
| "grad_norm": 1.784163236618042, |
| "learning_rate": 9.370275068023097e-06, |
| "loss": 0.3302, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 1.436231255531311, |
| "learning_rate": 9.364610386349048e-06, |
| "loss": 0.1248, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.742, |
| "grad_norm": 2.2542660236358643, |
| "learning_rate": 9.358922068483813e-06, |
| "loss": 0.3149, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.744, |
| "grad_norm": 1.5600509643554688, |
| "learning_rate": 9.35321014523201e-06, |
| "loss": 0.1924, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.746, |
| "grad_norm": 1.7589770555496216, |
| "learning_rate": 9.347474647526095e-06, |
| "loss": 0.2127, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.748, |
| "grad_norm": 1.6427364349365234, |
| "learning_rate": 9.34171560642619e-06, |
| "loss": 0.1524, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.5470690727233887, |
| "learning_rate": 9.335933053119906e-06, |
| "loss": 0.2051, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 1.5658862590789795, |
| "learning_rate": 9.330127018922195e-06, |
| "loss": 0.2415, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.754, |
| "grad_norm": 1.5468968152999878, |
| "learning_rate": 9.324297535275156e-06, |
| "loss": 0.1993, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.756, |
| "grad_norm": 1.397178053855896, |
| "learning_rate": 9.318444633747884e-06, |
| "loss": 0.1774, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.758, |
| "grad_norm": 1.6021531820297241, |
| "learning_rate": 9.312568346036288e-06, |
| "loss": 0.2997, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 1.580793023109436, |
| "learning_rate": 9.306668703962927e-06, |
| "loss": 0.196, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.762, |
| "grad_norm": 1.7833058834075928, |
| "learning_rate": 9.30074573947683e-06, |
| "loss": 0.2005, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.764, |
| "grad_norm": 1.6787939071655273, |
| "learning_rate": 9.294799484653323e-06, |
| "loss": 0.1089, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.766, |
| "grad_norm": 1.4611705541610718, |
| "learning_rate": 9.288829971693869e-06, |
| "loss": 0.2504, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 1.9019439220428467, |
| "learning_rate": 9.282837232925876e-06, |
| "loss": 0.1973, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 1.9683581590652466, |
| "learning_rate": 9.276821300802535e-06, |
| "loss": 0.241, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.772, |
| "grad_norm": 1.6254339218139648, |
| "learning_rate": 9.27078220790263e-06, |
| "loss": 0.2799, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.774, |
| "grad_norm": 1.6056698560714722, |
| "learning_rate": 9.264719986930376e-06, |
| "loss": 0.1804, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.776, |
| "grad_norm": 1.4303953647613525, |
| "learning_rate": 9.25863467071524e-06, |
| "loss": 0.2681, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.778, |
| "grad_norm": 1.707230806350708, |
| "learning_rate": 9.25252629221175e-06, |
| "loss": 0.2711, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 1.709281086921692, |
| "learning_rate": 9.246394884499334e-06, |
| "loss": 0.256, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.782, |
| "grad_norm": 1.6412250995635986, |
| "learning_rate": 9.24024048078213e-06, |
| "loss": 0.2271, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 1.3375557661056519, |
| "learning_rate": 9.234063114388809e-06, |
| "loss": 0.2052, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.786, |
| "grad_norm": 1.449644923210144, |
| "learning_rate": 9.227862818772392e-06, |
| "loss": 0.2519, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.788, |
| "grad_norm": 1.5053601264953613, |
| "learning_rate": 9.221639627510076e-06, |
| "loss": 0.2186, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 1.5800226926803589, |
| "learning_rate": 9.215393574303043e-06, |
| "loss": 0.2031, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.792, |
| "grad_norm": 1.5477964878082275, |
| "learning_rate": 9.209124692976287e-06, |
| "loss": 0.1865, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.794, |
| "grad_norm": 1.7126036882400513, |
| "learning_rate": 9.202833017478421e-06, |
| "loss": 0.2517, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.796, |
| "grad_norm": 1.5103005170822144, |
| "learning_rate": 9.196518581881502e-06, |
| "loss": 0.2488, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.798, |
| "grad_norm": 1.8108140230178833, |
| "learning_rate": 9.190181420380838e-06, |
| "loss": 0.2877, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.5517218112945557, |
| "learning_rate": 9.18382156729481e-06, |
| "loss": 0.1755, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.802, |
| "grad_norm": 1.5021002292633057, |
| "learning_rate": 9.177439057064684e-06, |
| "loss": 0.1731, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.804, |
| "grad_norm": 1.3245618343353271, |
| "learning_rate": 9.171033924254421e-06, |
| "loss": 0.1414, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.806, |
| "grad_norm": 1.6147440671920776, |
| "learning_rate": 9.164606203550498e-06, |
| "loss": 0.1876, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.808, |
| "grad_norm": 1.5092687606811523, |
| "learning_rate": 9.15815592976171e-06, |
| "loss": 0.138, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 1.7393368482589722, |
| "learning_rate": 9.151683137818989e-06, |
| "loss": 0.2142, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.812, |
| "grad_norm": 1.7157940864562988, |
| "learning_rate": 9.145187862775208e-06, |
| "loss": 0.1615, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.814, |
| "grad_norm": 1.4408173561096191, |
| "learning_rate": 9.138670139805004e-06, |
| "loss": 0.139, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 1.648302435874939, |
| "learning_rate": 9.132130004204569e-06, |
| "loss": 0.1163, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.818, |
| "grad_norm": 1.5025349855422974, |
| "learning_rate": 9.125567491391476e-06, |
| "loss": 0.1426, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 1.495948076248169, |
| "learning_rate": 9.118982636904476e-06, |
| "loss": 0.1804, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.822, |
| "grad_norm": 1.4459843635559082, |
| "learning_rate": 9.112375476403313e-06, |
| "loss": 0.1603, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.824, |
| "grad_norm": 1.8957642316818237, |
| "learning_rate": 9.10574604566852e-06, |
| "loss": 0.2707, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.826, |
| "grad_norm": 2.036060094833374, |
| "learning_rate": 9.099094380601244e-06, |
| "loss": 0.3142, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.828, |
| "grad_norm": 1.4369035959243774, |
| "learning_rate": 9.09242051722303e-06, |
| "loss": 0.2311, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 1.3601306676864624, |
| "learning_rate": 9.085724491675642e-06, |
| "loss": 0.1153, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 1.539316177368164, |
| "learning_rate": 9.079006340220862e-06, |
| "loss": 0.1519, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.834, |
| "grad_norm": 1.6601051092147827, |
| "learning_rate": 9.072266099240286e-06, |
| "loss": 0.1848, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.836, |
| "grad_norm": 1.8635293245315552, |
| "learning_rate": 9.065503805235139e-06, |
| "loss": 0.1731, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.838, |
| "grad_norm": 2.097959518432617, |
| "learning_rate": 9.058719494826076e-06, |
| "loss": 0.404, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 2.099665641784668, |
| "learning_rate": 9.051913204752972e-06, |
| "loss": 0.2034, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.842, |
| "grad_norm": 1.8327354192733765, |
| "learning_rate": 9.045084971874738e-06, |
| "loss": 0.2435, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.844, |
| "grad_norm": 1.6721452474594116, |
| "learning_rate": 9.03823483316911e-06, |
| "loss": 0.2248, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.846, |
| "grad_norm": 1.5078072547912598, |
| "learning_rate": 9.031362825732456e-06, |
| "loss": 0.1608, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 1.7546452283859253, |
| "learning_rate": 9.02446898677957e-06, |
| "loss": 0.1649, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.5506815910339355, |
| "learning_rate": 9.017553353643479e-06, |
| "loss": 0.245, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.852, |
| "grad_norm": 1.6377143859863281, |
| "learning_rate": 9.01061596377522e-06, |
| "loss": 0.1609, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.854, |
| "grad_norm": 1.3670685291290283, |
| "learning_rate": 9.003656854743667e-06, |
| "loss": 0.1507, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.856, |
| "grad_norm": 1.7809122800827026, |
| "learning_rate": 8.996676064235308e-06, |
| "loss": 0.2064, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.858, |
| "grad_norm": 1.5835070610046387, |
| "learning_rate": 8.989673630054044e-06, |
| "loss": 0.1358, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 1.5268651247024536, |
| "learning_rate": 8.982649590120982e-06, |
| "loss": 0.1558, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.862, |
| "grad_norm": 2.022331714630127, |
| "learning_rate": 8.97560398247424e-06, |
| "loss": 0.1557, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 1.5841243267059326, |
| "learning_rate": 8.96853684526873e-06, |
| "loss": 0.0923, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.866, |
| "grad_norm": 1.6304723024368286, |
| "learning_rate": 8.961448216775955e-06, |
| "loss": 0.1563, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.868, |
| "grad_norm": 1.7919025421142578, |
| "learning_rate": 8.954338135383804e-06, |
| "loss": 0.1692, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 1.8628870248794556, |
| "learning_rate": 8.947206639596346e-06, |
| "loss": 0.2082, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.872, |
| "grad_norm": 1.5078816413879395, |
| "learning_rate": 8.94005376803361e-06, |
| "loss": 0.2606, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.874, |
| "grad_norm": 1.5765806436538696, |
| "learning_rate": 8.932879559431392e-06, |
| "loss": 0.1426, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.876, |
| "grad_norm": 1.343929648399353, |
| "learning_rate": 8.925684052641027e-06, |
| "loss": 0.1908, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.878, |
| "grad_norm": 1.702836513519287, |
| "learning_rate": 8.9184672866292e-06, |
| "loss": 0.2557, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 1.7433876991271973, |
| "learning_rate": 8.911229300477716e-06, |
| "loss": 0.2205, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.882, |
| "grad_norm": 1.3122010231018066, |
| "learning_rate": 8.903970133383297e-06, |
| "loss": 0.1327, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.884, |
| "grad_norm": 1.5010037422180176, |
| "learning_rate": 8.896689824657371e-06, |
| "loss": 0.1909, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.886, |
| "grad_norm": 1.507695198059082, |
| "learning_rate": 8.889388413725857e-06, |
| "loss": 0.1751, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.888, |
| "grad_norm": 1.7989376783370972, |
| "learning_rate": 8.882065940128946e-06, |
| "loss": 0.2674, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 1.7190501689910889, |
| "learning_rate": 8.874722443520898e-06, |
| "loss": 0.2409, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.892, |
| "grad_norm": 1.787239909172058, |
| "learning_rate": 8.867357963669821e-06, |
| "loss": 0.1394, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.894, |
| "grad_norm": 1.6416536569595337, |
| "learning_rate": 8.859972540457451e-06, |
| "loss": 0.1698, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 1.502375841140747, |
| "learning_rate": 8.852566213878947e-06, |
| "loss": 0.1149, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.898, |
| "grad_norm": 1.5897934436798096, |
| "learning_rate": 8.845139024042664e-06, |
| "loss": 0.1707, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.2941950559616089, |
| "learning_rate": 8.837691011169944e-06, |
| "loss": 0.104, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.902, |
| "grad_norm": 1.708298683166504, |
| "learning_rate": 8.83022221559489e-06, |
| "loss": 0.2151, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.904, |
| "grad_norm": 2.3875808715820312, |
| "learning_rate": 8.822732677764158e-06, |
| "loss": 0.2799, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.906, |
| "grad_norm": 1.5689988136291504, |
| "learning_rate": 8.815222438236726e-06, |
| "loss": 0.2035, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.908, |
| "grad_norm": 1.1103384494781494, |
| "learning_rate": 8.807691537683685e-06, |
| "loss": 0.0694, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 1.6364744901657104, |
| "learning_rate": 8.800140016888009e-06, |
| "loss": 0.1829, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 1.566340446472168, |
| "learning_rate": 8.792567916744346e-06, |
| "loss": 0.2079, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.914, |
| "grad_norm": 1.5354089736938477, |
| "learning_rate": 8.784975278258783e-06, |
| "loss": 0.138, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.916, |
| "grad_norm": 1.6658852100372314, |
| "learning_rate": 8.777362142548636e-06, |
| "loss": 0.1845, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.918, |
| "grad_norm": 1.4948214292526245, |
| "learning_rate": 8.769728550842217e-06, |
| "loss": 0.1475, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 1.6014717817306519, |
| "learning_rate": 8.762074544478622e-06, |
| "loss": 0.1891, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.922, |
| "grad_norm": 1.6478428840637207, |
| "learning_rate": 8.754400164907496e-06, |
| "loss": 0.1808, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.924, |
| "grad_norm": 2.035034418106079, |
| "learning_rate": 8.746705453688815e-06, |
| "loss": 0.1773, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.926, |
| "grad_norm": 1.97378408908844, |
| "learning_rate": 8.73899045249266e-06, |
| "loss": 0.2585, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 1.7488819360733032, |
| "learning_rate": 8.73125520309899e-06, |
| "loss": 0.1975, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 1.4043940305709839, |
| "learning_rate": 8.723499747397415e-06, |
| "loss": 0.1401, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.932, |
| "grad_norm": 1.358202576637268, |
| "learning_rate": 8.715724127386971e-06, |
| "loss": 0.1652, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.934, |
| "grad_norm": 1.294463038444519, |
| "learning_rate": 8.707928385175898e-06, |
| "loss": 0.1187, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.936, |
| "grad_norm": 1.3944119215011597, |
| "learning_rate": 8.700112562981398e-06, |
| "loss": 0.1893, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.938, |
| "grad_norm": 1.4225037097930908, |
| "learning_rate": 8.692276703129421e-06, |
| "loss": 0.1225, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 1.2248589992523193, |
| "learning_rate": 8.68442084805442e-06, |
| "loss": 0.1162, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.942, |
| "grad_norm": 1.177075982093811, |
| "learning_rate": 8.676545040299145e-06, |
| "loss": 0.1072, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 2.0721774101257324, |
| "learning_rate": 8.668649322514382e-06, |
| "loss": 0.1961, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.946, |
| "grad_norm": 1.694319725036621, |
| "learning_rate": 8.660733737458751e-06, |
| "loss": 0.1608, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.948, |
| "grad_norm": 1.47614586353302, |
| "learning_rate": 8.652798327998458e-06, |
| "loss": 0.1236, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 1.8103671073913574, |
| "learning_rate": 8.644843137107058e-06, |
| "loss": 0.2321, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.952, |
| "grad_norm": 1.6554206609725952, |
| "learning_rate": 8.636868207865244e-06, |
| "loss": 0.2501, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.954, |
| "grad_norm": 2.072645664215088, |
| "learning_rate": 8.628873583460593e-06, |
| "loss": 0.2904, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.956, |
| "grad_norm": 1.444131851196289, |
| "learning_rate": 8.620859307187339e-06, |
| "loss": 0.1377, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.958, |
| "grad_norm": 1.2418184280395508, |
| "learning_rate": 8.61282542244614e-06, |
| "loss": 0.0772, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 1.3381738662719727, |
| "learning_rate": 8.604771972743848e-06, |
| "loss": 0.0934, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.962, |
| "grad_norm": 1.494141936302185, |
| "learning_rate": 8.596699001693257e-06, |
| "loss": 0.1627, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.964, |
| "grad_norm": 1.06540048122406, |
| "learning_rate": 8.588606553012884e-06, |
| "loss": 0.058, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.966, |
| "grad_norm": 1.520519733428955, |
| "learning_rate": 8.580494670526725e-06, |
| "loss": 0.1542, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.968, |
| "grad_norm": 1.3340709209442139, |
| "learning_rate": 8.572363398164017e-06, |
| "loss": 0.1246, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 1.5186916589736938, |
| "learning_rate": 8.564212779959003e-06, |
| "loss": 0.1466, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.972, |
| "grad_norm": 1.2340925931930542, |
| "learning_rate": 8.556042860050686e-06, |
| "loss": 0.0813, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.974, |
| "grad_norm": 1.3246345520019531, |
| "learning_rate": 8.547853682682605e-06, |
| "loss": 0.1439, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 1.258293867111206, |
| "learning_rate": 8.539645292202579e-06, |
| "loss": 0.1046, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.978, |
| "grad_norm": 1.4723703861236572, |
| "learning_rate": 8.531417733062476e-06, |
| "loss": 0.1719, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 1.373752236366272, |
| "learning_rate": 8.523171049817974e-06, |
| "loss": 0.1234, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.982, |
| "grad_norm": 1.7229127883911133, |
| "learning_rate": 8.51490528712831e-06, |
| "loss": 0.1663, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.984, |
| "grad_norm": 1.5550212860107422, |
| "learning_rate": 8.506620489756045e-06, |
| "loss": 0.1655, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.986, |
| "grad_norm": 1.3695895671844482, |
| "learning_rate": 8.498316702566828e-06, |
| "loss": 0.1345, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.988, |
| "grad_norm": 1.432288646697998, |
| "learning_rate": 8.489993970529137e-06, |
| "loss": 0.111, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 1.3665354251861572, |
| "learning_rate": 8.481652338714048e-06, |
| "loss": 0.1865, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 1.4727295637130737, |
| "learning_rate": 8.473291852294986e-06, |
| "loss": 0.196, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.994, |
| "grad_norm": 1.5306068658828735, |
| "learning_rate": 8.464912556547486e-06, |
| "loss": 0.196, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.996, |
| "grad_norm": 1.512803554534912, |
| "learning_rate": 8.456514496848938e-06, |
| "loss": 0.0902, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.998, |
| "grad_norm": 1.4982142448425293, |
| "learning_rate": 8.44809771867835e-06, |
| "loss": 0.1175, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.9177815914154053, |
| "learning_rate": 8.439662267616093e-06, |
| "loss": 0.2034, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.002, |
| "grad_norm": 1.4074664115905762, |
| "learning_rate": 8.43120818934367e-06, |
| "loss": 0.0809, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.004, |
| "grad_norm": 1.3982102870941162, |
| "learning_rate": 8.422735529643445e-06, |
| "loss": 0.0911, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.006, |
| "grad_norm": 1.4477176666259766, |
| "learning_rate": 8.414244334398418e-06, |
| "loss": 0.1385, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.008, |
| "grad_norm": 1.540793538093567, |
| "learning_rate": 8.405734649591964e-06, |
| "loss": 0.156, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.01, |
| "grad_norm": 1.245995283126831, |
| "learning_rate": 8.397206521307584e-06, |
| "loss": 0.0915, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.012, |
| "grad_norm": 1.2165762186050415, |
| "learning_rate": 8.388659995728662e-06, |
| "loss": 0.0792, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.014, |
| "grad_norm": 1.4472664594650269, |
| "learning_rate": 8.380095119138209e-06, |
| "loss": 0.0765, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.016, |
| "grad_norm": 1.1731630563735962, |
| "learning_rate": 8.371511937918616e-06, |
| "loss": 0.0772, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.018, |
| "grad_norm": 1.2897391319274902, |
| "learning_rate": 8.362910498551402e-06, |
| "loss": 0.0827, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.02, |
| "grad_norm": 1.1869486570358276, |
| "learning_rate": 8.354290847616954e-06, |
| "loss": 0.0553, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.022, |
| "grad_norm": 1.0010886192321777, |
| "learning_rate": 8.345653031794292e-06, |
| "loss": 0.0357, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.024, |
| "grad_norm": 1.442855715751648, |
| "learning_rate": 8.3369970978608e-06, |
| "loss": 0.1053, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.026, |
| "grad_norm": 1.6988791227340698, |
| "learning_rate": 8.328323092691985e-06, |
| "loss": 0.1412, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.028, |
| "grad_norm": 1.2314667701721191, |
| "learning_rate": 8.319631063261209e-06, |
| "loss": 0.0752, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.03, |
| "grad_norm": 1.2584779262542725, |
| "learning_rate": 8.310921056639451e-06, |
| "loss": 0.0802, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.032, |
| "grad_norm": 1.3586853742599487, |
| "learning_rate": 8.302193119995038e-06, |
| "loss": 0.0683, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.034, |
| "grad_norm": 1.622609257698059, |
| "learning_rate": 8.293447300593402e-06, |
| "loss": 0.0936, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.036, |
| "grad_norm": 1.5385489463806152, |
| "learning_rate": 8.284683645796814e-06, |
| "loss": 0.1009, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.038, |
| "grad_norm": 1.3496136665344238, |
| "learning_rate": 8.275902203064125e-06, |
| "loss": 0.117, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 1.3850297927856445, |
| "learning_rate": 8.267103019950529e-06, |
| "loss": 0.0871, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.042, |
| "grad_norm": 1.3631035089492798, |
| "learning_rate": 8.258286144107277e-06, |
| "loss": 0.1119, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.044, |
| "grad_norm": 1.0882558822631836, |
| "learning_rate": 8.249451623281444e-06, |
| "loss": 0.0629, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.046, |
| "grad_norm": 1.2716962099075317, |
| "learning_rate": 8.240599505315656e-06, |
| "loss": 0.1426, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.048, |
| "grad_norm": 0.9287087321281433, |
| "learning_rate": 8.231729838147833e-06, |
| "loss": 0.0439, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 1.3585246801376343, |
| "learning_rate": 8.222842669810936e-06, |
| "loss": 0.0997, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.052, |
| "grad_norm": 1.942495584487915, |
| "learning_rate": 8.213938048432697e-06, |
| "loss": 0.1591, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.054, |
| "grad_norm": 0.990554928779602, |
| "learning_rate": 8.205016022235368e-06, |
| "loss": 0.0452, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.056, |
| "grad_norm": 1.2105649709701538, |
| "learning_rate": 8.196076639535453e-06, |
| "loss": 0.0484, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.058, |
| "grad_norm": 1.478988766670227, |
| "learning_rate": 8.18711994874345e-06, |
| "loss": 0.1172, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.06, |
| "grad_norm": 1.5153695344924927, |
| "learning_rate": 8.178145998363585e-06, |
| "loss": 0.1165, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.062, |
| "grad_norm": 1.9413139820098877, |
| "learning_rate": 8.16915483699355e-06, |
| "loss": 0.1477, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.064, |
| "grad_norm": 1.581265926361084, |
| "learning_rate": 8.160146513324256e-06, |
| "loss": 0.125, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.066, |
| "grad_norm": 1.3135696649551392, |
| "learning_rate": 8.151121076139534e-06, |
| "loss": 0.0889, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.068, |
| "grad_norm": 1.047702670097351, |
| "learning_rate": 8.142078574315907e-06, |
| "loss": 0.0592, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.07, |
| "grad_norm": 1.2292808294296265, |
| "learning_rate": 8.133019056822303e-06, |
| "loss": 0.0882, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.072, |
| "grad_norm": 1.0881916284561157, |
| "learning_rate": 8.123942572719801e-06, |
| "loss": 0.0667, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.074, |
| "grad_norm": 1.3366835117340088, |
| "learning_rate": 8.11484917116136e-06, |
| "loss": 0.0785, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.076, |
| "grad_norm": 1.179837942123413, |
| "learning_rate": 8.105738901391553e-06, |
| "loss": 0.0914, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.078, |
| "grad_norm": 1.1984621286392212, |
| "learning_rate": 8.096611812746302e-06, |
| "loss": 0.0604, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 1.167480230331421, |
| "learning_rate": 8.087467954652608e-06, |
| "loss": 0.0792, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.082, |
| "grad_norm": 1.8635348081588745, |
| "learning_rate": 8.078307376628292e-06, |
| "loss": 0.0813, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.084, |
| "grad_norm": 1.6677511930465698, |
| "learning_rate": 8.069130128281714e-06, |
| "loss": 0.0936, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.086, |
| "grad_norm": 1.1988495588302612, |
| "learning_rate": 8.059936259311514e-06, |
| "loss": 0.0647, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.088, |
| "grad_norm": 1.2146140336990356, |
| "learning_rate": 8.05072581950634e-06, |
| "loss": 0.1185, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.09, |
| "grad_norm": 1.4924564361572266, |
| "learning_rate": 8.041498858744572e-06, |
| "loss": 0.1183, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.092, |
| "grad_norm": 1.1881672143936157, |
| "learning_rate": 8.032255426994069e-06, |
| "loss": 0.0465, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.094, |
| "grad_norm": 1.723354697227478, |
| "learning_rate": 8.022995574311876e-06, |
| "loss": 0.1093, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.096, |
| "grad_norm": 1.3465012311935425, |
| "learning_rate": 8.013719350843969e-06, |
| "loss": 0.0956, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.098, |
| "grad_norm": 1.4414770603179932, |
| "learning_rate": 8.004426806824985e-06, |
| "loss": 0.1028, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 1.053121566772461, |
| "learning_rate": 7.99511799257793e-06, |
| "loss": 0.0597, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.102, |
| "grad_norm": 1.1670550107955933, |
| "learning_rate": 7.985792958513932e-06, |
| "loss": 0.0658, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.104, |
| "grad_norm": 1.426408052444458, |
| "learning_rate": 7.97645175513195e-06, |
| "loss": 0.0767, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.106, |
| "grad_norm": 1.1832488775253296, |
| "learning_rate": 7.967094433018508e-06, |
| "loss": 0.089, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.108, |
| "grad_norm": 1.3124687671661377, |
| "learning_rate": 7.95772104284742e-06, |
| "loss": 0.0674, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.11, |
| "grad_norm": 1.193248987197876, |
| "learning_rate": 7.948331635379517e-06, |
| "loss": 0.0602, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.112, |
| "grad_norm": 1.0218859910964966, |
| "learning_rate": 7.938926261462366e-06, |
| "loss": 0.0692, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.114, |
| "grad_norm": 1.4290659427642822, |
| "learning_rate": 7.929504972030003e-06, |
| "loss": 0.1171, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.116, |
| "grad_norm": 1.4135209321975708, |
| "learning_rate": 7.920067818102652e-06, |
| "loss": 0.0841, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.1179999999999999, |
| "grad_norm": 1.4746184349060059, |
| "learning_rate": 7.910614850786448e-06, |
| "loss": 0.0894, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 1.4937189817428589, |
| "learning_rate": 7.901146121273165e-06, |
| "loss": 0.1206, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.1219999999999999, |
| "grad_norm": 1.5291107892990112, |
| "learning_rate": 7.891661680839932e-06, |
| "loss": 0.131, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.124, |
| "grad_norm": 1.065200924873352, |
| "learning_rate": 7.882161580848966e-06, |
| "loss": 0.0466, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.126, |
| "grad_norm": 1.4422272443771362, |
| "learning_rate": 7.872645872747281e-06, |
| "loss": 0.0745, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.1280000000000001, |
| "grad_norm": 0.9669689536094666, |
| "learning_rate": 7.863114608066417e-06, |
| "loss": 0.0744, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.13, |
| "grad_norm": 0.9402322173118591, |
| "learning_rate": 7.85356783842216e-06, |
| "loss": 0.062, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.1320000000000001, |
| "grad_norm": 1.339349627494812, |
| "learning_rate": 7.84400561551426e-06, |
| "loss": 0.1108, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.134, |
| "grad_norm": 1.3024260997772217, |
| "learning_rate": 7.834427991126155e-06, |
| "loss": 0.0687, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.1360000000000001, |
| "grad_norm": 1.167820692062378, |
| "learning_rate": 7.82483501712469e-06, |
| "loss": 0.0736, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.138, |
| "grad_norm": 1.4007395505905151, |
| "learning_rate": 7.815226745459831e-06, |
| "loss": 0.079, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.1400000000000001, |
| "grad_norm": 1.4969840049743652, |
| "learning_rate": 7.80560322816439e-06, |
| "loss": 0.0921, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.142, |
| "grad_norm": 1.6289410591125488, |
| "learning_rate": 7.795964517353734e-06, |
| "loss": 0.1482, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.144, |
| "grad_norm": 1.1523656845092773, |
| "learning_rate": 7.786310665225522e-06, |
| "loss": 0.06, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.146, |
| "grad_norm": 1.2471307516098022, |
| "learning_rate": 7.776641724059398e-06, |
| "loss": 0.0959, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.148, |
| "grad_norm": 1.285406470298767, |
| "learning_rate": 7.76695774621672e-06, |
| "loss": 0.0868, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 1.1705622673034668, |
| "learning_rate": 7.757258784140286e-06, |
| "loss": 0.0666, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.152, |
| "grad_norm": 1.426995873451233, |
| "learning_rate": 7.747544890354031e-06, |
| "loss": 0.1463, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.154, |
| "grad_norm": 1.361234188079834, |
| "learning_rate": 7.737816117462752e-06, |
| "loss": 0.1009, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.156, |
| "grad_norm": 1.0927761793136597, |
| "learning_rate": 7.728072518151826e-06, |
| "loss": 0.0795, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.158, |
| "grad_norm": 1.3462144136428833, |
| "learning_rate": 7.718314145186918e-06, |
| "loss": 0.0921, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 1.0117051601409912, |
| "learning_rate": 7.7085410514137e-06, |
| "loss": 0.0425, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.162, |
| "grad_norm": 1.2194225788116455, |
| "learning_rate": 7.698753289757565e-06, |
| "loss": 0.0533, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.164, |
| "grad_norm": 1.6970041990280151, |
| "learning_rate": 7.688950913223336e-06, |
| "loss": 0.1019, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.166, |
| "grad_norm": 1.1927522420883179, |
| "learning_rate": 7.679133974894984e-06, |
| "loss": 0.0902, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.168, |
| "grad_norm": 1.0492830276489258, |
| "learning_rate": 7.669302527935334e-06, |
| "loss": 0.06, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.17, |
| "grad_norm": 1.0530539751052856, |
| "learning_rate": 7.65945662558579e-06, |
| "loss": 0.0676, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.172, |
| "grad_norm": 1.158579707145691, |
| "learning_rate": 7.649596321166024e-06, |
| "loss": 0.0676, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.174, |
| "grad_norm": 1.1233559846878052, |
| "learning_rate": 7.639721668073718e-06, |
| "loss": 0.045, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.176, |
| "grad_norm": 1.0013350248336792, |
| "learning_rate": 7.629832719784245e-06, |
| "loss": 0.0714, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.178, |
| "grad_norm": 1.3075058460235596, |
| "learning_rate": 7.619929529850397e-06, |
| "loss": 0.065, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.18, |
| "grad_norm": 1.1243176460266113, |
| "learning_rate": 7.610012151902091e-06, |
| "loss": 0.0575, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.182, |
| "grad_norm": 1.3878341913223267, |
| "learning_rate": 7.600080639646077e-06, |
| "loss": 0.0544, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.184, |
| "grad_norm": 1.202048420906067, |
| "learning_rate": 7.590135046865652e-06, |
| "loss": 0.1045, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.186, |
| "grad_norm": 1.0473711490631104, |
| "learning_rate": 7.580175427420358e-06, |
| "loss": 0.043, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.188, |
| "grad_norm": 1.1366267204284668, |
| "learning_rate": 7.570201835245703e-06, |
| "loss": 0.0529, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.19, |
| "grad_norm": 1.1529453992843628, |
| "learning_rate": 7.560214324352858e-06, |
| "loss": 0.0696, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.192, |
| "grad_norm": 1.0182459354400635, |
| "learning_rate": 7.550212948828377e-06, |
| "loss": 0.0387, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.194, |
| "grad_norm": 1.2447912693023682, |
| "learning_rate": 7.54019776283389e-06, |
| "loss": 0.0706, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.196, |
| "grad_norm": 1.0753467082977295, |
| "learning_rate": 7.530168820605819e-06, |
| "loss": 0.0521, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.198, |
| "grad_norm": 1.341948390007019, |
| "learning_rate": 7.520126176455084e-06, |
| "loss": 0.0926, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 1.0409612655639648, |
| "learning_rate": 7.510069884766802e-06, |
| "loss": 0.0802, |
| "step": 600 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 20, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5412817212850831e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|