| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.078762306610408, |
| "eval_steps": 50, |
| "global_step": 1096, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0028129395218002813, |
| "grad_norm": 75.0997085571289, |
| "learning_rate": 0.0, |
| "loss": 6.7334, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.005625879043600563, |
| "grad_norm": 77.64845275878906, |
| "learning_rate": 1.8181818181818183e-07, |
| "loss": 7.1245, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.008438818565400843, |
| "grad_norm": 72.08865356445312, |
| "learning_rate": 3.6363636363636366e-07, |
| "loss": 7.1721, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.011251758087201125, |
| "grad_norm": 86.44847106933594, |
| "learning_rate": 5.454545454545455e-07, |
| "loss": 7.4339, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.014064697609001406, |
| "grad_norm": 75.51171875, |
| "learning_rate": 7.272727272727273e-07, |
| "loss": 6.9712, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.016877637130801686, |
| "grad_norm": 73.0139389038086, |
| "learning_rate": 9.090909090909091e-07, |
| "loss": 7.0641, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01969057665260197, |
| "grad_norm": 78.8460464477539, |
| "learning_rate": 1.090909090909091e-06, |
| "loss": 7.3246, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.02250351617440225, |
| "grad_norm": 73.03649139404297, |
| "learning_rate": 1.2727272727272728e-06, |
| "loss": 6.5645, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.02531645569620253, |
| "grad_norm": 83.64058685302734, |
| "learning_rate": 1.4545454545454546e-06, |
| "loss": 7.0709, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02812939521800281, |
| "grad_norm": 98.82685089111328, |
| "learning_rate": 1.6363636363636365e-06, |
| "loss": 7.9752, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.030942334739803096, |
| "grad_norm": 77.33248901367188, |
| "learning_rate": 1.8181818181818183e-06, |
| "loss": 6.7595, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.03375527426160337, |
| "grad_norm": 75.38268280029297, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 7.2824, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.03656821378340366, |
| "grad_norm": 84.62842559814453, |
| "learning_rate": 2.181818181818182e-06, |
| "loss": 7.3766, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.03938115330520394, |
| "grad_norm": 79.66527557373047, |
| "learning_rate": 2.363636363636364e-06, |
| "loss": 6.902, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.04219409282700422, |
| "grad_norm": 88.1312026977539, |
| "learning_rate": 2.5454545454545456e-06, |
| "loss": 7.5326, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0450070323488045, |
| "grad_norm": 67.1671371459961, |
| "learning_rate": 2.7272727272727272e-06, |
| "loss": 6.6224, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.04781997187060478, |
| "grad_norm": 78.15252685546875, |
| "learning_rate": 2.9090909090909093e-06, |
| "loss": 7.2991, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.05063291139240506, |
| "grad_norm": 89.14740753173828, |
| "learning_rate": 3.090909090909091e-06, |
| "loss": 7.316, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.053445850914205346, |
| "grad_norm": 82.18356323242188, |
| "learning_rate": 3.272727272727273e-06, |
| "loss": 7.2328, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.05625879043600562, |
| "grad_norm": 75.57778930664062, |
| "learning_rate": 3.454545454545455e-06, |
| "loss": 6.8493, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.05907172995780591, |
| "grad_norm": 91.4236068725586, |
| "learning_rate": 3.6363636363636366e-06, |
| "loss": 7.251, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.06188466947960619, |
| "grad_norm": 69.89977264404297, |
| "learning_rate": 3.818181818181819e-06, |
| "loss": 6.1767, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.06469760900140648, |
| "grad_norm": 79.23346710205078, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 6.5113, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.06751054852320675, |
| "grad_norm": 75.04463958740234, |
| "learning_rate": 4.181818181818182e-06, |
| "loss": 6.3854, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.07032348804500703, |
| "grad_norm": 90.92483520507812, |
| "learning_rate": 4.363636363636364e-06, |
| "loss": 7.383, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.07313642756680731, |
| "grad_norm": 69.97665405273438, |
| "learning_rate": 4.5454545454545455e-06, |
| "loss": 6.1346, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.0759493670886076, |
| "grad_norm": 89.80615234375, |
| "learning_rate": 4.727272727272728e-06, |
| "loss": 6.9564, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.07876230661040788, |
| "grad_norm": 81.48190307617188, |
| "learning_rate": 4.90909090909091e-06, |
| "loss": 6.7021, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.08157524613220815, |
| "grad_norm": 79.94600677490234, |
| "learning_rate": 5.090909090909091e-06, |
| "loss": 5.7781, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.08438818565400844, |
| "grad_norm": 80.19660949707031, |
| "learning_rate": 5.272727272727273e-06, |
| "loss": 5.7521, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08720112517580872, |
| "grad_norm": 74.73751831054688, |
| "learning_rate": 5.4545454545454545e-06, |
| "loss": 5.2344, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.090014064697609, |
| "grad_norm": 84.3858871459961, |
| "learning_rate": 5.636363636363636e-06, |
| "loss": 5.6553, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.09282700421940929, |
| "grad_norm": 93.59162902832031, |
| "learning_rate": 5.8181818181818185e-06, |
| "loss": 5.0521, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.09563994374120956, |
| "grad_norm": 68.47096252441406, |
| "learning_rate": 6e-06, |
| "loss": 5.225, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.09845288326300984, |
| "grad_norm": 62.65687942504883, |
| "learning_rate": 6.181818181818182e-06, |
| "loss": 4.338, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.10126582278481013, |
| "grad_norm": 66.28219604492188, |
| "learning_rate": 6.363636363636364e-06, |
| "loss": 4.714, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.10407876230661041, |
| "grad_norm": 64.53064727783203, |
| "learning_rate": 6.545454545454546e-06, |
| "loss": 4.8437, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.10689170182841069, |
| "grad_norm": 40.499000549316406, |
| "learning_rate": 6.7272727272727275e-06, |
| "loss": 4.5761, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.10970464135021098, |
| "grad_norm": 40.0291633605957, |
| "learning_rate": 6.90909090909091e-06, |
| "loss": 4.1693, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.11251758087201125, |
| "grad_norm": 19.737794876098633, |
| "learning_rate": 7.0909090909090916e-06, |
| "loss": 3.7602, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11533052039381153, |
| "grad_norm": 18.727174758911133, |
| "learning_rate": 7.272727272727273e-06, |
| "loss": 3.941, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.11814345991561181, |
| "grad_norm": 15.440817832946777, |
| "learning_rate": 7.454545454545456e-06, |
| "loss": 3.676, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.1209563994374121, |
| "grad_norm": 49.393409729003906, |
| "learning_rate": 7.636363636363638e-06, |
| "loss": 3.5502, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.12376933895921238, |
| "grad_norm": 29.366811752319336, |
| "learning_rate": 7.81818181818182e-06, |
| "loss": 3.174, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.12658227848101267, |
| "grad_norm": 26.623790740966797, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 3.1249, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.12939521800281295, |
| "grad_norm": 23.555133819580078, |
| "learning_rate": 8.181818181818183e-06, |
| "loss": 3.5312, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.13220815752461323, |
| "grad_norm": 33.85753631591797, |
| "learning_rate": 8.363636363636365e-06, |
| "loss": 3.4659, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.1350210970464135, |
| "grad_norm": 17.72439193725586, |
| "learning_rate": 8.545454545454546e-06, |
| "loss": 2.7741, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.13783403656821377, |
| "grad_norm": 17.903911590576172, |
| "learning_rate": 8.727272727272728e-06, |
| "loss": 3.334, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.14064697609001406, |
| "grad_norm": 15.8783597946167, |
| "learning_rate": 8.90909090909091e-06, |
| "loss": 2.7859, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.14064697609001406, |
| "eval_loss": 1.4928081035614014, |
| "eval_runtime": 2.8043, |
| "eval_samples_per_second": 9.271, |
| "eval_steps_per_second": 1.426, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.14064697609001406, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 805.75, |
| "eval_avg_mem_token_accuracy": 0.25177304964539005, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.007424448394855171, |
| "eval_avg_mem_token_rate": 0.6361338388877802, |
| "eval_avg_mem_token_recall(Accuracy)": 0.25177304964539005, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 50, |
| "eval_loss": 1.4928081035614014, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.8043, |
| "eval_samples_per_second": 9.271, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.426, |
| "eval_total_correct_count": 71, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 9563, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.14345991561181434, |
| "grad_norm": 9.468743324279785, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 2.8193, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.14627285513361463, |
| "grad_norm": 8.735793113708496, |
| "learning_rate": 9.272727272727273e-06, |
| "loss": 2.624, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.1490857946554149, |
| "grad_norm": 11.669722557067871, |
| "learning_rate": 9.454545454545456e-06, |
| "loss": 2.8725, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.1518987341772152, |
| "grad_norm": 16.81114959716797, |
| "learning_rate": 9.636363636363638e-06, |
| "loss": 2.4657, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.15471167369901548, |
| "grad_norm": 19.379348754882812, |
| "learning_rate": 9.81818181818182e-06, |
| "loss": 2.1279, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.15752461322081576, |
| "grad_norm": 13.823864936828613, |
| "learning_rate": 1e-05, |
| "loss": 2.3733, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.16033755274261605, |
| "grad_norm": 14.514190673828125, |
| "learning_rate": 9.999977231314128e-06, |
| "loss": 2.1855, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.1631504922644163, |
| "grad_norm": 24.133705139160156, |
| "learning_rate": 9.99990892546387e-06, |
| "loss": 2.2268, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1659634317862166, |
| "grad_norm": 13.885165214538574, |
| "learning_rate": 9.999795083071328e-06, |
| "loss": 2.1062, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.16877637130801687, |
| "grad_norm": 6.023658752441406, |
| "learning_rate": 9.999635705173312e-06, |
| "loss": 1.9233, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.17158931082981715, |
| "grad_norm": 5.1499104499816895, |
| "learning_rate": 9.999430793221356e-06, |
| "loss": 2.117, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.17440225035161744, |
| "grad_norm": 5.638373851776123, |
| "learning_rate": 9.999180349081688e-06, |
| "loss": 2.2507, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.17721518987341772, |
| "grad_norm": 5.992455959320068, |
| "learning_rate": 9.998884375035221e-06, |
| "loss": 1.9682, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.180028129395218, |
| "grad_norm": 4.536100387573242, |
| "learning_rate": 9.998542873777534e-06, |
| "loss": 1.955, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.1828410689170183, |
| "grad_norm": 11.286314964294434, |
| "learning_rate": 9.99815584841884e-06, |
| "loss": 2.1629, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.18565400843881857, |
| "grad_norm": 9.133061408996582, |
| "learning_rate": 9.99772330248396e-06, |
| "loss": 1.4339, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.18846694796061886, |
| "grad_norm": 7.25726842880249, |
| "learning_rate": 9.997245239912299e-06, |
| "loss": 2.0025, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.19127988748241911, |
| "grad_norm": 5.315834045410156, |
| "learning_rate": 9.996721665057796e-06, |
| "loss": 1.7737, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.1940928270042194, |
| "grad_norm": 3.770214080810547, |
| "learning_rate": 9.996152582688899e-06, |
| "loss": 1.8984, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.19690576652601968, |
| "grad_norm": 4.797364711761475, |
| "learning_rate": 9.995537997988507e-06, |
| "loss": 2.0319, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.19971870604781997, |
| "grad_norm": 5.449586391448975, |
| "learning_rate": 9.994877916553937e-06, |
| "loss": 1.7875, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.20253164556962025, |
| "grad_norm": 3.06927227973938, |
| "learning_rate": 9.994172344396866e-06, |
| "loss": 1.5467, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.20534458509142053, |
| "grad_norm": 3.089805841445923, |
| "learning_rate": 9.99342128794327e-06, |
| "loss": 1.3562, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.20815752461322082, |
| "grad_norm": 3.4402778148651123, |
| "learning_rate": 9.992624754033377e-06, |
| "loss": 1.7436, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.2109704641350211, |
| "grad_norm": 2.948519706726074, |
| "learning_rate": 9.991782749921601e-06, |
| "loss": 1.5302, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.21378340365682139, |
| "grad_norm": 6.839716911315918, |
| "learning_rate": 9.990895283276472e-06, |
| "loss": 1.6953, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.21659634317862167, |
| "grad_norm": 4.01812219619751, |
| "learning_rate": 9.98996236218057e-06, |
| "loss": 1.7822, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.21940928270042195, |
| "grad_norm": 4.928662300109863, |
| "learning_rate": 9.98898399513045e-06, |
| "loss": 1.4248, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 3.146573305130005, |
| "learning_rate": 9.987960191036564e-06, |
| "loss": 1.6365, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.2250351617440225, |
| "grad_norm": 4.380753993988037, |
| "learning_rate": 9.986890959223181e-06, |
| "loss": 1.7186, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.22784810126582278, |
| "grad_norm": 2.831251621246338, |
| "learning_rate": 9.985776309428306e-06, |
| "loss": 1.4852, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.23066104078762306, |
| "grad_norm": 3.742809772491455, |
| "learning_rate": 9.984616251803577e-06, |
| "loss": 1.5631, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.23347398030942335, |
| "grad_norm": 3.9068987369537354, |
| "learning_rate": 9.983410796914197e-06, |
| "loss": 1.482, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.23628691983122363, |
| "grad_norm": 3.327174663543701, |
| "learning_rate": 9.982159955738808e-06, |
| "loss": 1.608, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.2390998593530239, |
| "grad_norm": 3.083757162094116, |
| "learning_rate": 9.980863739669419e-06, |
| "loss": 1.5167, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.2419127988748242, |
| "grad_norm": 2.9441981315612793, |
| "learning_rate": 9.979522160511282e-06, |
| "loss": 1.6137, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.24472573839662448, |
| "grad_norm": 2.8649449348449707, |
| "learning_rate": 9.978135230482797e-06, |
| "loss": 1.665, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.24753867791842477, |
| "grad_norm": 3.0601882934570312, |
| "learning_rate": 9.97670296221539e-06, |
| "loss": 1.5845, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.25035161744022505, |
| "grad_norm": 4.856632232666016, |
| "learning_rate": 9.975225368753412e-06, |
| "loss": 1.5959, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.25316455696202533, |
| "grad_norm": 3.0896317958831787, |
| "learning_rate": 9.973702463554004e-06, |
| "loss": 1.2724, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2559774964838256, |
| "grad_norm": 2.862079381942749, |
| "learning_rate": 9.972134260486989e-06, |
| "loss": 1.73, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.2587904360056259, |
| "grad_norm": 2.281548500061035, |
| "learning_rate": 9.970520773834734e-06, |
| "loss": 1.4366, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.2616033755274262, |
| "grad_norm": 2.9218814373016357, |
| "learning_rate": 9.968862018292025e-06, |
| "loss": 1.7853, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.26441631504922647, |
| "grad_norm": 3.361042022705078, |
| "learning_rate": 9.967158008965942e-06, |
| "loss": 1.5868, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.2672292545710267, |
| "grad_norm": 2.6090950965881348, |
| "learning_rate": 9.965408761375702e-06, |
| "loss": 1.6479, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.270042194092827, |
| "grad_norm": 2.4182980060577393, |
| "learning_rate": 9.963614291452532e-06, |
| "loss": 1.4854, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.27285513361462727, |
| "grad_norm": 2.7494289875030518, |
| "learning_rate": 9.961774615539523e-06, |
| "loss": 1.6097, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.27566807313642755, |
| "grad_norm": 3.082038402557373, |
| "learning_rate": 9.959889750391474e-06, |
| "loss": 1.3752, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.27848101265822783, |
| "grad_norm": 3.282862663269043, |
| "learning_rate": 9.957959713174748e-06, |
| "loss": 1.3782, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2812939521800281, |
| "grad_norm": 2.0881476402282715, |
| "learning_rate": 9.955984521467108e-06, |
| "loss": 1.3952, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2812939521800281, |
| "eval_loss": 0.7734614014625549, |
| "eval_runtime": 2.8846, |
| "eval_samples_per_second": 9.013, |
| "eval_steps_per_second": 1.387, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2812939521800281, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 638.5, |
| "eval_avg_mem_token_accuracy": 0.24822695035460993, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.008693492300049677, |
| "eval_avg_mem_token_rate": 0.5356216324087009, |
| "eval_avg_mem_token_recall(Accuracy)": 0.24822695035460993, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 100, |
| "eval_loss": 0.7734614014625549, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.8846, |
| "eval_samples_per_second": 9.013, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.387, |
| "eval_total_correct_count": 70, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8052, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2841068917018284, |
| "grad_norm": 2.6222341060638428, |
| "learning_rate": 9.953964193257563e-06, |
| "loss": 1.5721, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.2869198312236287, |
| "grad_norm": 2.3316454887390137, |
| "learning_rate": 9.951898746946201e-06, |
| "loss": 1.3596, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.28973277074542897, |
| "grad_norm": 2.174182176589966, |
| "learning_rate": 9.949788201344019e-06, |
| "loss": 1.2779, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.29254571026722925, |
| "grad_norm": 2.538205862045288, |
| "learning_rate": 9.947632575672758e-06, |
| "loss": 1.3406, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.29535864978902954, |
| "grad_norm": 1.902901291847229, |
| "learning_rate": 9.945431889564724e-06, |
| "loss": 1.1408, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.2981715893108298, |
| "grad_norm": 2.382870674133301, |
| "learning_rate": 9.943186163062607e-06, |
| "loss": 1.3498, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.3009845288326301, |
| "grad_norm": 2.490842342376709, |
| "learning_rate": 9.940895416619308e-06, |
| "loss": 1.401, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.3037974683544304, |
| "grad_norm": 2.9286532402038574, |
| "learning_rate": 9.938559671097739e-06, |
| "loss": 1.5762, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3066104078762307, |
| "grad_norm": 2.838031530380249, |
| "learning_rate": 9.93617894777064e-06, |
| "loss": 1.5001, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.30942334739803096, |
| "grad_norm": 2.0874297618865967, |
| "learning_rate": 9.933753268320391e-06, |
| "loss": 1.3123, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.31223628691983124, |
| "grad_norm": 2.5237607955932617, |
| "learning_rate": 9.931282654838803e-06, |
| "loss": 1.2764, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.3150492264416315, |
| "grad_norm": 2.4033403396606445, |
| "learning_rate": 9.928767129826929e-06, |
| "loss": 1.3374, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.3178621659634318, |
| "grad_norm": 2.2955803871154785, |
| "learning_rate": 9.926206716194842e-06, |
| "loss": 1.3878, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.3206751054852321, |
| "grad_norm": 3.3657052516937256, |
| "learning_rate": 9.92360143726145e-06, |
| "loss": 1.288, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.3234880450070324, |
| "grad_norm": 3.1771109104156494, |
| "learning_rate": 9.920951316754259e-06, |
| "loss": 1.4854, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.3263009845288326, |
| "grad_norm": 2.6639983654022217, |
| "learning_rate": 9.918256378809178e-06, |
| "loss": 1.5049, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.3291139240506329, |
| "grad_norm": 2.107646942138672, |
| "learning_rate": 9.915516647970283e-06, |
| "loss": 1.2783, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.3319268635724332, |
| "grad_norm": 2.307697296142578, |
| "learning_rate": 9.9127321491896e-06, |
| "loss": 1.3444, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.33473980309423346, |
| "grad_norm": 2.160855293273926, |
| "learning_rate": 9.909902907826884e-06, |
| "loss": 1.112, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.33755274261603374, |
| "grad_norm": 2.300719976425171, |
| "learning_rate": 9.907028949649376e-06, |
| "loss": 1.3957, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.340365682137834, |
| "grad_norm": 2.3513684272766113, |
| "learning_rate": 9.904110300831577e-06, |
| "loss": 1.224, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.3431786216596343, |
| "grad_norm": 2.0586118698120117, |
| "learning_rate": 9.901146987955008e-06, |
| "loss": 1.1874, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.3459915611814346, |
| "grad_norm": 2.517422676086426, |
| "learning_rate": 9.898139038007962e-06, |
| "loss": 1.2165, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.3488045007032349, |
| "grad_norm": 2.1542768478393555, |
| "learning_rate": 9.895086478385267e-06, |
| "loss": 1.3451, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.35161744022503516, |
| "grad_norm": 2.022313356399536, |
| "learning_rate": 9.891989336888033e-06, |
| "loss": 1.2169, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.35443037974683544, |
| "grad_norm": 2.6460540294647217, |
| "learning_rate": 9.888847641723394e-06, |
| "loss": 1.4583, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.35724331926863573, |
| "grad_norm": 2.2727549076080322, |
| "learning_rate": 9.88566142150426e-06, |
| "loss": 1.2032, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.360056258790436, |
| "grad_norm": 2.1075050830841064, |
| "learning_rate": 9.88243070524905e-06, |
| "loss": 1.1943, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.3628691983122363, |
| "grad_norm": 2.352522611618042, |
| "learning_rate": 9.87915552238143e-06, |
| "loss": 1.3522, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.3656821378340366, |
| "grad_norm": 2.469947338104248, |
| "learning_rate": 9.87583590273004e-06, |
| "loss": 1.1493, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.36849507735583686, |
| "grad_norm": 2.1671838760375977, |
| "learning_rate": 9.872471876528235e-06, |
| "loss": 1.3792, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.37130801687763715, |
| "grad_norm": 2.235957622528076, |
| "learning_rate": 9.869063474413798e-06, |
| "loss": 1.3672, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.37412095639943743, |
| "grad_norm": 2.241083860397339, |
| "learning_rate": 9.865610727428661e-06, |
| "loss": 1.1784, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.3769338959212377, |
| "grad_norm": 2.1455912590026855, |
| "learning_rate": 9.862113667018628e-06, |
| "loss": 1.2497, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.379746835443038, |
| "grad_norm": 2.49971342086792, |
| "learning_rate": 9.858572325033089e-06, |
| "loss": 1.4471, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.38255977496483823, |
| "grad_norm": 2.6926071643829346, |
| "learning_rate": 9.854986733724724e-06, |
| "loss": 1.1595, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.3853727144866385, |
| "grad_norm": 2.2876596450805664, |
| "learning_rate": 9.851356925749218e-06, |
| "loss": 1.1668, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.3881856540084388, |
| "grad_norm": 2.018536329269409, |
| "learning_rate": 9.847682934164948e-06, |
| "loss": 1.1446, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.3909985935302391, |
| "grad_norm": 2.660203456878662, |
| "learning_rate": 9.843964792432701e-06, |
| "loss": 1.3112, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.39381153305203936, |
| "grad_norm": 2.4841043949127197, |
| "learning_rate": 9.840202534415358e-06, |
| "loss": 1.3684, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.39662447257383965, |
| "grad_norm": 2.1534616947174072, |
| "learning_rate": 9.836396194377587e-06, |
| "loss": 1.2795, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.39943741209563993, |
| "grad_norm": 2.2963688373565674, |
| "learning_rate": 9.832545806985532e-06, |
| "loss": 1.298, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.4022503516174402, |
| "grad_norm": 2.911456346511841, |
| "learning_rate": 9.828651407306495e-06, |
| "loss": 1.3186, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.4050632911392405, |
| "grad_norm": 3.0715761184692383, |
| "learning_rate": 9.824713030808626e-06, |
| "loss": 1.378, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.4078762306610408, |
| "grad_norm": 2.150747537612915, |
| "learning_rate": 9.820730713360585e-06, |
| "loss": 1.1809, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.41068917018284107, |
| "grad_norm": 2.1824264526367188, |
| "learning_rate": 9.816704491231226e-06, |
| "loss": 1.0561, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.41350210970464135, |
| "grad_norm": 2.2817230224609375, |
| "learning_rate": 9.812634401089265e-06, |
| "loss": 1.2782, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.41631504922644164, |
| "grad_norm": 2.196108341217041, |
| "learning_rate": 9.808520480002942e-06, |
| "loss": 1.1196, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.4191279887482419, |
| "grad_norm": 2.3351998329162598, |
| "learning_rate": 9.804362765439688e-06, |
| "loss": 1.4752, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.4219409282700422, |
| "grad_norm": 1.8851360082626343, |
| "learning_rate": 9.800161295265782e-06, |
| "loss": 1.1407, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4219409282700422, |
| "eval_loss": 0.7094771862030029, |
| "eval_runtime": 2.855, |
| "eval_samples_per_second": 9.107, |
| "eval_steps_per_second": 1.401, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4219409282700422, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 615.25, |
| "eval_avg_mem_token_accuracy": 0.23404255319148937, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.007896625987078248, |
| "eval_avg_mem_token_rate": 0.5559768509279585, |
| "eval_avg_mem_token_recall(Accuracy)": 0.23404255319148937, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 150, |
| "eval_loss": 0.7094771862030029, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.855, |
| "eval_samples_per_second": 9.107, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.401, |
| "eval_total_correct_count": 66, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8358, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4247538677918425, |
| "grad_norm": 2.1879961490631104, |
| "learning_rate": 9.795916107746009e-06, |
| "loss": 1.1632, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.42756680731364277, |
| "grad_norm": 2.7381277084350586, |
| "learning_rate": 9.7916272415433e-06, |
| "loss": 1.3305, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.43037974683544306, |
| "grad_norm": 2.1921334266662598, |
| "learning_rate": 9.787294735718397e-06, |
| "loss": 1.1759, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.43319268635724334, |
| "grad_norm": 2.2524077892303467, |
| "learning_rate": 9.782918629729486e-06, |
| "loss": 1.1278, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.4360056258790436, |
| "grad_norm": 2.3991479873657227, |
| "learning_rate": 9.778498963431838e-06, |
| "loss": 1.2304, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.4388185654008439, |
| "grad_norm": 2.4503281116485596, |
| "learning_rate": 9.774035777077452e-06, |
| "loss": 1.3168, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.44163150492264414, |
| "grad_norm": 2.1630754470825195, |
| "learning_rate": 9.769529111314683e-06, |
| "loss": 1.1698, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 2.1806483268737793, |
| "learning_rate": 9.764979007187874e-06, |
| "loss": 1.1485, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.4472573839662447, |
| "grad_norm": 2.1980652809143066, |
| "learning_rate": 9.760385506136982e-06, |
| "loss": 1.3419, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.450070323488045, |
| "grad_norm": 4.968358039855957, |
| "learning_rate": 9.755748649997197e-06, |
| "loss": 1.19, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.45288326300984527, |
| "grad_norm": 2.15004563331604, |
| "learning_rate": 9.751068480998572e-06, |
| "loss": 1.2162, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.45569620253164556, |
| "grad_norm": 2.2927024364471436, |
| "learning_rate": 9.746345041765624e-06, |
| "loss": 1.2539, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.45850914205344584, |
| "grad_norm": 2.2658493518829346, |
| "learning_rate": 9.741578375316953e-06, |
| "loss": 1.4352, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.4613220815752461, |
| "grad_norm": 2.3411777019500732, |
| "learning_rate": 9.736768525064852e-06, |
| "loss": 1.4317, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.4641350210970464, |
| "grad_norm": 2.0097508430480957, |
| "learning_rate": 9.731915534814912e-06, |
| "loss": 1.1761, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.4669479606188467, |
| "grad_norm": 2.312138080596924, |
| "learning_rate": 9.727019448765613e-06, |
| "loss": 1.2183, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.469760900140647, |
| "grad_norm": 2.3369953632354736, |
| "learning_rate": 9.722080311507938e-06, |
| "loss": 1.3209, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.47257383966244726, |
| "grad_norm": 2.1543290615081787, |
| "learning_rate": 9.717098168024948e-06, |
| "loss": 1.2806, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.47538677918424754, |
| "grad_norm": 2.3597400188446045, |
| "learning_rate": 9.712073063691388e-06, |
| "loss": 1.2461, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.4781997187060478, |
| "grad_norm": 2.410320520401001, |
| "learning_rate": 9.707005044273268e-06, |
| "loss": 1.3153, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.4810126582278481, |
| "grad_norm": 2.5447475910186768, |
| "learning_rate": 9.701894155927445e-06, |
| "loss": 1.3782, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.4838255977496484, |
| "grad_norm": 2.600811004638672, |
| "learning_rate": 9.696740445201202e-06, |
| "loss": 1.5061, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.4866385372714487, |
| "grad_norm": 2.225473642349243, |
| "learning_rate": 9.691543959031831e-06, |
| "loss": 1.3204, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.48945147679324896, |
| "grad_norm": 2.2354350090026855, |
| "learning_rate": 9.68630474474619e-06, |
| "loss": 1.3342, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.49226441631504925, |
| "grad_norm": 2.4795658588409424, |
| "learning_rate": 9.681022850060297e-06, |
| "loss": 1.2004, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.49507735583684953, |
| "grad_norm": 2.111879348754883, |
| "learning_rate": 9.675698323078865e-06, |
| "loss": 1.0086, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.4978902953586498, |
| "grad_norm": 2.0163023471832275, |
| "learning_rate": 9.67033121229489e-06, |
| "loss": 1.0946, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.5007032348804501, |
| "grad_norm": 2.2219393253326416, |
| "learning_rate": 9.664921566589195e-06, |
| "loss": 1.3935, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.5035161744022504, |
| "grad_norm": 2.128089189529419, |
| "learning_rate": 9.659469435229992e-06, |
| "loss": 1.1659, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.5063291139240507, |
| "grad_norm": 2.5307302474975586, |
| "learning_rate": 9.653974867872424e-06, |
| "loss": 1.1473, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.509142053445851, |
| "grad_norm": 2.2050728797912598, |
| "learning_rate": 9.648437914558126e-06, |
| "loss": 1.3126, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.5119549929676512, |
| "grad_norm": 2.1602675914764404, |
| "learning_rate": 9.642858625714753e-06, |
| "loss": 1.0508, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.5147679324894515, |
| "grad_norm": 2.3411359786987305, |
| "learning_rate": 9.637237052155541e-06, |
| "loss": 1.2805, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.5175808720112518, |
| "grad_norm": 2.3061892986297607, |
| "learning_rate": 9.631573245078823e-06, |
| "loss": 1.324, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.5203938115330521, |
| "grad_norm": 2.0462026596069336, |
| "learning_rate": 9.625867256067577e-06, |
| "loss": 1.2376, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5232067510548524, |
| "grad_norm": 2.2104408740997314, |
| "learning_rate": 9.620119137088954e-06, |
| "loss": 1.2963, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.5260196905766527, |
| "grad_norm": 2.5065929889678955, |
| "learning_rate": 9.614328940493797e-06, |
| "loss": 1.3735, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.5288326300984529, |
| "grad_norm": 2.349320888519287, |
| "learning_rate": 9.608496719016176e-06, |
| "loss": 1.2742, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.5316455696202531, |
| "grad_norm": 2.519850730895996, |
| "learning_rate": 9.602622525772895e-06, |
| "loss": 1.4212, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.5344585091420534, |
| "grad_norm": 2.0543527603149414, |
| "learning_rate": 9.596706414263022e-06, |
| "loss": 1.1391, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.5372714486638537, |
| "grad_norm": 2.289496898651123, |
| "learning_rate": 9.59074843836739e-06, |
| "loss": 1.2401, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.540084388185654, |
| "grad_norm": 2.350924491882324, |
| "learning_rate": 9.584748652348107e-06, |
| "loss": 1.3712, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.5428973277074542, |
| "grad_norm": 2.23681640625, |
| "learning_rate": 9.578707110848077e-06, |
| "loss": 1.1505, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.5457102672292545, |
| "grad_norm": 2.008516788482666, |
| "learning_rate": 9.572623868890482e-06, |
| "loss": 1.0241, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.5485232067510548, |
| "grad_norm": 2.3972671031951904, |
| "learning_rate": 9.566498981878289e-06, |
| "loss": 1.4334, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.5513361462728551, |
| "grad_norm": 1.9378750324249268, |
| "learning_rate": 9.560332505593754e-06, |
| "loss": 1.0679, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.5541490857946554, |
| "grad_norm": 2.3928143978118896, |
| "learning_rate": 9.554124496197899e-06, |
| "loss": 1.0903, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.5569620253164557, |
| "grad_norm": 2.4164905548095703, |
| "learning_rate": 9.547875010230009e-06, |
| "loss": 1.3779, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.559774964838256, |
| "grad_norm": 2.0729787349700928, |
| "learning_rate": 9.54158410460712e-06, |
| "loss": 1.114, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.5625879043600562, |
| "grad_norm": 1.9305024147033691, |
| "learning_rate": 9.535251836623491e-06, |
| "loss": 1.1579, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5625879043600562, |
| "eval_loss": 0.6872708797454834, |
| "eval_runtime": 2.8553, |
| "eval_samples_per_second": 9.106, |
| "eval_steps_per_second": 1.401, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5625879043600562, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 608.5, |
| "eval_avg_mem_token_accuracy": 0.2198581560283688, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.007418930238123729, |
| "eval_avg_mem_token_rate": 0.5559103306060001, |
| "eval_avg_mem_token_recall(Accuracy)": 0.2198581560283688, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 200, |
| "eval_loss": 0.6872708797454834, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.8553, |
| "eval_samples_per_second": 9.106, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.401, |
| "eval_total_correct_count": 62, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8357, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5654008438818565, |
| "grad_norm": 2.2860162258148193, |
| "learning_rate": 9.528878263950094e-06, |
| "loss": 1.2892, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.5682137834036568, |
| "grad_norm": 2.314282178878784, |
| "learning_rate": 9.522463444634075e-06, |
| "loss": 1.0782, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.5710267229254571, |
| "grad_norm": 15.191813468933105, |
| "learning_rate": 9.516007437098238e-06, |
| "loss": 1.2559, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.5738396624472574, |
| "grad_norm": 1.9443162679672241, |
| "learning_rate": 9.509510300140506e-06, |
| "loss": 0.8679, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.5766526019690577, |
| "grad_norm": 2.5310826301574707, |
| "learning_rate": 9.502972092933384e-06, |
| "loss": 1.2779, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.5794655414908579, |
| "grad_norm": 2.4394469261169434, |
| "learning_rate": 9.496392875023433e-06, |
| "loss": 1.1331, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.5822784810126582, |
| "grad_norm": 2.40698504447937, |
| "learning_rate": 9.489772706330707e-06, |
| "loss": 1.4669, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.5850914205344585, |
| "grad_norm": 2.0934903621673584, |
| "learning_rate": 9.483111647148223e-06, |
| "loss": 1.2372, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.5879043600562588, |
| "grad_norm": 2.2789113521575928, |
| "learning_rate": 9.476409758141404e-06, |
| "loss": 1.3838, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.5907172995780591, |
| "grad_norm": 2.0439610481262207, |
| "learning_rate": 9.469667100347539e-06, |
| "loss": 1.1897, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5935302390998594, |
| "grad_norm": 2.5594871044158936, |
| "learning_rate": 9.462883735175205e-06, |
| "loss": 1.2361, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.5963431786216596, |
| "grad_norm": 2.417461395263672, |
| "learning_rate": 9.45605972440373e-06, |
| "loss": 1.3818, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.5991561181434599, |
| "grad_norm": 2.030989170074463, |
| "learning_rate": 9.449195130182614e-06, |
| "loss": 1.2072, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.6019690576652602, |
| "grad_norm": 1.9220385551452637, |
| "learning_rate": 9.442290015030974e-06, |
| "loss": 1.1057, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.6047819971870605, |
| "grad_norm": 2.4362001419067383, |
| "learning_rate": 9.43534444183697e-06, |
| "loss": 1.3472, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.6075949367088608, |
| "grad_norm": 1.9925367832183838, |
| "learning_rate": 9.42835847385723e-06, |
| "loss": 1.2851, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.6104078762306611, |
| "grad_norm": 2.3182199001312256, |
| "learning_rate": 9.42133217471628e-06, |
| "loss": 1.2026, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.6132208157524613, |
| "grad_norm": 2.7779831886291504, |
| "learning_rate": 9.414265608405956e-06, |
| "loss": 1.2488, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.6160337552742616, |
| "grad_norm": 2.6299376487731934, |
| "learning_rate": 9.407158839284836e-06, |
| "loss": 1.3019, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.6188466947960619, |
| "grad_norm": 3.4749839305877686, |
| "learning_rate": 9.40001193207763e-06, |
| "loss": 1.4892, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.6216596343178622, |
| "grad_norm": 2.2574360370635986, |
| "learning_rate": 9.392824951874618e-06, |
| "loss": 1.2897, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.6244725738396625, |
| "grad_norm": 2.16740083694458, |
| "learning_rate": 9.385597964131033e-06, |
| "loss": 1.2792, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.6272855133614628, |
| "grad_norm": 2.0155792236328125, |
| "learning_rate": 9.378331034666483e-06, |
| "loss": 1.2584, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.630098452883263, |
| "grad_norm": 2.4452121257781982, |
| "learning_rate": 9.371024229664342e-06, |
| "loss": 1.4524, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.6329113924050633, |
| "grad_norm": 2.295438766479492, |
| "learning_rate": 9.363677615671148e-06, |
| "loss": 1.2677, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.6357243319268636, |
| "grad_norm": 2.1375696659088135, |
| "learning_rate": 9.356291259596e-06, |
| "loss": 1.265, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.6385372714486639, |
| "grad_norm": 2.3946800231933594, |
| "learning_rate": 9.348865228709947e-06, |
| "loss": 1.3528, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.6413502109704642, |
| "grad_norm": 2.332805871963501, |
| "learning_rate": 9.341399590645373e-06, |
| "loss": 1.3119, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.6441631504922645, |
| "grad_norm": 2.3480770587921143, |
| "learning_rate": 9.333894413395388e-06, |
| "loss": 1.33, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.6469760900140648, |
| "grad_norm": 2.432349681854248, |
| "learning_rate": 9.326349765313199e-06, |
| "loss": 1.1957, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.6497890295358649, |
| "grad_norm": 2.0219781398773193, |
| "learning_rate": 9.318765715111497e-06, |
| "loss": 1.2202, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.6526019690576652, |
| "grad_norm": 2.8865296840667725, |
| "learning_rate": 9.311142331861821e-06, |
| "loss": 1.5149, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.6554149085794655, |
| "grad_norm": 2.1823160648345947, |
| "learning_rate": 9.303479684993943e-06, |
| "loss": 1.2677, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.6582278481012658, |
| "grad_norm": 2.011133909225464, |
| "learning_rate": 9.295777844295219e-06, |
| "loss": 1.0202, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.6610407876230661, |
| "grad_norm": 2.2680437564849854, |
| "learning_rate": 9.288036879909967e-06, |
| "loss": 1.2755, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.6638537271448663, |
| "grad_norm": 2.297574520111084, |
| "learning_rate": 9.280256862338822e-06, |
| "loss": 1.2567, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 2.2774109840393066, |
| "learning_rate": 9.272437862438095e-06, |
| "loss": 1.1645, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.6694796061884669, |
| "grad_norm": 2.4613051414489746, |
| "learning_rate": 9.264579951419126e-06, |
| "loss": 1.3841, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.6722925457102672, |
| "grad_norm": 2.2511165142059326, |
| "learning_rate": 9.256683200847638e-06, |
| "loss": 1.2692, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.6751054852320675, |
| "grad_norm": 2.209132432937622, |
| "learning_rate": 9.248747682643085e-06, |
| "loss": 1.2905, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.6779184247538678, |
| "grad_norm": 2.3346107006073, |
| "learning_rate": 9.240773469077994e-06, |
| "loss": 1.189, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.680731364275668, |
| "grad_norm": 2.3697586059570312, |
| "learning_rate": 9.232760632777311e-06, |
| "loss": 1.236, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.6835443037974683, |
| "grad_norm": 2.7163619995117188, |
| "learning_rate": 9.22470924671774e-06, |
| "loss": 1.3411, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.6863572433192686, |
| "grad_norm": 2.210554838180542, |
| "learning_rate": 9.216619384227068e-06, |
| "loss": 1.2791, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.6891701828410689, |
| "grad_norm": 2.2112317085266113, |
| "learning_rate": 9.208491118983515e-06, |
| "loss": 1.2984, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.6919831223628692, |
| "grad_norm": 2.247898817062378, |
| "learning_rate": 9.200324525015046e-06, |
| "loss": 1.2766, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.6947960618846695, |
| "grad_norm": 2.2993924617767334, |
| "learning_rate": 9.192119676698703e-06, |
| "loss": 1.1908, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.6976090014064698, |
| "grad_norm": 2.4729530811309814, |
| "learning_rate": 9.183876648759937e-06, |
| "loss": 1.364, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.70042194092827, |
| "grad_norm": 2.201533794403076, |
| "learning_rate": 9.175595516271911e-06, |
| "loss": 1.344, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.7032348804500703, |
| "grad_norm": 2.3106961250305176, |
| "learning_rate": 9.167276354654827e-06, |
| "loss": 1.313, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7032348804500703, |
| "eval_loss": 0.6741299033164978, |
| "eval_runtime": 2.8499, |
| "eval_samples_per_second": 9.123, |
| "eval_steps_per_second": 1.404, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7032348804500703, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 590.625, |
| "eval_avg_mem_token_accuracy": 0.23049645390070922, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.007840772014475271, |
| "eval_avg_mem_token_rate": 0.5514534690347901, |
| "eval_avg_mem_token_recall(Accuracy)": 0.23049645390070922, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 250, |
| "eval_loss": 0.6741299033164978, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.8499, |
| "eval_samples_per_second": 9.123, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.404, |
| "eval_total_correct_count": 65, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8290, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.7060478199718706, |
| "grad_norm": 2.2657763957977295, |
| "learning_rate": 9.158919239675237e-06, |
| "loss": 0.9924, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.7088607594936709, |
| "grad_norm": 2.8294458389282227, |
| "learning_rate": 9.150524247445346e-06, |
| "loss": 1.5447, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.7116736990154712, |
| "grad_norm": 2.327502489089966, |
| "learning_rate": 9.14209145442234e-06, |
| "loss": 1.3784, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.7144866385372715, |
| "grad_norm": 2.2193102836608887, |
| "learning_rate": 9.133620937407656e-06, |
| "loss": 1.2874, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.7172995780590717, |
| "grad_norm": 2.400413990020752, |
| "learning_rate": 9.125112773546315e-06, |
| "loss": 1.2711, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.720112517580872, |
| "grad_norm": 2.1976544857025146, |
| "learning_rate": 9.1165670403262e-06, |
| "loss": 1.399, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.7229254571026723, |
| "grad_norm": 2.2996156215667725, |
| "learning_rate": 9.107983815577359e-06, |
| "loss": 1.4082, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.7257383966244726, |
| "grad_norm": 2.307288408279419, |
| "learning_rate": 9.09936317747129e-06, |
| "loss": 1.275, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.7285513361462729, |
| "grad_norm": 2.204585552215576, |
| "learning_rate": 9.090705204520231e-06, |
| "loss": 1.3542, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.7313642756680732, |
| "grad_norm": 2.3391809463500977, |
| "learning_rate": 9.082009975576452e-06, |
| "loss": 1.231, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.7341772151898734, |
| "grad_norm": 2.5154929161071777, |
| "learning_rate": 9.073277569831526e-06, |
| "loss": 1.3549, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.7369901547116737, |
| "grad_norm": 2.1306750774383545, |
| "learning_rate": 9.064508066815614e-06, |
| "loss": 1.1, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.739803094233474, |
| "grad_norm": 1.9493396282196045, |
| "learning_rate": 9.05570154639674e-06, |
| "loss": 1.0767, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.7426160337552743, |
| "grad_norm": 2.2229723930358887, |
| "learning_rate": 9.046858088780064e-06, |
| "loss": 1.1945, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.7454289732770746, |
| "grad_norm": 2.0410044193267822, |
| "learning_rate": 9.03797777450715e-06, |
| "loss": 1.2284, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.7482419127988749, |
| "grad_norm": 2.533954381942749, |
| "learning_rate": 9.02906068445523e-06, |
| "loss": 1.4345, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.7510548523206751, |
| "grad_norm": 2.324066162109375, |
| "learning_rate": 9.020106899836471e-06, |
| "loss": 1.2716, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.7538677918424754, |
| "grad_norm": 2.0535366535186768, |
| "learning_rate": 9.011116502197243e-06, |
| "loss": 1.1823, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.7566807313642757, |
| "grad_norm": 2.3328094482421875, |
| "learning_rate": 9.002089573417356e-06, |
| "loss": 1.2959, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.759493670886076, |
| "grad_norm": 2.3262429237365723, |
| "learning_rate": 8.993026195709337e-06, |
| "loss": 0.965, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.7623066104078763, |
| "grad_norm": 2.247913122177124, |
| "learning_rate": 8.983926451617664e-06, |
| "loss": 1.291, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.7651195499296765, |
| "grad_norm": 2.140726089477539, |
| "learning_rate": 8.974790424018022e-06, |
| "loss": 1.2708, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.7679324894514767, |
| "grad_norm": 2.0828731060028076, |
| "learning_rate": 8.96561819611655e-06, |
| "loss": 1.2937, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.770745428973277, |
| "grad_norm": 2.237555742263794, |
| "learning_rate": 8.956409851449076e-06, |
| "loss": 1.1241, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.7735583684950773, |
| "grad_norm": 1.906575083732605, |
| "learning_rate": 8.947165473880364e-06, |
| "loss": 1.0149, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.7763713080168776, |
| "grad_norm": 2.204448699951172, |
| "learning_rate": 8.937885147603345e-06, |
| "loss": 1.2036, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.7791842475386779, |
| "grad_norm": 2.151160717010498, |
| "learning_rate": 8.928568957138356e-06, |
| "loss": 1.2992, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.7819971870604782, |
| "grad_norm": 2.286642551422119, |
| "learning_rate": 8.919216987332357e-06, |
| "loss": 1.2701, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.7848101265822784, |
| "grad_norm": 3.3560984134674072, |
| "learning_rate": 8.909829323358177e-06, |
| "loss": 1.3486, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.7876230661040787, |
| "grad_norm": 1.9844144582748413, |
| "learning_rate": 8.900406050713723e-06, |
| "loss": 0.967, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.790436005625879, |
| "grad_norm": 2.1631999015808105, |
| "learning_rate": 8.89094725522121e-06, |
| "loss": 1.2139, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.7932489451476793, |
| "grad_norm": 2.1446194648742676, |
| "learning_rate": 8.881453023026373e-06, |
| "loss": 1.2743, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.7960618846694796, |
| "grad_norm": 1.9020416736602783, |
| "learning_rate": 8.871923440597694e-06, |
| "loss": 1.0834, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.7988748241912799, |
| "grad_norm": 2.1618247032165527, |
| "learning_rate": 8.862358594725595e-06, |
| "loss": 1.151, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.8016877637130801, |
| "grad_norm": 2.3456199169158936, |
| "learning_rate": 8.852758572521666e-06, |
| "loss": 1.206, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.8045007032348804, |
| "grad_norm": 2.2839531898498535, |
| "learning_rate": 8.843123461417864e-06, |
| "loss": 1.248, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.8073136427566807, |
| "grad_norm": 2.277515411376953, |
| "learning_rate": 8.833453349165713e-06, |
| "loss": 1.3061, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.810126582278481, |
| "grad_norm": 2.3145205974578857, |
| "learning_rate": 8.823748323835517e-06, |
| "loss": 1.4309, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.8129395218002813, |
| "grad_norm": 2.298470973968506, |
| "learning_rate": 8.814008473815542e-06, |
| "loss": 1.1581, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.8157524613220816, |
| "grad_norm": 2.4578652381896973, |
| "learning_rate": 8.804233887811224e-06, |
| "loss": 1.328, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.8185654008438819, |
| "grad_norm": 2.162040948867798, |
| "learning_rate": 8.794424654844352e-06, |
| "loss": 1.041, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.8213783403656821, |
| "grad_norm": 2.1940865516662598, |
| "learning_rate": 8.784580864252266e-06, |
| "loss": 1.2024, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.8241912798874824, |
| "grad_norm": 2.127418041229248, |
| "learning_rate": 8.774702605687036e-06, |
| "loss": 1.1357, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.8270042194092827, |
| "grad_norm": 2.259040355682373, |
| "learning_rate": 8.764789969114647e-06, |
| "loss": 1.2494, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.829817158931083, |
| "grad_norm": 2.398115634918213, |
| "learning_rate": 8.754843044814183e-06, |
| "loss": 1.3409, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.8326300984528833, |
| "grad_norm": 1.94135320186615, |
| "learning_rate": 8.744861923377e-06, |
| "loss": 1.0011, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.8354430379746836, |
| "grad_norm": 2.3360581398010254, |
| "learning_rate": 8.734846695705912e-06, |
| "loss": 1.3973, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.8382559774964838, |
| "grad_norm": 2.0555343627929688, |
| "learning_rate": 8.724797453014342e-06, |
| "loss": 1.0796, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.8410689170182841, |
| "grad_norm": 2.26999831199646, |
| "learning_rate": 8.714714286825512e-06, |
| "loss": 1.2569, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.8438818565400844, |
| "grad_norm": 2.004324197769165, |
| "learning_rate": 8.704597288971598e-06, |
| "loss": 1.1934, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8438818565400844, |
| "eval_loss": 0.6666268110275269, |
| "eval_runtime": 2.761, |
| "eval_samples_per_second": 9.417, |
| "eval_steps_per_second": 1.449, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8438818565400844, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 590.375, |
| "eval_avg_mem_token_accuracy": 0.22340425531914893, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.007591276057356308, |
| "eval_avg_mem_token_rate": 0.5520521519324153, |
| "eval_avg_mem_token_recall(Accuracy)": 0.22340425531914893, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 300, |
| "eval_loss": 0.6666268110275269, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.761, |
| "eval_samples_per_second": 9.417, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.449, |
| "eval_total_correct_count": 63, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8299, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.8466947960618847, |
| "grad_norm": 2.1731441020965576, |
| "learning_rate": 8.6944465515929e-06, |
| "loss": 1.1642, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.849507735583685, |
| "grad_norm": 1.9805549383163452, |
| "learning_rate": 8.684262167136999e-06, |
| "loss": 1.1963, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.8523206751054853, |
| "grad_norm": 1.985160231590271, |
| "learning_rate": 8.674044228357915e-06, |
| "loss": 1.0271, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.8551336146272855, |
| "grad_norm": 2.233934164047241, |
| "learning_rate": 8.663792828315259e-06, |
| "loss": 1.3379, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.8579465541490858, |
| "grad_norm": 2.1742870807647705, |
| "learning_rate": 8.6535080603734e-06, |
| "loss": 1.2982, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.8607594936708861, |
| "grad_norm": 2.2393639087677, |
| "learning_rate": 8.643190018200595e-06, |
| "loss": 1.2925, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.8635724331926864, |
| "grad_norm": 2.395679473876953, |
| "learning_rate": 8.632838795768149e-06, |
| "loss": 1.3027, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.8663853727144867, |
| "grad_norm": 1.976331353187561, |
| "learning_rate": 8.622454487349556e-06, |
| "loss": 1.1242, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.869198312236287, |
| "grad_norm": 2.1286044120788574, |
| "learning_rate": 8.612037187519635e-06, |
| "loss": 1.1868, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.8720112517580872, |
| "grad_norm": 2.2224793434143066, |
| "learning_rate": 8.601586991153681e-06, |
| "loss": 1.2595, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.8748241912798875, |
| "grad_norm": 2.282410144805908, |
| "learning_rate": 8.591103993426588e-06, |
| "loss": 1.1068, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.8776371308016878, |
| "grad_norm": 2.000074625015259, |
| "learning_rate": 8.580588289811987e-06, |
| "loss": 1.1547, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.8804500703234881, |
| "grad_norm": 2.108109474182129, |
| "learning_rate": 8.570039976081382e-06, |
| "loss": 1.1654, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.8832630098452883, |
| "grad_norm": 2.2698593139648438, |
| "learning_rate": 8.559459148303268e-06, |
| "loss": 1.0082, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.8860759493670886, |
| "grad_norm": 2.04703426361084, |
| "learning_rate": 8.548845902842264e-06, |
| "loss": 1.2114, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 1.9669705629348755, |
| "learning_rate": 8.538200336358227e-06, |
| "loss": 1.0822, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.8917018284106891, |
| "grad_norm": 2.058732271194458, |
| "learning_rate": 8.527522545805386e-06, |
| "loss": 1.056, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.8945147679324894, |
| "grad_norm": 2.1475107669830322, |
| "learning_rate": 8.51681262843144e-06, |
| "loss": 1.2073, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.8973277074542897, |
| "grad_norm": 1.9537756443023682, |
| "learning_rate": 8.50607068177669e-06, |
| "loss": 1.026, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.90014064697609, |
| "grad_norm": 2.14225172996521, |
| "learning_rate": 8.495296803673138e-06, |
| "loss": 1.3038, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.9029535864978903, |
| "grad_norm": 2.2561981678009033, |
| "learning_rate": 8.484491092243603e-06, |
| "loss": 1.0576, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.9057665260196905, |
| "grad_norm": 1.9777567386627197, |
| "learning_rate": 8.473653645900825e-06, |
| "loss": 1.1675, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.9085794655414908, |
| "grad_norm": 2.2552154064178467, |
| "learning_rate": 8.462784563346567e-06, |
| "loss": 1.2568, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.9113924050632911, |
| "grad_norm": 2.19797945022583, |
| "learning_rate": 8.451883943570722e-06, |
| "loss": 1.1247, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.9142053445850914, |
| "grad_norm": 2.176769971847534, |
| "learning_rate": 8.440951885850402e-06, |
| "loss": 1.0333, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.9170182841068917, |
| "grad_norm": 2.011472463607788, |
| "learning_rate": 8.429988489749045e-06, |
| "loss": 1.2882, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.919831223628692, |
| "grad_norm": 2.276411294937134, |
| "learning_rate": 8.418993855115498e-06, |
| "loss": 1.2682, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.9226441631504922, |
| "grad_norm": 1.9374414682388306, |
| "learning_rate": 8.407968082083116e-06, |
| "loss": 1.198, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.9254571026722925, |
| "grad_norm": 2.0080978870391846, |
| "learning_rate": 8.396911271068842e-06, |
| "loss": 1.0495, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.9282700421940928, |
| "grad_norm": 2.410945415496826, |
| "learning_rate": 8.385823522772299e-06, |
| "loss": 1.3558, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.9310829817158931, |
| "grad_norm": 2.205632448196411, |
| "learning_rate": 8.37470493817487e-06, |
| "loss": 1.1552, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.9338959212376934, |
| "grad_norm": 1.9957945346832275, |
| "learning_rate": 8.36355561853878e-06, |
| "loss": 1.2074, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.9367088607594937, |
| "grad_norm": 1.889917254447937, |
| "learning_rate": 8.352375665406171e-06, |
| "loss": 0.8613, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.939521800281294, |
| "grad_norm": 2.4653337001800537, |
| "learning_rate": 8.341165180598182e-06, |
| "loss": 1.3945, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.9423347398030942, |
| "grad_norm": 2.15743088722229, |
| "learning_rate": 8.32992426621401e-06, |
| "loss": 1.1899, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.9451476793248945, |
| "grad_norm": 2.014369010925293, |
| "learning_rate": 8.318653024629999e-06, |
| "loss": 1.2004, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.9479606188466948, |
| "grad_norm": 2.475370168685913, |
| "learning_rate": 8.307351558498692e-06, |
| "loss": 1.0919, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.9507735583684951, |
| "grad_norm": 2.288590669631958, |
| "learning_rate": 8.296019970747901e-06, |
| "loss": 1.054, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.9535864978902954, |
| "grad_norm": 2.0414512157440186, |
| "learning_rate": 8.284658364579771e-06, |
| "loss": 1.2336, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.9563994374120957, |
| "grad_norm": 2.192631483078003, |
| "learning_rate": 8.27326684346984e-06, |
| "loss": 1.2078, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.9592123769338959, |
| "grad_norm": 2.109923839569092, |
| "learning_rate": 8.261845511166092e-06, |
| "loss": 1.2295, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.9620253164556962, |
| "grad_norm": 1.7825968265533447, |
| "learning_rate": 8.250394471688018e-06, |
| "loss": 1.1074, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.9648382559774965, |
| "grad_norm": 1.9041146039962769, |
| "learning_rate": 8.23891382932567e-06, |
| "loss": 1.1283, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.9676511954992968, |
| "grad_norm": 2.0874454975128174, |
| "learning_rate": 8.2274036886387e-06, |
| "loss": 1.1228, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.9704641350210971, |
| "grad_norm": 1.9520052671432495, |
| "learning_rate": 8.215864154455421e-06, |
| "loss": 1.2209, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.9732770745428974, |
| "grad_norm": 2.6171762943267822, |
| "learning_rate": 8.204295331871844e-06, |
| "loss": 1.6231, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.9760900140646976, |
| "grad_norm": 2.0320959091186523, |
| "learning_rate": 8.192697326250722e-06, |
| "loss": 1.153, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.9789029535864979, |
| "grad_norm": 1.8297227621078491, |
| "learning_rate": 8.1810702432206e-06, |
| "loss": 0.9717, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.9817158931082982, |
| "grad_norm": 2.077699661254883, |
| "learning_rate": 8.169414188674829e-06, |
| "loss": 0.9804, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.9845288326300985, |
| "grad_norm": 2.002263069152832, |
| "learning_rate": 8.157729268770636e-06, |
| "loss": 1.1233, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9845288326300985, |
| "eval_loss": 0.6594013571739197, |
| "eval_runtime": 2.8213, |
| "eval_samples_per_second": 9.216, |
| "eval_steps_per_second": 1.418, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9845288326300985, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 591.0, |
| "eval_avg_mem_token_accuracy": 0.22340425531914893, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.007495538370017847, |
| "eval_avg_mem_token_rate": 0.5591033060600014, |
| "eval_avg_mem_token_recall(Accuracy)": 0.22340425531914893, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 350, |
| "eval_loss": 0.6594013571739197, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.8213, |
| "eval_samples_per_second": 9.216, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.418, |
| "eval_total_correct_count": 63, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8405, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.9873417721518988, |
| "grad_norm": 2.112032890319824, |
| "learning_rate": 8.146015589928123e-06, |
| "loss": 1.1559, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.9901547116736991, |
| "grad_norm": 2.227578639984131, |
| "learning_rate": 8.134273258829322e-06, |
| "loss": 1.2947, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.9929676511954993, |
| "grad_norm": 2.0214011669158936, |
| "learning_rate": 8.122502382417211e-06, |
| "loss": 1.3415, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.9957805907172996, |
| "grad_norm": 2.176740884780884, |
| "learning_rate": 8.110703067894747e-06, |
| "loss": 1.3129, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.9985935302390999, |
| "grad_norm": 1.9748849868774414, |
| "learning_rate": 8.098875422723884e-06, |
| "loss": 1.0268, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.5412670373916626, |
| "learning_rate": 8.087019554624595e-06, |
| "loss": 0.657, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.0028129395218002, |
| "grad_norm": 2.013446092605591, |
| "learning_rate": 8.075135571573898e-06, |
| "loss": 1.1009, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.0056258790436006, |
| "grad_norm": 2.034468412399292, |
| "learning_rate": 8.06322358180486e-06, |
| "loss": 1.1514, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.0084388185654007, |
| "grad_norm": 2.1513798236846924, |
| "learning_rate": 8.051283693805624e-06, |
| "loss": 1.1312, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.0112517580872011, |
| "grad_norm": 1.8825079202651978, |
| "learning_rate": 8.039316016318415e-06, |
| "loss": 0.9748, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.0140646976090013, |
| "grad_norm": 2.040106773376465, |
| "learning_rate": 8.027320658338547e-06, |
| "loss": 1.2061, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.0168776371308017, |
| "grad_norm": 2.0149614810943604, |
| "learning_rate": 8.015297729113436e-06, |
| "loss": 1.0372, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.0196905766526019, |
| "grad_norm": 1.8744758367538452, |
| "learning_rate": 8.0032473381416e-06, |
| "loss": 1.1538, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.0225035161744023, |
| "grad_norm": 2.2196671962738037, |
| "learning_rate": 7.991169595171669e-06, |
| "loss": 1.1131, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.0253164556962024, |
| "grad_norm": 2.2530996799468994, |
| "learning_rate": 7.979064610201372e-06, |
| "loss": 1.3786, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.0281293952180028, |
| "grad_norm": 2.0854427814483643, |
| "learning_rate": 7.966932493476554e-06, |
| "loss": 1.0615, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.030942334739803, |
| "grad_norm": 2.3596975803375244, |
| "learning_rate": 7.954773355490155e-06, |
| "loss": 1.366, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.0337552742616034, |
| "grad_norm": 1.9892560243606567, |
| "learning_rate": 7.942587306981213e-06, |
| "loss": 1.0439, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.0365682137834036, |
| "grad_norm": 1.8899530172348022, |
| "learning_rate": 7.930374458933852e-06, |
| "loss": 1.0212, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.039381153305204, |
| "grad_norm": 2.1707684993743896, |
| "learning_rate": 7.918134922576271e-06, |
| "loss": 1.1767, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.0421940928270041, |
| "grad_norm": 2.041611671447754, |
| "learning_rate": 7.905868809379735e-06, |
| "loss": 1.2155, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.0450070323488045, |
| "grad_norm": 1.939260482788086, |
| "learning_rate": 7.893576231057553e-06, |
| "loss": 1.0179, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.0478199718706047, |
| "grad_norm": 1.9848639965057373, |
| "learning_rate": 7.88125729956407e-06, |
| "loss": 1.0099, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.0506329113924051, |
| "grad_norm": 2.0023953914642334, |
| "learning_rate": 7.868912127093638e-06, |
| "loss": 1.119, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.0534458509142053, |
| "grad_norm": 1.7961069345474243, |
| "learning_rate": 7.856540826079595e-06, |
| "loss": 0.7417, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.0562587904360057, |
| "grad_norm": 1.8289830684661865, |
| "learning_rate": 7.844143509193252e-06, |
| "loss": 1.0566, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.0590717299578059, |
| "grad_norm": 1.8681098222732544, |
| "learning_rate": 7.831720289342853e-06, |
| "loss": 0.9817, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.0618846694796062, |
| "grad_norm": 1.9967904090881348, |
| "learning_rate": 7.819271279672553e-06, |
| "loss": 0.9361, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.0646976090014064, |
| "grad_norm": 1.7474114894866943, |
| "learning_rate": 7.806796593561389e-06, |
| "loss": 0.9923, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.0675105485232068, |
| "grad_norm": 2.514089822769165, |
| "learning_rate": 7.794296344622246e-06, |
| "loss": 1.2647, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.070323488045007, |
| "grad_norm": 2.2486379146575928, |
| "learning_rate": 7.78177064670082e-06, |
| "loss": 1.1741, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.0731364275668074, |
| "grad_norm": 2.0108935832977295, |
| "learning_rate": 7.769219613874581e-06, |
| "loss": 1.0724, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.0759493670886076, |
| "grad_norm": 2.316124677658081, |
| "learning_rate": 7.756643360451744e-06, |
| "loss": 1.2943, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.078762306610408, |
| "grad_norm": 2.3428173065185547, |
| "learning_rate": 7.744042000970207e-06, |
| "loss": 1.2522, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.0815752461322081, |
| "grad_norm": 2.087315797805786, |
| "learning_rate": 7.731415650196535e-06, |
| "loss": 1.0241, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.0843881856540085, |
| "grad_norm": 2.1546409130096436, |
| "learning_rate": 7.718764423124892e-06, |
| "loss": 1.2256, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.0872011251758087, |
| "grad_norm": 2.559561252593994, |
| "learning_rate": 7.706088434976e-06, |
| "loss": 1.4538, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.090014064697609, |
| "grad_norm": 2.023336410522461, |
| "learning_rate": 7.6933878011961e-06, |
| "loss": 1.1043, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.0928270042194093, |
| "grad_norm": 2.1914350986480713, |
| "learning_rate": 7.68066263745589e-06, |
| "loss": 1.1997, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.0956399437412097, |
| "grad_norm": 1.8683468103408813, |
| "learning_rate": 7.667913059649468e-06, |
| "loss": 1.0576, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.0984528832630098, |
| "grad_norm": 2.2158288955688477, |
| "learning_rate": 7.65513918389329e-06, |
| "loss": 1.2133, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.1012658227848102, |
| "grad_norm": 2.4496500492095947, |
| "learning_rate": 7.6423411265251e-06, |
| "loss": 1.309, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.1040787623066104, |
| "grad_norm": 2.3594353199005127, |
| "learning_rate": 7.629519004102876e-06, |
| "loss": 1.2893, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.1068917018284108, |
| "grad_norm": 2.0072391033172607, |
| "learning_rate": 7.616672933403772e-06, |
| "loss": 0.9854, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.109704641350211, |
| "grad_norm": 2.1165082454681396, |
| "learning_rate": 7.603803031423046e-06, |
| "loss": 1.0648, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.1125175808720114, |
| "grad_norm": 2.136019229888916, |
| "learning_rate": 7.590909415373e-06, |
| "loss": 1.2763, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.1153305203938115, |
| "grad_norm": 2.089963912963867, |
| "learning_rate": 7.577992202681912e-06, |
| "loss": 1.1498, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.1181434599156117, |
| "grad_norm": 2.0347511768341064, |
| "learning_rate": 7.565051510992964e-06, |
| "loss": 1.0931, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.120956399437412, |
| "grad_norm": 1.902830958366394, |
| "learning_rate": 7.552087458163177e-06, |
| "loss": 1.0382, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.1237693389592125, |
| "grad_norm": 2.3222129344940186, |
| "learning_rate": 7.539100162262325e-06, |
| "loss": 1.3173, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.1237693389592125, |
| "eval_loss": 0.6541261672973633, |
| "eval_runtime": 2.7652, |
| "eval_samples_per_second": 9.403, |
| "eval_steps_per_second": 1.447, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.1237693389592125, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 579.625, |
| "eval_avg_mem_token_accuracy": 0.23049645390070922, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.007830381881701, |
| "eval_avg_mem_token_rate": 0.552185192576332, |
| "eval_avg_mem_token_recall(Accuracy)": 0.23049645390070922, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 400, |
| "eval_loss": 0.6541261672973633, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.7652, |
| "eval_samples_per_second": 9.403, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.447, |
| "eval_total_correct_count": 65, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8301, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.1265822784810127, |
| "grad_norm": 2.1172144412994385, |
| "learning_rate": 7.526089741571876e-06, |
| "loss": 1.2135, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.1293952180028128, |
| "grad_norm": 2.117197036743164, |
| "learning_rate": 7.5130563145838994e-06, |
| "loss": 1.2903, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.1322081575246132, |
| "grad_norm": 1.8641384840011597, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.902, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.1350210970464134, |
| "grad_norm": 2.043870449066162, |
| "learning_rate": 7.486920916730228e-06, |
| "loss": 1.14, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.1378340365682138, |
| "grad_norm": 2.371406078338623, |
| "learning_rate": 7.473819183891997e-06, |
| "loss": 1.168, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.140646976090014, |
| "grad_norm": 2.017378807067871, |
| "learning_rate": 7.460694920809004e-06, |
| "loss": 1.2308, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.1434599156118144, |
| "grad_norm": 3.638538122177124, |
| "learning_rate": 7.447548247010137e-06, |
| "loss": 1.1636, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.1462728551336145, |
| "grad_norm": 1.9470067024230957, |
| "learning_rate": 7.434379282228393e-06, |
| "loss": 1.1502, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.149085794655415, |
| "grad_norm": 2.1175174713134766, |
| "learning_rate": 7.421188146399776e-06, |
| "loss": 1.0217, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.1518987341772151, |
| "grad_norm": 1.9489398002624512, |
| "learning_rate": 7.407974959662222e-06, |
| "loss": 1.223, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.1547116736990155, |
| "grad_norm": 2.227391242980957, |
| "learning_rate": 7.394739842354489e-06, |
| "loss": 1.1757, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.1575246132208157, |
| "grad_norm": 1.961480736732483, |
| "learning_rate": 7.381482915015068e-06, |
| "loss": 1.1204, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.160337552742616, |
| "grad_norm": 1.8854504823684692, |
| "learning_rate": 7.368204298381085e-06, |
| "loss": 1.0732, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.1631504922644162, |
| "grad_norm": 2.4665989875793457, |
| "learning_rate": 7.3549041133872004e-06, |
| "loss": 1.2208, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.1659634317862166, |
| "grad_norm": 2.293067216873169, |
| "learning_rate": 7.341582481164508e-06, |
| "loss": 1.0995, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.1687763713080168, |
| "grad_norm": 1.636135458946228, |
| "learning_rate": 7.328239523039431e-06, |
| "loss": 1.0113, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.1715893108298172, |
| "grad_norm": 2.080463171005249, |
| "learning_rate": 7.314875360532618e-06, |
| "loss": 1.2187, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.1744022503516174, |
| "grad_norm": 2.316681146621704, |
| "learning_rate": 7.301490115357837e-06, |
| "loss": 1.0254, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.1772151898734178, |
| "grad_norm": 1.9154740571975708, |
| "learning_rate": 7.288083909420866e-06, |
| "loss": 1.0994, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.180028129395218, |
| "grad_norm": 2.2701125144958496, |
| "learning_rate": 7.274656864818379e-06, |
| "loss": 1.193, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.1828410689170183, |
| "grad_norm": 2.259373188018799, |
| "learning_rate": 7.261209103836843e-06, |
| "loss": 1.2083, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.1856540084388185, |
| "grad_norm": 2.170278787612915, |
| "learning_rate": 7.247740748951394e-06, |
| "loss": 1.108, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.188466947960619, |
| "grad_norm": 2.3180534839630127, |
| "learning_rate": 7.234251922824731e-06, |
| "loss": 1.0838, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.191279887482419, |
| "grad_norm": 2.200087308883667, |
| "learning_rate": 7.220742748305989e-06, |
| "loss": 1.2188, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.1940928270042195, |
| "grad_norm": 2.148313045501709, |
| "learning_rate": 7.20721334842963e-06, |
| "loss": 1.1162, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.1969057665260197, |
| "grad_norm": 2.109539270401001, |
| "learning_rate": 7.193663846414318e-06, |
| "loss": 1.126, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.19971870604782, |
| "grad_norm": 2.3250086307525635, |
| "learning_rate": 7.180094365661793e-06, |
| "loss": 1.216, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.2025316455696202, |
| "grad_norm": 2.1778461933135986, |
| "learning_rate": 7.166505029755753e-06, |
| "loss": 1.1582, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.2053445850914206, |
| "grad_norm": 2.0346758365631104, |
| "learning_rate": 7.152895962460727e-06, |
| "loss": 1.0597, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.2081575246132208, |
| "grad_norm": 2.2523462772369385, |
| "learning_rate": 7.139267287720945e-06, |
| "loss": 1.3096, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.2109704641350212, |
| "grad_norm": 2.1248557567596436, |
| "learning_rate": 7.125619129659215e-06, |
| "loss": 1.2255, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.2137834036568214, |
| "grad_norm": 2.402777671813965, |
| "learning_rate": 7.111951612575783e-06, |
| "loss": 1.2178, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.2165963431786218, |
| "grad_norm": 2.1899073123931885, |
| "learning_rate": 7.0982648609472135e-06, |
| "loss": 1.1086, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.219409282700422, |
| "grad_norm": 2.306647777557373, |
| "learning_rate": 7.084558999425245e-06, |
| "loss": 1.2791, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.2222222222222223, |
| "grad_norm": 2.1083829402923584, |
| "learning_rate": 7.0708341528356585e-06, |
| "loss": 1.2203, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.2250351617440225, |
| "grad_norm": 1.9246402978897095, |
| "learning_rate": 7.0570904461771426e-06, |
| "loss": 1.1293, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.2278481012658227, |
| "grad_norm": 2.0863969326019287, |
| "learning_rate": 7.043328004620154e-06, |
| "loss": 1.1112, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.230661040787623, |
| "grad_norm": 2.237459421157837, |
| "learning_rate": 7.029546953505776e-06, |
| "loss": 1.1374, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.2334739803094235, |
| "grad_norm": 1.9015916585922241, |
| "learning_rate": 7.015747418344578e-06, |
| "loss": 1.0886, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.2362869198312236, |
| "grad_norm": 2.1524229049682617, |
| "learning_rate": 7.0019295248154714e-06, |
| "loss": 1.1271, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.2390998593530238, |
| "grad_norm": 2.171227216720581, |
| "learning_rate": 6.98809339876457e-06, |
| "loss": 1.2677, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.2419127988748242, |
| "grad_norm": 2.0763444900512695, |
| "learning_rate": 6.974239166204034e-06, |
| "loss": 1.0989, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.2447257383966246, |
| "grad_norm": 2.1066906452178955, |
| "learning_rate": 6.960366953310931e-06, |
| "loss": 1.2027, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.2475386779184248, |
| "grad_norm": 2.748056650161743, |
| "learning_rate": 6.946476886426087e-06, |
| "loss": 1.0004, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.250351617440225, |
| "grad_norm": 1.90733003616333, |
| "learning_rate": 6.932569092052927e-06, |
| "loss": 0.9063, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.2531645569620253, |
| "grad_norm": 2.3296380043029785, |
| "learning_rate": 6.918643696856333e-06, |
| "loss": 1.2053, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.2559774964838257, |
| "grad_norm": 2.194408416748047, |
| "learning_rate": 6.904700827661484e-06, |
| "loss": 1.2663, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.258790436005626, |
| "grad_norm": 2.2270679473876953, |
| "learning_rate": 6.890740611452705e-06, |
| "loss": 1.1718, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.261603375527426, |
| "grad_norm": 1.8598543405532837, |
| "learning_rate": 6.876763175372306e-06, |
| "loss": 0.958, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.2644163150492265, |
| "grad_norm": 2.112734079360962, |
| "learning_rate": 6.862768646719425e-06, |
| "loss": 1.2674, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.2644163150492265, |
| "eval_loss": 0.6488014459609985, |
| "eval_runtime": 2.7966, |
| "eval_samples_per_second": 9.297, |
| "eval_steps_per_second": 1.43, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.2644163150492265, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 588.0, |
| "eval_avg_mem_token_accuracy": 0.23404255319148937, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.007794048181388758, |
| "eval_avg_mem_token_rate": 0.5632940863433779, |
| "eval_avg_mem_token_recall(Accuracy)": 0.23404255319148937, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 450, |
| "eval_loss": 0.6488014459609985, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.7966, |
| "eval_samples_per_second": 9.297, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.43, |
| "eval_total_correct_count": 66, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8468, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.2672292545710266, |
| "grad_norm": 1.943136215209961, |
| "learning_rate": 6.848757152948876e-06, |
| "loss": 1.0877, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.270042194092827, |
| "grad_norm": 2.015427589416504, |
| "learning_rate": 6.834728821669978e-06, |
| "loss": 1.0226, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.2728551336146272, |
| "grad_norm": 2.0203545093536377, |
| "learning_rate": 6.820683780645397e-06, |
| "loss": 1.0537, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.2756680731364276, |
| "grad_norm": 1.9082456827163696, |
| "learning_rate": 6.806622157789989e-06, |
| "loss": 1.0811, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.2784810126582278, |
| "grad_norm": 2.0107004642486572, |
| "learning_rate": 6.7925440811696165e-06, |
| "loss": 1.1643, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.2812939521800282, |
| "grad_norm": 1.968511700630188, |
| "learning_rate": 6.778449679000006e-06, |
| "loss": 0.9849, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.2841068917018283, |
| "grad_norm": 2.0401535034179688, |
| "learning_rate": 6.764339079645561e-06, |
| "loss": 1.1488, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.2869198312236287, |
| "grad_norm": 1.788967251777649, |
| "learning_rate": 6.7502124116182066e-06, |
| "loss": 0.8775, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.289732770745429, |
| "grad_norm": 1.8958114385604858, |
| "learning_rate": 6.736069803576205e-06, |
| "loss": 1.1991, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.2925457102672293, |
| "grad_norm": 2.1174044609069824, |
| "learning_rate": 6.721911384323e-06, |
| "loss": 1.2373, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.2953586497890295, |
| "grad_norm": 2.2091267108917236, |
| "learning_rate": 6.7077372828060294e-06, |
| "loss": 1.1511, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.2981715893108299, |
| "grad_norm": 1.904528021812439, |
| "learning_rate": 6.693547628115561e-06, |
| "loss": 0.9815, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.30098452883263, |
| "grad_norm": 2.0216708183288574, |
| "learning_rate": 6.67934254948351e-06, |
| "loss": 1.0773, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.3037974683544304, |
| "grad_norm": 2.3458025455474854, |
| "learning_rate": 6.6651221762822635e-06, |
| "loss": 1.2122, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.3066104078762306, |
| "grad_norm": 2.210007905960083, |
| "learning_rate": 6.650886638023508e-06, |
| "loss": 1.2001, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.309423347398031, |
| "grad_norm": 2.168041229248047, |
| "learning_rate": 6.636636064357045e-06, |
| "loss": 1.1748, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.3122362869198312, |
| "grad_norm": 2.1177752017974854, |
| "learning_rate": 6.622370585069605e-06, |
| "loss": 1.1441, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.3150492264416316, |
| "grad_norm": 4.13400411605835, |
| "learning_rate": 6.608090330083677e-06, |
| "loss": 1.0154, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.3178621659634318, |
| "grad_norm": 1.8855236768722534, |
| "learning_rate": 6.593795429456317e-06, |
| "loss": 1.1638, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.3206751054852321, |
| "grad_norm": 2.1128952503204346, |
| "learning_rate": 6.579486013377963e-06, |
| "loss": 1.2435, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.3234880450070323, |
| "grad_norm": 2.091977119445801, |
| "learning_rate": 6.565162212171257e-06, |
| "loss": 1.1948, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.3263009845288325, |
| "grad_norm": 1.8725004196166992, |
| "learning_rate": 6.550824156289852e-06, |
| "loss": 0.9448, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.3291139240506329, |
| "grad_norm": 2.134361982345581, |
| "learning_rate": 6.536471976317223e-06, |
| "loss": 1.1985, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.3319268635724333, |
| "grad_norm": 2.0700531005859375, |
| "learning_rate": 6.5221058029654815e-06, |
| "loss": 1.1321, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.3347398030942335, |
| "grad_norm": 2.336487054824829, |
| "learning_rate": 6.507725767074181e-06, |
| "loss": 1.2447, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.3375527426160336, |
| "grad_norm": 2.1936490535736084, |
| "learning_rate": 6.493331999609132e-06, |
| "loss": 1.3264, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.340365682137834, |
| "grad_norm": 1.8957630395889282, |
| "learning_rate": 6.4789246316612e-06, |
| "loss": 1.0029, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.3431786216596344, |
| "grad_norm": 2.329432249069214, |
| "learning_rate": 6.464503794445121e-06, |
| "loss": 1.3139, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.3459915611814346, |
| "grad_norm": 2.2381882667541504, |
| "learning_rate": 6.450069619298299e-06, |
| "loss": 1.0446, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.3488045007032348, |
| "grad_norm": 2.235319137573242, |
| "learning_rate": 6.435622237679615e-06, |
| "loss": 1.1327, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.3516174402250352, |
| "grad_norm": 2.1776840686798096, |
| "learning_rate": 6.421161781168226e-06, |
| "loss": 1.0707, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.3544303797468356, |
| "grad_norm": 2.003654956817627, |
| "learning_rate": 6.4066883814623674e-06, |
| "loss": 1.0294, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.3572433192686357, |
| "grad_norm": 2.2653419971466064, |
| "learning_rate": 6.3922021703781574e-06, |
| "loss": 1.1558, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.360056258790436, |
| "grad_norm": 1.8952243328094482, |
| "learning_rate": 6.377703279848393e-06, |
| "loss": 1.1621, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.3628691983122363, |
| "grad_norm": 1.818117618560791, |
| "learning_rate": 6.363191841921345e-06, |
| "loss": 1.1758, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.3656821378340367, |
| "grad_norm": 2.188119411468506, |
| "learning_rate": 6.3486679887595635e-06, |
| "loss": 1.4035, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.3684950773558369, |
| "grad_norm": 2.2680625915527344, |
| "learning_rate": 6.334131852638669e-06, |
| "loss": 1.3802, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.371308016877637, |
| "grad_norm": 2.239824056625366, |
| "learning_rate": 6.319583565946147e-06, |
| "loss": 0.978, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.3741209563994374, |
| "grad_norm": 2.084578275680542, |
| "learning_rate": 6.305023261180146e-06, |
| "loss": 1.1592, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.3769338959212378, |
| "grad_norm": 2.074716329574585, |
| "learning_rate": 6.290451070948269e-06, |
| "loss": 1.1417, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.379746835443038, |
| "grad_norm": 2.2187070846557617, |
| "learning_rate": 6.275867127966364e-06, |
| "loss": 1.3134, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.3825597749648382, |
| "grad_norm": 1.9704614877700806, |
| "learning_rate": 6.261271565057318e-06, |
| "loss": 1.2947, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.3853727144866386, |
| "grad_norm": 2.0791146755218506, |
| "learning_rate": 6.246664515149845e-06, |
| "loss": 1.1796, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.3881856540084387, |
| "grad_norm": 2.070108413696289, |
| "learning_rate": 6.232046111277277e-06, |
| "loss": 1.016, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.3909985935302391, |
| "grad_norm": 2.40295147895813, |
| "learning_rate": 6.217416486576354e-06, |
| "loss": 1.247, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.3938115330520393, |
| "grad_norm": 1.9346283674240112, |
| "learning_rate": 6.202775774286007e-06, |
| "loss": 1.0943, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.3966244725738397, |
| "grad_norm": 1.88413667678833, |
| "learning_rate": 6.188124107746148e-06, |
| "loss": 1.0378, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.3994374120956399, |
| "grad_norm": 2.3754115104675293, |
| "learning_rate": 6.173461620396453e-06, |
| "loss": 1.1976, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.4022503516174403, |
| "grad_norm": 2.2472076416015625, |
| "learning_rate": 6.158788445775151e-06, |
| "loss": 1.348, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.4050632911392404, |
| "grad_norm": 2.299577474594116, |
| "learning_rate": 6.1441047175178025e-06, |
| "loss": 1.3543, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.4050632911392404, |
| "eval_loss": 0.6480849385261536, |
| "eval_runtime": 2.7664, |
| "eval_samples_per_second": 9.398, |
| "eval_steps_per_second": 1.446, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.4050632911392404, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 593.75, |
| "eval_avg_mem_token_accuracy": 0.2375886524822695, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.007852789498359119, |
| "eval_avg_mem_token_rate": 0.5675513869487129, |
| "eval_avg_mem_token_recall(Accuracy)": 0.2375886524822695, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 500, |
| "eval_loss": 0.6480849385261536, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.7664, |
| "eval_samples_per_second": 9.398, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.446, |
| "eval_total_correct_count": 67, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8532, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.4078762306610408, |
| "grad_norm": 2.4926252365112305, |
| "learning_rate": 6.129410569356086e-06, |
| "loss": 1.1548, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.410689170182841, |
| "grad_norm": 1.9530552625656128, |
| "learning_rate": 6.11470613511658e-06, |
| "loss": 0.9438, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.4135021097046414, |
| "grad_norm": 2.046297788619995, |
| "learning_rate": 6.0999915487195395e-06, |
| "loss": 1.0105, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.4163150492264416, |
| "grad_norm": 2.359480619430542, |
| "learning_rate": 6.085266944177686e-06, |
| "loss": 1.2237, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.419127988748242, |
| "grad_norm": 2.0814826488494873, |
| "learning_rate": 6.070532455594974e-06, |
| "loss": 1.3641, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.4219409282700421, |
| "grad_norm": 2.5021960735321045, |
| "learning_rate": 6.055788217165384e-06, |
| "loss": 1.1271, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.4247538677918425, |
| "grad_norm": 2.1782703399658203, |
| "learning_rate": 6.0410343631716865e-06, |
| "loss": 1.1237, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.4275668073136427, |
| "grad_norm": 1.9032992124557495, |
| "learning_rate": 6.0262710279842305e-06, |
| "loss": 1.2318, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.4303797468354431, |
| "grad_norm": 1.969860315322876, |
| "learning_rate": 6.011498346059712e-06, |
| "loss": 1.0196, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.4331926863572433, |
| "grad_norm": 2.1782121658325195, |
| "learning_rate": 5.99671645193995e-06, |
| "loss": 1.1725, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.4360056258790437, |
| "grad_norm": 2.0659401416778564, |
| "learning_rate": 5.98192548025067e-06, |
| "loss": 1.1655, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.4388185654008439, |
| "grad_norm": 2.1270692348480225, |
| "learning_rate": 5.967125565700266e-06, |
| "loss": 0.9583, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.4416315049226442, |
| "grad_norm": 2.146409034729004, |
| "learning_rate": 5.952316843078579e-06, |
| "loss": 1.1295, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.4444444444444444, |
| "grad_norm": 2.323197364807129, |
| "learning_rate": 5.9374994472556715e-06, |
| "loss": 1.1557, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.4472573839662446, |
| "grad_norm": 2.1008739471435547, |
| "learning_rate": 5.922673513180596e-06, |
| "loss": 1.24, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.450070323488045, |
| "grad_norm": 2.4466872215270996, |
| "learning_rate": 5.9078391758801646e-06, |
| "loss": 1.2434, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.4528832630098454, |
| "grad_norm": 2.210320234298706, |
| "learning_rate": 5.8929965704577275e-06, |
| "loss": 1.136, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.4556962025316456, |
| "grad_norm": 2.259718894958496, |
| "learning_rate": 5.878145832091929e-06, |
| "loss": 1.3789, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.4585091420534457, |
| "grad_norm": 2.305795431137085, |
| "learning_rate": 5.863287096035491e-06, |
| "loss": 1.0189, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.4613220815752461, |
| "grad_norm": 2.283437967300415, |
| "learning_rate": 5.848420497613969e-06, |
| "loss": 1.1944, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.4641350210970465, |
| "grad_norm": 2.0504446029663086, |
| "learning_rate": 5.833546172224527e-06, |
| "loss": 1.22, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.4669479606188467, |
| "grad_norm": 2.018839120864868, |
| "learning_rate": 5.818664255334702e-06, |
| "loss": 1.0634, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.4697609001406469, |
| "grad_norm": 2.3706552982330322, |
| "learning_rate": 5.803774882481171e-06, |
| "loss": 1.1355, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.4725738396624473, |
| "grad_norm": 2.355933427810669, |
| "learning_rate": 5.788878189268516e-06, |
| "loss": 1.2492, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.4753867791842477, |
| "grad_norm": 2.439201831817627, |
| "learning_rate": 5.773974311367987e-06, |
| "loss": 1.3196, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.4781997187060478, |
| "grad_norm": 2.0663866996765137, |
| "learning_rate": 5.759063384516271e-06, |
| "loss": 1.1885, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.481012658227848, |
| "grad_norm": 2.264146327972412, |
| "learning_rate": 5.7441455445142505e-06, |
| "loss": 1.2146, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.4838255977496484, |
| "grad_norm": 1.8687844276428223, |
| "learning_rate": 5.729220927225769e-06, |
| "loss": 0.9485, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.4866385372714488, |
| "grad_norm": 2.1123878955841064, |
| "learning_rate": 5.714289668576401e-06, |
| "loss": 1.0617, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.489451476793249, |
| "grad_norm": 2.460676670074463, |
| "learning_rate": 5.699351904552196e-06, |
| "loss": 1.5609, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.4922644163150491, |
| "grad_norm": 2.3636927604675293, |
| "learning_rate": 5.68440777119846e-06, |
| "loss": 1.2612, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.4950773558368495, |
| "grad_norm": 1.9600480794906616, |
| "learning_rate": 5.669457404618502e-06, |
| "loss": 0.9536, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.49789029535865, |
| "grad_norm": 1.95573091506958, |
| "learning_rate": 5.654500940972405e-06, |
| "loss": 1.0379, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.50070323488045, |
| "grad_norm": 1.8376390933990479, |
| "learning_rate": 5.639538516475775e-06, |
| "loss": 1.1431, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.5035161744022503, |
| "grad_norm": 1.8683063983917236, |
| "learning_rate": 5.624570267398511e-06, |
| "loss": 1.0917, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.5063291139240507, |
| "grad_norm": 2.060288906097412, |
| "learning_rate": 5.6095963300635585e-06, |
| "loss": 1.0954, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.509142053445851, |
| "grad_norm": 2.148991107940674, |
| "learning_rate": 5.594616840845666e-06, |
| "loss": 1.0198, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.5119549929676512, |
| "grad_norm": 2.234832286834717, |
| "learning_rate": 5.579631936170147e-06, |
| "loss": 1.1007, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.5147679324894514, |
| "grad_norm": 2.1892640590667725, |
| "learning_rate": 5.564641752511637e-06, |
| "loss": 1.0431, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.5175808720112518, |
| "grad_norm": 2.029608726501465, |
| "learning_rate": 5.54964642639285e-06, |
| "loss": 1.0874, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.5203938115330522, |
| "grad_norm": 2.019705057144165, |
| "learning_rate": 5.534646094383333e-06, |
| "loss": 1.0566, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.5232067510548524, |
| "grad_norm": 2.067397117614746, |
| "learning_rate": 5.519640893098227e-06, |
| "loss": 1.1467, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.5260196905766525, |
| "grad_norm": 2.2218313217163086, |
| "learning_rate": 5.504630959197014e-06, |
| "loss": 1.2784, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.528832630098453, |
| "grad_norm": 2.1426005363464355, |
| "learning_rate": 5.489616429382285e-06, |
| "loss": 1.217, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.5316455696202531, |
| "grad_norm": 2.0496666431427, |
| "learning_rate": 5.474597440398483e-06, |
| "loss": 1.1561, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.5344585091420533, |
| "grad_norm": 1.9886417388916016, |
| "learning_rate": 5.459574129030669e-06, |
| "loss": 1.2286, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.5372714486638537, |
| "grad_norm": 1.9588450193405151, |
| "learning_rate": 5.444546632103262e-06, |
| "loss": 1.1474, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.540084388185654, |
| "grad_norm": 2.0006983280181885, |
| "learning_rate": 5.429515086478805e-06, |
| "loss": 1.1519, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.5428973277074542, |
| "grad_norm": 2.1134023666381836, |
| "learning_rate": 5.414479629056717e-06, |
| "loss": 1.1426, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.5457102672292544, |
| "grad_norm": 2.110901355743408, |
| "learning_rate": 5.3994403967720366e-06, |
| "loss": 1.0726, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.5457102672292544, |
| "eval_loss": 0.6454769372940063, |
| "eval_runtime": 2.82, |
| "eval_samples_per_second": 9.22, |
| "eval_steps_per_second": 1.418, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.5457102672292544, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 591.125, |
| "eval_avg_mem_token_accuracy": 0.24113475177304963, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.008006593665371483, |
| "eval_avg_mem_token_rate": 0.5649570943923369, |
| "eval_avg_mem_token_recall(Accuracy)": 0.24113475177304963, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 550, |
| "eval_loss": 0.6454769372940063, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.82, |
| "eval_samples_per_second": 9.22, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.418, |
| "eval_total_correct_count": 68, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8493, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.5485232067510548, |
| "grad_norm": 1.9120993614196777, |
| "learning_rate": 5.3843975265941896e-06, |
| "loss": 1.1199, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.5513361462728552, |
| "grad_norm": 2.0266835689544678, |
| "learning_rate": 5.369351155525729e-06, |
| "loss": 1.1231, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.5541490857946554, |
| "grad_norm": 2.3950095176696777, |
| "learning_rate": 5.354301420601095e-06, |
| "loss": 1.2016, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.5569620253164556, |
| "grad_norm": 2.245199680328369, |
| "learning_rate": 5.33924845888536e-06, |
| "loss": 1.1973, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.559774964838256, |
| "grad_norm": 2.302870988845825, |
| "learning_rate": 5.3241924074729865e-06, |
| "loss": 1.1057, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.5625879043600563, |
| "grad_norm": 2.439229726791382, |
| "learning_rate": 5.30913340348658e-06, |
| "loss": 1.0278, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.5654008438818565, |
| "grad_norm": 2.243025779724121, |
| "learning_rate": 5.294071584075628e-06, |
| "loss": 1.2353, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.5682137834036567, |
| "grad_norm": 2.1339046955108643, |
| "learning_rate": 5.279007086415268e-06, |
| "loss": 1.2753, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.571026722925457, |
| "grad_norm": 2.055248260498047, |
| "learning_rate": 5.263940047705026e-06, |
| "loss": 1.0207, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.5738396624472575, |
| "grad_norm": 2.2932729721069336, |
| "learning_rate": 5.24887060516757e-06, |
| "loss": 1.0904, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.5766526019690577, |
| "grad_norm": 2.3540918827056885, |
| "learning_rate": 5.233798896047461e-06, |
| "loss": 1.045, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.5794655414908578, |
| "grad_norm": 1.941489338874817, |
| "learning_rate": 5.218725057609901e-06, |
| "loss": 0.9543, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.5822784810126582, |
| "grad_norm": 1.9541575908660889, |
| "learning_rate": 5.2036492271394915e-06, |
| "loss": 0.9803, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.5850914205344586, |
| "grad_norm": 2.066892147064209, |
| "learning_rate": 5.188571541938968e-06, |
| "loss": 1.1598, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.5879043600562588, |
| "grad_norm": 2.207688093185425, |
| "learning_rate": 5.1734921393279644e-06, |
| "loss": 1.14, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.590717299578059, |
| "grad_norm": 2.2512924671173096, |
| "learning_rate": 5.158411156641752e-06, |
| "loss": 1.2269, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.5935302390998594, |
| "grad_norm": 1.9499599933624268, |
| "learning_rate": 5.143328731229994e-06, |
| "loss": 0.9949, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.5963431786216598, |
| "grad_norm": 2.176727056503296, |
| "learning_rate": 5.128245000455493e-06, |
| "loss": 1.1866, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.59915611814346, |
| "grad_norm": 2.0169143676757812, |
| "learning_rate": 5.113160101692939e-06, |
| "loss": 1.1554, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.60196905766526, |
| "grad_norm": 2.1123158931732178, |
| "learning_rate": 5.098074172327661e-06, |
| "loss": 0.9758, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.6047819971870605, |
| "grad_norm": 1.8653483390808105, |
| "learning_rate": 5.082987349754376e-06, |
| "loss": 1.009, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.6075949367088609, |
| "grad_norm": 2.3386378288269043, |
| "learning_rate": 5.0678997713759305e-06, |
| "loss": 1.1193, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.610407876230661, |
| "grad_norm": 2.200810432434082, |
| "learning_rate": 5.052811574602059e-06, |
| "loss": 1.2255, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.6132208157524612, |
| "grad_norm": 2.702786922454834, |
| "learning_rate": 5.0377228968481274e-06, |
| "loss": 1.2351, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.6160337552742616, |
| "grad_norm": 2.252342462539673, |
| "learning_rate": 5.022633875533879e-06, |
| "loss": 1.095, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.618846694796062, |
| "grad_norm": 2.326218605041504, |
| "learning_rate": 5.00754464808219e-06, |
| "loss": 1.1578, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.6216596343178622, |
| "grad_norm": 2.0061216354370117, |
| "learning_rate": 4.992455351917812e-06, |
| "loss": 0.974, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.6244725738396624, |
| "grad_norm": 2.0241732597351074, |
| "learning_rate": 4.977366124466122e-06, |
| "loss": 1.0518, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.6272855133614628, |
| "grad_norm": 2.2035324573516846, |
| "learning_rate": 4.962277103151876e-06, |
| "loss": 1.0806, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.6300984528832632, |
| "grad_norm": 1.9597488641738892, |
| "learning_rate": 4.947188425397942e-06, |
| "loss": 0.9929, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.6329113924050633, |
| "grad_norm": 1.8797650337219238, |
| "learning_rate": 4.932100228624072e-06, |
| "loss": 1.0142, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.6357243319268635, |
| "grad_norm": 2.195955514907837, |
| "learning_rate": 4.917012650245626e-06, |
| "loss": 1.2481, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.638537271448664, |
| "grad_norm": 2.0398526191711426, |
| "learning_rate": 4.901925827672341e-06, |
| "loss": 0.9249, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.6413502109704643, |
| "grad_norm": 2.003324508666992, |
| "learning_rate": 4.886839898307062e-06, |
| "loss": 1.0438, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.6441631504922645, |
| "grad_norm": 1.6683696508407593, |
| "learning_rate": 4.8717549995445105e-06, |
| "loss": 0.8833, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.6469760900140646, |
| "grad_norm": 2.1678078174591064, |
| "learning_rate": 4.856671268770007e-06, |
| "loss": 1.1291, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.649789029535865, |
| "grad_norm": 1.9070981740951538, |
| "learning_rate": 4.841588843358251e-06, |
| "loss": 0.9658, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.6526019690576652, |
| "grad_norm": 1.897820234298706, |
| "learning_rate": 4.826507860672036e-06, |
| "loss": 0.9903, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.6554149085794654, |
| "grad_norm": 2.141012668609619, |
| "learning_rate": 4.811428458061033e-06, |
| "loss": 1.3183, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.6582278481012658, |
| "grad_norm": 1.9511604309082031, |
| "learning_rate": 4.796350772860511e-06, |
| "loss": 1.2011, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.6610407876230662, |
| "grad_norm": 2.517437696456909, |
| "learning_rate": 4.7812749423901e-06, |
| "loss": 1.1229, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.6638537271448663, |
| "grad_norm": 1.9676152467727661, |
| "learning_rate": 4.7662011039525416e-06, |
| "loss": 1.1357, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 1.9041470289230347, |
| "learning_rate": 4.7511293948324325e-06, |
| "loss": 1.0166, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.669479606188467, |
| "grad_norm": 2.15259051322937, |
| "learning_rate": 4.736059952294975e-06, |
| "loss": 1.011, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.6722925457102673, |
| "grad_norm": 2.361236333847046, |
| "learning_rate": 4.720992913584732e-06, |
| "loss": 1.3296, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.6751054852320675, |
| "grad_norm": 2.3137876987457275, |
| "learning_rate": 4.7059284159243725e-06, |
| "loss": 1.3602, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.6779184247538677, |
| "grad_norm": 2.085984230041504, |
| "learning_rate": 4.690866596513421e-06, |
| "loss": 1.247, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.680731364275668, |
| "grad_norm": 2.2906124591827393, |
| "learning_rate": 4.675807592527014e-06, |
| "loss": 1.2777, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.6835443037974684, |
| "grad_norm": 2.461681842803955, |
| "learning_rate": 4.660751541114641e-06, |
| "loss": 1.3176, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.6863572433192686, |
| "grad_norm": 2.259167194366455, |
| "learning_rate": 4.645698579398907e-06, |
| "loss": 1.145, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.6863572433192686, |
| "eval_loss": 0.6439154744148254, |
| "eval_runtime": 2.7846, |
| "eval_samples_per_second": 9.337, |
| "eval_steps_per_second": 1.436, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.6863572433192686, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 596.375, |
| "eval_avg_mem_token_accuracy": 0.24822695035460993, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.008178525528683258, |
| "eval_avg_mem_token_rate": 0.5693474356415885, |
| "eval_avg_mem_token_recall(Accuracy)": 0.24822695035460993, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 600, |
| "eval_loss": 0.6439154744148254, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.7846, |
| "eval_samples_per_second": 9.337, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.436, |
| "eval_total_correct_count": 70, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8559, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.6891701828410688, |
| "grad_norm": 2.261350154876709, |
| "learning_rate": 4.630648844474271e-06, |
| "loss": 1.3461, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.6919831223628692, |
| "grad_norm": 2.463414192199707, |
| "learning_rate": 4.615602473405812e-06, |
| "loss": 1.1112, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.6947960618846696, |
| "grad_norm": 2.262482166290283, |
| "learning_rate": 4.600559603227963e-06, |
| "loss": 1.208, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.6976090014064698, |
| "grad_norm": 2.235854387283325, |
| "learning_rate": 4.585520370943285e-06, |
| "loss": 0.8357, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.70042194092827, |
| "grad_norm": 2.0354301929473877, |
| "learning_rate": 4.570484913521196e-06, |
| "loss": 0.9843, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.7032348804500703, |
| "grad_norm": 2.3465640544891357, |
| "learning_rate": 4.55545336789674e-06, |
| "loss": 1.2206, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.7060478199718707, |
| "grad_norm": 1.846433162689209, |
| "learning_rate": 4.540425870969332e-06, |
| "loss": 0.9545, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.7088607594936709, |
| "grad_norm": 2.3210694789886475, |
| "learning_rate": 4.5254025596015175e-06, |
| "loss": 1.2733, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.711673699015471, |
| "grad_norm": 2.5384347438812256, |
| "learning_rate": 4.510383570617716e-06, |
| "loss": 1.2064, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.7144866385372715, |
| "grad_norm": 2.0778439044952393, |
| "learning_rate": 4.495369040802988e-06, |
| "loss": 1.1119, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.7172995780590719, |
| "grad_norm": 2.212078332901001, |
| "learning_rate": 4.480359106901775e-06, |
| "loss": 1.1948, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.720112517580872, |
| "grad_norm": 2.3751208782196045, |
| "learning_rate": 4.465353905616668e-06, |
| "loss": 1.2253, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.7229254571026722, |
| "grad_norm": 2.196316957473755, |
| "learning_rate": 4.4503535736071505e-06, |
| "loss": 1.159, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.7257383966244726, |
| "grad_norm": 2.1474740505218506, |
| "learning_rate": 4.435358247488365e-06, |
| "loss": 1.143, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.728551336146273, |
| "grad_norm": 2.5476577281951904, |
| "learning_rate": 4.420368063829854e-06, |
| "loss": 1.157, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.7313642756680732, |
| "grad_norm": 2.186852216720581, |
| "learning_rate": 4.405383159154337e-06, |
| "loss": 1.1052, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.7341772151898733, |
| "grad_norm": 2.162107467651367, |
| "learning_rate": 4.390403669936443e-06, |
| "loss": 1.1342, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.7369901547116737, |
| "grad_norm": 2.093745470046997, |
| "learning_rate": 4.37542973260149e-06, |
| "loss": 0.9557, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.7398030942334741, |
| "grad_norm": 1.8521722555160522, |
| "learning_rate": 4.3604614835242255e-06, |
| "loss": 1.0542, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.7426160337552743, |
| "grad_norm": 2.1983838081359863, |
| "learning_rate": 4.3454990590275966e-06, |
| "loss": 0.7818, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.7454289732770745, |
| "grad_norm": 2.261500597000122, |
| "learning_rate": 4.3305425953814985e-06, |
| "loss": 1.1948, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.7482419127988749, |
| "grad_norm": 2.4740712642669678, |
| "learning_rate": 4.315592228801543e-06, |
| "loss": 1.3438, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.7510548523206753, |
| "grad_norm": 2.277127981185913, |
| "learning_rate": 4.300648095447806e-06, |
| "loss": 1.2477, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.7538677918424754, |
| "grad_norm": 2.1069774627685547, |
| "learning_rate": 4.285710331423603e-06, |
| "loss": 1.208, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.7566807313642756, |
| "grad_norm": 2.1714632511138916, |
| "learning_rate": 4.2707790727742315e-06, |
| "loss": 1.2219, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.759493670886076, |
| "grad_norm": 2.2100682258605957, |
| "learning_rate": 4.255854455485753e-06, |
| "loss": 1.284, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.7623066104078764, |
| "grad_norm": 2.0882930755615234, |
| "learning_rate": 4.24093661548373e-06, |
| "loss": 1.1695, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.7651195499296763, |
| "grad_norm": 2.3131346702575684, |
| "learning_rate": 4.226025688632013e-06, |
| "loss": 1.1353, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.7679324894514767, |
| "grad_norm": 2.0631368160247803, |
| "learning_rate": 4.211121810731485e-06, |
| "loss": 1.175, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.7707454289732771, |
| "grad_norm": 2.4987428188323975, |
| "learning_rate": 4.196225117518828e-06, |
| "loss": 1.2522, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.7735583684950773, |
| "grad_norm": 1.8051552772521973, |
| "learning_rate": 4.181335744665299e-06, |
| "loss": 1.0842, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.7763713080168775, |
| "grad_norm": 2.0841329097747803, |
| "learning_rate": 4.166453827775474e-06, |
| "loss": 1.331, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.7791842475386779, |
| "grad_norm": 2.309027910232544, |
| "learning_rate": 4.1515795023860325e-06, |
| "loss": 1.2727, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.7819971870604783, |
| "grad_norm": 2.1550230979919434, |
| "learning_rate": 4.136712903964511e-06, |
| "loss": 1.2984, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.7848101265822784, |
| "grad_norm": 1.9745640754699707, |
| "learning_rate": 4.121854167908072e-06, |
| "loss": 0.8655, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.7876230661040786, |
| "grad_norm": 1.838762879371643, |
| "learning_rate": 4.107003429542273e-06, |
| "loss": 0.8657, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.790436005625879, |
| "grad_norm": 3.8649277687072754, |
| "learning_rate": 4.092160824119836e-06, |
| "loss": 1.0927, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.7932489451476794, |
| "grad_norm": 1.946352481842041, |
| "learning_rate": 4.077326486819405e-06, |
| "loss": 0.922, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.7960618846694796, |
| "grad_norm": 1.9564697742462158, |
| "learning_rate": 4.06250055274433e-06, |
| "loss": 1.1767, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.7988748241912798, |
| "grad_norm": 2.0671567916870117, |
| "learning_rate": 4.047683156921422e-06, |
| "loss": 1.1347, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.8016877637130801, |
| "grad_norm": 2.086289167404175, |
| "learning_rate": 4.0328744342997355e-06, |
| "loss": 1.2172, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.8045007032348805, |
| "grad_norm": 1.74513578414917, |
| "learning_rate": 4.0180745197493295e-06, |
| "loss": 1.1084, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.8073136427566807, |
| "grad_norm": 2.2042808532714844, |
| "learning_rate": 4.0032835480600516e-06, |
| "loss": 1.3802, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.810126582278481, |
| "grad_norm": 2.1729772090911865, |
| "learning_rate": 3.9885016539402896e-06, |
| "loss": 1.1866, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.8129395218002813, |
| "grad_norm": 2.0441439151763916, |
| "learning_rate": 3.973728972015771e-06, |
| "loss": 1.1282, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.8157524613220817, |
| "grad_norm": 2.044088125228882, |
| "learning_rate": 3.958965636828314e-06, |
| "loss": 1.0972, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.8185654008438819, |
| "grad_norm": 2.1738321781158447, |
| "learning_rate": 3.944211782834618e-06, |
| "loss": 1.1018, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.821378340365682, |
| "grad_norm": 2.4498589038848877, |
| "learning_rate": 3.929467544405027e-06, |
| "loss": 1.1727, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.8241912798874824, |
| "grad_norm": 2.110391616821289, |
| "learning_rate": 3.9147330558223175e-06, |
| "loss": 1.2465, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.8270042194092828, |
| "grad_norm": 2.273608684539795, |
| "learning_rate": 3.900008451280462e-06, |
| "loss": 1.1749, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.8270042194092828, |
| "eval_loss": 0.6407925486564636, |
| "eval_runtime": 2.8075, |
| "eval_samples_per_second": 9.261, |
| "eval_steps_per_second": 1.425, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.8270042194092828, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 601.5, |
| "eval_avg_mem_token_accuracy": 0.25177304964539005, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.0082385704339754, |
| "eval_avg_mem_token_rate": 0.5732721346371317, |
| "eval_avg_mem_token_recall(Accuracy)": 0.25177304964539005, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 650, |
| "eval_loss": 0.6407925486564636, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.8075, |
| "eval_samples_per_second": 9.261, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.425, |
| "eval_total_correct_count": 71, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8618, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.829817158931083, |
| "grad_norm": 2.3315672874450684, |
| "learning_rate": 3.885293864883423e-06, |
| "loss": 1.1839, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.8326300984528832, |
| "grad_norm": 2.203946828842163, |
| "learning_rate": 3.870589430643915e-06, |
| "loss": 1.1069, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.8354430379746836, |
| "grad_norm": 2.159895896911621, |
| "learning_rate": 3.8558952824822e-06, |
| "loss": 1.147, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.838255977496484, |
| "grad_norm": 2.023045301437378, |
| "learning_rate": 3.84121155422485e-06, |
| "loss": 0.888, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.8410689170182841, |
| "grad_norm": 2.383005380630493, |
| "learning_rate": 3.826538379603549e-06, |
| "loss": 1.4156, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.8438818565400843, |
| "grad_norm": 2.3636224269866943, |
| "learning_rate": 3.8118758922538533e-06, |
| "loss": 1.0916, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.8466947960618847, |
| "grad_norm": 2.039092779159546, |
| "learning_rate": 3.7972242257139953e-06, |
| "loss": 1.2214, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.849507735583685, |
| "grad_norm": 2.0451226234436035, |
| "learning_rate": 3.782583513423647e-06, |
| "loss": 1.3025, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.8523206751054853, |
| "grad_norm": 2.2477307319641113, |
| "learning_rate": 3.7679538887227247e-06, |
| "loss": 1.3284, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.8551336146272854, |
| "grad_norm": 2.366098165512085, |
| "learning_rate": 3.753335484850157e-06, |
| "loss": 1.2683, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.8579465541490858, |
| "grad_norm": 2.1643450260162354, |
| "learning_rate": 3.738728434942684e-06, |
| "loss": 1.1879, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.8607594936708862, |
| "grad_norm": 2.3253345489501953, |
| "learning_rate": 3.7241328720336377e-06, |
| "loss": 1.2502, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.8635724331926864, |
| "grad_norm": 1.8580361604690552, |
| "learning_rate": 3.709548929051732e-06, |
| "loss": 0.9708, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.8663853727144866, |
| "grad_norm": 2.173644542694092, |
| "learning_rate": 3.6949767388198554e-06, |
| "loss": 1.2449, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.869198312236287, |
| "grad_norm": 1.964975357055664, |
| "learning_rate": 3.680416434053854e-06, |
| "loss": 1.1799, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.8720112517580874, |
| "grad_norm": 2.169707775115967, |
| "learning_rate": 3.6658681473613333e-06, |
| "loss": 1.2694, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.8748241912798875, |
| "grad_norm": 1.9698622226715088, |
| "learning_rate": 3.651332011240437e-06, |
| "loss": 1.1431, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.8776371308016877, |
| "grad_norm": 2.4650795459747314, |
| "learning_rate": 3.636808158078656e-06, |
| "loss": 1.3374, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.880450070323488, |
| "grad_norm": 1.978132724761963, |
| "learning_rate": 3.622296720151608e-06, |
| "loss": 0.9086, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.8832630098452883, |
| "grad_norm": 1.8494510650634766, |
| "learning_rate": 3.607797829621843e-06, |
| "loss": 1.0412, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.8860759493670884, |
| "grad_norm": 2.31000018119812, |
| "learning_rate": 3.5933116185376325e-06, |
| "loss": 1.2616, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.8888888888888888, |
| "grad_norm": 2.1177399158477783, |
| "learning_rate": 3.578838218831776e-06, |
| "loss": 1.0584, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.8917018284106892, |
| "grad_norm": 2.711202621459961, |
| "learning_rate": 3.5643777623203857e-06, |
| "loss": 1.4235, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.8945147679324894, |
| "grad_norm": 2.0394771099090576, |
| "learning_rate": 3.5499303807017018e-06, |
| "loss": 1.0978, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.8973277074542896, |
| "grad_norm": 1.9236093759536743, |
| "learning_rate": 3.5354962055548802e-06, |
| "loss": 1.0943, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.90014064697609, |
| "grad_norm": 2.159970283508301, |
| "learning_rate": 3.5210753683388014e-06, |
| "loss": 1.1188, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.9029535864978904, |
| "grad_norm": 2.201075315475464, |
| "learning_rate": 3.5066680003908695e-06, |
| "loss": 1.0096, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.9057665260196905, |
| "grad_norm": 2.2006876468658447, |
| "learning_rate": 3.4922742329258207e-06, |
| "loss": 1.2433, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.9085794655414907, |
| "grad_norm": 2.1321656703948975, |
| "learning_rate": 3.47789419703452e-06, |
| "loss": 1.2714, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.9113924050632911, |
| "grad_norm": 2.141841173171997, |
| "learning_rate": 3.463528023682779e-06, |
| "loss": 1.0148, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.9142053445850915, |
| "grad_norm": 2.4476535320281982, |
| "learning_rate": 3.4491758437101487e-06, |
| "loss": 1.2952, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.9170182841068917, |
| "grad_norm": 2.855252742767334, |
| "learning_rate": 3.4348377878287443e-06, |
| "loss": 1.0821, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.9198312236286919, |
| "grad_norm": 2.2479875087738037, |
| "learning_rate": 3.4205139866220384e-06, |
| "loss": 0.9025, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.9226441631504922, |
| "grad_norm": 1.734316349029541, |
| "learning_rate": 3.4062045705436863e-06, |
| "loss": 0.9917, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.9254571026722926, |
| "grad_norm": 1.7392464876174927, |
| "learning_rate": 3.391909669916324e-06, |
| "loss": 0.6617, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.9282700421940928, |
| "grad_norm": 2.1003048419952393, |
| "learning_rate": 3.3776294149303956e-06, |
| "loss": 1.2154, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.931082981715893, |
| "grad_norm": 2.3303074836730957, |
| "learning_rate": 3.3633639356429564e-06, |
| "loss": 1.2461, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.9338959212376934, |
| "grad_norm": 2.2976810932159424, |
| "learning_rate": 3.3491133619764925e-06, |
| "loss": 1.3707, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.9367088607594938, |
| "grad_norm": 1.9439120292663574, |
| "learning_rate": 3.334877823717737e-06, |
| "loss": 0.9291, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.939521800281294, |
| "grad_norm": 2.5753273963928223, |
| "learning_rate": 3.3206574505164934e-06, |
| "loss": 1.0634, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.9423347398030941, |
| "grad_norm": 2.2259931564331055, |
| "learning_rate": 3.306452371884441e-06, |
| "loss": 1.1333, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.9451476793248945, |
| "grad_norm": 2.0289406776428223, |
| "learning_rate": 3.2922627171939726e-06, |
| "loss": 1.138, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.947960618846695, |
| "grad_norm": 2.4240784645080566, |
| "learning_rate": 3.2780886156770016e-06, |
| "loss": 1.1418, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.950773558368495, |
| "grad_norm": 2.215083122253418, |
| "learning_rate": 3.263930196423797e-06, |
| "loss": 1.42, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.9535864978902953, |
| "grad_norm": 2.2829818725585938, |
| "learning_rate": 3.2497875883817955e-06, |
| "loss": 1.1413, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.9563994374120957, |
| "grad_norm": 2.153489828109741, |
| "learning_rate": 3.2356609203544387e-06, |
| "loss": 1.2167, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.959212376933896, |
| "grad_norm": 1.974264144897461, |
| "learning_rate": 3.2215503209999952e-06, |
| "loss": 1.1241, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.9620253164556962, |
| "grad_norm": 1.9400849342346191, |
| "learning_rate": 3.207455918830384e-06, |
| "loss": 1.036, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.9648382559774964, |
| "grad_norm": 2.141404628753662, |
| "learning_rate": 3.193377842210014e-06, |
| "loss": 1.1286, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.9676511954992968, |
| "grad_norm": 2.2581005096435547, |
| "learning_rate": 3.179316219354602e-06, |
| "loss": 1.385, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.9676511954992968, |
| "eval_loss": 0.6409177184104919, |
| "eval_runtime": 2.8079, |
| "eval_samples_per_second": 9.26, |
| "eval_steps_per_second": 1.425, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.9676511954992968, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 603.5, |
| "eval_avg_mem_token_accuracy": 0.25177304964539005, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.008195775135634306, |
| "eval_avg_mem_token_rate": 0.5762655491252577, |
| "eval_avg_mem_token_recall(Accuracy)": 0.25177304964539005, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 700, |
| "eval_loss": 0.6409177184104919, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.8079, |
| "eval_samples_per_second": 9.26, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.425, |
| "eval_total_correct_count": 71, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8663, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.9704641350210972, |
| "grad_norm": 2.2288899421691895, |
| "learning_rate": 3.1652711783300234e-06, |
| "loss": 1.3147, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.9732770745428974, |
| "grad_norm": 2.327530860900879, |
| "learning_rate": 3.1512428470511257e-06, |
| "loss": 1.2538, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.9760900140646975, |
| "grad_norm": 1.8935436010360718, |
| "learning_rate": 3.1372313532805766e-06, |
| "loss": 0.8867, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.978902953586498, |
| "grad_norm": 2.1964917182922363, |
| "learning_rate": 3.1232368246276956e-06, |
| "loss": 1.1226, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.9817158931082983, |
| "grad_norm": 2.11517333984375, |
| "learning_rate": 3.1092593885472965e-06, |
| "loss": 1.1076, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.9845288326300985, |
| "grad_norm": 2.36454439163208, |
| "learning_rate": 3.0952991723385152e-06, |
| "loss": 1.1308, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.9873417721518987, |
| "grad_norm": 4.362302780151367, |
| "learning_rate": 3.0813563031436676e-06, |
| "loss": 1.3241, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.990154711673699, |
| "grad_norm": 2.1657958030700684, |
| "learning_rate": 3.067430907947073e-06, |
| "loss": 1.1269, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.9929676511954995, |
| "grad_norm": 1.7424006462097168, |
| "learning_rate": 3.053523113573914e-06, |
| "loss": 0.9743, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.9957805907172996, |
| "grad_norm": 2.1487817764282227, |
| "learning_rate": 3.039633046689069e-06, |
| "loss": 1.0117, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.9985935302390998, |
| "grad_norm": 2.059786319732666, |
| "learning_rate": 3.0257608337959683e-06, |
| "loss": 1.0671, |
| "step": 711 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.65206778049469, |
| "learning_rate": 3.0119066012354316e-06, |
| "loss": 0.5849, |
| "step": 712 |
| }, |
| { |
| "epoch": 2.0028129395218004, |
| "grad_norm": 1.849442720413208, |
| "learning_rate": 2.9980704751845302e-06, |
| "loss": 1.0254, |
| "step": 713 |
| }, |
| { |
| "epoch": 2.0056258790436003, |
| "grad_norm": 2.224947690963745, |
| "learning_rate": 2.9842525816554237e-06, |
| "loss": 1.3435, |
| "step": 714 |
| }, |
| { |
| "epoch": 2.0084388185654007, |
| "grad_norm": 2.0207643508911133, |
| "learning_rate": 2.9704530464942254e-06, |
| "loss": 1.1889, |
| "step": 715 |
| }, |
| { |
| "epoch": 2.011251758087201, |
| "grad_norm": 1.9327627420425415, |
| "learning_rate": 2.9566719953798474e-06, |
| "loss": 0.9725, |
| "step": 716 |
| }, |
| { |
| "epoch": 2.0140646976090015, |
| "grad_norm": 2.2062811851501465, |
| "learning_rate": 2.942909553822859e-06, |
| "loss": 1.1318, |
| "step": 717 |
| }, |
| { |
| "epoch": 2.0168776371308015, |
| "grad_norm": 1.9610023498535156, |
| "learning_rate": 2.929165847164343e-06, |
| "loss": 1.02, |
| "step": 718 |
| }, |
| { |
| "epoch": 2.019690576652602, |
| "grad_norm": 2.012442111968994, |
| "learning_rate": 2.9154410005747586e-06, |
| "loss": 1.073, |
| "step": 719 |
| }, |
| { |
| "epoch": 2.0225035161744023, |
| "grad_norm": 1.9642077684402466, |
| "learning_rate": 2.901735139052787e-06, |
| "loss": 1.0427, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.0253164556962027, |
| "grad_norm": 2.19358491897583, |
| "learning_rate": 2.888048387424218e-06, |
| "loss": 1.1162, |
| "step": 721 |
| }, |
| { |
| "epoch": 2.0281293952180026, |
| "grad_norm": 1.9871453046798706, |
| "learning_rate": 2.8743808703407866e-06, |
| "loss": 1.1066, |
| "step": 722 |
| }, |
| { |
| "epoch": 2.030942334739803, |
| "grad_norm": 2.278085947036743, |
| "learning_rate": 2.8607327122790555e-06, |
| "loss": 1.1253, |
| "step": 723 |
| }, |
| { |
| "epoch": 2.0337552742616034, |
| "grad_norm": 1.7093780040740967, |
| "learning_rate": 2.8471040375392745e-06, |
| "loss": 1.0754, |
| "step": 724 |
| }, |
| { |
| "epoch": 2.036568213783404, |
| "grad_norm": 2.088590621948242, |
| "learning_rate": 2.833494970244248e-06, |
| "loss": 1.2312, |
| "step": 725 |
| }, |
| { |
| "epoch": 2.0393811533052038, |
| "grad_norm": 1.8987199068069458, |
| "learning_rate": 2.819905634338208e-06, |
| "loss": 0.9913, |
| "step": 726 |
| }, |
| { |
| "epoch": 2.042194092827004, |
| "grad_norm": 2.069563627243042, |
| "learning_rate": 2.8063361535856838e-06, |
| "loss": 1.1635, |
| "step": 727 |
| }, |
| { |
| "epoch": 2.0450070323488045, |
| "grad_norm": 2.440237522125244, |
| "learning_rate": 2.7927866515703705e-06, |
| "loss": 1.2113, |
| "step": 728 |
| }, |
| { |
| "epoch": 2.047819971870605, |
| "grad_norm": 2.0094406604766846, |
| "learning_rate": 2.7792572516940108e-06, |
| "loss": 0.9271, |
| "step": 729 |
| }, |
| { |
| "epoch": 2.050632911392405, |
| "grad_norm": 2.2327640056610107, |
| "learning_rate": 2.765748077175272e-06, |
| "loss": 1.1026, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.0534458509142053, |
| "grad_norm": 2.1008453369140625, |
| "learning_rate": 2.752259251048606e-06, |
| "loss": 1.1666, |
| "step": 731 |
| }, |
| { |
| "epoch": 2.0562587904360057, |
| "grad_norm": 1.8837400674819946, |
| "learning_rate": 2.7387908961631597e-06, |
| "loss": 0.8817, |
| "step": 732 |
| }, |
| { |
| "epoch": 2.059071729957806, |
| "grad_norm": 1.993558645248413, |
| "learning_rate": 2.725343135181622e-06, |
| "loss": 1.0745, |
| "step": 733 |
| }, |
| { |
| "epoch": 2.061884669479606, |
| "grad_norm": 2.122399091720581, |
| "learning_rate": 2.711916090579137e-06, |
| "loss": 1.1435, |
| "step": 734 |
| }, |
| { |
| "epoch": 2.0646976090014064, |
| "grad_norm": 2.0384397506713867, |
| "learning_rate": 2.698509884642163e-06, |
| "loss": 1.181, |
| "step": 735 |
| }, |
| { |
| "epoch": 2.067510548523207, |
| "grad_norm": 2.315969944000244, |
| "learning_rate": 2.6851246394673822e-06, |
| "loss": 0.9172, |
| "step": 736 |
| }, |
| { |
| "epoch": 2.070323488045007, |
| "grad_norm": 1.8696023225784302, |
| "learning_rate": 2.67176047696057e-06, |
| "loss": 0.9634, |
| "step": 737 |
| }, |
| { |
| "epoch": 2.073136427566807, |
| "grad_norm": 2.3400771617889404, |
| "learning_rate": 2.6584175188354934e-06, |
| "loss": 1.1388, |
| "step": 738 |
| }, |
| { |
| "epoch": 2.0759493670886076, |
| "grad_norm": 2.0902152061462402, |
| "learning_rate": 2.6450958866128e-06, |
| "loss": 0.9649, |
| "step": 739 |
| }, |
| { |
| "epoch": 2.078762306610408, |
| "grad_norm": 1.8135625123977661, |
| "learning_rate": 2.6317957016189155e-06, |
| "loss": 1.1267, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.0815752461322083, |
| "grad_norm": 1.949086308479309, |
| "learning_rate": 2.618517084984933e-06, |
| "loss": 1.056, |
| "step": 741 |
| }, |
| { |
| "epoch": 2.0843881856540083, |
| "grad_norm": 2.1474437713623047, |
| "learning_rate": 2.6052601576455116e-06, |
| "loss": 1.1126, |
| "step": 742 |
| }, |
| { |
| "epoch": 2.0872011251758087, |
| "grad_norm": 2.2054314613342285, |
| "learning_rate": 2.592025040337779e-06, |
| "loss": 1.1921, |
| "step": 743 |
| }, |
| { |
| "epoch": 2.090014064697609, |
| "grad_norm": 1.9321085214614868, |
| "learning_rate": 2.578811853600226e-06, |
| "loss": 0.9129, |
| "step": 744 |
| }, |
| { |
| "epoch": 2.0928270042194095, |
| "grad_norm": 2.050908327102661, |
| "learning_rate": 2.5656207177716107e-06, |
| "loss": 1.0466, |
| "step": 745 |
| }, |
| { |
| "epoch": 2.0956399437412094, |
| "grad_norm": 2.335043430328369, |
| "learning_rate": 2.552451752989865e-06, |
| "loss": 0.9907, |
| "step": 746 |
| }, |
| { |
| "epoch": 2.09845288326301, |
| "grad_norm": 2.1719613075256348, |
| "learning_rate": 2.539305079190999e-06, |
| "loss": 1.1855, |
| "step": 747 |
| }, |
| { |
| "epoch": 2.1012658227848102, |
| "grad_norm": 2.2501490116119385, |
| "learning_rate": 2.5261808161080047e-06, |
| "loss": 1.1693, |
| "step": 748 |
| }, |
| { |
| "epoch": 2.1040787623066106, |
| "grad_norm": 2.1329755783081055, |
| "learning_rate": 2.513079083269774e-06, |
| "loss": 1.1507, |
| "step": 749 |
| }, |
| { |
| "epoch": 2.1068917018284106, |
| "grad_norm": 1.9924427270889282, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 1.035, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.1068917018284106, |
| "eval_loss": 0.6396089792251587, |
| "eval_runtime": 2.846, |
| "eval_samples_per_second": 9.136, |
| "eval_steps_per_second": 1.405, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.1068917018284106, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 596.5, |
| "eval_avg_mem_token_accuracy": 0.2553191489361702, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.00837404047452896, |
| "eval_avg_mem_token_rate": 0.5719417281979645, |
| "eval_avg_mem_token_recall(Accuracy)": 0.2553191489361702, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 750, |
| "eval_loss": 0.6396089792251587, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.846, |
| "eval_samples_per_second": 9.136, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.405, |
| "eval_total_correct_count": 72, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8598, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.109704641350211, |
| "grad_norm": 1.8336857557296753, |
| "learning_rate": 2.4869436854161e-06, |
| "loss": 0.9486, |
| "step": 751 |
| }, |
| { |
| "epoch": 2.1125175808720114, |
| "grad_norm": 2.0312447547912598, |
| "learning_rate": 2.4739102584281268e-06, |
| "loss": 1.2113, |
| "step": 752 |
| }, |
| { |
| "epoch": 2.1153305203938118, |
| "grad_norm": 2.0355682373046875, |
| "learning_rate": 2.4608998377376752e-06, |
| "loss": 1.1002, |
| "step": 753 |
| }, |
| { |
| "epoch": 2.1181434599156117, |
| "grad_norm": 2.6331751346588135, |
| "learning_rate": 2.447912541836826e-06, |
| "loss": 1.1891, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.120956399437412, |
| "grad_norm": 2.6497652530670166, |
| "learning_rate": 2.4349484890070357e-06, |
| "loss": 1.0924, |
| "step": 755 |
| }, |
| { |
| "epoch": 2.1237693389592125, |
| "grad_norm": 1.9082729816436768, |
| "learning_rate": 2.4220077973180906e-06, |
| "loss": 1.0445, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.1265822784810124, |
| "grad_norm": 1.8643864393234253, |
| "learning_rate": 2.4090905846270006e-06, |
| "loss": 1.0385, |
| "step": 757 |
| }, |
| { |
| "epoch": 2.129395218002813, |
| "grad_norm": 2.0666754245758057, |
| "learning_rate": 2.396196968576957e-06, |
| "loss": 1.2737, |
| "step": 758 |
| }, |
| { |
| "epoch": 2.1322081575246132, |
| "grad_norm": 2.5806944370269775, |
| "learning_rate": 2.3833270665962293e-06, |
| "loss": 0.9353, |
| "step": 759 |
| }, |
| { |
| "epoch": 2.1350210970464136, |
| "grad_norm": 2.0371792316436768, |
| "learning_rate": 2.370480995897127e-06, |
| "loss": 1.1003, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.1378340365682136, |
| "grad_norm": 1.9753756523132324, |
| "learning_rate": 2.3576588734749022e-06, |
| "loss": 0.9872, |
| "step": 761 |
| }, |
| { |
| "epoch": 2.140646976090014, |
| "grad_norm": 2.2429325580596924, |
| "learning_rate": 2.3448608161067117e-06, |
| "loss": 1.0195, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.1434599156118144, |
| "grad_norm": 1.8056210279464722, |
| "learning_rate": 2.3320869403505324e-06, |
| "loss": 0.9248, |
| "step": 763 |
| }, |
| { |
| "epoch": 2.1462728551336148, |
| "grad_norm": 1.9145182371139526, |
| "learning_rate": 2.3193373625441113e-06, |
| "loss": 0.9601, |
| "step": 764 |
| }, |
| { |
| "epoch": 2.1490857946554147, |
| "grad_norm": 2.0845413208007812, |
| "learning_rate": 2.3066121988038996e-06, |
| "loss": 1.1699, |
| "step": 765 |
| }, |
| { |
| "epoch": 2.151898734177215, |
| "grad_norm": 1.9216276407241821, |
| "learning_rate": 2.2939115650240008e-06, |
| "loss": 1.0108, |
| "step": 766 |
| }, |
| { |
| "epoch": 2.1547116736990155, |
| "grad_norm": 2.0462570190429688, |
| "learning_rate": 2.2812355768751106e-06, |
| "loss": 0.8837, |
| "step": 767 |
| }, |
| { |
| "epoch": 2.157524613220816, |
| "grad_norm": 2.385082721710205, |
| "learning_rate": 2.268584349803464e-06, |
| "loss": 1.1446, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.160337552742616, |
| "grad_norm": 2.243379592895508, |
| "learning_rate": 2.2559579990297943e-06, |
| "loss": 1.2207, |
| "step": 769 |
| }, |
| { |
| "epoch": 2.1631504922644162, |
| "grad_norm": 2.170370101928711, |
| "learning_rate": 2.2433566395482577e-06, |
| "loss": 1.3006, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.1659634317862166, |
| "grad_norm": 2.1776270866394043, |
| "learning_rate": 2.2307803861254207e-06, |
| "loss": 1.1889, |
| "step": 771 |
| }, |
| { |
| "epoch": 2.168776371308017, |
| "grad_norm": 2.114034652709961, |
| "learning_rate": 2.218229353299181e-06, |
| "loss": 1.2131, |
| "step": 772 |
| }, |
| { |
| "epoch": 2.171589310829817, |
| "grad_norm": 2.2640528678894043, |
| "learning_rate": 2.2057036553777565e-06, |
| "loss": 1.3633, |
| "step": 773 |
| }, |
| { |
| "epoch": 2.1744022503516174, |
| "grad_norm": 1.7782313823699951, |
| "learning_rate": 2.1932034064386113e-06, |
| "loss": 0.9327, |
| "step": 774 |
| }, |
| { |
| "epoch": 2.1772151898734178, |
| "grad_norm": 2.046961545944214, |
| "learning_rate": 2.1807287203274504e-06, |
| "loss": 1.2086, |
| "step": 775 |
| }, |
| { |
| "epoch": 2.180028129395218, |
| "grad_norm": 2.103487491607666, |
| "learning_rate": 2.168279710657149e-06, |
| "loss": 1.0986, |
| "step": 776 |
| }, |
| { |
| "epoch": 2.182841068917018, |
| "grad_norm": 2.1570355892181396, |
| "learning_rate": 2.1558564908067497e-06, |
| "loss": 1.0043, |
| "step": 777 |
| }, |
| { |
| "epoch": 2.1856540084388185, |
| "grad_norm": 1.9457972049713135, |
| "learning_rate": 2.1434591739204062e-06, |
| "loss": 1.067, |
| "step": 778 |
| }, |
| { |
| "epoch": 2.188466947960619, |
| "grad_norm": 2.141794204711914, |
| "learning_rate": 2.1310878729063645e-06, |
| "loss": 1.1144, |
| "step": 779 |
| }, |
| { |
| "epoch": 2.1912798874824193, |
| "grad_norm": 1.9879792928695679, |
| "learning_rate": 2.118742700435931e-06, |
| "loss": 1.0625, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.1940928270042193, |
| "grad_norm": 2.3529539108276367, |
| "learning_rate": 2.1064237689424483e-06, |
| "loss": 1.2867, |
| "step": 781 |
| }, |
| { |
| "epoch": 2.1969057665260197, |
| "grad_norm": 2.0593795776367188, |
| "learning_rate": 2.0941311906202672e-06, |
| "loss": 1.3383, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.19971870604782, |
| "grad_norm": 2.1530141830444336, |
| "learning_rate": 2.081865077423731e-06, |
| "loss": 1.2258, |
| "step": 783 |
| }, |
| { |
| "epoch": 2.2025316455696204, |
| "grad_norm": 1.9634898900985718, |
| "learning_rate": 2.06962554106615e-06, |
| "loss": 1.1629, |
| "step": 784 |
| }, |
| { |
| "epoch": 2.2053445850914204, |
| "grad_norm": 2.2565033435821533, |
| "learning_rate": 2.0574126930187882e-06, |
| "loss": 1.3058, |
| "step": 785 |
| }, |
| { |
| "epoch": 2.208157524613221, |
| "grad_norm": 2.420267105102539, |
| "learning_rate": 2.0452266445098457e-06, |
| "loss": 1.2447, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.210970464135021, |
| "grad_norm": 2.2069785594940186, |
| "learning_rate": 2.0330675065234466e-06, |
| "loss": 1.1835, |
| "step": 787 |
| }, |
| { |
| "epoch": 2.2137834036568216, |
| "grad_norm": 2.1070237159729004, |
| "learning_rate": 2.0209353897986288e-06, |
| "loss": 1.1873, |
| "step": 788 |
| }, |
| { |
| "epoch": 2.2165963431786215, |
| "grad_norm": 1.9886164665222168, |
| "learning_rate": 2.0088304048283337e-06, |
| "loss": 1.0022, |
| "step": 789 |
| }, |
| { |
| "epoch": 2.219409282700422, |
| "grad_norm": 2.1714046001434326, |
| "learning_rate": 1.9967526618584016e-06, |
| "loss": 1.1458, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 2.1026611328125, |
| "learning_rate": 1.984702270886566e-06, |
| "loss": 1.1671, |
| "step": 791 |
| }, |
| { |
| "epoch": 2.2250351617440227, |
| "grad_norm": 2.3853933811187744, |
| "learning_rate": 1.9726793416614532e-06, |
| "loss": 1.2162, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.2278481012658227, |
| "grad_norm": 2.1531338691711426, |
| "learning_rate": 1.9606839836815872e-06, |
| "loss": 1.2844, |
| "step": 793 |
| }, |
| { |
| "epoch": 2.230661040787623, |
| "grad_norm": 2.198315143585205, |
| "learning_rate": 1.948716306194376e-06, |
| "loss": 1.1015, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.2334739803094235, |
| "grad_norm": 1.9941608905792236, |
| "learning_rate": 1.9367764181951403e-06, |
| "loss": 0.9099, |
| "step": 795 |
| }, |
| { |
| "epoch": 2.2362869198312234, |
| "grad_norm": 2.1348161697387695, |
| "learning_rate": 1.924864428426103e-06, |
| "loss": 0.9096, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.239099859353024, |
| "grad_norm": 2.182652235031128, |
| "learning_rate": 1.9129804453754053e-06, |
| "loss": 1.2748, |
| "step": 797 |
| }, |
| { |
| "epoch": 2.241912798874824, |
| "grad_norm": 2.1464662551879883, |
| "learning_rate": 1.9011245772761173e-06, |
| "loss": 1.2931, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.2447257383966246, |
| "grad_norm": 2.15000319480896, |
| "learning_rate": 1.889296932105254e-06, |
| "loss": 0.9775, |
| "step": 799 |
| }, |
| { |
| "epoch": 2.247538677918425, |
| "grad_norm": 2.1729373931884766, |
| "learning_rate": 1.8774976175827898e-06, |
| "loss": 1.182, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.247538677918425, |
| "eval_loss": 0.6389347910881042, |
| "eval_runtime": 2.7883, |
| "eval_samples_per_second": 9.325, |
| "eval_steps_per_second": 1.435, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.247538677918425, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 600.75, |
| "eval_avg_mem_token_accuracy": 0.24822695035460993, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.008117824423054622, |
| "eval_avg_mem_token_rate": 0.5736047362469234, |
| "eval_avg_mem_token_recall(Accuracy)": 0.24822695035460993, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 800, |
| "eval_loss": 0.6389347910881042, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.7883, |
| "eval_samples_per_second": 9.325, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.435, |
| "eval_total_correct_count": 70, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8623, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.250351617440225, |
| "grad_norm": 2.0819458961486816, |
| "learning_rate": 1.8657267411706802e-06, |
| "loss": 1.0104, |
| "step": 801 |
| }, |
| { |
| "epoch": 2.2531645569620253, |
| "grad_norm": 2.394252061843872, |
| "learning_rate": 1.853984410071879e-06, |
| "loss": 1.249, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.2559774964838257, |
| "grad_norm": 2.0108907222747803, |
| "learning_rate": 1.8422707312293663e-06, |
| "loss": 1.0054, |
| "step": 803 |
| }, |
| { |
| "epoch": 2.2587904360056257, |
| "grad_norm": 2.035367488861084, |
| "learning_rate": 1.8305858113251717e-06, |
| "loss": 1.052, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.261603375527426, |
| "grad_norm": 2.199094772338867, |
| "learning_rate": 1.8189297567794029e-06, |
| "loss": 1.2031, |
| "step": 805 |
| }, |
| { |
| "epoch": 2.2644163150492265, |
| "grad_norm": 2.0634264945983887, |
| "learning_rate": 1.8073026737492783e-06, |
| "loss": 1.1867, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.267229254571027, |
| "grad_norm": 2.314810037612915, |
| "learning_rate": 1.7957046681281582e-06, |
| "loss": 1.2492, |
| "step": 807 |
| }, |
| { |
| "epoch": 2.270042194092827, |
| "grad_norm": 2.0201666355133057, |
| "learning_rate": 1.7841358455445807e-06, |
| "loss": 1.1079, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.272855133614627, |
| "grad_norm": 2.239051342010498, |
| "learning_rate": 1.7725963113612998e-06, |
| "loss": 1.1677, |
| "step": 809 |
| }, |
| { |
| "epoch": 2.2756680731364276, |
| "grad_norm": 2.3143956661224365, |
| "learning_rate": 1.7610861706743316e-06, |
| "loss": 1.1724, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.278481012658228, |
| "grad_norm": 2.3886356353759766, |
| "learning_rate": 1.7496055283119812e-06, |
| "loss": 1.2109, |
| "step": 811 |
| }, |
| { |
| "epoch": 2.281293952180028, |
| "grad_norm": 2.2909440994262695, |
| "learning_rate": 1.7381544888339103e-06, |
| "loss": 1.0614, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.2841068917018283, |
| "grad_norm": 2.069227695465088, |
| "learning_rate": 1.726733156530161e-06, |
| "loss": 1.0202, |
| "step": 813 |
| }, |
| { |
| "epoch": 2.2869198312236287, |
| "grad_norm": 2.242708683013916, |
| "learning_rate": 1.7153416354202307e-06, |
| "loss": 1.0972, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.289732770745429, |
| "grad_norm": 2.0846173763275146, |
| "learning_rate": 1.7039800292520997e-06, |
| "loss": 1.1095, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.292545710267229, |
| "grad_norm": 1.924421787261963, |
| "learning_rate": 1.69264844150131e-06, |
| "loss": 1.1585, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.2953586497890295, |
| "grad_norm": 1.7929229736328125, |
| "learning_rate": 1.6813469753700013e-06, |
| "loss": 0.9856, |
| "step": 817 |
| }, |
| { |
| "epoch": 2.29817158931083, |
| "grad_norm": 1.9918988943099976, |
| "learning_rate": 1.6700757337859907e-06, |
| "loss": 1.0617, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.3009845288326303, |
| "grad_norm": 2.357882499694824, |
| "learning_rate": 1.6588348194018205e-06, |
| "loss": 1.0826, |
| "step": 819 |
| }, |
| { |
| "epoch": 2.3037974683544302, |
| "grad_norm": 2.163602828979492, |
| "learning_rate": 1.6476243345938293e-06, |
| "loss": 1.342, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.3066104078762306, |
| "grad_norm": 1.7069376707077026, |
| "learning_rate": 1.6364443814612207e-06, |
| "loss": 0.933, |
| "step": 821 |
| }, |
| { |
| "epoch": 2.309423347398031, |
| "grad_norm": 2.1436493396759033, |
| "learning_rate": 1.6252950618251311e-06, |
| "loss": 1.2028, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.3122362869198314, |
| "grad_norm": 2.0016818046569824, |
| "learning_rate": 1.614176477227703e-06, |
| "loss": 1.1039, |
| "step": 823 |
| }, |
| { |
| "epoch": 2.3150492264416314, |
| "grad_norm": 2.098785400390625, |
| "learning_rate": 1.6030887289311604e-06, |
| "loss": 1.0678, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.3178621659634318, |
| "grad_norm": 2.156809091567993, |
| "learning_rate": 1.5920319179168859e-06, |
| "loss": 1.2103, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.320675105485232, |
| "grad_norm": 2.111753463745117, |
| "learning_rate": 1.5810061448845028e-06, |
| "loss": 1.1346, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.3234880450070325, |
| "grad_norm": 2.18839693069458, |
| "learning_rate": 1.5700115102509562e-06, |
| "loss": 1.1966, |
| "step": 827 |
| }, |
| { |
| "epoch": 2.3263009845288325, |
| "grad_norm": 2.2580389976501465, |
| "learning_rate": 1.5590481141495988e-06, |
| "loss": 1.2102, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.329113924050633, |
| "grad_norm": 2.530665874481201, |
| "learning_rate": 1.5481160564292802e-06, |
| "loss": 1.3096, |
| "step": 829 |
| }, |
| { |
| "epoch": 2.3319268635724333, |
| "grad_norm": 2.008321523666382, |
| "learning_rate": 1.5372154366534325e-06, |
| "loss": 1.0493, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.3347398030942337, |
| "grad_norm": 1.8788542747497559, |
| "learning_rate": 1.5263463540991769e-06, |
| "loss": 1.1453, |
| "step": 831 |
| }, |
| { |
| "epoch": 2.3375527426160336, |
| "grad_norm": 2.1390604972839355, |
| "learning_rate": 1.5155089077563968e-06, |
| "loss": 0.9813, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.340365682137834, |
| "grad_norm": 2.1308085918426514, |
| "learning_rate": 1.5047031963268617e-06, |
| "loss": 1.3274, |
| "step": 833 |
| }, |
| { |
| "epoch": 2.3431786216596344, |
| "grad_norm": 2.2323601245880127, |
| "learning_rate": 1.49392931822331e-06, |
| "loss": 1.1762, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.3459915611814344, |
| "grad_norm": 2.2134149074554443, |
| "learning_rate": 1.4831873715685597e-06, |
| "loss": 1.1039, |
| "step": 835 |
| }, |
| { |
| "epoch": 2.3488045007032348, |
| "grad_norm": 1.834775686264038, |
| "learning_rate": 1.4724774541946145e-06, |
| "loss": 0.9826, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.351617440225035, |
| "grad_norm": 1.9355462789535522, |
| "learning_rate": 1.461799663641773e-06, |
| "loss": 1.0111, |
| "step": 837 |
| }, |
| { |
| "epoch": 2.3544303797468356, |
| "grad_norm": 2.2236545085906982, |
| "learning_rate": 1.4511540971577377e-06, |
| "loss": 1.1159, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.357243319268636, |
| "grad_norm": 2.29103946685791, |
| "learning_rate": 1.440540851696733e-06, |
| "loss": 1.3618, |
| "step": 839 |
| }, |
| { |
| "epoch": 2.360056258790436, |
| "grad_norm": 2.335484743118286, |
| "learning_rate": 1.429960023918619e-06, |
| "loss": 1.165, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.3628691983122363, |
| "grad_norm": 2.207131862640381, |
| "learning_rate": 1.4194117101880134e-06, |
| "loss": 1.11, |
| "step": 841 |
| }, |
| { |
| "epoch": 2.3656821378340367, |
| "grad_norm": 1.7570301294326782, |
| "learning_rate": 1.4088960065734137e-06, |
| "loss": 0.9707, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.3684950773558366, |
| "grad_norm": 2.027989149093628, |
| "learning_rate": 1.3984130088463204e-06, |
| "loss": 1.1416, |
| "step": 843 |
| }, |
| { |
| "epoch": 2.371308016877637, |
| "grad_norm": 2.0788614749908447, |
| "learning_rate": 1.3879628124803662e-06, |
| "loss": 1.0461, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.3741209563994374, |
| "grad_norm": 1.9784637689590454, |
| "learning_rate": 1.3775455126504466e-06, |
| "loss": 1.0517, |
| "step": 845 |
| }, |
| { |
| "epoch": 2.376933895921238, |
| "grad_norm": 1.6520678997039795, |
| "learning_rate": 1.3671612042318527e-06, |
| "loss": 0.8804, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.379746835443038, |
| "grad_norm": 2.11843204498291, |
| "learning_rate": 1.3568099817994068e-06, |
| "loss": 1.0982, |
| "step": 847 |
| }, |
| { |
| "epoch": 2.382559774964838, |
| "grad_norm": 2.0866153240203857, |
| "learning_rate": 1.3464919396266018e-06, |
| "loss": 1.1652, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.3853727144866386, |
| "grad_norm": 2.224863052368164, |
| "learning_rate": 1.3362071716847424e-06, |
| "loss": 1.2356, |
| "step": 849 |
| }, |
| { |
| "epoch": 2.388185654008439, |
| "grad_norm": 2.0457394123077393, |
| "learning_rate": 1.3259557716420868e-06, |
| "loss": 1.2145, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.388185654008439, |
| "eval_loss": 0.6384085416793823, |
| "eval_runtime": 2.8184, |
| "eval_samples_per_second": 9.225, |
| "eval_steps_per_second": 1.419, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.388185654008439, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 600.875, |
| "eval_avg_mem_token_accuracy": 0.25177304964539005, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.008220446914437884, |
| "eval_avg_mem_token_rate": 0.5745360207543404, |
| "eval_avg_mem_token_recall(Accuracy)": 0.25177304964539005, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 850, |
| "eval_loss": 0.6384085416793823, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.8184, |
| "eval_samples_per_second": 9.225, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.419, |
| "eval_total_correct_count": 71, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8637, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.390998593530239, |
| "grad_norm": 2.302175760269165, |
| "learning_rate": 1.3157378328630027e-06, |
| "loss": 1.2569, |
| "step": 851 |
| }, |
| { |
| "epoch": 2.3938115330520393, |
| "grad_norm": 2.0771360397338867, |
| "learning_rate": 1.3055534484070997e-06, |
| "loss": 1.0361, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.3966244725738397, |
| "grad_norm": 2.1782445907592773, |
| "learning_rate": 1.2954027110284035e-06, |
| "loss": 1.1286, |
| "step": 853 |
| }, |
| { |
| "epoch": 2.39943741209564, |
| "grad_norm": 2.210466146469116, |
| "learning_rate": 1.285285713174489e-06, |
| "loss": 1.0967, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.40225035161744, |
| "grad_norm": 2.1318819522857666, |
| "learning_rate": 1.2752025469856598e-06, |
| "loss": 1.1318, |
| "step": 855 |
| }, |
| { |
| "epoch": 2.4050632911392404, |
| "grad_norm": 2.405397653579712, |
| "learning_rate": 1.2651533042940883e-06, |
| "loss": 1.1057, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.407876230661041, |
| "grad_norm": 1.8459330797195435, |
| "learning_rate": 1.2551380766230003e-06, |
| "loss": 0.9308, |
| "step": 857 |
| }, |
| { |
| "epoch": 2.4106891701828412, |
| "grad_norm": 2.1533725261688232, |
| "learning_rate": 1.2451569551858183e-06, |
| "loss": 1.1996, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.413502109704641, |
| "grad_norm": 2.2185754776000977, |
| "learning_rate": 1.2352100308853548e-06, |
| "loss": 1.3325, |
| "step": 859 |
| }, |
| { |
| "epoch": 2.4163150492264416, |
| "grad_norm": 1.8294565677642822, |
| "learning_rate": 1.225297394312966e-06, |
| "loss": 0.8245, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.419127988748242, |
| "grad_norm": 2.1881840229034424, |
| "learning_rate": 1.2154191357477352e-06, |
| "loss": 1.1655, |
| "step": 861 |
| }, |
| { |
| "epoch": 2.4219409282700424, |
| "grad_norm": 1.8707904815673828, |
| "learning_rate": 1.205575345155649e-06, |
| "loss": 0.9647, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.4247538677918423, |
| "grad_norm": 1.8865529298782349, |
| "learning_rate": 1.1957661121887782e-06, |
| "loss": 0.972, |
| "step": 863 |
| }, |
| { |
| "epoch": 2.4275668073136427, |
| "grad_norm": 2.1275415420532227, |
| "learning_rate": 1.1859915261844596e-06, |
| "loss": 0.9982, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.430379746835443, |
| "grad_norm": 2.7815465927124023, |
| "learning_rate": 1.1762516761644831e-06, |
| "loss": 0.9779, |
| "step": 865 |
| }, |
| { |
| "epoch": 2.4331926863572435, |
| "grad_norm": 2.201364517211914, |
| "learning_rate": 1.1665466508342876e-06, |
| "loss": 1.1864, |
| "step": 866 |
| }, |
| { |
| "epoch": 2.4360056258790435, |
| "grad_norm": 1.9111566543579102, |
| "learning_rate": 1.1568765385821373e-06, |
| "loss": 1.1079, |
| "step": 867 |
| }, |
| { |
| "epoch": 2.438818565400844, |
| "grad_norm": 2.0928750038146973, |
| "learning_rate": 1.147241427478336e-06, |
| "loss": 0.8893, |
| "step": 868 |
| }, |
| { |
| "epoch": 2.4416315049226442, |
| "grad_norm": 2.2094082832336426, |
| "learning_rate": 1.1376414052744055e-06, |
| "loss": 1.1135, |
| "step": 869 |
| }, |
| { |
| "epoch": 2.4444444444444446, |
| "grad_norm": 2.2001736164093018, |
| "learning_rate": 1.128076559402308e-06, |
| "loss": 1.0784, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.4472573839662446, |
| "grad_norm": 2.1906962394714355, |
| "learning_rate": 1.1185469769736262e-06, |
| "loss": 1.0625, |
| "step": 871 |
| }, |
| { |
| "epoch": 2.450070323488045, |
| "grad_norm": 2.111055612564087, |
| "learning_rate": 1.1090527447787924e-06, |
| "loss": 1.0759, |
| "step": 872 |
| }, |
| { |
| "epoch": 2.4528832630098454, |
| "grad_norm": 2.1977760791778564, |
| "learning_rate": 1.0995939492862783e-06, |
| "loss": 1.156, |
| "step": 873 |
| }, |
| { |
| "epoch": 2.4556962025316453, |
| "grad_norm": 2.4149186611175537, |
| "learning_rate": 1.0901706766418247e-06, |
| "loss": 1.0938, |
| "step": 874 |
| }, |
| { |
| "epoch": 2.4585091420534457, |
| "grad_norm": 1.9314627647399902, |
| "learning_rate": 1.0807830126676444e-06, |
| "loss": 0.8718, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.461322081575246, |
| "grad_norm": 2.219050168991089, |
| "learning_rate": 1.0714310428616464e-06, |
| "loss": 0.9997, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.4641350210970465, |
| "grad_norm": 1.7131034135818481, |
| "learning_rate": 1.0621148523966552e-06, |
| "loss": 0.8264, |
| "step": 877 |
| }, |
| { |
| "epoch": 2.466947960618847, |
| "grad_norm": 2.0101089477539062, |
| "learning_rate": 1.052834526119637e-06, |
| "loss": 1.0334, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.469760900140647, |
| "grad_norm": 2.2573459148406982, |
| "learning_rate": 1.0435901485509254e-06, |
| "loss": 1.2282, |
| "step": 879 |
| }, |
| { |
| "epoch": 2.4725738396624473, |
| "grad_norm": 2.196690797805786, |
| "learning_rate": 1.0343818038834513e-06, |
| "loss": 1.013, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.4753867791842477, |
| "grad_norm": 2.2671730518341064, |
| "learning_rate": 1.0252095759819785e-06, |
| "loss": 1.1514, |
| "step": 881 |
| }, |
| { |
| "epoch": 2.4781997187060476, |
| "grad_norm": 2.392235279083252, |
| "learning_rate": 1.016073548382337e-06, |
| "loss": 1.2227, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.481012658227848, |
| "grad_norm": 2.245374917984009, |
| "learning_rate": 1.0069738042906635e-06, |
| "loss": 1.2656, |
| "step": 883 |
| }, |
| { |
| "epoch": 2.4838255977496484, |
| "grad_norm": 1.7064595222473145, |
| "learning_rate": 9.979104265826438e-07, |
| "loss": 0.9954, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.486638537271449, |
| "grad_norm": 1.9993723630905151, |
| "learning_rate": 9.888834978027589e-07, |
| "loss": 1.0137, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.489451476793249, |
| "grad_norm": 2.405082941055298, |
| "learning_rate": 9.798931001635298e-07, |
| "loss": 1.1, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.492264416315049, |
| "grad_norm": 2.263054132461548, |
| "learning_rate": 9.709393155447734e-07, |
| "loss": 1.1043, |
| "step": 887 |
| }, |
| { |
| "epoch": 2.4950773558368495, |
| "grad_norm": 2.4851043224334717, |
| "learning_rate": 9.62022225492853e-07, |
| "loss": 1.4185, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.49789029535865, |
| "grad_norm": 2.131120443344116, |
| "learning_rate": 9.531419112199375e-07, |
| "loss": 1.0574, |
| "step": 889 |
| }, |
| { |
| "epoch": 2.50070323488045, |
| "grad_norm": 2.3178141117095947, |
| "learning_rate": 9.442984536032612e-07, |
| "loss": 1.1726, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.5035161744022503, |
| "grad_norm": 2.0481185913085938, |
| "learning_rate": 9.354919331843865e-07, |
| "loss": 1.1169, |
| "step": 891 |
| }, |
| { |
| "epoch": 2.5063291139240507, |
| "grad_norm": 2.4421157836914062, |
| "learning_rate": 9.267224301684763e-07, |
| "loss": 1.27, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.509142053445851, |
| "grad_norm": 2.13606333732605, |
| "learning_rate": 9.17990024423549e-07, |
| "loss": 1.2005, |
| "step": 893 |
| }, |
| { |
| "epoch": 2.5119549929676515, |
| "grad_norm": 2.085256576538086, |
| "learning_rate": 9.09294795479771e-07, |
| "loss": 0.9328, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.5147679324894514, |
| "grad_norm": 1.9264284372329712, |
| "learning_rate": 9.006368225287116e-07, |
| "loss": 0.8267, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.517580872011252, |
| "grad_norm": 1.8938343524932861, |
| "learning_rate": 8.920161844226416e-07, |
| "loss": 0.9883, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.520393811533052, |
| "grad_norm": 2.379265308380127, |
| "learning_rate": 8.834329596737995e-07, |
| "loss": 1.2038, |
| "step": 897 |
| }, |
| { |
| "epoch": 2.523206751054852, |
| "grad_norm": 1.9732309579849243, |
| "learning_rate": 8.748872264536856e-07, |
| "loss": 1.0939, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.5260196905766525, |
| "grad_norm": 1.9441081285476685, |
| "learning_rate": 8.663790625923451e-07, |
| "loss": 1.1116, |
| "step": 899 |
| }, |
| { |
| "epoch": 2.528832630098453, |
| "grad_norm": 1.8037775754928589, |
| "learning_rate": 8.57908545577662e-07, |
| "loss": 0.9497, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.528832630098453, |
| "eval_loss": 0.6382944583892822, |
| "eval_runtime": 2.8103, |
| "eval_samples_per_second": 9.252, |
| "eval_steps_per_second": 1.423, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.528832630098453, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 600.5, |
| "eval_avg_mem_token_accuracy": 0.24822695035460993, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.008113120074177098, |
| "eval_avg_mem_token_rate": 0.5739373378567152, |
| "eval_avg_mem_token_recall(Accuracy)": 0.24822695035460993, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 900, |
| "eval_loss": 0.6382944583892822, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.8103, |
| "eval_samples_per_second": 9.252, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.423, |
| "eval_total_correct_count": 70, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8628, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.5316455696202533, |
| "grad_norm": 2.0587706565856934, |
| "learning_rate": 8.494757525546538e-07, |
| "loss": 0.989, |
| "step": 901 |
| }, |
| { |
| "epoch": 2.5344585091420533, |
| "grad_norm": 2.0397393703460693, |
| "learning_rate": 8.410807603247656e-07, |
| "loss": 0.9581, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.5372714486638537, |
| "grad_norm": 1.872904896736145, |
| "learning_rate": 8.327236453451743e-07, |
| "loss": 0.9432, |
| "step": 903 |
| }, |
| { |
| "epoch": 2.540084388185654, |
| "grad_norm": 2.3130741119384766, |
| "learning_rate": 8.244044837280901e-07, |
| "loss": 1.2045, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.542897327707454, |
| "grad_norm": 2.1820616722106934, |
| "learning_rate": 8.161233512400641e-07, |
| "loss": 1.1755, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.5457102672292544, |
| "grad_norm": 1.8425172567367554, |
| "learning_rate": 8.078803233012966e-07, |
| "loss": 0.8806, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.548523206751055, |
| "grad_norm": 2.0481603145599365, |
| "learning_rate": 7.996754749849567e-07, |
| "loss": 1.0307, |
| "step": 907 |
| }, |
| { |
| "epoch": 2.551336146272855, |
| "grad_norm": 1.9898444414138794, |
| "learning_rate": 7.915088810164856e-07, |
| "loss": 0.9326, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.5541490857946556, |
| "grad_norm": 2.014399766921997, |
| "learning_rate": 7.833806157729329e-07, |
| "loss": 1.0494, |
| "step": 909 |
| }, |
| { |
| "epoch": 2.5569620253164556, |
| "grad_norm": 1.9588618278503418, |
| "learning_rate": 7.752907532822613e-07, |
| "loss": 1.0299, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.559774964838256, |
| "grad_norm": 2.05118465423584, |
| "learning_rate": 7.672393672226902e-07, |
| "loss": 1.2032, |
| "step": 911 |
| }, |
| { |
| "epoch": 2.5625879043600563, |
| "grad_norm": 2.032313585281372, |
| "learning_rate": 7.592265309220071e-07, |
| "loss": 1.2313, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.5654008438818563, |
| "grad_norm": 2.2414844036102295, |
| "learning_rate": 7.512523173569175e-07, |
| "loss": 1.1436, |
| "step": 913 |
| }, |
| { |
| "epoch": 2.5682137834036567, |
| "grad_norm": 2.186063289642334, |
| "learning_rate": 7.433167991523632e-07, |
| "loss": 1.1053, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.571026722925457, |
| "grad_norm": 2.098294734954834, |
| "learning_rate": 7.354200485808749e-07, |
| "loss": 1.1406, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.5738396624472575, |
| "grad_norm": 2.103463888168335, |
| "learning_rate": 7.275621375619058e-07, |
| "loss": 1.2908, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.576652601969058, |
| "grad_norm": 2.070359706878662, |
| "learning_rate": 7.197431376611785e-07, |
| "loss": 0.9896, |
| "step": 917 |
| }, |
| { |
| "epoch": 2.579465541490858, |
| "grad_norm": 1.8880215883255005, |
| "learning_rate": 7.11963120090034e-07, |
| "loss": 0.9669, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.5822784810126582, |
| "grad_norm": 1.9502841234207153, |
| "learning_rate": 7.042221557047823e-07, |
| "loss": 0.9554, |
| "step": 919 |
| }, |
| { |
| "epoch": 2.5850914205344586, |
| "grad_norm": 2.4192519187927246, |
| "learning_rate": 6.96520315006059e-07, |
| "loss": 1.4215, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.5879043600562586, |
| "grad_norm": 2.0227794647216797, |
| "learning_rate": 6.888576681381798e-07, |
| "loss": 1.0162, |
| "step": 921 |
| }, |
| { |
| "epoch": 2.590717299578059, |
| "grad_norm": 2.049302101135254, |
| "learning_rate": 6.81234284888505e-07, |
| "loss": 1.1344, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.5935302390998594, |
| "grad_norm": 2.3195278644561768, |
| "learning_rate": 6.736502346868018e-07, |
| "loss": 1.1883, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.5963431786216598, |
| "grad_norm": 1.9605528116226196, |
| "learning_rate": 6.661055866046134e-07, |
| "loss": 0.9725, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.59915611814346, |
| "grad_norm": 2.021388530731201, |
| "learning_rate": 6.586004093546277e-07, |
| "loss": 1.1272, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.60196905766526, |
| "grad_norm": 1.7564787864685059, |
| "learning_rate": 6.511347712900545e-07, |
| "loss": 0.9292, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.6047819971870605, |
| "grad_norm": 1.886629581451416, |
| "learning_rate": 6.437087404040016e-07, |
| "loss": 1.027, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.607594936708861, |
| "grad_norm": 2.0022552013397217, |
| "learning_rate": 6.363223843288535e-07, |
| "loss": 1.0797, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.610407876230661, |
| "grad_norm": 2.084672451019287, |
| "learning_rate": 6.289757703356597e-07, |
| "loss": 1.164, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.6132208157524612, |
| "grad_norm": 2.0323879718780518, |
| "learning_rate": 6.216689653335184e-07, |
| "loss": 1.172, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.6160337552742616, |
| "grad_norm": 1.9796019792556763, |
| "learning_rate": 6.144020358689679e-07, |
| "loss": 1.1588, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.618846694796062, |
| "grad_norm": 2.1912734508514404, |
| "learning_rate": 6.071750481253835e-07, |
| "loss": 1.0916, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.6216596343178624, |
| "grad_norm": 2.242549419403076, |
| "learning_rate": 5.999880679223702e-07, |
| "loss": 1.1584, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.6244725738396624, |
| "grad_norm": 2.412274122238159, |
| "learning_rate": 5.928411607151651e-07, |
| "loss": 1.2867, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.6272855133614628, |
| "grad_norm": 2.416025161743164, |
| "learning_rate": 5.857343915940434e-07, |
| "loss": 1.2418, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.630098452883263, |
| "grad_norm": 2.027195453643799, |
| "learning_rate": 5.786678252837213e-07, |
| "loss": 1.1176, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.632911392405063, |
| "grad_norm": 1.915125846862793, |
| "learning_rate": 5.71641526142771e-07, |
| "loss": 1.0964, |
| "step": 937 |
| }, |
| { |
| "epoch": 2.6357243319268635, |
| "grad_norm": 1.882155179977417, |
| "learning_rate": 5.646555581630319e-07, |
| "loss": 0.9061, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.638537271448664, |
| "grad_norm": 2.08971905708313, |
| "learning_rate": 5.577099849690276e-07, |
| "loss": 1.0459, |
| "step": 939 |
| }, |
| { |
| "epoch": 2.6413502109704643, |
| "grad_norm": 2.2240920066833496, |
| "learning_rate": 5.508048698173879e-07, |
| "loss": 1.283, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.6441631504922647, |
| "grad_norm": 2.1256864070892334, |
| "learning_rate": 5.439402755962719e-07, |
| "loss": 0.9836, |
| "step": 941 |
| }, |
| { |
| "epoch": 2.6469760900140646, |
| "grad_norm": 2.5735840797424316, |
| "learning_rate": 5.371162648247957e-07, |
| "loss": 1.3213, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.649789029535865, |
| "grad_norm": 2.2286038398742676, |
| "learning_rate": 5.303328996524626e-07, |
| "loss": 1.2165, |
| "step": 943 |
| }, |
| { |
| "epoch": 2.652601969057665, |
| "grad_norm": 1.9804893732070923, |
| "learning_rate": 5.235902418585958e-07, |
| "loss": 1.0179, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.6554149085794654, |
| "grad_norm": 2.038052797317505, |
| "learning_rate": 5.168883528517793e-07, |
| "loss": 1.0582, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.6582278481012658, |
| "grad_norm": 2.0677716732025146, |
| "learning_rate": 5.102272936692948e-07, |
| "loss": 1.2318, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.661040787623066, |
| "grad_norm": 2.240928888320923, |
| "learning_rate": 5.036071249765673e-07, |
| "loss": 0.9381, |
| "step": 947 |
| }, |
| { |
| "epoch": 2.6638537271448666, |
| "grad_norm": 2.2003684043884277, |
| "learning_rate": 4.970279070666162e-07, |
| "loss": 1.1822, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 2.238095998764038, |
| "learning_rate": 4.904896998594955e-07, |
| "loss": 1.2912, |
| "step": 949 |
| }, |
| { |
| "epoch": 2.669479606188467, |
| "grad_norm": 2.166447639465332, |
| "learning_rate": 4.839925629017638e-07, |
| "loss": 1.1712, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.669479606188467, |
| "eval_loss": 0.6378054022789001, |
| "eval_runtime": 2.8903, |
| "eval_samples_per_second": 8.996, |
| "eval_steps_per_second": 1.384, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.669479606188467, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 600.375, |
| "eval_avg_mem_token_accuracy": 0.2553191489361702, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.008346858335265477, |
| "eval_avg_mem_token_rate": 0.5738042972127985, |
| "eval_avg_mem_token_recall(Accuracy)": 0.2553191489361702, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 950, |
| "eval_loss": 0.6378054022789001, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.8903, |
| "eval_samples_per_second": 8.996, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.384, |
| "eval_total_correct_count": 72, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8626, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.6722925457102673, |
| "grad_norm": 2.2033162117004395, |
| "learning_rate": 4.775365553659256e-07, |
| "loss": 1.0523, |
| "step": 951 |
| }, |
| { |
| "epoch": 2.6751054852320673, |
| "grad_norm": 2.277907133102417, |
| "learning_rate": 4.711217360499082e-07, |
| "loss": 1.0803, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.6779184247538677, |
| "grad_norm": 1.9675984382629395, |
| "learning_rate": 4.6474816337650883e-07, |
| "loss": 1.258, |
| "step": 953 |
| }, |
| { |
| "epoch": 2.680731364275668, |
| "grad_norm": 2.1231744289398193, |
| "learning_rate": 4.5841589539288187e-07, |
| "loss": 1.0332, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.6835443037974684, |
| "grad_norm": 2.0946061611175537, |
| "learning_rate": 4.5212498976999196e-07, |
| "loss": 1.1456, |
| "step": 955 |
| }, |
| { |
| "epoch": 2.686357243319269, |
| "grad_norm": 2.3890576362609863, |
| "learning_rate": 4.458755038021029e-07, |
| "loss": 1.2698, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.689170182841069, |
| "grad_norm": 1.8794134855270386, |
| "learning_rate": 4.3966749440624736e-07, |
| "loss": 0.9727, |
| "step": 957 |
| }, |
| { |
| "epoch": 2.691983122362869, |
| "grad_norm": 2.3660783767700195, |
| "learning_rate": 4.3350101812171143e-07, |
| "loss": 1.1163, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.6947960618846696, |
| "grad_norm": 2.015714168548584, |
| "learning_rate": 4.2737613110951924e-07, |
| "loss": 1.1079, |
| "step": 959 |
| }, |
| { |
| "epoch": 2.6976090014064695, |
| "grad_norm": 2.051121234893799, |
| "learning_rate": 4.2129288915192355e-07, |
| "loss": 1.1844, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.70042194092827, |
| "grad_norm": 2.295501708984375, |
| "learning_rate": 4.152513476518927e-07, |
| "loss": 1.2118, |
| "step": 961 |
| }, |
| { |
| "epoch": 2.7032348804500703, |
| "grad_norm": 1.991119623184204, |
| "learning_rate": 4.092515616326126e-07, |
| "loss": 1.1834, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.7060478199718707, |
| "grad_norm": 1.856577754020691, |
| "learning_rate": 4.0329358573697906e-07, |
| "loss": 0.972, |
| "step": 963 |
| }, |
| { |
| "epoch": 2.708860759493671, |
| "grad_norm": 2.042525291442871, |
| "learning_rate": 3.973774742271047e-07, |
| "loss": 1.1083, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.711673699015471, |
| "grad_norm": 1.8524376153945923, |
| "learning_rate": 3.9150328098382593e-07, |
| "loss": 0.9043, |
| "step": 965 |
| }, |
| { |
| "epoch": 2.7144866385372715, |
| "grad_norm": 2.0273165702819824, |
| "learning_rate": 3.8567105950620353e-07, |
| "loss": 0.9573, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.717299578059072, |
| "grad_norm": 2.551295757293701, |
| "learning_rate": 3.798808629110479e-07, |
| "loss": 1.0811, |
| "step": 967 |
| }, |
| { |
| "epoch": 2.720112517580872, |
| "grad_norm": 2.2737653255462646, |
| "learning_rate": 3.7413274393242327e-07, |
| "loss": 1.1984, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.722925457102672, |
| "grad_norm": 2.330913543701172, |
| "learning_rate": 3.68426754921179e-07, |
| "loss": 1.223, |
| "step": 969 |
| }, |
| { |
| "epoch": 2.7257383966244726, |
| "grad_norm": 2.24187970161438, |
| "learning_rate": 3.6276294784446e-07, |
| "loss": 1.0989, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.728551336146273, |
| "grad_norm": 2.3575563430786133, |
| "learning_rate": 3.5714137428524754e-07, |
| "loss": 1.2727, |
| "step": 971 |
| }, |
| { |
| "epoch": 2.7313642756680734, |
| "grad_norm": 2.3462178707122803, |
| "learning_rate": 3.5156208544187554e-07, |
| "loss": 1.2697, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.7341772151898733, |
| "grad_norm": 2.2106142044067383, |
| "learning_rate": 3.460251321275759e-07, |
| "loss": 0.9519, |
| "step": 973 |
| }, |
| { |
| "epoch": 2.7369901547116737, |
| "grad_norm": 1.885840654373169, |
| "learning_rate": 3.4053056477000856e-07, |
| "loss": 0.8887, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.739803094233474, |
| "grad_norm": 1.8733952045440674, |
| "learning_rate": 3.350784334108048e-07, |
| "loss": 1.1189, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.742616033755274, |
| "grad_norm": 2.0802693367004395, |
| "learning_rate": 3.2966878770511025e-07, |
| "loss": 1.0736, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.7454289732770745, |
| "grad_norm": 2.003995656967163, |
| "learning_rate": 3.24301676921136e-07, |
| "loss": 0.9954, |
| "step": 977 |
| }, |
| { |
| "epoch": 2.748241912798875, |
| "grad_norm": 1.968119740486145, |
| "learning_rate": 3.189771499397043e-07, |
| "loss": 1.0114, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.7510548523206753, |
| "grad_norm": 2.2957983016967773, |
| "learning_rate": 3.136952552538092e-07, |
| "loss": 1.1369, |
| "step": 979 |
| }, |
| { |
| "epoch": 2.7538677918424757, |
| "grad_norm": 2.131643772125244, |
| "learning_rate": 3.084560409681703e-07, |
| "loss": 1.2212, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.7566807313642756, |
| "grad_norm": 1.8769854307174683, |
| "learning_rate": 3.0325955479879765e-07, |
| "loss": 0.94, |
| "step": 981 |
| }, |
| { |
| "epoch": 2.759493670886076, |
| "grad_norm": 1.8766363859176636, |
| "learning_rate": 2.981058440725559e-07, |
| "loss": 0.9704, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.7623066104078764, |
| "grad_norm": 2.0633304119110107, |
| "learning_rate": 2.929949557267331e-07, |
| "loss": 0.9554, |
| "step": 983 |
| }, |
| { |
| "epoch": 2.7651195499296763, |
| "grad_norm": 2.1459577083587646, |
| "learning_rate": 2.8792693630861345e-07, |
| "loss": 1.0209, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.7679324894514767, |
| "grad_norm": 2.0213375091552734, |
| "learning_rate": 2.829018319750543e-07, |
| "loss": 1.0121, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.770745428973277, |
| "grad_norm": 2.148283004760742, |
| "learning_rate": 2.779196884920643e-07, |
| "loss": 1.1324, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.7735583684950775, |
| "grad_norm": 2.2942779064178467, |
| "learning_rate": 2.729805512343875e-07, |
| "loss": 1.3349, |
| "step": 987 |
| }, |
| { |
| "epoch": 2.7763713080168775, |
| "grad_norm": 1.860045075416565, |
| "learning_rate": 2.6808446518508835e-07, |
| "loss": 0.9753, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.779184247538678, |
| "grad_norm": 2.135307550430298, |
| "learning_rate": 2.632314749351483e-07, |
| "loss": 1.2426, |
| "step": 989 |
| }, |
| { |
| "epoch": 2.7819971870604783, |
| "grad_norm": 2.502941131591797, |
| "learning_rate": 2.5842162468304845e-07, |
| "loss": 1.3143, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.7848101265822782, |
| "grad_norm": 1.8326023817062378, |
| "learning_rate": 2.5365495823437834e-07, |
| "loss": 1.0144, |
| "step": 991 |
| }, |
| { |
| "epoch": 2.7876230661040786, |
| "grad_norm": 2.351020574569702, |
| "learning_rate": 2.489315190014291e-07, |
| "loss": 1.2042, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.790436005625879, |
| "grad_norm": 1.9044114351272583, |
| "learning_rate": 2.4425135000280374e-07, |
| "loss": 1.0554, |
| "step": 993 |
| }, |
| { |
| "epoch": 2.7932489451476794, |
| "grad_norm": 2.1605467796325684, |
| "learning_rate": 2.3961449386302017e-07, |
| "loss": 1.1091, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.79606188466948, |
| "grad_norm": 1.9160940647125244, |
| "learning_rate": 2.3502099281212775e-07, |
| "loss": 0.9543, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.7988748241912798, |
| "grad_norm": 2.0379810333251953, |
| "learning_rate": 2.3047088868531796e-07, |
| "loss": 1.0654, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.80168776371308, |
| "grad_norm": 2.0998106002807617, |
| "learning_rate": 2.2596422292254893e-07, |
| "loss": 1.1908, |
| "step": 997 |
| }, |
| { |
| "epoch": 2.8045007032348805, |
| "grad_norm": 2.1208677291870117, |
| "learning_rate": 2.2150103656816357e-07, |
| "loss": 1.0795, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.8073136427566805, |
| "grad_norm": 2.2069194316864014, |
| "learning_rate": 2.1708137027051601e-07, |
| "loss": 1.1354, |
| "step": 999 |
| }, |
| { |
| "epoch": 2.810126582278481, |
| "grad_norm": 2.2347195148468018, |
| "learning_rate": 2.1270526428160466e-07, |
| "loss": 1.3928, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.810126582278481, |
| "eval_loss": 0.63798987865448, |
| "eval_runtime": 2.8525, |
| "eval_samples_per_second": 9.115, |
| "eval_steps_per_second": 1.402, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.810126582278481, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 599.75, |
| "eval_avg_mem_token_accuracy": 0.24113475177304963, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.00790054606715464, |
| "eval_avg_mem_token_rate": 0.5725404110955897, |
| "eval_avg_mem_token_recall(Accuracy)": 0.24113475177304963, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 1000, |
| "eval_loss": 0.63798987865448, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.8525, |
| "eval_samples_per_second": 9.115, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.402, |
| "eval_total_correct_count": 68, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8607, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.8129395218002813, |
| "grad_norm": 2.0307729244232178, |
| "learning_rate": 2.0837275845670135e-07, |
| "loss": 1.2427, |
| "step": 1001 |
| }, |
| { |
| "epoch": 2.8157524613220817, |
| "grad_norm": 2.4855947494506836, |
| "learning_rate": 2.0408389225399339e-07, |
| "loss": 1.1572, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.818565400843882, |
| "grad_norm": 2.137430429458618, |
| "learning_rate": 1.9983870473421761e-07, |
| "loss": 1.1247, |
| "step": 1003 |
| }, |
| { |
| "epoch": 2.821378340365682, |
| "grad_norm": 1.7523655891418457, |
| "learning_rate": 1.9563723456031303e-07, |
| "loss": 1.1162, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.8241912798874824, |
| "grad_norm": 2.1431448459625244, |
| "learning_rate": 1.9147951999705928e-07, |
| "loss": 1.2084, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.827004219409283, |
| "grad_norm": 2.178713798522949, |
| "learning_rate": 1.8736559891073703e-07, |
| "loss": 1.2073, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.8298171589310828, |
| "grad_norm": 2.0820088386535645, |
| "learning_rate": 1.8329550876877488e-07, |
| "loss": 1.1191, |
| "step": 1007 |
| }, |
| { |
| "epoch": 2.832630098452883, |
| "grad_norm": 2.0419578552246094, |
| "learning_rate": 1.7926928663941635e-07, |
| "loss": 1.0641, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.8354430379746836, |
| "grad_norm": 2.2004177570343018, |
| "learning_rate": 1.7528696919137444e-07, |
| "loss": 1.3558, |
| "step": 1009 |
| }, |
| { |
| "epoch": 2.838255977496484, |
| "grad_norm": 2.3024518489837646, |
| "learning_rate": 1.7134859269350546e-07, |
| "loss": 1.2914, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.8410689170182843, |
| "grad_norm": 2.0407681465148926, |
| "learning_rate": 1.6745419301446962e-07, |
| "loss": 1.0491, |
| "step": 1011 |
| }, |
| { |
| "epoch": 2.8438818565400843, |
| "grad_norm": 2.028738498687744, |
| "learning_rate": 1.6360380562241428e-07, |
| "loss": 1.2034, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.8466947960618847, |
| "grad_norm": 2.436655044555664, |
| "learning_rate": 1.5979746558464237e-07, |
| "loss": 1.4506, |
| "step": 1013 |
| }, |
| { |
| "epoch": 2.849507735583685, |
| "grad_norm": 2.0717296600341797, |
| "learning_rate": 1.5603520756729885e-07, |
| "loss": 1.1103, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.852320675105485, |
| "grad_norm": 2.195970058441162, |
| "learning_rate": 1.5231706583505256e-07, |
| "loss": 1.2775, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.8551336146272854, |
| "grad_norm": 2.2911033630371094, |
| "learning_rate": 1.486430742507833e-07, |
| "loss": 1.1482, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.857946554149086, |
| "grad_norm": 2.503101348876953, |
| "learning_rate": 1.4501326627527513e-07, |
| "loss": 1.4186, |
| "step": 1017 |
| }, |
| { |
| "epoch": 2.8607594936708862, |
| "grad_norm": 1.9371217489242554, |
| "learning_rate": 1.4142767496691135e-07, |
| "loss": 0.9705, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.8635724331926866, |
| "grad_norm": 2.0493252277374268, |
| "learning_rate": 1.3788633298137288e-07, |
| "loss": 0.9959, |
| "step": 1019 |
| }, |
| { |
| "epoch": 2.8663853727144866, |
| "grad_norm": 1.987891674041748, |
| "learning_rate": 1.3438927257134083e-07, |
| "loss": 0.9549, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.869198312236287, |
| "grad_norm": 2.177379608154297, |
| "learning_rate": 1.3093652558620384e-07, |
| "loss": 1.1057, |
| "step": 1021 |
| }, |
| { |
| "epoch": 2.8720112517580874, |
| "grad_norm": 1.7878172397613525, |
| "learning_rate": 1.2752812347176514e-07, |
| "loss": 0.865, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.8748241912798873, |
| "grad_norm": 2.258223295211792, |
| "learning_rate": 1.2416409726996037e-07, |
| "loss": 1.1227, |
| "step": 1023 |
| }, |
| { |
| "epoch": 2.8776371308016877, |
| "grad_norm": 2.103666067123413, |
| "learning_rate": 1.2084447761857244e-07, |
| "loss": 1.1573, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.880450070323488, |
| "grad_norm": 1.982913851737976, |
| "learning_rate": 1.1756929475095103e-07, |
| "loss": 1.0078, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.8832630098452885, |
| "grad_norm": 1.9436091184616089, |
| "learning_rate": 1.143385784957407e-07, |
| "loss": 1.0486, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.8860759493670884, |
| "grad_norm": 2.438931465148926, |
| "learning_rate": 1.111523582766072e-07, |
| "loss": 1.2295, |
| "step": 1027 |
| }, |
| { |
| "epoch": 2.888888888888889, |
| "grad_norm": 1.8638874292373657, |
| "learning_rate": 1.0801066311196872e-07, |
| "loss": 1.06, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.8917018284106892, |
| "grad_norm": 1.9490095376968384, |
| "learning_rate": 1.0491352161473345e-07, |
| "loss": 1.0883, |
| "step": 1029 |
| }, |
| { |
| "epoch": 2.894514767932489, |
| "grad_norm": 2.201900005340576, |
| "learning_rate": 1.018609619920391e-07, |
| "loss": 0.9764, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.8973277074542896, |
| "grad_norm": 2.4178552627563477, |
| "learning_rate": 9.885301204499321e-08, |
| "loss": 1.2852, |
| "step": 1031 |
| }, |
| { |
| "epoch": 2.90014064697609, |
| "grad_norm": 2.231503486633301, |
| "learning_rate": 9.588969916842272e-08, |
| "loss": 1.1528, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.9029535864978904, |
| "grad_norm": 1.870887041091919, |
| "learning_rate": 9.297105035062426e-08, |
| "loss": 1.0726, |
| "step": 1033 |
| }, |
| { |
| "epoch": 2.9057665260196908, |
| "grad_norm": 2.3219852447509766, |
| "learning_rate": 9.009709217311702e-08, |
| "loss": 1.1784, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.9085794655414907, |
| "grad_norm": 2.1292107105255127, |
| "learning_rate": 8.72678508104008e-08, |
| "loss": 1.2251, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.911392405063291, |
| "grad_norm": 2.016449451446533, |
| "learning_rate": 8.448335202971891e-08, |
| "loss": 0.9478, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.9142053445850915, |
| "grad_norm": 2.08313250541687, |
| "learning_rate": 8.174362119082291e-08, |
| "loss": 1.0649, |
| "step": 1037 |
| }, |
| { |
| "epoch": 2.9170182841068915, |
| "grad_norm": 2.0640265941619873, |
| "learning_rate": 7.9048683245741e-08, |
| "loss": 1.1765, |
| "step": 1038 |
| }, |
| { |
| "epoch": 2.919831223628692, |
| "grad_norm": 2.1048390865325928, |
| "learning_rate": 7.639856273855106e-08, |
| "loss": 1.0642, |
| "step": 1039 |
| }, |
| { |
| "epoch": 2.9226441631504922, |
| "grad_norm": 2.1916463375091553, |
| "learning_rate": 7.379328380515805e-08, |
| "loss": 1.2419, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.9254571026722926, |
| "grad_norm": 2.252420425415039, |
| "learning_rate": 7.123287017307302e-08, |
| "loss": 1.3343, |
| "step": 1041 |
| }, |
| { |
| "epoch": 2.928270042194093, |
| "grad_norm": 2.1169185638427734, |
| "learning_rate": 6.871734516119721e-08, |
| "loss": 1.129, |
| "step": 1042 |
| }, |
| { |
| "epoch": 2.931082981715893, |
| "grad_norm": 2.2315621376037598, |
| "learning_rate": 6.624673167961004e-08, |
| "loss": 1.1125, |
| "step": 1043 |
| }, |
| { |
| "epoch": 2.9338959212376934, |
| "grad_norm": 1.8748716115951538, |
| "learning_rate": 6.382105222936085e-08, |
| "loss": 1.049, |
| "step": 1044 |
| }, |
| { |
| "epoch": 2.9367088607594938, |
| "grad_norm": 1.9676600694656372, |
| "learning_rate": 6.144032890226304e-08, |
| "loss": 1.1791, |
| "step": 1045 |
| }, |
| { |
| "epoch": 2.9395218002812937, |
| "grad_norm": 1.765437126159668, |
| "learning_rate": 5.910458338069192e-08, |
| "loss": 0.9795, |
| "step": 1046 |
| }, |
| { |
| "epoch": 2.942334739803094, |
| "grad_norm": 2.3168399333953857, |
| "learning_rate": 5.6813836937392175e-08, |
| "loss": 1.1186, |
| "step": 1047 |
| }, |
| { |
| "epoch": 2.9451476793248945, |
| "grad_norm": 2.183238983154297, |
| "learning_rate": 5.456811043527632e-08, |
| "loss": 1.1833, |
| "step": 1048 |
| }, |
| { |
| "epoch": 2.947960618846695, |
| "grad_norm": 1.8787195682525635, |
| "learning_rate": 5.236742432724262e-08, |
| "loss": 0.9953, |
| "step": 1049 |
| }, |
| { |
| "epoch": 2.9507735583684953, |
| "grad_norm": 2.0316836833953857, |
| "learning_rate": 5.021179865598136e-08, |
| "loss": 1.0088, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.9507735583684953, |
| "eval_loss": 0.6373986005783081, |
| "eval_runtime": 2.8523, |
| "eval_samples_per_second": 9.115, |
| "eval_steps_per_second": 1.402, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.9507735583684953, |
| "eval_active_sample_count": 30, |
| "eval_avg_loss": 599.625, |
| "eval_avg_mem_token_accuracy": 0.2553191489361702, |
| "eval_avg_mem_token_gt_count": 9.4, |
| "eval_avg_mem_token_precision": 0.008363340689975607, |
| "eval_avg_mem_token_rate": 0.5726734517395065, |
| "eval_avg_mem_token_recall(Accuracy)": 0.2553191489361702, |
| "eval_avg_slot_norm_mean": 197.63333333333333, |
| "eval_avg_slot_sim_mean": 0.996875, |
| "eval_global_step": 1050, |
| "eval_loss": 0.6373986005783081, |
| "eval_num_samples": 30, |
| "eval_runtime": 2.8523, |
| "eval_samples_per_second": 9.115, |
| "eval_sim_active_sample_count": 30, |
| "eval_steps_per_second": 1.402, |
| "eval_total_correct_count": 72, |
| "eval_total_gt_mem_token_count": 282, |
| "eval_total_positions": 15033, |
| "eval_total_pred_mem_token_count": 8609, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.9535864978902953, |
| "grad_norm": 2.193411111831665, |
| "learning_rate": 4.810125305379998e-08, |
| "loss": 1.086, |
| "step": 1051 |
| }, |
| { |
| "epoch": 2.9563994374120957, |
| "grad_norm": 1.7261470556259155, |
| "learning_rate": 4.6035806742436575e-08, |
| "loss": 1.004, |
| "step": 1052 |
| }, |
| { |
| "epoch": 2.959212376933896, |
| "grad_norm": 1.943182110786438, |
| "learning_rate": 4.4015478532891675e-08, |
| "loss": 1.1523, |
| "step": 1053 |
| }, |
| { |
| "epoch": 2.962025316455696, |
| "grad_norm": 2.992014169692993, |
| "learning_rate": 4.20402868252523e-08, |
| "loss": 1.1195, |
| "step": 1054 |
| }, |
| { |
| "epoch": 2.9648382559774964, |
| "grad_norm": 2.0633037090301514, |
| "learning_rate": 4.01102496085265e-08, |
| "loss": 1.1554, |
| "step": 1055 |
| }, |
| { |
| "epoch": 2.967651195499297, |
| "grad_norm": 5.867424964904785, |
| "learning_rate": 3.822538446047852e-08, |
| "loss": 1.1499, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.970464135021097, |
| "grad_norm": 2.3555386066436768, |
| "learning_rate": 3.6385708547468925e-08, |
| "loss": 1.296, |
| "step": 1057 |
| }, |
| { |
| "epoch": 2.9732770745428976, |
| "grad_norm": 2.298612594604492, |
| "learning_rate": 3.4591238624299696e-08, |
| "loss": 1.1622, |
| "step": 1058 |
| }, |
| { |
| "epoch": 2.9760900140646975, |
| "grad_norm": 2.095074415206909, |
| "learning_rate": 3.284199103405883e-08, |
| "loss": 1.0392, |
| "step": 1059 |
| }, |
| { |
| "epoch": 2.978902953586498, |
| "grad_norm": 1.7967655658721924, |
| "learning_rate": 3.113798170797489e-08, |
| "loss": 0.8557, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.9817158931082983, |
| "grad_norm": 2.187788963317871, |
| "learning_rate": 2.9479226165268216e-08, |
| "loss": 1.2315, |
| "step": 1061 |
| }, |
| { |
| "epoch": 2.9845288326300983, |
| "grad_norm": 2.0555531978607178, |
| "learning_rate": 2.7865739513012746e-08, |
| "loss": 1.0719, |
| "step": 1062 |
| }, |
| { |
| "epoch": 2.9873417721518987, |
| "grad_norm": 2.1727023124694824, |
| "learning_rate": 2.629753644599664e-08, |
| "loss": 1.0655, |
| "step": 1063 |
| }, |
| { |
| "epoch": 2.990154711673699, |
| "grad_norm": 2.1658568382263184, |
| "learning_rate": 2.4774631246589075e-08, |
| "loss": 1.0773, |
| "step": 1064 |
| }, |
| { |
| "epoch": 2.9929676511954995, |
| "grad_norm": 2.12109112739563, |
| "learning_rate": 2.3297037784609787e-08, |
| "loss": 1.1639, |
| "step": 1065 |
| }, |
| { |
| "epoch": 2.9957805907173, |
| "grad_norm": 2.118447780609131, |
| "learning_rate": 2.1864769517204177e-08, |
| "loss": 1.1426, |
| "step": 1066 |
| }, |
| { |
| "epoch": 2.9985935302391, |
| "grad_norm": 1.9243059158325195, |
| "learning_rate": 2.0477839488718398e-08, |
| "loss": 0.9786, |
| "step": 1067 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.6388542652130127, |
| "learning_rate": 1.913626033058169e-08, |
| "loss": 0.5664, |
| "step": 1068 |
| }, |
| { |
| "epoch": 3.0028129395218004, |
| "grad_norm": 1.8314422369003296, |
| "learning_rate": 1.784004426119257e-08, |
| "loss": 1.0312, |
| "step": 1069 |
| }, |
| { |
| "epoch": 3.0056258790436003, |
| "grad_norm": 2.122387170791626, |
| "learning_rate": 1.6589203085804473e-08, |
| "loss": 1.0936, |
| "step": 1070 |
| }, |
| { |
| "epoch": 3.0084388185654007, |
| "grad_norm": 2.0820372104644775, |
| "learning_rate": 1.538374819642252e-08, |
| "loss": 1.0541, |
| "step": 1071 |
| }, |
| { |
| "epoch": 3.011251758087201, |
| "grad_norm": 1.9248408079147339, |
| "learning_rate": 1.4223690571695815e-08, |
| "loss": 0.9005, |
| "step": 1072 |
| }, |
| { |
| "epoch": 3.0140646976090015, |
| "grad_norm": 1.9669166803359985, |
| "learning_rate": 1.3109040776819181e-08, |
| "loss": 1.1376, |
| "step": 1073 |
| }, |
| { |
| "epoch": 3.0168776371308015, |
| "grad_norm": 1.9701210260391235, |
| "learning_rate": 1.2039808963437705e-08, |
| "loss": 1.0197, |
| "step": 1074 |
| }, |
| { |
| "epoch": 3.019690576652602, |
| "grad_norm": 2.451758623123169, |
| "learning_rate": 1.1016004869551788e-08, |
| "loss": 1.2066, |
| "step": 1075 |
| }, |
| { |
| "epoch": 3.0225035161744023, |
| "grad_norm": 1.9009047746658325, |
| "learning_rate": 1.0037637819431123e-08, |
| "loss": 1.0529, |
| "step": 1076 |
| }, |
| { |
| "epoch": 3.0253164556962027, |
| "grad_norm": 2.054837465286255, |
| "learning_rate": 9.10471672352864e-09, |
| "loss": 1.1907, |
| "step": 1077 |
| }, |
| { |
| "epoch": 3.0281293952180026, |
| "grad_norm": 2.181744337081909, |
| "learning_rate": 8.217250078400018e-09, |
| "loss": 1.1479, |
| "step": 1078 |
| }, |
| { |
| "epoch": 3.030942334739803, |
| "grad_norm": 2.066051721572876, |
| "learning_rate": 7.375245966623757e-09, |
| "loss": 1.2419, |
| "step": 1079 |
| }, |
| { |
| "epoch": 3.0337552742616034, |
| "grad_norm": 2.2346465587615967, |
| "learning_rate": 6.5787120567317734e-09, |
| "loss": 0.9984, |
| "step": 1080 |
| }, |
| { |
| "epoch": 3.036568213783404, |
| "grad_norm": 1.9933655261993408, |
| "learning_rate": 5.827655603135585e-09, |
| "loss": 1.0698, |
| "step": 1081 |
| }, |
| { |
| "epoch": 3.0393811533052038, |
| "grad_norm": 2.1959750652313232, |
| "learning_rate": 5.122083446062464e-09, |
| "loss": 1.1049, |
| "step": 1082 |
| }, |
| { |
| "epoch": 3.042194092827004, |
| "grad_norm": 2.2590200901031494, |
| "learning_rate": 4.462002011493271e-09, |
| "loss": 1.1198, |
| "step": 1083 |
| }, |
| { |
| "epoch": 3.0450070323488045, |
| "grad_norm": 2.1988589763641357, |
| "learning_rate": 3.847417311102497e-09, |
| "loss": 1.1142, |
| "step": 1084 |
| }, |
| { |
| "epoch": 3.047819971870605, |
| "grad_norm": 2.254117727279663, |
| "learning_rate": 3.2783349422044197e-09, |
| "loss": 1.199, |
| "step": 1085 |
| }, |
| { |
| "epoch": 3.050632911392405, |
| "grad_norm": 1.9562636613845825, |
| "learning_rate": 2.7547600877020355e-09, |
| "loss": 1.0887, |
| "step": 1086 |
| }, |
| { |
| "epoch": 3.0534458509142053, |
| "grad_norm": 1.9559649229049683, |
| "learning_rate": 2.276697516039872e-09, |
| "loss": 1.0819, |
| "step": 1087 |
| }, |
| { |
| "epoch": 3.0562587904360057, |
| "grad_norm": 2.017869472503662, |
| "learning_rate": 1.8441515811612465e-09, |
| "loss": 0.9884, |
| "step": 1088 |
| }, |
| { |
| "epoch": 3.059071729957806, |
| "grad_norm": 1.8643865585327148, |
| "learning_rate": 1.4571262224666315e-09, |
| "loss": 0.9771, |
| "step": 1089 |
| }, |
| { |
| "epoch": 3.061884669479606, |
| "grad_norm": 2.1424920558929443, |
| "learning_rate": 1.1156249647797934e-09, |
| "loss": 1.2107, |
| "step": 1090 |
| }, |
| { |
| "epoch": 3.0646976090014064, |
| "grad_norm": 2.071485757827759, |
| "learning_rate": 8.196509183139301e-10, |
| "loss": 0.8257, |
| "step": 1091 |
| }, |
| { |
| "epoch": 3.067510548523207, |
| "grad_norm": 1.8392572402954102, |
| "learning_rate": 5.692067786455813e-10, |
| "loss": 1.119, |
| "step": 1092 |
| }, |
| { |
| "epoch": 3.070323488045007, |
| "grad_norm": 2.0427193641662598, |
| "learning_rate": 3.6429482668853824e-10, |
| "loss": 1.0698, |
| "step": 1093 |
| }, |
| { |
| "epoch": 3.073136427566807, |
| "grad_norm": 2.2885656356811523, |
| "learning_rate": 2.0491692867330438e-10, |
| "loss": 1.4175, |
| "step": 1094 |
| }, |
| { |
| "epoch": 3.0759493670886076, |
| "grad_norm": 2.181267499923706, |
| "learning_rate": 9.107453612933192e-11, |
| "loss": 1.0596, |
| "step": 1095 |
| }, |
| { |
| "epoch": 3.078762306610408, |
| "grad_norm": 2.340491533279419, |
| "learning_rate": 2.2768685873364448e-11, |
| "loss": 1.1616, |
| "step": 1096 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1096, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|