Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

20250501-1429-grad-clip/checkpoint-111/optimizer.pt +3 -0
20250501-1429-grad-clip/checkpoint-111/pytorch_model.bin +3 -0
20250501-1429-grad-clip/checkpoint-111/rng_state.pth +3 -0
20250501-1429-grad-clip/checkpoint-111/scheduler.pt +3 -0
20250501-1429-grad-clip/checkpoint-111/trainer_state.json +811 -0
20250501-1429-grad-clip/checkpoint-111/training_args.bin +3 -0
20250501-1429-grad-clip/runs/May01_14-29-25_d55355693cce/events.out.tfevents.1746109765.d55355693cce.160.0 +3 -0

20250501-1429-grad-clip/checkpoint-111/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c9b9dbd8bd53597d72aa78886601609670feed96370f5a4237999080f8dde5e
+size 33824762

20250501-1429-grad-clip/checkpoint-111/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21523c962f6a9ac67e63e72f2338fb57be8b90b65f45bec6058074fbc7114542
+size 368596590

20250501-1429-grad-clip/checkpoint-111/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:238523785dc8a6bd63d8f51ae72844d04988b9978d2050f234191a39fe7b1141
+size 14244

20250501-1429-grad-clip/checkpoint-111/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e42e66dcf545600cbaa1c353643c7beb3f3d693939da1e7ad003b79b8ca066d
+size 1064

20250501-1429-grad-clip/checkpoint-111/trainer_state.json ADDED Viewed

	@@ -0,0 +1,811 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9955156950672646,
+  "eval_steps": 200,
+  "global_step": 111,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.008968609865470852,
+      "grad_norm": 2.960822105407715,
+      "learning_rate": 0.0001,
+      "loss": 2.8706,
+      "step": 1
+    },
+    {
+      "epoch": 0.017937219730941704,
+      "grad_norm": 0.7307469248771667,
+      "learning_rate": 9.99799753559161e-05,
+      "loss": 1.3,
+      "step": 2
+    },
+    {
+      "epoch": 0.026905829596412557,
+      "grad_norm": 0.3418230414390564,
+      "learning_rate": 9.991991746311917e-05,
+      "loss": 1.2554,
+      "step": 3
+    },
+    {
+      "epoch": 0.03587443946188341,
+      "grad_norm": 3.3377888202667236,
+      "learning_rate": 9.981987442712633e-05,
+      "loss": 5.976,
+      "step": 4
+    },
+    {
+      "epoch": 0.04484304932735426,
+      "grad_norm": 0.4093039929866791,
+      "learning_rate": 9.967992638098515e-05,
+      "loss": 0.9023,
+      "step": 5
+    },
+    {
+      "epoch": 0.053811659192825115,
+      "grad_norm": 0.518921434879303,
+      "learning_rate": 9.950018542108818e-05,
+      "loss": 2.2163,
+      "step": 6
+    },
+    {
+      "epoch": 0.06278026905829596,
+      "grad_norm": 1.4019224643707275,
+      "learning_rate": 9.928079551738543e-05,
+      "loss": 1.0835,
+      "step": 7
+    },
+    {
+      "epoch": 0.07174887892376682,
+      "grad_norm": 0.4738399386405945,
+      "learning_rate": 9.902193239806635e-05,
+      "loss": 2.6919,
+      "step": 8
+    },
+    {
+      "epoch": 0.08071748878923767,
+      "grad_norm": 0.7166749835014343,
+      "learning_rate": 9.872380340880416e-05,
+      "loss": 1.0091,
+      "step": 9
+    },
+    {
+      "epoch": 0.08968609865470852,
+      "grad_norm": 0.5193161368370056,
+      "learning_rate": 9.838664734667495e-05,
+      "loss": 1.3009,
+      "step": 10
+    },
+    {
+      "epoch": 0.09865470852017937,
+      "grad_norm": 2.732475996017456,
+      "learning_rate": 9.801073426888447e-05,
+      "loss": 6.0912,
+      "step": 11
+    },
+    {
+      "epoch": 0.10762331838565023,
+      "grad_norm": 1.2672487497329712,
+      "learning_rate": 9.759636527645633e-05,
+      "loss": 1.7488,
+      "step": 12
+    },
+    {
+      "epoch": 0.11659192825112108,
+      "grad_norm": 2.43237566947937,
+      "learning_rate": 9.714387227305422e-05,
+      "loss": 0.7325,
+      "step": 13
+    },
+    {
+      "epoch": 0.12556053811659193,
+      "grad_norm": 2.542262315750122,
+      "learning_rate": 9.665361769913187e-05,
+      "loss": 5.6541,
+      "step": 14
+    },
+    {
+      "epoch": 0.13452914798206278,
+      "grad_norm": 1.3107805252075195,
+      "learning_rate": 9.612599424162344e-05,
+      "loss": 1.9974,
+      "step": 15
+    },
+    {
+      "epoch": 0.14349775784753363,
+      "grad_norm": 1.3028833866119385,
+      "learning_rate": 9.55614245194068e-05,
+      "loss": 2.6046,
+      "step": 16
+    },
+    {
+      "epoch": 0.15246636771300448,
+      "grad_norm": 0.9606136083602905,
+      "learning_rate": 9.496036074479184e-05,
+      "loss": 0.9119,
+      "step": 17
+    },
+    {
+      "epoch": 0.16143497757847533,
+      "grad_norm": 11.634020805358887,
+      "learning_rate": 9.432328436130493e-05,
+      "loss": 18.7628,
+      "step": 18
+    },
+    {
+      "epoch": 0.17040358744394618,
+      "grad_norm": 1.5843065977096558,
+      "learning_rate": 9.365070565805941e-05,
+      "loss": 0.667,
+      "step": 19
+    },
+    {
+      "epoch": 0.17937219730941703,
+      "grad_norm": 1.8860232830047607,
+      "learning_rate": 9.294316336102132e-05,
+      "loss": 0.9718,
+      "step": 20
+    },
+    {
+      "epoch": 0.18834080717488788,
+      "grad_norm": 4.539754390716553,
+      "learning_rate": 9.220122420149753e-05,
+      "loss": 1.02,
+      "step": 21
+    },
+    {
+      "epoch": 0.19730941704035873,
+      "grad_norm": 11.95474624633789,
+      "learning_rate": 9.142548246219212e-05,
+      "loss": 11.9615,
+      "step": 22
+    },
+    {
+      "epoch": 0.2062780269058296,
+      "grad_norm": 2.1707379817962646,
+      "learning_rate": 9.06165595011943e-05,
+      "loss": 2.3992,
+      "step": 23
+    },
+    {
+      "epoch": 0.21524663677130046,
+      "grad_norm": 0.7615045309066772,
+      "learning_rate": 8.97751032542795e-05,
+      "loss": 1.0077,
+      "step": 24
+    },
+    {
+      "epoch": 0.2242152466367713,
+      "grad_norm": 2.006991147994995,
+      "learning_rate": 8.890178771592199e-05,
+      "loss": 0.933,
+      "step": 25
+    },
+    {
+      "epoch": 0.23318385650224216,
+      "grad_norm": 1.6505995988845825,
+      "learning_rate": 8.799731239943487e-05,
+      "loss": 2.3094,
+      "step": 26
+    },
+    {
+      "epoch": 0.242152466367713,
+      "grad_norm": 1.3269753456115723,
+      "learning_rate": 8.706240177667003e-05,
+      "loss": 0.5488,
+      "step": 27
+    },
+    {
+      "epoch": 0.25112107623318386,
+      "grad_norm": 3.6518466472625732,
+      "learning_rate": 8.609780469772623e-05,
+      "loss": 2.4007,
+      "step": 28
+    },
+    {
+      "epoch": 0.2600896860986547,
+      "grad_norm": 2.108593463897705,
+      "learning_rate": 8.510429379113114e-05,
+      "loss": 0.9,
+      "step": 29
+    },
+    {
+      "epoch": 0.26905829596412556,
+      "grad_norm": 5.371153831481934,
+      "learning_rate": 8.408266484497664e-05,
+      "loss": 5.6924,
+      "step": 30
+    },
+    {
+      "epoch": 0.27802690582959644,
+      "grad_norm": 3.1271610260009766,
+      "learning_rate": 8.303373616950408e-05,
+      "loss": 0.5757,
+      "step": 31
+    },
+    {
+      "epoch": 0.28699551569506726,
+      "grad_norm": 4.09198522567749,
+      "learning_rate": 8.195834794164925e-05,
+      "loss": 0.7701,
+      "step": 32
+    },
+    {
+      "epoch": 0.29596412556053814,
+      "grad_norm": 6.343031883239746,
+      "learning_rate": 8.085736153207277e-05,
+      "loss": 2.7134,
+      "step": 33
+    },
+    {
+      "epoch": 0.30493273542600896,
+      "grad_norm": 3.070613145828247,
+      "learning_rate": 7.973165881521434e-05,
+      "loss": 1.1801,
+      "step": 34
+    },
+    {
+      "epoch": 0.31390134529147984,
+      "grad_norm": 8.84461498260498,
+      "learning_rate": 7.858214146292394e-05,
+      "loss": 5.7997,
+      "step": 35
+    },
+    {
+      "epoch": 0.32286995515695066,
+      "grad_norm": 2.05316424369812,
+      "learning_rate": 7.740973022223549e-05,
+      "loss": 0.8196,
+      "step": 36
+    },
+    {
+      "epoch": 0.33183856502242154,
+      "grad_norm": 1.3113634586334229,
+      "learning_rate": 7.621536417786159e-05,
+      "loss": 1.7346,
+      "step": 37
+    },
+    {
+      "epoch": 0.34080717488789236,
+      "grad_norm": 3.6481339931488037,
+      "learning_rate": 7.500000000000001e-05,
+      "loss": 1.5097,
+      "step": 38
+    },
+    {
+      "epoch": 0.34977578475336324,
+      "grad_norm": 9.386982917785645,
+      "learning_rate": 7.37646111780545e-05,
+      "loss": 9.1972,
+      "step": 39
+    },
+    {
+      "epoch": 0.35874439461883406,
+      "grad_norm": 2.849909782409668,
+      "learning_rate": 7.251018724088367e-05,
+      "loss": 3.8456,
+      "step": 40
+    },
+    {
+      "epoch": 0.36771300448430494,
+      "grad_norm": 14.061447143554688,
+      "learning_rate": 7.12377329642024e-05,
+      "loss": 11.3528,
+      "step": 41
+    },
+    {
+      "epoch": 0.37668161434977576,
+      "grad_norm": 5.636099338531494,
+      "learning_rate": 6.994826756577082e-05,
+      "loss": 1.2004,
+      "step": 42
+    },
+    {
+      "epoch": 0.38565022421524664,
+      "grad_norm": 3.9606025218963623,
+      "learning_rate": 6.864282388901544e-05,
+      "loss": 3.019,
+      "step": 43
+    },
+    {
+      "epoch": 0.39461883408071746,
+      "grad_norm": 1.7586909532546997,
+      "learning_rate": 6.732244757573619e-05,
+      "loss": 4.6122,
+      "step": 44
+    },
+    {
+      "epoch": 0.40358744394618834,
+      "grad_norm": 2.1864688396453857,
+      "learning_rate": 6.598819622856227e-05,
+      "loss": 1.1626,
+      "step": 45
+    },
+    {
+      "epoch": 0.4125560538116592,
+      "grad_norm": 2.680082321166992,
+      "learning_rate": 6.464113856382752e-05,
+      "loss": 1.5712,
+      "step": 46
+    },
+    {
+      "epoch": 0.42152466367713004,
+      "grad_norm": 10.624690055847168,
+      "learning_rate": 6.328235355554382e-05,
+      "loss": 15.0875,
+      "step": 47
+    },
+    {
+      "epoch": 0.4304932735426009,
+      "grad_norm": 4.587795257568359,
+      "learning_rate": 6.191292957115825e-05,
+      "loss": 1.6446,
+      "step": 48
+    },
+    {
+      "epoch": 0.43946188340807174,
+      "grad_norm": 3.1477649211883545,
+      "learning_rate": 6.0533963499786314e-05,
+      "loss": 0.8853,
+      "step": 49
+    },
+    {
+      "epoch": 0.4484304932735426,
+      "grad_norm": 7.956603050231934,
+      "learning_rate": 5.9146559873619335e-05,
+      "loss": 1.4764,
+      "step": 50
+    },
+    {
+      "epoch": 0.45739910313901344,
+      "grad_norm": 7.392563343048096,
+      "learning_rate": 5.7751829983209896e-05,
+      "loss": 6.6863,
+      "step": 51
+    },
+    {
+      "epoch": 0.4663677130044843,
+      "grad_norm": 9.421327590942383,
+      "learning_rate": 5.6350890987343944e-05,
+      "loss": 1.2458,
+      "step": 52
+    },
+    {
+      "epoch": 0.47533632286995514,
+      "grad_norm": 13.373278617858887,
+      "learning_rate": 5.4944865018212497e-05,
+      "loss": 17.7216,
+      "step": 53
+    },
+    {
+      "epoch": 0.484304932735426,
+      "grad_norm": 2.546264171600342,
+      "learning_rate": 5.353487828259973e-05,
+      "loss": 0.7374,
+      "step": 54
+    },
+    {
+      "epoch": 0.49327354260089684,
+      "grad_norm": 19.44002914428711,
+      "learning_rate": 5.212206015980742e-05,
+      "loss": 17.9206,
+      "step": 55
+    },
+    {
+      "epoch": 0.5022421524663677,
+      "grad_norm": 3.014998197555542,
+      "learning_rate": 5.0707542297038114e-05,
+      "loss": 2.4286,
+      "step": 56
+    },
+    {
+      "epoch": 0.5112107623318386,
+      "grad_norm": 8.958091735839844,
+      "learning_rate": 4.929245770296191e-05,
+      "loss": 1.0844,
+      "step": 57
+    },
+    {
+      "epoch": 0.5201793721973094,
+      "grad_norm": 12.889779090881348,
+      "learning_rate": 4.78779398401926e-05,
+      "loss": 1.3162,
+      "step": 58
+    },
+    {
+      "epoch": 0.5291479820627802,
+      "grad_norm": 4.120257377624512,
+      "learning_rate": 4.6465121717400275e-05,
+      "loss": 1.7695,
+      "step": 59
+    },
+    {
+      "epoch": 0.5381165919282511,
+      "grad_norm": 10.660250663757324,
+      "learning_rate": 4.5055134981787515e-05,
+      "loss": 0.8245,
+      "step": 60
+    },
+    {
+      "epoch": 0.547085201793722,
+      "grad_norm": 12.857951164245605,
+      "learning_rate": 4.364910901265606e-05,
+      "loss": 0.9344,
+      "step": 61
+    },
+    {
+      "epoch": 0.5560538116591929,
+      "grad_norm": 32.71133804321289,
+      "learning_rate": 4.224817001679011e-05,
+      "loss": 12.3719,
+      "step": 62
+    },
+    {
+      "epoch": 0.5650224215246636,
+      "grad_norm": 4.448777198791504,
+      "learning_rate": 4.085344012638067e-05,
+      "loss": 0.652,
+      "step": 63
+    },
+    {
+      "epoch": 0.5739910313901345,
+      "grad_norm": 31.312528610229492,
+      "learning_rate": 3.94660365002137e-05,
+      "loss": 5.3895,
+      "step": 64
+    },
+    {
+      "epoch": 0.5829596412556054,
+      "grad_norm": 11.717623710632324,
+      "learning_rate": 3.808707042884176e-05,
+      "loss": 0.5366,
+      "step": 65
+    },
+    {
+      "epoch": 0.5919282511210763,
+      "grad_norm": 26.004444122314453,
+      "learning_rate": 3.6717646444456193e-05,
+      "loss": 6.3204,
+      "step": 66
+    },
+    {
+      "epoch": 0.600896860986547,
+      "grad_norm": 2.2269480228424072,
+      "learning_rate": 3.5358861436172485e-05,
+      "loss": 1.2421,
+      "step": 67
+    },
+    {
+      "epoch": 0.6098654708520179,
+      "grad_norm": 35.62107849121094,
+      "learning_rate": 3.401180377143774e-05,
+      "loss": 7.713,
+      "step": 68
+    },
+    {
+      "epoch": 0.6188340807174888,
+      "grad_norm": 24.226354598999023,
+      "learning_rate": 3.267755242426384e-05,
+      "loss": 4.7619,
+      "step": 69
+    },
+    {
+      "epoch": 0.6278026905829597,
+      "grad_norm": 8.82026195526123,
+      "learning_rate": 3.135717611098458e-05,
+      "loss": 0.7543,
+      "step": 70
+    },
+    {
+      "epoch": 0.6367713004484304,
+      "grad_norm": 6.430093765258789,
+      "learning_rate": 3.0051732434229184e-05,
+      "loss": 0.664,
+      "step": 71
+    },
+    {
+      "epoch": 0.6457399103139013,
+      "grad_norm": 12.058977127075195,
+      "learning_rate": 2.876226703579761e-05,
+      "loss": 0.9159,
+      "step": 72
+    },
+    {
+      "epoch": 0.6547085201793722,
+      "grad_norm": 1.5619999170303345,
+      "learning_rate": 2.748981275911633e-05,
+      "loss": 0.385,
+      "step": 73
+    },
+    {
+      "epoch": 0.6636771300448431,
+      "grad_norm": 7.307992935180664,
+      "learning_rate": 2.6235388821945495e-05,
+      "loss": 3.3863,
+      "step": 74
+    },
+    {
+      "epoch": 0.672645739910314,
+      "grad_norm": 13.682785987854004,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 1.3127,
+      "step": 75
+    },
+    {
+      "epoch": 0.6816143497757847,
+      "grad_norm": 5.196952819824219,
+      "learning_rate": 2.3784635822138424e-05,
+      "loss": 0.5807,
+      "step": 76
+    },
+    {
+      "epoch": 0.6905829596412556,
+      "grad_norm": 15.000142097473145,
+      "learning_rate": 2.2590269777764515e-05,
+      "loss": 7.6098,
+      "step": 77
+    },
+    {
+      "epoch": 0.6995515695067265,
+      "grad_norm": 15.166838645935059,
+      "learning_rate": 2.141785853707607e-05,
+      "loss": 3.2455,
+      "step": 78
+    },
+    {
+      "epoch": 0.7085201793721974,
+      "grad_norm": 51.42879867553711,
+      "learning_rate": 2.026834118478567e-05,
+      "loss": 17.0577,
+      "step": 79
+    },
+    {
+      "epoch": 0.7174887892376681,
+      "grad_norm": 9.79825210571289,
+      "learning_rate": 1.9142638467927254e-05,
+      "loss": 2.9278,
+      "step": 80
+    },
+    {
+      "epoch": 0.726457399103139,
+      "grad_norm": 11.85526180267334,
+      "learning_rate": 1.8041652058350767e-05,
+      "loss": 0.7338,
+      "step": 81
+    },
+    {
+      "epoch": 0.7354260089686099,
+      "grad_norm": 9.030694961547852,
+      "learning_rate": 1.6966263830495936e-05,
+      "loss": 0.5654,
+      "step": 82
+    },
+    {
+      "epoch": 0.7443946188340808,
+      "grad_norm": 2.9595985412597656,
+      "learning_rate": 1.5917335155023367e-05,
+      "loss": 0.4755,
+      "step": 83
+    },
+    {
+      "epoch": 0.7533632286995515,
+      "grad_norm": 22.492107391357422,
+      "learning_rate": 1.4895706208868875e-05,
+      "loss": 5.8251,
+      "step": 84
+    },
+    {
+      "epoch": 0.7623318385650224,
+      "grad_norm": 4.64907693862915,
+      "learning_rate": 1.3902195302273779e-05,
+      "loss": 0.8443,
+      "step": 85
+    },
+    {
+      "epoch": 0.7713004484304933,
+      "grad_norm": 14.93572998046875,
+      "learning_rate": 1.2937598223330005e-05,
+      "loss": 4.5949,
+      "step": 86
+    },
+    {
+      "epoch": 0.7802690582959642,
+      "grad_norm": 15.04963493347168,
+      "learning_rate": 1.2002687600565137e-05,
+      "loss": 0.9518,
+      "step": 87
+    },
+    {
+      "epoch": 0.7892376681614349,
+      "grad_norm": 5.2687273025512695,
+      "learning_rate": 1.1098212284078036e-05,
+      "loss": 0.5544,
+      "step": 88
+    },
+    {
+      "epoch": 0.7982062780269058,
+      "grad_norm": 18.53102684020996,
+      "learning_rate": 1.0224896745720514e-05,
+      "loss": 4.8409,
+      "step": 89
+    },
+    {
+      "epoch": 0.8071748878923767,
+      "grad_norm": 7.883419513702393,
+      "learning_rate": 9.383440498805712e-06,
+      "loss": 1.0861,
+      "step": 90
+    },
+    {
+      "epoch": 0.8161434977578476,
+      "grad_norm": 4.33174991607666,
+      "learning_rate": 8.574517537807897e-06,
+      "loss": 1.8334,
+      "step": 91
+    },
+    {
+      "epoch": 0.8251121076233184,
+      "grad_norm": 25.279464721679688,
+      "learning_rate": 7.798775798502483e-06,
+      "loss": 13.6025,
+      "step": 92
+    },
+    {
+      "epoch": 0.8340807174887892,
+      "grad_norm": 30.552305221557617,
+      "learning_rate": 7.0568366389786975e-06,
+      "loss": 6.8453,
+      "step": 93
+    },
+    {
+      "epoch": 0.8430493273542601,
+      "grad_norm": 26.603317260742188,
+      "learning_rate": 6.349294341940593e-06,
+      "loss": 9.7103,
+      "step": 94
+    },
+    {
+      "epoch": 0.852017937219731,
+      "grad_norm": 21.3187313079834,
+      "learning_rate": 5.676715638695063e-06,
+      "loss": 8.2781,
+      "step": 95
+    },
+    {
+      "epoch": 0.8609865470852018,
+      "grad_norm": 10.021405220031738,
+      "learning_rate": 5.0396392552081564e-06,
+      "loss": 3.2354,
+      "step": 96
+    },
+    {
+      "epoch": 0.8699551569506726,
+      "grad_norm": 7.05618143081665,
+      "learning_rate": 4.43857548059321e-06,
+      "loss": 0.8512,
+      "step": 97
+    },
+    {
+      "epoch": 0.8789237668161435,
+      "grad_norm": 4.646176815032959,
+      "learning_rate": 3.87400575837657e-06,
+      "loss": 1.6577,
+      "step": 98
+    },
+    {
+      "epoch": 0.8878923766816144,
+      "grad_norm": 20.824520111083984,
+      "learning_rate": 3.346382300868134e-06,
+      "loss": 6.9646,
+      "step": 99
+    },
+    {
+      "epoch": 0.8968609865470852,
+      "grad_norm": 9.020377159118652,
+      "learning_rate": 2.85612772694579e-06,
+      "loss": 2.5206,
+      "step": 100
+    },
+    {
+      "epoch": 0.905829596412556,
+      "grad_norm": 9.824996948242188,
+      "learning_rate": 2.403634723543674e-06,
+      "loss": 2.3949,
+      "step": 101
+    },
+    {
+      "epoch": 0.9147982062780269,
+      "grad_norm": 8.309864044189453,
+      "learning_rate": 1.9892657311155248e-06,
+      "loss": 0.721,
+      "step": 102
+    },
+    {
+      "epoch": 0.9237668161434978,
+      "grad_norm": 3.164247989654541,
+      "learning_rate": 1.6133526533250565e-06,
+      "loss": 0.7017,
+      "step": 103
+    },
+    {
+      "epoch": 0.9327354260089686,
+      "grad_norm": 28.464841842651367,
+      "learning_rate": 1.2761965911958384e-06,
+      "loss": 16.1661,
+      "step": 104
+    },
+    {
+      "epoch": 0.9417040358744395,
+      "grad_norm": 8.207584381103516,
+      "learning_rate": 9.780676019336631e-07,
+      "loss": 1.0466,
+      "step": 105
+    },
+    {
+      "epoch": 0.9506726457399103,
+      "grad_norm": 14.752134323120117,
+      "learning_rate": 7.192044826145771e-07,
+      "loss": 4.8826,
+      "step": 106
+    },
+    {
+      "epoch": 0.9596412556053812,
+      "grad_norm": 17.996065139770508,
+      "learning_rate": 4.998145789118114e-07,
+      "loss": 1.0732,
+      "step": 107
+    },
+    {
+      "epoch": 0.968609865470852,
+      "grad_norm": 5.737391948699951,
+      "learning_rate": 3.2007361901485455e-07,
+      "loss": 0.5421,
+      "step": 108
+    },
+    {
+      "epoch": 0.9775784753363229,
+      "grad_norm": 8.844368934631348,
+      "learning_rate": 1.8012557287367392e-07,
+      "loss": 4.7103,
+      "step": 109
+    },
+    {
+      "epoch": 0.9865470852017937,
+      "grad_norm": 3.355065107345581,
+      "learning_rate": 8.008253688084889e-08,
+      "loss": 2.5895,
+      "step": 110
+    },
+    {
+      "epoch": 0.9955156950672646,
+      "grad_norm": 8.227714538574219,
+      "learning_rate": 2.0024644083921352e-08,
+      "loss": 6.171,
+      "step": 111
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 111,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 200,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 24,
+  "trial_name": null,
+  "trial_params": null
+}

20250501-1429-grad-clip/checkpoint-111/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e3ef61e15dd2a9fddcef71c93f973a0d24de7fc1ce7fad91e9d00c977252df2
+size 5304

20250501-1429-grad-clip/runs/May01_14-29-25_d55355693cce/events.out.tfevents.1746109765.d55355693cce.160.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6fd67c67ebcd78af433967dfb59c7c2bf53379f2bc49e9f4d17084461d252bac
+size 27569