Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

20250501-1421/checkpoint-111/optimizer.pt +3 -0
20250501-1421/checkpoint-111/pytorch_model.bin +3 -0
20250501-1421/checkpoint-111/rng_state.pth +3 -0
20250501-1421/checkpoint-111/scheduler.pt +3 -0
20250501-1421/checkpoint-111/trainer_state.json +811 -0
20250501-1421/checkpoint-111/training_args.bin +3 -0
20250501-1421/runs/May01_14-21-16_d55355693cce/events.out.tfevents.1746109277.d55355693cce.100.0 +3 -0

20250501-1421/checkpoint-111/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5711efa0fa29e3b31af8605d63f8159b42b8a498d41a4d4bd1c5b1367a42a3e1
+size 33824762

20250501-1421/checkpoint-111/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fb6ff2c422748c278df1a02d53926882dc259ab62a1aafcdaed9b41b69dcb17
+size 368596590

20250501-1421/checkpoint-111/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:238523785dc8a6bd63d8f51ae72844d04988b9978d2050f234191a39fe7b1141
+size 14244

20250501-1421/checkpoint-111/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e42e66dcf545600cbaa1c353643c7beb3f3d693939da1e7ad003b79b8ca066d
+size 1064

20250501-1421/checkpoint-111/trainer_state.json ADDED Viewed

	@@ -0,0 +1,811 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9955156950672646,
+  "eval_steps": 200,
+  "global_step": 111,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.008968609865470852,
+      "grad_norm": 2.960822105407715,
+      "learning_rate": 0.0001,
+      "loss": 2.8706,
+      "step": 1
+    },
+    {
+      "epoch": 0.017937219730941704,
+      "grad_norm": 0.7307156324386597,
+      "learning_rate": 9.90990990990991e-05,
+      "loss": 1.3,
+      "step": 2
+    },
+    {
+      "epoch": 0.026905829596412557,
+      "grad_norm": 0.3430079519748688,
+      "learning_rate": 9.81981981981982e-05,
+      "loss": 1.2556,
+      "step": 3
+    },
+    {
+      "epoch": 0.03587443946188341,
+      "grad_norm": 3.3454463481903076,
+      "learning_rate": 9.729729729729731e-05,
+      "loss": 5.9789,
+      "step": 4
+    },
+    {
+      "epoch": 0.04484304932735426,
+      "grad_norm": 0.40624433755874634,
+      "learning_rate": 9.639639639639641e-05,
+      "loss": 0.9036,
+      "step": 5
+    },
+    {
+      "epoch": 0.053811659192825115,
+      "grad_norm": 0.52606600522995,
+      "learning_rate": 9.54954954954955e-05,
+      "loss": 2.2173,
+      "step": 6
+    },
+    {
+      "epoch": 0.06278026905829596,
+      "grad_norm": 1.4110300540924072,
+      "learning_rate": 9.45945945945946e-05,
+      "loss": 1.0893,
+      "step": 7
+    },
+    {
+      "epoch": 0.07174887892376682,
+      "grad_norm": 0.46715906262397766,
+      "learning_rate": 9.36936936936937e-05,
+      "loss": 2.6939,
+      "step": 8
+    },
+    {
+      "epoch": 0.08071748878923767,
+      "grad_norm": 0.6949467658996582,
+      "learning_rate": 9.279279279279279e-05,
+      "loss": 1.0149,
+      "step": 9
+    },
+    {
+      "epoch": 0.08968609865470852,
+      "grad_norm": 0.488537460565567,
+      "learning_rate": 9.18918918918919e-05,
+      "loss": 1.3068,
+      "step": 10
+    },
+    {
+      "epoch": 0.09865470852017937,
+      "grad_norm": 2.622659206390381,
+      "learning_rate": 9.0990990990991e-05,
+      "loss": 6.1029,
+      "step": 11
+    },
+    {
+      "epoch": 0.10762331838565023,
+      "grad_norm": 1.1801209449768066,
+      "learning_rate": 9.009009009009009e-05,
+      "loss": 1.7577,
+      "step": 12
+    },
+    {
+      "epoch": 0.11659192825112108,
+      "grad_norm": 2.3658924102783203,
+      "learning_rate": 8.918918918918919e-05,
+      "loss": 0.7523,
+      "step": 13
+    },
+    {
+      "epoch": 0.12556053811659193,
+      "grad_norm": 2.378786325454712,
+      "learning_rate": 8.82882882882883e-05,
+      "loss": 5.655,
+      "step": 14
+    },
+    {
+      "epoch": 0.13452914798206278,
+      "grad_norm": 1.1980235576629639,
+      "learning_rate": 8.738738738738738e-05,
+      "loss": 1.9926,
+      "step": 15
+    },
+    {
+      "epoch": 0.14349775784753363,
+      "grad_norm": 1.1755034923553467,
+      "learning_rate": 8.64864864864865e-05,
+      "loss": 2.6233,
+      "step": 16
+    },
+    {
+      "epoch": 0.15246636771300448,
+      "grad_norm": 0.9538990259170532,
+      "learning_rate": 8.55855855855856e-05,
+      "loss": 0.936,
+      "step": 17
+    },
+    {
+      "epoch": 0.16143497757847533,
+      "grad_norm": 10.638568878173828,
+      "learning_rate": 8.468468468468469e-05,
+      "loss": 18.6878,
+      "step": 18
+    },
+    {
+      "epoch": 0.17040358744394618,
+      "grad_norm": 1.5819883346557617,
+      "learning_rate": 8.378378378378379e-05,
+      "loss": 0.6895,
+      "step": 19
+    },
+    {
+      "epoch": 0.17937219730941703,
+      "grad_norm": 1.8580043315887451,
+      "learning_rate": 8.288288288288289e-05,
+      "loss": 0.9961,
+      "step": 20
+    },
+    {
+      "epoch": 0.18834080717488788,
+      "grad_norm": 4.262594223022461,
+      "learning_rate": 8.198198198198198e-05,
+      "loss": 1.0639,
+      "step": 21
+    },
+    {
+      "epoch": 0.19730941704035873,
+      "grad_norm": 10.581626892089844,
+      "learning_rate": 8.108108108108109e-05,
+      "loss": 11.8574,
+      "step": 22
+    },
+    {
+      "epoch": 0.2062780269058296,
+      "grad_norm": 1.8223183155059814,
+      "learning_rate": 8.018018018018019e-05,
+      "loss": 2.4031,
+      "step": 23
+    },
+    {
+      "epoch": 0.21524663677130046,
+      "grad_norm": 0.7646092772483826,
+      "learning_rate": 7.927927927927928e-05,
+      "loss": 1.0303,
+      "step": 24
+    },
+    {
+      "epoch": 0.2242152466367713,
+      "grad_norm": 1.9194566011428833,
+      "learning_rate": 7.837837837837838e-05,
+      "loss": 0.9733,
+      "step": 25
+    },
+    {
+      "epoch": 0.23318385650224216,
+      "grad_norm": 1.6878315210342407,
+      "learning_rate": 7.747747747747748e-05,
+      "loss": 2.3391,
+      "step": 26
+    },
+    {
+      "epoch": 0.242152466367713,
+      "grad_norm": 1.3996132612228394,
+      "learning_rate": 7.657657657657657e-05,
+      "loss": 0.5793,
+      "step": 27
+    },
+    {
+      "epoch": 0.25112107623318386,
+      "grad_norm": 2.891868829727173,
+      "learning_rate": 7.567567567567568e-05,
+      "loss": 2.4065,
+      "step": 28
+    },
+    {
+      "epoch": 0.2600896860986547,
+      "grad_norm": 1.5177550315856934,
+      "learning_rate": 7.477477477477479e-05,
+      "loss": 0.8909,
+      "step": 29
+    },
+    {
+      "epoch": 0.26905829596412556,
+      "grad_norm": 4.505558967590332,
+      "learning_rate": 7.387387387387387e-05,
+      "loss": 5.6894,
+      "step": 30
+    },
+    {
+      "epoch": 0.27802690582959644,
+      "grad_norm": 3.0119218826293945,
+      "learning_rate": 7.297297297297297e-05,
+      "loss": 0.6294,
+      "step": 31
+    },
+    {
+      "epoch": 0.28699551569506726,
+      "grad_norm": 3.9027931690216064,
+      "learning_rate": 7.207207207207208e-05,
+      "loss": 0.8345,
+      "step": 32
+    },
+    {
+      "epoch": 0.29596412556053814,
+      "grad_norm": 5.136641502380371,
+      "learning_rate": 7.117117117117116e-05,
+      "loss": 2.685,
+      "step": 33
+    },
+    {
+      "epoch": 0.30493273542600896,
+      "grad_norm": 2.472689390182495,
+      "learning_rate": 7.027027027027028e-05,
+      "loss": 1.1706,
+      "step": 34
+    },
+    {
+      "epoch": 0.31390134529147984,
+      "grad_norm": 7.271293640136719,
+      "learning_rate": 6.936936936936938e-05,
+      "loss": 5.7874,
+      "step": 35
+    },
+    {
+      "epoch": 0.32286995515695066,
+      "grad_norm": 1.4728788137435913,
+      "learning_rate": 6.846846846846847e-05,
+      "loss": 0.832,
+      "step": 36
+    },
+    {
+      "epoch": 0.33183856502242154,
+      "grad_norm": 1.3111985921859741,
+      "learning_rate": 6.756756756756757e-05,
+      "loss": 1.7645,
+      "step": 37
+    },
+    {
+      "epoch": 0.34080717488789236,
+      "grad_norm": 3.0567102432250977,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 1.5445,
+      "step": 38
+    },
+    {
+      "epoch": 0.34977578475336324,
+      "grad_norm": 8.974732398986816,
+      "learning_rate": 6.576576576576577e-05,
+      "loss": 9.2399,
+      "step": 39
+    },
+    {
+      "epoch": 0.35874439461883406,
+      "grad_norm": 2.831979990005493,
+      "learning_rate": 6.486486486486487e-05,
+      "loss": 3.8836,
+      "step": 40
+    },
+    {
+      "epoch": 0.36771300448430494,
+      "grad_norm": 14.11927604675293,
+      "learning_rate": 6.396396396396397e-05,
+      "loss": 11.535,
+      "step": 41
+    },
+    {
+      "epoch": 0.37668161434977576,
+      "grad_norm": 4.956366062164307,
+      "learning_rate": 6.306306306306306e-05,
+      "loss": 1.1701,
+      "step": 42
+    },
+    {
+      "epoch": 0.38565022421524664,
+      "grad_norm": 3.545992136001587,
+      "learning_rate": 6.216216216216216e-05,
+      "loss": 3.0184,
+      "step": 43
+    },
+    {
+      "epoch": 0.39461883408071746,
+      "grad_norm": 2.012129068374634,
+      "learning_rate": 6.126126126126126e-05,
+      "loss": 4.641,
+      "step": 44
+    },
+    {
+      "epoch": 0.40358744394618834,
+      "grad_norm": 1.8412423133850098,
+      "learning_rate": 6.0360360360360365e-05,
+      "loss": 1.1749,
+      "step": 45
+    },
+    {
+      "epoch": 0.4125560538116592,
+      "grad_norm": 2.2313199043273926,
+      "learning_rate": 5.9459459459459466e-05,
+      "loss": 1.5577,
+      "step": 46
+    },
+    {
+      "epoch": 0.42152466367713004,
+      "grad_norm": 10.823288917541504,
+      "learning_rate": 5.855855855855856e-05,
+      "loss": 15.2545,
+      "step": 47
+    },
+    {
+      "epoch": 0.4304932735426009,
+      "grad_norm": 4.433194637298584,
+      "learning_rate": 5.765765765765766e-05,
+      "loss": 1.6109,
+      "step": 48
+    },
+    {
+      "epoch": 0.43946188340807174,
+      "grad_norm": 2.7911524772644043,
+      "learning_rate": 5.6756756756756757e-05,
+      "loss": 0.8831,
+      "step": 49
+    },
+    {
+      "epoch": 0.4484304932735426,
+      "grad_norm": 7.617527008056641,
+      "learning_rate": 5.585585585585585e-05,
+      "loss": 1.4399,
+      "step": 50
+    },
+    {
+      "epoch": 0.45739910313901344,
+      "grad_norm": 7.5390777587890625,
+      "learning_rate": 5.4954954954954966e-05,
+      "loss": 6.7722,
+      "step": 51
+    },
+    {
+      "epoch": 0.4663677130044843,
+      "grad_norm": 9.198465347290039,
+      "learning_rate": 5.405405405405406e-05,
+      "loss": 1.1899,
+      "step": 52
+    },
+    {
+      "epoch": 0.47533632286995514,
+      "grad_norm": 13.445169448852539,
+      "learning_rate": 5.3153153153153155e-05,
+      "loss": 17.8221,
+      "step": 53
+    },
+    {
+      "epoch": 0.484304932735426,
+      "grad_norm": 2.501037359237671,
+      "learning_rate": 5.2252252252252256e-05,
+      "loss": 0.7443,
+      "step": 54
+    },
+    {
+      "epoch": 0.49327354260089684,
+      "grad_norm": 20.265993118286133,
+      "learning_rate": 5.135135135135135e-05,
+      "loss": 18.0624,
+      "step": 55
+    },
+    {
+      "epoch": 0.5022421524663677,
+      "grad_norm": 2.8558311462402344,
+      "learning_rate": 5.0450450450450445e-05,
+      "loss": 2.4351,
+      "step": 56
+    },
+    {
+      "epoch": 0.5112107623318386,
+      "grad_norm": 9.033570289611816,
+      "learning_rate": 4.954954954954955e-05,
+      "loss": 1.05,
+      "step": 57
+    },
+    {
+      "epoch": 0.5201793721973094,
+      "grad_norm": 12.937838554382324,
+      "learning_rate": 4.8648648648648654e-05,
+      "loss": 1.2733,
+      "step": 58
+    },
+    {
+      "epoch": 0.5291479820627802,
+      "grad_norm": 4.5761942863464355,
+      "learning_rate": 4.774774774774775e-05,
+      "loss": 1.7739,
+      "step": 59
+    },
+    {
+      "epoch": 0.5381165919282511,
+      "grad_norm": 10.550862312316895,
+      "learning_rate": 4.684684684684685e-05,
+      "loss": 0.7909,
+      "step": 60
+    },
+    {
+      "epoch": 0.547085201793722,
+      "grad_norm": 13.009169578552246,
+      "learning_rate": 4.594594594594595e-05,
+      "loss": 0.9064,
+      "step": 61
+    },
+    {
+      "epoch": 0.5560538116591929,
+      "grad_norm": 35.53451919555664,
+      "learning_rate": 4.5045045045045046e-05,
+      "loss": 12.5267,
+      "step": 62
+    },
+    {
+      "epoch": 0.5650224215246636,
+      "grad_norm": 3.889129161834717,
+      "learning_rate": 4.414414414414415e-05,
+      "loss": 0.6209,
+      "step": 63
+    },
+    {
+      "epoch": 0.5739910313901345,
+      "grad_norm": 33.15275573730469,
+      "learning_rate": 4.324324324324325e-05,
+      "loss": 5.5689,
+      "step": 64
+    },
+    {
+      "epoch": 0.5829596412556054,
+      "grad_norm": 10.819781303405762,
+      "learning_rate": 4.234234234234234e-05,
+      "loss": 0.4751,
+      "step": 65
+    },
+    {
+      "epoch": 0.5919282511210763,
+      "grad_norm": 27.67494773864746,
+      "learning_rate": 4.1441441441441444e-05,
+      "loss": 6.5307,
+      "step": 66
+    },
+    {
+      "epoch": 0.600896860986547,
+      "grad_norm": 2.997436285018921,
+      "learning_rate": 4.0540540540540545e-05,
+      "loss": 1.2605,
+      "step": 67
+    },
+    {
+      "epoch": 0.6098654708520179,
+      "grad_norm": 36.24047088623047,
+      "learning_rate": 3.963963963963964e-05,
+      "loss": 7.8812,
+      "step": 68
+    },
+    {
+      "epoch": 0.6188340807174888,
+      "grad_norm": 24.230558395385742,
+      "learning_rate": 3.873873873873874e-05,
+      "loss": 4.867,
+      "step": 69
+    },
+    {
+      "epoch": 0.6278026905829597,
+      "grad_norm": 8.829228401184082,
+      "learning_rate": 3.783783783783784e-05,
+      "loss": 0.7367,
+      "step": 70
+    },
+    {
+      "epoch": 0.6367713004484304,
+      "grad_norm": 6.5498199462890625,
+      "learning_rate": 3.693693693693694e-05,
+      "loss": 0.6523,
+      "step": 71
+    },
+    {
+      "epoch": 0.6457399103139013,
+      "grad_norm": 12.350178718566895,
+      "learning_rate": 3.603603603603604e-05,
+      "loss": 0.9404,
+      "step": 72
+    },
+    {
+      "epoch": 0.6547085201793722,
+      "grad_norm": 1.8611844778060913,
+      "learning_rate": 3.513513513513514e-05,
+      "loss": 0.3853,
+      "step": 73
+    },
+    {
+      "epoch": 0.6636771300448431,
+      "grad_norm": 7.657431125640869,
+      "learning_rate": 3.4234234234234234e-05,
+      "loss": 3.3681,
+      "step": 74
+    },
+    {
+      "epoch": 0.672645739910314,
+      "grad_norm": 13.772859573364258,
+      "learning_rate": 3.3333333333333335e-05,
+      "loss": 1.3095,
+      "step": 75
+    },
+    {
+      "epoch": 0.6816143497757847,
+      "grad_norm": 5.472783088684082,
+      "learning_rate": 3.2432432432432436e-05,
+      "loss": 0.6071,
+      "step": 76
+    },
+    {
+      "epoch": 0.6905829596412556,
+      "grad_norm": 14.440363883972168,
+      "learning_rate": 3.153153153153153e-05,
+      "loss": 7.5165,
+      "step": 77
+    },
+    {
+      "epoch": 0.6995515695067265,
+      "grad_norm": 13.61552906036377,
+      "learning_rate": 3.063063063063063e-05,
+      "loss": 3.1452,
+      "step": 78
+    },
+    {
+      "epoch": 0.7085201793721974,
+      "grad_norm": 48.33591842651367,
+      "learning_rate": 2.9729729729729733e-05,
+      "loss": 16.6477,
+      "step": 79
+    },
+    {
+      "epoch": 0.7174887892376681,
+      "grad_norm": 7.3193464279174805,
+      "learning_rate": 2.882882882882883e-05,
+      "loss": 2.8526,
+      "step": 80
+    },
+    {
+      "epoch": 0.726457399103139,
+      "grad_norm": 13.097620010375977,
+      "learning_rate": 2.7927927927927926e-05,
+      "loss": 0.87,
+      "step": 81
+    },
+    {
+      "epoch": 0.7354260089686099,
+      "grad_norm": 11.424261093139648,
+      "learning_rate": 2.702702702702703e-05,
+      "loss": 0.7039,
+      "step": 82
+    },
+    {
+      "epoch": 0.7443946188340808,
+      "grad_norm": 4.838938236236572,
+      "learning_rate": 2.6126126126126128e-05,
+      "loss": 0.5254,
+      "step": 83
+    },
+    {
+      "epoch": 0.7533632286995515,
+      "grad_norm": 17.895553588867188,
+      "learning_rate": 2.5225225225225222e-05,
+      "loss": 5.5567,
+      "step": 84
+    },
+    {
+      "epoch": 0.7623318385650224,
+      "grad_norm": 5.642916202545166,
+      "learning_rate": 2.4324324324324327e-05,
+      "loss": 0.8884,
+      "step": 85
+    },
+    {
+      "epoch": 0.7713004484304933,
+      "grad_norm": 11.742246627807617,
+      "learning_rate": 2.3423423423423425e-05,
+      "loss": 4.378,
+      "step": 86
+    },
+    {
+      "epoch": 0.7802690582959642,
+      "grad_norm": 15.400406837463379,
+      "learning_rate": 2.2522522522522523e-05,
+      "loss": 1.1752,
+      "step": 87
+    },
+    {
+      "epoch": 0.7892376681614349,
+      "grad_norm": 6.748379707336426,
+      "learning_rate": 2.1621621621621624e-05,
+      "loss": 0.6377,
+      "step": 88
+    },
+    {
+      "epoch": 0.7982062780269058,
+      "grad_norm": 13.948418617248535,
+      "learning_rate": 2.0720720720720722e-05,
+      "loss": 4.6146,
+      "step": 89
+    },
+    {
+      "epoch": 0.8071748878923767,
+      "grad_norm": 5.0134687423706055,
+      "learning_rate": 1.981981981981982e-05,
+      "loss": 0.9851,
+      "step": 90
+    },
+    {
+      "epoch": 0.8161434977578476,
+      "grad_norm": 2.1885986328125,
+      "learning_rate": 1.891891891891892e-05,
+      "loss": 1.7911,
+      "step": 91
+    },
+    {
+      "epoch": 0.8251121076233184,
+      "grad_norm": 19.37745475769043,
+      "learning_rate": 1.801801801801802e-05,
+      "loss": 13.3176,
+      "step": 92
+    },
+    {
+      "epoch": 0.8340807174887892,
+      "grad_norm": 25.039756774902344,
+      "learning_rate": 1.7117117117117117e-05,
+      "loss": 6.4343,
+      "step": 93
+    },
+    {
+      "epoch": 0.8430493273542601,
+      "grad_norm": 20.313587188720703,
+      "learning_rate": 1.6216216216216218e-05,
+      "loss": 9.2506,
+      "step": 94
+    },
+    {
+      "epoch": 0.852017937219731,
+      "grad_norm": 17.610029220581055,
+      "learning_rate": 1.5315315315315316e-05,
+      "loss": 7.9287,
+      "step": 95
+    },
+    {
+      "epoch": 0.8609865470852018,
+      "grad_norm": 6.322761058807373,
+      "learning_rate": 1.4414414414414416e-05,
+      "loss": 3.0756,
+      "step": 96
+    },
+    {
+      "epoch": 0.8699551569506726,
+      "grad_norm": 8.840217590332031,
+      "learning_rate": 1.3513513513513515e-05,
+      "loss": 1.0194,
+      "step": 97
+    },
+    {
+      "epoch": 0.8789237668161435,
+      "grad_norm": 1.8822249174118042,
+      "learning_rate": 1.2612612612612611e-05,
+      "loss": 1.5888,
+      "step": 98
+    },
+    {
+      "epoch": 0.8878923766816144,
+      "grad_norm": 15.456230163574219,
+      "learning_rate": 1.1711711711711713e-05,
+      "loss": 6.5939,
+      "step": 99
+    },
+    {
+      "epoch": 0.8968609865470852,
+      "grad_norm": 4.190560817718506,
+      "learning_rate": 1.0810810810810812e-05,
+      "loss": 2.3652,
+      "step": 100
+    },
+    {
+      "epoch": 0.905829596412556,
+      "grad_norm": 4.274927139282227,
+      "learning_rate": 9.90990990990991e-06,
+      "loss": 2.2314,
+      "step": 101
+    },
+    {
+      "epoch": 0.9147982062780269,
+      "grad_norm": 10.192256927490234,
+      "learning_rate": 9.00900900900901e-06,
+      "loss": 0.9849,
+      "step": 102
+    },
+    {
+      "epoch": 0.9237668161434978,
+      "grad_norm": 6.9923095703125,
+      "learning_rate": 8.108108108108109e-06,
+      "loss": 0.8398,
+      "step": 103
+    },
+    {
+      "epoch": 0.9327354260089686,
+      "grad_norm": 18.946592330932617,
+      "learning_rate": 7.207207207207208e-06,
+      "loss": 15.561,
+      "step": 104
+    },
+    {
+      "epoch": 0.9417040358744395,
+      "grad_norm": 10.394827842712402,
+      "learning_rate": 6.306306306306306e-06,
+      "loss": 1.3364,
+      "step": 105
+    },
+    {
+      "epoch": 0.9506726457399103,
+      "grad_norm": 7.969135284423828,
+      "learning_rate": 5.405405405405406e-06,
+      "loss": 4.5584,
+      "step": 106
+    },
+    {
+      "epoch": 0.9596412556053812,
+      "grad_norm": 18.159500122070312,
+      "learning_rate": 4.504504504504505e-06,
+      "loss": 1.6239,
+      "step": 107
+    },
+    {
+      "epoch": 0.968609865470852,
+      "grad_norm": 8.934094429016113,
+      "learning_rate": 3.603603603603604e-06,
+      "loss": 0.7876,
+      "step": 108
+    },
+    {
+      "epoch": 0.9775784753363229,
+      "grad_norm": 3.407855272293091,
+      "learning_rate": 2.702702702702703e-06,
+      "loss": 4.5206,
+      "step": 109
+    },
+    {
+      "epoch": 0.9865470852017937,
+      "grad_norm": 4.0875701904296875,
+      "learning_rate": 1.801801801801802e-06,
+      "loss": 2.6009,
+      "step": 110
+    },
+    {
+      "epoch": 0.9955156950672646,
+      "grad_norm": 3.929051637649536,
+      "learning_rate": 9.00900900900901e-07,
+      "loss": 6.0557,
+      "step": 111
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 111,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 200,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 24,
+  "trial_name": null,
+  "trial_params": null
+}

20250501-1421/checkpoint-111/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5af9beeda97f152c735237547cc7b18512cf2624167015790c50fa65590e3a8b
+size 5304

20250501-1421/runs/May01_14-21-16_d55355693cce/events.out.tfevents.1746109277.d55355693cce.100.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:733af7f3581ca84c24b5a6088a4614469a0ce481382b094ca7333e15a4280109
+size 27539