Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

20250501-1443/checkpoint-111/optimizer.pt +3 -0
20250501-1443/checkpoint-111/pytorch_model.bin +3 -0
20250501-1443/checkpoint-111/rng_state.pth +3 -0
20250501-1443/checkpoint-111/scheduler.pt +3 -0
20250501-1443/checkpoint-111/trainer_state.json +811 -0
20250501-1443/checkpoint-111/training_args.bin +3 -0
20250501-1443/runs/May01_14-43-04_d55355693cce/events.out.tfevents.1746110584.d55355693cce.190.0 +3 -0

20250501-1443/checkpoint-111/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4e8ddbb7db75c7753a0072a9d206bc3fcfe6cca185cde5e346cb5fa101933ba
+size 33824762

20250501-1443/checkpoint-111/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85d26d8feb40fa988797a0da10f54092b69129fc55181e59693208bd7fa9a889
+size 368596590

20250501-1443/checkpoint-111/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:238523785dc8a6bd63d8f51ae72844d04988b9978d2050f234191a39fe7b1141
+size 14244

20250501-1443/checkpoint-111/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5aaf933d4fb1ea8eea938475ce05bd1a1bf3e571898db00f4d1ce0efa30350bb
+size 1064

20250501-1443/checkpoint-111/trainer_state.json ADDED Viewed

	@@ -0,0 +1,811 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9955156950672646,
+  "eval_steps": 200,
+  "global_step": 111,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.008968609865470852,
+      "grad_norm": 2.960822105407715,
+      "learning_rate": 1e-05,
+      "loss": 2.8706,
+      "step": 1
+    },
+    {
+      "epoch": 0.017937219730941704,
+      "grad_norm": 0.8830159306526184,
+      "learning_rate": 9.99799753559161e-06,
+      "loss": 1.3321,
+      "step": 2
+    },
+    {
+      "epoch": 0.026905829596412557,
+      "grad_norm": 0.6235600113868713,
+      "learning_rate": 9.991991746311916e-06,
+      "loss": 1.286,
+      "step": 3
+    },
+    {
+      "epoch": 0.03587443946188341,
+      "grad_norm": 3.6786465644836426,
+      "learning_rate": 9.981987442712634e-06,
+      "loss": 6.3481,
+      "step": 4
+    },
+    {
+      "epoch": 0.04484304932735426,
+      "grad_norm": 0.9499123096466064,
+      "learning_rate": 9.967992638098517e-06,
+      "loss": 0.9874,
+      "step": 5
+    },
+    {
+      "epoch": 0.053811659192825115,
+      "grad_norm": 0.5928323268890381,
+      "learning_rate": 9.950018542108818e-06,
+      "loss": 2.2348,
+      "step": 6
+    },
+    {
+      "epoch": 0.06278026905829596,
+      "grad_norm": 0.5137901306152344,
+      "learning_rate": 9.928079551738542e-06,
+      "loss": 0.9355,
+      "step": 7
+    },
+    {
+      "epoch": 0.07174887892376682,
+      "grad_norm": 0.9037736654281616,
+      "learning_rate": 9.902193239806634e-06,
+      "loss": 2.7805,
+      "step": 8
+    },
+    {
+      "epoch": 0.08071748878923767,
+      "grad_norm": 0.44555988907814026,
+      "learning_rate": 9.872380340880416e-06,
+      "loss": 1.0094,
+      "step": 9
+    },
+    {
+      "epoch": 0.08968609865470852,
+      "grad_norm": 0.5814350843429565,
+      "learning_rate": 9.838664734667496e-06,
+      "loss": 1.3459,
+      "step": 10
+    },
+    {
+      "epoch": 0.09865470852017937,
+      "grad_norm": 3.1227192878723145,
+      "learning_rate": 9.801073426888447e-06,
+      "loss": 6.6014,
+      "step": 11
+    },
+    {
+      "epoch": 0.10762331838565023,
+      "grad_norm": 1.7620548009872437,
+      "learning_rate": 9.759636527645633e-06,
+      "loss": 2.0171,
+      "step": 12
+    },
+    {
+      "epoch": 0.11659192825112108,
+      "grad_norm": 1.4160568714141846,
+      "learning_rate": 9.714387227305422e-06,
+      "loss": 0.4961,
+      "step": 13
+    },
+    {
+      "epoch": 0.12556053811659193,
+      "grad_norm": 2.736999750137329,
+      "learning_rate": 9.665361769913187e-06,
+      "loss": 6.0883,
+      "step": 14
+    },
+    {
+      "epoch": 0.13452914798206278,
+      "grad_norm": 1.6954115629196167,
+      "learning_rate": 9.612599424162344e-06,
+      "loss": 2.2329,
+      "step": 15
+    },
+    {
+      "epoch": 0.14349775784753363,
+      "grad_norm": 1.6500178575515747,
+      "learning_rate": 9.55614245194068e-06,
+      "loss": 2.9183,
+      "step": 16
+    },
+    {
+      "epoch": 0.15246636771300448,
+      "grad_norm": 0.3353758454322815,
+      "learning_rate": 9.496036074479184e-06,
+      "loss": 0.9336,
+      "step": 17
+    },
+    {
+      "epoch": 0.16143497757847533,
+      "grad_norm": 8.906839370727539,
+      "learning_rate": 9.432328436130493e-06,
+      "loss": 20.6856,
+      "step": 18
+    },
+    {
+      "epoch": 0.17040358744394618,
+      "grad_norm": 0.25995615124702454,
+      "learning_rate": 9.365070565805941e-06,
+      "loss": 0.5657,
+      "step": 19
+    },
+    {
+      "epoch": 0.17937219730941703,
+      "grad_norm": 0.4160774350166321,
+      "learning_rate": 9.294316336102132e-06,
+      "loss": 0.8398,
+      "step": 20
+    },
+    {
+      "epoch": 0.18834080717488788,
+      "grad_norm": 1.9057613611221313,
+      "learning_rate": 9.220122420149753e-06,
+      "loss": 0.5355,
+      "step": 21
+    },
+    {
+      "epoch": 0.19730941704035873,
+      "grad_norm": 7.734179496765137,
+      "learning_rate": 9.142548246219212e-06,
+      "loss": 13.5005,
+      "step": 22
+    },
+    {
+      "epoch": 0.2062780269058296,
+      "grad_norm": 2.181833505630493,
+      "learning_rate": 9.06165595011943e-06,
+      "loss": 2.8525,
+      "step": 23
+    },
+    {
+      "epoch": 0.21524663677130046,
+      "grad_norm": 0.7631401419639587,
+      "learning_rate": 8.97751032542795e-06,
+      "loss": 1.1342,
+      "step": 24
+    },
+    {
+      "epoch": 0.2242152466367713,
+      "grad_norm": 0.3742104470729828,
+      "learning_rate": 8.890178771592198e-06,
+      "loss": 0.9673,
+      "step": 25
+    },
+    {
+      "epoch": 0.23318385650224216,
+      "grad_norm": 0.39411431550979614,
+      "learning_rate": 8.799731239943488e-06,
+      "loss": 2.3545,
+      "step": 26
+    },
+    {
+      "epoch": 0.242152466367713,
+      "grad_norm": 0.22178885340690613,
+      "learning_rate": 8.706240177667003e-06,
+      "loss": 0.5843,
+      "step": 27
+    },
+    {
+      "epoch": 0.25112107623318386,
+      "grad_norm": 1.9868074655532837,
+      "learning_rate": 8.609780469772623e-06,
+      "loss": 2.832,
+      "step": 28
+    },
+    {
+      "epoch": 0.2600896860986547,
+      "grad_norm": 1.152679443359375,
+      "learning_rate": 8.510429379113114e-06,
+      "loss": 1.091,
+      "step": 29
+    },
+    {
+      "epoch": 0.26905829596412556,
+      "grad_norm": 3.0194895267486572,
+      "learning_rate": 8.408266484497664e-06,
+      "loss": 6.3736,
+      "step": 30
+    },
+    {
+      "epoch": 0.27802690582959644,
+      "grad_norm": 0.9038723707199097,
+      "learning_rate": 8.303373616950408e-06,
+      "loss": 0.5556,
+      "step": 31
+    },
+    {
+      "epoch": 0.28699551569506726,
+      "grad_norm": 1.5291337966918945,
+      "learning_rate": 8.195834794164925e-06,
+      "loss": 0.7135,
+      "step": 32
+    },
+    {
+      "epoch": 0.29596412556053814,
+      "grad_norm": 2.6512932777404785,
+      "learning_rate": 8.085736153207277e-06,
+      "loss": 3.171,
+      "step": 33
+    },
+    {
+      "epoch": 0.30493273542600896,
+      "grad_norm": 1.5600694417953491,
+      "learning_rate": 7.973165881521435e-06,
+      "loss": 1.4656,
+      "step": 34
+    },
+    {
+      "epoch": 0.31390134529147984,
+      "grad_norm": 3.086857557296753,
+      "learning_rate": 7.858214146292394e-06,
+      "loss": 6.4496,
+      "step": 35
+    },
+    {
+      "epoch": 0.32286995515695066,
+      "grad_norm": 0.5738726258277893,
+      "learning_rate": 7.74097302222355e-06,
+      "loss": 1.0392,
+      "step": 36
+    },
+    {
+      "epoch": 0.33183856502242154,
+      "grad_norm": 0.6252602934837341,
+      "learning_rate": 7.621536417786159e-06,
+      "loss": 2.0603,
+      "step": 37
+    },
+    {
+      "epoch": 0.34080717488789236,
+      "grad_norm": 1.3429893255233765,
+      "learning_rate": 7.500000000000001e-06,
+      "loss": 1.9293,
+      "step": 38
+    },
+    {
+      "epoch": 0.34977578475336324,
+      "grad_norm": 4.64713716506958,
+      "learning_rate": 7.37646111780545e-06,
+      "loss": 10.205,
+      "step": 39
+    },
+    {
+      "epoch": 0.35874439461883406,
+      "grad_norm": 1.8087923526763916,
+      "learning_rate": 7.251018724088367e-06,
+      "loss": 4.4365,
+      "step": 40
+    },
+    {
+      "epoch": 0.36771300448430494,
+      "grad_norm": 7.087998390197754,
+      "learning_rate": 7.12377329642024e-06,
+      "loss": 13.9087,
+      "step": 41
+    },
+    {
+      "epoch": 0.37668161434977576,
+      "grad_norm": 0.43495556712150574,
+      "learning_rate": 6.994826756577082e-06,
+      "loss": 1.1252,
+      "step": 42
+    },
+    {
+      "epoch": 0.38565022421524664,
+      "grad_norm": 0.846789538860321,
+      "learning_rate": 6.864282388901544e-06,
+      "loss": 3.3436,
+      "step": 43
+    },
+    {
+      "epoch": 0.39461883408071746,
+      "grad_norm": 2.1694114208221436,
+      "learning_rate": 6.732244757573619e-06,
+      "loss": 5.1797,
+      "step": 44
+    },
+    {
+      "epoch": 0.40358744394618834,
+      "grad_norm": 1.503968358039856,
+      "learning_rate": 6.598819622856227e-06,
+      "loss": 1.6628,
+      "step": 45
+    },
+    {
+      "epoch": 0.4125560538116592,
+      "grad_norm": 0.7890214323997498,
+      "learning_rate": 6.464113856382752e-06,
+      "loss": 1.7293,
+      "step": 46
+    },
+    {
+      "epoch": 0.42152466367713004,
+      "grad_norm": 7.634031772613525,
+      "learning_rate": 6.328235355554382e-06,
+      "loss": 18.4625,
+      "step": 47
+    },
+    {
+      "epoch": 0.4304932735426009,
+      "grad_norm": 0.4779037535190582,
+      "learning_rate": 6.191292957115825e-06,
+      "loss": 1.503,
+      "step": 48
+    },
+    {
+      "epoch": 0.43946188340807174,
+      "grad_norm": 1.601840615272522,
+      "learning_rate": 6.053396349978632e-06,
+      "loss": 1.3566,
+      "step": 49
+    },
+    {
+      "epoch": 0.4484304932735426,
+      "grad_norm": 0.35992446541786194,
+      "learning_rate": 5.914655987361934e-06,
+      "loss": 1.2768,
+      "step": 50
+    },
+    {
+      "epoch": 0.45739910313901344,
+      "grad_norm": 5.177908420562744,
+      "learning_rate": 5.77518299832099e-06,
+      "loss": 8.8561,
+      "step": 51
+    },
+    {
+      "epoch": 0.4663677130044843,
+      "grad_norm": 1.3006104230880737,
+      "learning_rate": 5.635089098734394e-06,
+      "loss": 0.5305,
+      "step": 52
+    },
+    {
+      "epoch": 0.47533632286995514,
+      "grad_norm": 7.080929279327393,
+      "learning_rate": 5.49448650182125e-06,
+      "loss": 20.9515,
+      "step": 53
+    },
+    {
+      "epoch": 0.484304932735426,
+      "grad_norm": 0.9084333777427673,
+      "learning_rate": 5.353487828259973e-06,
+      "loss": 1.2171,
+      "step": 54
+    },
+    {
+      "epoch": 0.49327354260089684,
+      "grad_norm": 9.15694808959961,
+      "learning_rate": 5.212206015980742e-06,
+      "loss": 21.9408,
+      "step": 55
+    },
+    {
+      "epoch": 0.5022421524663677,
+      "grad_norm": 1.3177266120910645,
+      "learning_rate": 5.070754229703811e-06,
+      "loss": 2.9937,
+      "step": 56
+    },
+    {
+      "epoch": 0.5112107623318386,
+      "grad_norm": 1.0787602663040161,
+      "learning_rate": 4.929245770296191e-06,
+      "loss": 0.6544,
+      "step": 57
+    },
+    {
+      "epoch": 0.5201793721973094,
+      "grad_norm": 1.7759263515472412,
+      "learning_rate": 4.78779398401926e-06,
+      "loss": 0.5922,
+      "step": 58
+    },
+    {
+      "epoch": 0.5291479820627802,
+      "grad_norm": 1.2032181024551392,
+      "learning_rate": 4.646512171740028e-06,
+      "loss": 2.3146,
+      "step": 59
+    },
+    {
+      "epoch": 0.5381165919282511,
+      "grad_norm": 1.228383183479309,
+      "learning_rate": 4.505513498178752e-06,
+      "loss": 0.5068,
+      "step": 60
+    },
+    {
+      "epoch": 0.547085201793722,
+      "grad_norm": 2.082943916320801,
+      "learning_rate": 4.364910901265607e-06,
+      "loss": 0.6455,
+      "step": 61
+    },
+    {
+      "epoch": 0.5560538116591929,
+      "grad_norm": 5.729760646820068,
+      "learning_rate": 4.224817001679011e-06,
+      "loss": 13.7731,
+      "step": 62
+    },
+    {
+      "epoch": 0.5650224215246636,
+      "grad_norm": 0.9803327918052673,
+      "learning_rate": 4.085344012638067e-06,
+      "loss": 0.6381,
+      "step": 63
+    },
+    {
+      "epoch": 0.5739910313901345,
+      "grad_norm": 5.145959377288818,
+      "learning_rate": 3.94660365002137e-06,
+      "loss": 6.1626,
+      "step": 64
+    },
+    {
+      "epoch": 0.5829596412556054,
+      "grad_norm": 1.8556267023086548,
+      "learning_rate": 3.808707042884176e-06,
+      "loss": 0.507,
+      "step": 65
+    },
+    {
+      "epoch": 0.5919282511210763,
+      "grad_norm": 4.164766788482666,
+      "learning_rate": 3.6717646444456196e-06,
+      "loss": 7.3431,
+      "step": 66
+    },
+    {
+      "epoch": 0.600896860986547,
+      "grad_norm": 0.5073394179344177,
+      "learning_rate": 3.5358861436172487e-06,
+      "loss": 1.4834,
+      "step": 67
+    },
+    {
+      "epoch": 0.6098654708520179,
+      "grad_norm": 5.046154975891113,
+      "learning_rate": 3.401180377143774e-06,
+      "loss": 8.1017,
+      "step": 68
+    },
+    {
+      "epoch": 0.6188340807174888,
+      "grad_norm": 2.9860904216766357,
+      "learning_rate": 3.2677552424263836e-06,
+      "loss": 4.9051,
+      "step": 69
+    },
+    {
+      "epoch": 0.6278026905829597,
+      "grad_norm": 1.433781385421753,
+      "learning_rate": 3.1357176110984578e-06,
+      "loss": 0.752,
+      "step": 70
+    },
+    {
+      "epoch": 0.6367713004484304,
+      "grad_norm": 1.624589443206787,
+      "learning_rate": 3.0051732434229185e-06,
+      "loss": 0.7641,
+      "step": 71
+    },
+    {
+      "epoch": 0.6457399103139013,
+      "grad_norm": 0.9126272797584534,
+      "learning_rate": 2.8762267035797607e-06,
+      "loss": 1.248,
+      "step": 72
+    },
+    {
+      "epoch": 0.6547085201793722,
+      "grad_norm": 0.49890244007110596,
+      "learning_rate": 2.748981275911633e-06,
+      "loss": 0.6581,
+      "step": 73
+    },
+    {
+      "epoch": 0.6636771300448431,
+      "grad_norm": 1.4649814367294312,
+      "learning_rate": 2.6235388821945497e-06,
+      "loss": 3.489,
+      "step": 74
+    },
+    {
+      "epoch": 0.672645739910314,
+      "grad_norm": 1.5615613460540771,
+      "learning_rate": 2.5000000000000015e-06,
+      "loss": 1.8816,
+      "step": 75
+    },
+    {
+      "epoch": 0.6816143497757847,
+      "grad_norm": 1.2162355184555054,
+      "learning_rate": 2.3784635822138424e-06,
+      "loss": 0.7183,
+      "step": 76
+    },
+    {
+      "epoch": 0.6905829596412556,
+      "grad_norm": 3.3642117977142334,
+      "learning_rate": 2.2590269777764516e-06,
+      "loss": 8.284,
+      "step": 77
+    },
+    {
+      "epoch": 0.6995515695067265,
+      "grad_norm": 2.1293866634368896,
+      "learning_rate": 2.141785853707607e-06,
+      "loss": 3.4749,
+      "step": 78
+    },
+    {
+      "epoch": 0.7085201793721974,
+      "grad_norm": 8.28476619720459,
+      "learning_rate": 2.0268341184785674e-06,
+      "loss": 18.1382,
+      "step": 79
+    },
+    {
+      "epoch": 0.7174887892376681,
+      "grad_norm": 1.8235303163528442,
+      "learning_rate": 1.9142638467927254e-06,
+      "loss": 3.4227,
+      "step": 80
+    },
+    {
+      "epoch": 0.726457399103139,
+      "grad_norm": 2.0121238231658936,
+      "learning_rate": 1.8041652058350768e-06,
+      "loss": 0.7364,
+      "step": 81
+    },
+    {
+      "epoch": 0.7354260089686099,
+      "grad_norm": 1.3227595090866089,
+      "learning_rate": 1.6966263830495939e-06,
+      "loss": 0.6529,
+      "step": 82
+    },
+    {
+      "epoch": 0.7443946188340808,
+      "grad_norm": 0.40294215083122253,
+      "learning_rate": 1.5917335155023368e-06,
+      "loss": 0.5894,
+      "step": 83
+    },
+    {
+      "epoch": 0.7533632286995515,
+      "grad_norm": 4.5333099365234375,
+      "learning_rate": 1.4895706208868876e-06,
+      "loss": 6.954,
+      "step": 84
+    },
+    {
+      "epoch": 0.7623318385650224,
+      "grad_norm": 0.9095823764801025,
+      "learning_rate": 1.390219530227378e-06,
+      "loss": 0.8511,
+      "step": 85
+    },
+    {
+      "epoch": 0.7713004484304933,
+      "grad_norm": 2.197908401489258,
+      "learning_rate": 1.2937598223330006e-06,
+      "loss": 5.0122,
+      "step": 86
+    },
+    {
+      "epoch": 0.7802690582959642,
+      "grad_norm": 2.3578813076019287,
+      "learning_rate": 1.2002687600565138e-06,
+      "loss": 0.6863,
+      "step": 87
+    },
+    {
+      "epoch": 0.7892376681614349,
+      "grad_norm": 0.6472300291061401,
+      "learning_rate": 1.1098212284078037e-06,
+      "loss": 0.6623,
+      "step": 88
+    },
+    {
+      "epoch": 0.7982062780269058,
+      "grad_norm": 3.915199041366577,
+      "learning_rate": 1.0224896745720513e-06,
+      "loss": 5.9304,
+      "step": 89
+    },
+    {
+      "epoch": 0.8071748878923767,
+      "grad_norm": 1.4207231998443604,
+      "learning_rate": 9.383440498805712e-07,
+      "loss": 1.4954,
+      "step": 90
+    },
+    {
+      "epoch": 0.8161434977578476,
+      "grad_norm": 0.8446305990219116,
+      "learning_rate": 8.574517537807897e-07,
+      "loss": 2.0417,
+      "step": 91
+    },
+    {
+      "epoch": 0.8251121076233184,
+      "grad_norm": 5.90231990814209,
+      "learning_rate": 7.798775798502484e-07,
+      "loss": 15.1191,
+      "step": 92
+    },
+    {
+      "epoch": 0.8340807174887892,
+      "grad_norm": 5.121821403503418,
+      "learning_rate": 7.056836638978698e-07,
+      "loss": 7.9447,
+      "step": 93
+    },
+    {
+      "epoch": 0.8430493273542601,
+      "grad_norm": 4.37883186340332,
+      "learning_rate": 6.349294341940593e-07,
+      "loss": 10.4594,
+      "step": 94
+    },
+    {
+      "epoch": 0.852017937219731,
+      "grad_norm": 4.789362907409668,
+      "learning_rate": 5.676715638695063e-07,
+      "loss": 9.3101,
+      "step": 95
+    },
+    {
+      "epoch": 0.8609865470852018,
+      "grad_norm": 2.2993826866149902,
+      "learning_rate": 5.039639255208156e-07,
+      "loss": 3.6573,
+      "step": 96
+    },
+    {
+      "epoch": 0.8699551569506726,
+      "grad_norm": 0.9291459918022156,
+      "learning_rate": 4.43857548059321e-07,
+      "loss": 0.7949,
+      "step": 97
+    },
+    {
+      "epoch": 0.8789237668161435,
+      "grad_norm": 1.2445647716522217,
+      "learning_rate": 3.87400575837657e-07,
+      "loss": 2.0625,
+      "step": 98
+    },
+    {
+      "epoch": 0.8878923766816144,
+      "grad_norm": 4.5968337059021,
+      "learning_rate": 3.346382300868134e-07,
+      "loss": 8.252,
+      "step": 99
+    },
+    {
+      "epoch": 0.8968609865470852,
+      "grad_norm": 2.6838462352752686,
+      "learning_rate": 2.85612772694579e-07,
+      "loss": 3.5332,
+      "step": 100
+    },
+    {
+      "epoch": 0.905829596412556,
+      "grad_norm": 2.564300298690796,
+      "learning_rate": 2.403634723543674e-07,
+      "loss": 3.3899,
+      "step": 101
+    },
+    {
+      "epoch": 0.9147982062780269,
+      "grad_norm": 1.2007478475570679,
+      "learning_rate": 1.989265731115525e-07,
+      "loss": 0.6646,
+      "step": 102
+    },
+    {
+      "epoch": 0.9237668161434978,
+      "grad_norm": 0.49647316336631775,
+      "learning_rate": 1.6133526533250566e-07,
+      "loss": 0.8976,
+      "step": 103
+    },
+    {
+      "epoch": 0.9327354260089686,
+      "grad_norm": 7.291123867034912,
+      "learning_rate": 1.2761965911958385e-07,
+      "loss": 18.4972,
+      "step": 104
+    },
+    {
+      "epoch": 0.9417040358744395,
+      "grad_norm": 0.898957371711731,
+      "learning_rate": 9.780676019336632e-08,
+      "loss": 1.1393,
+      "step": 105
+    },
+    {
+      "epoch": 0.9506726457399103,
+      "grad_norm": 3.6352906227111816,
+      "learning_rate": 7.192044826145772e-08,
+      "loss": 5.9557,
+      "step": 106
+    },
+    {
+      "epoch": 0.9596412556053812,
+      "grad_norm": 2.3766579627990723,
+      "learning_rate": 4.998145789118114e-08,
+      "loss": 0.6812,
+      "step": 107
+    },
+    {
+      "epoch": 0.968609865470852,
+      "grad_norm": 0.5174874067306519,
+      "learning_rate": 3.2007361901485455e-08,
+      "loss": 0.7247,
+      "step": 108
+    },
+    {
+      "epoch": 0.9775784753363229,
+      "grad_norm": 2.757059335708618,
+      "learning_rate": 1.8012557287367394e-08,
+      "loss": 5.8355,
+      "step": 109
+    },
+    {
+      "epoch": 0.9865470852017937,
+      "grad_norm": 0.954919159412384,
+      "learning_rate": 8.008253688084888e-09,
+      "loss": 3.1317,
+      "step": 110
+    },
+    {
+      "epoch": 0.9955156950672646,
+      "grad_norm": 3.348649740219116,
+      "learning_rate": 2.002464408392135e-09,
+      "loss": 7.5343,
+      "step": 111
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 111,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 200,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 24,
+  "trial_name": null,
+  "trial_params": null
+}

20250501-1443/checkpoint-111/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f557942d59ec98655c05adb4d5496eb4bbb1842077eb2c1f22a9459b51d268e
+size 5304

20250501-1443/runs/May01_14-43-04_d55355693cce/events.out.tfevents.1746110584.d55355693cce.190.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5cbc3140f84a0db89e905b1f64a8f712492d091d7ad5ed4140c57af7cddf232e
+size 27538