Kendamarron commited on Mar 25, 2024

Commit

817bdc0

verified ·

1 Parent(s): ebb77eb

Upload folder using huggingface_hub

Browse files

Files changed (32) hide show

adapter_model.safetensors +2 -2
checkpoint-100/adapter_model.safetensors +2 -2
checkpoint-100/optimizer.pt +1 -1
checkpoint-100/rng_state.pth +1 -1
checkpoint-100/scheduler.pt +1 -1
checkpoint-100/trainer_state.json +61 -61
checkpoint-100/training_args.bin +1 -1
checkpoint-20/adapter_model.safetensors +2 -2
checkpoint-20/optimizer.pt +1 -1
checkpoint-20/rng_state.pth +1 -1
checkpoint-20/scheduler.pt +1 -1
checkpoint-20/trainer_state.json +13 -13
checkpoint-20/training_args.bin +1 -1
checkpoint-40/adapter_model.safetensors +2 -2
checkpoint-40/optimizer.pt +1 -1
checkpoint-40/rng_state.pth +1 -1
checkpoint-40/scheduler.pt +1 -1
checkpoint-40/trainer_state.json +25 -25
checkpoint-40/training_args.bin +1 -1
checkpoint-60/adapter_model.safetensors +2 -2
checkpoint-60/optimizer.pt +1 -1
checkpoint-60/rng_state.pth +1 -1
checkpoint-60/scheduler.pt +1 -1
checkpoint-60/trainer_state.json +37 -37
checkpoint-60/training_args.bin +1 -1
checkpoint-80/adapter_model.safetensors +2 -2
checkpoint-80/optimizer.pt +1 -1
checkpoint-80/rng_state.pth +1 -1
checkpoint-80/scheduler.pt +1 -1
checkpoint-80/trainer_state.json +49 -49
checkpoint-80/training_args.bin +1 -1
training_args.bin +1 -1

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a53bcbc68dec82e2ce8cf2d2d439a8dbf38126c02cc88a567925a6781f6a782f
-size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:e39308711dbd127efe41850085dc7f2f63d54a44aad2dd2a502891474924b787
+size 83946192

checkpoint-100/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a53bcbc68dec82e2ce8cf2d2d439a8dbf38126c02cc88a567925a6781f6a782f
-size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:e39308711dbd127efe41850085dc7f2f63d54a44aad2dd2a502891474924b787
+size 83946192

checkpoint-100/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c62536abee8b78adb6147453bd71c877d7d2b4d5a8f8d034d44a921275e11ab
 size 335810482

 version https://git-lfs.github.com/spec/v1
+oid sha256:dacc62b0f0a2216d58e92701202dd5456a079db701e9e321345271c623dd7c6a
 size 335810482

checkpoint-100/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:808521b8055ae579535313bc1b5f324216971386eaeef0a87693a238c17a92b3
 size 14168

 version https://git-lfs.github.com/spec/v1
+oid sha256:2975b104fc6f24da571473b9b64f8d64d931a14d44726583f5951b3fe5be12b9
 size 14168

checkpoint-100/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:76b4fa9c11af5ffcd754861813fd859eeac70d14b38222bd674da5b3ddfefcfe
 size 1056

 version https://git-lfs.github.com/spec/v1
+oid sha256:dddbc5bcee87f33f86ec113accc5c003fca74582bc423aa05d433c41c6d3cf4e
 size 1056

checkpoint-100/trainer_state.json CHANGED Viewed

@@ -10,142 +10,142 @@
   "log_history": [
     {
       "epoch": 0.2,
-      "grad_norm": 5.041278839111328,
       "learning_rate": 2.9968542393565676e-06,
-      "loss": 1.9371,
       "step": 5
     },
     {
       "epoch": 0.41,
-      "grad_norm": NaN,
-      "learning_rate": 2.9717672653473587e-06,
-      "loss": 1.806,
       "step": 10
     },
     {
       "epoch": 0.61,
-      "grad_norm": 3.5015058517456055,
-      "learning_rate": 2.905810057509516e-06,
-      "loss": 1.7373,
       "step": 15
     },
     {
       "epoch": 0.82,
-      "grad_norm": 3.583218574523926,
-      "learning_rate": 2.803067604777227e-06,
-      "loss": 1.7471,
       "step": 20
     },
     {
       "epoch": 1.02,
-      "grad_norm": 4.409696102142334,
-      "learning_rate": 2.666228326019474e-06,
-      "loss": 1.6622,
       "step": 25
     },
     {
       "epoch": 1.22,
-      "grad_norm": 3.5655429363250732,
-      "learning_rate": 2.498872837517522e-06,
-      "loss": 1.6483,
       "step": 30
     },
     {
       "epoch": 1.43,
-      "grad_norm": 4.161169528961182,
-      "learning_rate": 2.305380260473476e-06,
-      "loss": 1.674,
       "step": 35
     },
     {
       "epoch": 1.63,
-      "grad_norm": 3.5801784992218018,
-      "learning_rate": 2.090813634373931e-06,
-      "loss": 1.6635,
       "step": 40
     },
     {
       "epoch": 1.84,
-      "grad_norm": 3.7053236961364746,
-      "learning_rate": 1.8607874345493807e-06,
-      "loss": 1.6857,
       "step": 45
     },
     {
       "epoch": 2.04,
-      "grad_norm": 3.145810127258301,
-      "learning_rate": 1.6213206605421064e-06,
-      "loss": 1.6165,
       "step": 50
     },
     {
       "epoch": 2.24,
-      "grad_norm": 3.514664649963379,
-      "learning_rate": 1.3786793394578939e-06,
-      "loss": 1.6025,
       "step": 55
     },
     {
       "epoch": 2.45,
-      "grad_norm": 3.9232802391052246,
-      "learning_rate": 1.13921256545062e-06,
-      "loss": 1.5615,
       "step": 60
     },
     {
       "epoch": 2.65,
-      "grad_norm": 3.0508198738098145,
-      "learning_rate": 9.091863656260696e-07,
-      "loss": 1.6076,
       "step": 65
     },
     {
       "epoch": 2.86,
-      "grad_norm": 3.7754147052764893,
-      "learning_rate": 6.946197395265243e-07,
-      "loss": 1.664,
       "step": 70
     },
     {
       "epoch": 3.06,
-      "grad_norm": 3.5631840229034424,
-      "learning_rate": 5.011271624824787e-07,
-      "loss": 1.5976,
       "step": 75
     },
     {
       "epoch": 3.27,
-      "grad_norm": 3.4716637134552,
-      "learning_rate": 3.337716739805264e-07,
-      "loss": 1.5761,
       "step": 80
     },
     {
       "epoch": 3.47,
-      "grad_norm": 3.624776601791382,
-      "learning_rate": 1.9693239522277327e-07,
-      "loss": 1.5924,
       "step": 85
     },
     {
       "epoch": 3.67,
-      "grad_norm": 3.4213175773620605,
-      "learning_rate": 9.418994249048474e-08,
-      "loss": 1.6305,
       "step": 90
     },
     {
       "epoch": 3.88,
-      "grad_norm": 3.8403515815734863,
-      "learning_rate": 2.8232734652641424e-08,
-      "loss": 1.6219,
       "step": 95
     },
     {
       "epoch": 4.08,
-      "grad_norm": 3.393942356109619,
-      "learning_rate": 7.866464317276001e-10,
-      "loss": 1.5561,
       "step": 100
     }
   ],
@@ -154,8 +154,8 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 20,
-  "total_flos": 8673113877921792.0,
-  "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null
 }

   "log_history": [
     {
       "epoch": 0.2,
+      "grad_norm": 5.09375,
       "learning_rate": 2.9968542393565676e-06,
+      "loss": 1.9388,
       "step": 5
     },
     {
       "epoch": 0.41,
+      "grad_norm": 4.4375,
+      "learning_rate": 2.9616157869703894e-06,
+      "loss": 1.8455,
       "step": 10
     },
     {
       "epoch": 0.61,
+      "grad_norm": 3.59375,
+      "learning_rate": 2.8881318444640566e-06,
+      "loss": 1.7796,
       "step": 15
     },
     {
       "epoch": 0.82,
+      "grad_norm": 3.796875,
+      "learning_rate": 2.778325235483954e-06,
+      "loss": 1.8091,
       "step": 20
     },
     {
       "epoch": 1.02,
+      "grad_norm": 4.625,
+      "learning_rate": 2.6350692237265428e-06,
+      "loss": 1.7224,
       "step": 25
     },
     {
       "epoch": 1.22,
+      "grad_norm": 3.953125,
+      "learning_rate": 2.4621123294467098e-06,
+      "loss": 1.7108,
       "step": 30
     },
     {
       "epoch": 1.43,
+      "grad_norm": 4.125,
+      "learning_rate": 2.2639802434931445e-06,
+      "loss": 1.7299,
       "step": 35
     },
     {
       "epoch": 1.63,
+      "grad_norm": 3.625,
+      "learning_rate": 2.0458574054452316e-06,
+      "loss": 1.7111,
       "step": 40
     },
     {
       "epoch": 1.84,
+      "grad_norm": 3.53125,
+      "learning_rate": 1.813451344546913e-06,
+      "loss": 1.7364,
       "step": 45
     },
     {
       "epoch": 2.04,
+      "grad_norm": 3.1875,
+      "learning_rate": 1.5728433331716726e-06,
+      "loss": 1.6664,
       "step": 50
     },
     {
       "epoch": 2.24,
+      "grad_norm": 3.125,
+      "learning_rate": 1.3303292607070737e-06,
+      "loss": 1.6673,
       "step": 55
     },
     {
       "epoch": 2.45,
+      "grad_norm": 3.65625,
+      "learning_rate": 1.0922548916454855e-06,
+      "loss": 1.6219,
       "step": 60
     },
     {
       "epoch": 2.65,
+      "grad_norm": 2.8125,
+      "learning_rate": 8.648498186137653e-07,
+      "loss": 1.6648,
       "step": 65
     },
     {
       "epoch": 2.86,
+      "grad_norm": 3.53125,
+      "learning_rate": 6.540644552236401e-07,
+      "loss": 1.699,
       "step": 70
     },
     {
       "epoch": 3.06,
+      "grad_norm": 4.15625,
+      "learning_rate": 4.6541433408284356e-07,
+      "loss": 1.6821,
       "step": 75
     },
     {
       "epoch": 3.27,
+      "grad_norm": 3.546875,
+      "learning_rate": 3.0383578415591913e-07,
+      "loss": 1.6633,
       "step": 80
     },
     {
       "epoch": 3.47,
+      "grad_norm": 3.4375,
+      "learning_rate": 1.7355676390496482e-07,
+      "loss": 1.6522,
       "step": 85
     },
     {
       "epoch": 3.67,
+      "grad_norm": 3.328125,
+      "learning_rate": 7.798623006559436e-08,
+      "loss": 1.6788,
       "step": 90
     },
     {
       "epoch": 3.88,
+      "grad_norm": 3.828125,
+      "learning_rate": 1.962493689916395e-08,
+      "loss": 1.6806,
       "step": 95
     },
     {
       "epoch": 4.08,
+      "grad_norm": 3.25,
+      "learning_rate": 0.0,
+      "loss": 1.6213,
       "step": 100
     }
   ],
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 20,
+  "total_flos": 8016985030459392.0,
+  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null
 }

checkpoint-100/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dcd55fa32c983512f289bcc506b75cd6687379a244a95f246ddb3cda8a97ea11
 size 4960

 version https://git-lfs.github.com/spec/v1
+oid sha256:23cfefdd62756fac4437632539fdfbb741029e6fb943cafeffe397c21a403a5d
 size 4960

checkpoint-20/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81169049628740146f07ac7e1f2c59f6475467aa39b48ea79092281b1cf4f31f
-size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:36fe3540199ef008a5a14901c68ed9f51bd9b4d479e2c02c0a2a74bf9ab9c08d
+size 83946192

checkpoint-20/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b5c5a5f5f27d9ba6aebc06e9fb489f05042a6881eeeddbd130172ad2723e6c5
 size 335810482

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba79b0978d0a6742535ddd213b0cf9c7209903a3c55809669cb1c9d19ccf9abc
 size 335810482

checkpoint-20/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c38429496778665cedc2e268e56dc0476144498310916d1f0cfff08c093b6b5c
 size 14168

 version https://git-lfs.github.com/spec/v1
+oid sha256:596bb0c43f17f8cd0971123a502b06f192d7a434146d9d5e3e84fb081424cc46
 size 14168

checkpoint-20/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce642ef0996d1e3d6618dd62493c8bbb08bd03811c25a654671f6952e68b2cd2
 size 1056

 version https://git-lfs.github.com/spec/v1
+oid sha256:2256134ffb225a6c790a5cebe9b44be1002bae7b80db44f96adfc6030072a13c
 size 1056

checkpoint-20/trainer_state.json CHANGED Viewed

@@ -10,30 +10,30 @@
   "log_history": [
     {
       "epoch": 0.2,
-      "grad_norm": 5.041278839111328,
       "learning_rate": 2.9968542393565676e-06,
-      "loss": 1.9371,
       "step": 5
     },
     {
       "epoch": 0.41,
-      "grad_norm": NaN,
-      "learning_rate": 2.9717672653473587e-06,
-      "loss": 1.806,
       "step": 10
     },
     {
       "epoch": 0.61,
-      "grad_norm": 3.5015058517456055,
-      "learning_rate": 2.905810057509516e-06,
-      "loss": 1.7373,
       "step": 15
     },
     {
       "epoch": 0.82,
-      "grad_norm": 3.583218574523926,
-      "learning_rate": 2.803067604777227e-06,
-      "loss": 1.7471,
       "step": 20
     }
   ],
@@ -42,8 +42,8 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 20,
-  "total_flos": 1729235156533248.0,
-  "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null
 }

   "log_history": [
     {
       "epoch": 0.2,
+      "grad_norm": 5.09375,
       "learning_rate": 2.9968542393565676e-06,
+      "loss": 1.9388,
       "step": 5
     },
     {
       "epoch": 0.41,
+      "grad_norm": 4.4375,
+      "learning_rate": 2.9616157869703894e-06,
+      "loss": 1.8455,
       "step": 10
     },
     {
       "epoch": 0.61,
+      "grad_norm": 3.59375,
+      "learning_rate": 2.8881318444640566e-06,
+      "loss": 1.7796,
       "step": 15
     },
     {
       "epoch": 0.82,
+      "grad_norm": 3.796875,
+      "learning_rate": 2.778325235483954e-06,
+      "loss": 1.8091,
       "step": 20
     }
   ],
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 20,
+  "total_flos": 1604332476923904.0,
+  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null
 }

checkpoint-20/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dcd55fa32c983512f289bcc506b75cd6687379a244a95f246ddb3cda8a97ea11
 size 4960

 version https://git-lfs.github.com/spec/v1
+oid sha256:23cfefdd62756fac4437632539fdfbb741029e6fb943cafeffe397c21a403a5d
 size 4960

checkpoint-40/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:201e57d1be1961bebbcd1ff8eda8c6677e4a69421937df43b308c971c3108aea
-size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab1d62447cec6baff4b897987488e96f3e155ae7a2d789c9bfd44df3352413bd
+size 83946192

checkpoint-40/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:079e8a89a0226983e6e8ae22966abf6aed4e5ebf6d41cb8dab776e531a5362f7
 size 335810482

 version https://git-lfs.github.com/spec/v1
+oid sha256:bb9a12580e8a534dab4a9273a9183c0f3caabe7de1e89240f077919754dc8398
 size 335810482

checkpoint-40/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e0230bfed5d2d4ba948095836587d23305d972aac689f5cdcf2fefea079cce46
 size 14168

 version https://git-lfs.github.com/spec/v1
+oid sha256:076780903adeff97a16d6f25588ee658c45903a68edf79adba5d800f18428061
 size 14168

checkpoint-40/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:701d4e651a2909e8f790218a3f92bf4c4a7e6e82039f18b093a26a63dfa65d6a
 size 1056

 version https://git-lfs.github.com/spec/v1
+oid sha256:aaf7ad3bbb37486a5e7658ddf05bfee6df67222659155f89c9b8f68d5f791717
 size 1056

checkpoint-40/trainer_state.json CHANGED Viewed

@@ -10,58 +10,58 @@
   "log_history": [
     {
       "epoch": 0.2,
-      "grad_norm": 5.041278839111328,
       "learning_rate": 2.9968542393565676e-06,
-      "loss": 1.9371,
       "step": 5
     },
     {
       "epoch": 0.41,
-      "grad_norm": NaN,
-      "learning_rate": 2.9717672653473587e-06,
-      "loss": 1.806,
       "step": 10
     },
     {
       "epoch": 0.61,
-      "grad_norm": 3.5015058517456055,
-      "learning_rate": 2.905810057509516e-06,
-      "loss": 1.7373,
       "step": 15
     },
     {
       "epoch": 0.82,
-      "grad_norm": 3.583218574523926,
-      "learning_rate": 2.803067604777227e-06,
-      "loss": 1.7471,
       "step": 20
     },
     {
       "epoch": 1.02,
-      "grad_norm": 4.409696102142334,
-      "learning_rate": 2.666228326019474e-06,
-      "loss": 1.6622,
       "step": 25
     },
     {
       "epoch": 1.22,
-      "grad_norm": 3.5655429363250732,
-      "learning_rate": 2.498872837517522e-06,
-      "loss": 1.6483,
       "step": 30
     },
     {
       "epoch": 1.43,
-      "grad_norm": 4.161169528961182,
-      "learning_rate": 2.305380260473476e-06,
-      "loss": 1.674,
       "step": 35
     },
     {
       "epoch": 1.63,
-      "grad_norm": 3.5801784992218018,
-      "learning_rate": 2.090813634373931e-06,
-      "loss": 1.6635,
       "step": 40
     }
   ],
@@ -70,8 +70,8 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 20,
-  "total_flos": 3471636198850560.0,
-  "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null
 }

   "log_history": [
     {
       "epoch": 0.2,
+      "grad_norm": 5.09375,
       "learning_rate": 2.9968542393565676e-06,
+      "loss": 1.9388,
       "step": 5
     },
     {
       "epoch": 0.41,
+      "grad_norm": 4.4375,
+      "learning_rate": 2.9616157869703894e-06,
+      "loss": 1.8455,
       "step": 10
     },
     {
       "epoch": 0.61,
+      "grad_norm": 3.59375,
+      "learning_rate": 2.8881318444640566e-06,
+      "loss": 1.7796,
       "step": 15
     },
     {
       "epoch": 0.82,
+      "grad_norm": 3.796875,
+      "learning_rate": 2.778325235483954e-06,
+      "loss": 1.8091,
       "step": 20
     },
     {
       "epoch": 1.02,
+      "grad_norm": 4.625,
+      "learning_rate": 2.6350692237265428e-06,
+      "loss": 1.7224,
       "step": 25
     },
     {
       "epoch": 1.22,
+      "grad_norm": 3.953125,
+      "learning_rate": 2.4621123294467098e-06,
+      "loss": 1.7108,
       "step": 30
     },
     {
       "epoch": 1.43,
+      "grad_norm": 4.125,
+      "learning_rate": 2.2639802434931445e-06,
+      "loss": 1.7299,
       "step": 35
     },
     {
       "epoch": 1.63,
+      "grad_norm": 3.625,
+      "learning_rate": 2.0458574054452316e-06,
+      "loss": 1.7111,
       "step": 40
     }
   ],
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 20,
+  "total_flos": 3199050392518656.0,
+  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null
 }

checkpoint-40/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dcd55fa32c983512f289bcc506b75cd6687379a244a95f246ddb3cda8a97ea11
 size 4960

 version https://git-lfs.github.com/spec/v1
+oid sha256:23cfefdd62756fac4437632539fdfbb741029e6fb943cafeffe397c21a403a5d
 size 4960

checkpoint-60/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ffdddd99b7e2dac2fbe6f1527394f3bd35c8f4cec7f824d6a680b150eab29752
-size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0b479837a2f06b820e0f0f448720fe1bd18ae1237586386c794257aec6a6aef
+size 83946192

checkpoint-60/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a2ca205f6859d421a936cd43599ebb3a9e6b4746a4617aec3eb1e4451973a6ab
 size 335810482

 version https://git-lfs.github.com/spec/v1
+oid sha256:bc5a078967486289c7bb2846375d3eb6741292205506fe5dde1c1a54fd203bf7
 size 335810482

checkpoint-60/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6a26fff3a3d4ce3ae0cf2b12423f9b87839511db578ab2145a0af9abb15ae98
 size 14168

 version https://git-lfs.github.com/spec/v1
+oid sha256:6fe60fa4f0a2e1e83d88e7fff8399bc59bb2f8ab03ac19802ae736a7dbddb571
 size 14168

checkpoint-60/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:66ff6bbcef17af5c104327b1fe8909f5fc08bf691eb358ca5fd99210a287b128
 size 1056

 version https://git-lfs.github.com/spec/v1
+oid sha256:d151571d18043fd71e8a555e30e88db47c2b573382d14ae55c5df3ad38604822
 size 1056

checkpoint-60/trainer_state.json CHANGED Viewed

@@ -10,86 +10,86 @@
   "log_history": [
     {
       "epoch": 0.2,
-      "grad_norm": 5.041278839111328,
       "learning_rate": 2.9968542393565676e-06,
-      "loss": 1.9371,
       "step": 5
     },
     {
       "epoch": 0.41,
-      "grad_norm": NaN,
-      "learning_rate": 2.9717672653473587e-06,
-      "loss": 1.806,
       "step": 10
     },
     {
       "epoch": 0.61,
-      "grad_norm": 3.5015058517456055,
-      "learning_rate": 2.905810057509516e-06,
-      "loss": 1.7373,
       "step": 15
     },
     {
       "epoch": 0.82,
-      "grad_norm": 3.583218574523926,
-      "learning_rate": 2.803067604777227e-06,
-      "loss": 1.7471,
       "step": 20
     },
     {
       "epoch": 1.02,
-      "grad_norm": 4.409696102142334,
-      "learning_rate": 2.666228326019474e-06,
-      "loss": 1.6622,
       "step": 25
     },
     {
       "epoch": 1.22,
-      "grad_norm": 3.5655429363250732,
-      "learning_rate": 2.498872837517522e-06,
-      "loss": 1.6483,
       "step": 30
     },
     {
       "epoch": 1.43,
-      "grad_norm": 4.161169528961182,
-      "learning_rate": 2.305380260473476e-06,
-      "loss": 1.674,
       "step": 35
     },
     {
       "epoch": 1.63,
-      "grad_norm": 3.5801784992218018,
-      "learning_rate": 2.090813634373931e-06,
-      "loss": 1.6635,
       "step": 40
     },
     {
       "epoch": 1.84,
-      "grad_norm": 3.7053236961364746,
-      "learning_rate": 1.8607874345493807e-06,
-      "loss": 1.6857,
       "step": 45
     },
     {
       "epoch": 2.04,
-      "grad_norm": 3.145810127258301,
-      "learning_rate": 1.6213206605421064e-06,
-      "loss": 1.6165,
       "step": 50
     },
     {
       "epoch": 2.24,
-      "grad_norm": 3.514664649963379,
-      "learning_rate": 1.3786793394578939e-06,
-      "loss": 1.6025,
       "step": 55
     },
     {
       "epoch": 2.45,
-      "grad_norm": 3.9232802391052246,
-      "learning_rate": 1.13921256545062e-06,
-      "loss": 1.5615,
       "step": 60
     }
   ],
@@ -98,8 +98,8 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 20,
-  "total_flos": 5264188871884800.0,
-  "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null
 }

   "log_history": [
     {
       "epoch": 0.2,
+      "grad_norm": 5.09375,
       "learning_rate": 2.9968542393565676e-06,
+      "loss": 1.9388,
       "step": 5
     },
     {
       "epoch": 0.41,
+      "grad_norm": 4.4375,
+      "learning_rate": 2.9616157869703894e-06,
+      "loss": 1.8455,
       "step": 10
     },
     {
       "epoch": 0.61,
+      "grad_norm": 3.59375,
+      "learning_rate": 2.8881318444640566e-06,
+      "loss": 1.7796,
       "step": 15
     },
     {
       "epoch": 0.82,
+      "grad_norm": 3.796875,
+      "learning_rate": 2.778325235483954e-06,
+      "loss": 1.8091,
       "step": 20
     },
     {
       "epoch": 1.02,
+      "grad_norm": 4.625,
+      "learning_rate": 2.6350692237265428e-06,
+      "loss": 1.7224,
       "step": 25
     },
     {
       "epoch": 1.22,
+      "grad_norm": 3.953125,
+      "learning_rate": 2.4621123294467098e-06,
+      "loss": 1.7108,
       "step": 30
     },
     {
       "epoch": 1.43,
+      "grad_norm": 4.125,
+      "learning_rate": 2.2639802434931445e-06,
+      "loss": 1.7299,
       "step": 35
     },
     {
       "epoch": 1.63,
+      "grad_norm": 3.625,
+      "learning_rate": 2.0458574054452316e-06,
+      "loss": 1.7111,
       "step": 40
     },
     {
       "epoch": 1.84,
+      "grad_norm": 3.53125,
+      "learning_rate": 1.813451344546913e-06,
+      "loss": 1.7364,
       "step": 45
     },
     {
       "epoch": 2.04,
+      "grad_norm": 3.1875,
+      "learning_rate": 1.5728433331716726e-06,
+      "loss": 1.6664,
       "step": 50
     },
     {
       "epoch": 2.24,
+      "grad_norm": 3.125,
+      "learning_rate": 1.3303292607070737e-06,
+      "loss": 1.6673,
       "step": 55
     },
     {
       "epoch": 2.45,
+      "grad_norm": 3.65625,
+      "learning_rate": 1.0922548916454855e-06,
+      "loss": 1.6219,
       "step": 60
     }
   ],
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 20,
+  "total_flos": 4847990969303040.0,
+  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null
 }

checkpoint-60/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dcd55fa32c983512f289bcc506b75cd6687379a244a95f246ddb3cda8a97ea11
 size 4960

 version https://git-lfs.github.com/spec/v1
+oid sha256:23cfefdd62756fac4437632539fdfbb741029e6fb943cafeffe397c21a403a5d
 size 4960

checkpoint-80/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c4be7974723249f007017e295554c31f7a342db52a8eba5f7a93c2a7a155e020
-size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2072d0773bb7a3b3ac9c1b62ccf0be9fb811367ea38d87012e0fe0cd0fa4c99
+size 83946192

checkpoint-80/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3bc3157b40e30f2fda6a7e7023a977eaccfd60bedd3515639ec519017b2ed43f
 size 335810482

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a3436433c51ee5552f4562b6706190e1b0b15ef3f5a6d76cc552fa0d29e0c6f
 size 335810482

checkpoint-80/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:abd6efaf95891e0f58baa8477c4de71a0b71c10ed67610f5cabdc07e838de207
 size 14168

 version https://git-lfs.github.com/spec/v1
+oid sha256:39b06804188097f3c1bab9da775567b42104978432eb2ab2e415e5e56cb71c34
 size 14168

checkpoint-80/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bee8a0b8f96a0f78af76b5c0179274ad985c4e5aff73d9a192518417414c17d8
 size 1056

 version https://git-lfs.github.com/spec/v1
+oid sha256:f9a8053fc960722f2783f69c7ab9c51a9ad21ea164a693d17b94cded06192073
 size 1056

checkpoint-80/trainer_state.json CHANGED Viewed

@@ -10,114 +10,114 @@
   "log_history": [
     {
       "epoch": 0.2,
-      "grad_norm": 5.041278839111328,
       "learning_rate": 2.9968542393565676e-06,
-      "loss": 1.9371,
       "step": 5
     },
     {
       "epoch": 0.41,
-      "grad_norm": NaN,
-      "learning_rate": 2.9717672653473587e-06,
-      "loss": 1.806,
       "step": 10
     },
     {
       "epoch": 0.61,
-      "grad_norm": 3.5015058517456055,
-      "learning_rate": 2.905810057509516e-06,
-      "loss": 1.7373,
       "step": 15
     },
     {
       "epoch": 0.82,
-      "grad_norm": 3.583218574523926,
-      "learning_rate": 2.803067604777227e-06,
-      "loss": 1.7471,
       "step": 20
     },
     {
       "epoch": 1.02,
-      "grad_norm": 4.409696102142334,
-      "learning_rate": 2.666228326019474e-06,
-      "loss": 1.6622,
       "step": 25
     },
     {
       "epoch": 1.22,
-      "grad_norm": 3.5655429363250732,
-      "learning_rate": 2.498872837517522e-06,
-      "loss": 1.6483,
       "step": 30
     },
     {
       "epoch": 1.43,
-      "grad_norm": 4.161169528961182,
-      "learning_rate": 2.305380260473476e-06,
-      "loss": 1.674,
       "step": 35
     },
     {
       "epoch": 1.63,
-      "grad_norm": 3.5801784992218018,
-      "learning_rate": 2.090813634373931e-06,
-      "loss": 1.6635,
       "step": 40
     },
     {
       "epoch": 1.84,
-      "grad_norm": 3.7053236961364746,
-      "learning_rate": 1.8607874345493807e-06,
-      "loss": 1.6857,
       "step": 45
     },
     {
       "epoch": 2.04,
-      "grad_norm": 3.145810127258301,
-      "learning_rate": 1.6213206605421064e-06,
-      "loss": 1.6165,
       "step": 50
     },
     {
       "epoch": 2.24,
-      "grad_norm": 3.514664649963379,
-      "learning_rate": 1.3786793394578939e-06,
-      "loss": 1.6025,
       "step": 55
     },
     {
       "epoch": 2.45,
-      "grad_norm": 3.9232802391052246,
-      "learning_rate": 1.13921256545062e-06,
-      "loss": 1.5615,
       "step": 60
     },
     {
       "epoch": 2.65,
-      "grad_norm": 3.0508198738098145,
-      "learning_rate": 9.091863656260696e-07,
-      "loss": 1.6076,
       "step": 65
     },
     {
       "epoch": 2.86,
-      "grad_norm": 3.7754147052764893,
-      "learning_rate": 6.946197395265243e-07,
-      "loss": 1.664,
       "step": 70
     },
     {
       "epoch": 3.06,
-      "grad_norm": 3.5631840229034424,
-      "learning_rate": 5.011271624824787e-07,
-      "loss": 1.5976,
       "step": 75
     },
     {
       "epoch": 3.27,
-      "grad_norm": 3.4716637134552,
-      "learning_rate": 3.337716739805264e-07,
-      "loss": 1.5761,
       "step": 80
     }
   ],
@@ -126,8 +126,8 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 20,
-  "total_flos": 6971682993340416.0,
-  "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null
 }

   "log_history": [
     {
       "epoch": 0.2,
+      "grad_norm": 5.09375,
       "learning_rate": 2.9968542393565676e-06,
+      "loss": 1.9388,
       "step": 5
     },
     {
       "epoch": 0.41,
+      "grad_norm": 4.4375,
+      "learning_rate": 2.9616157869703894e-06,
+      "loss": 1.8455,
       "step": 10
     },
     {
       "epoch": 0.61,
+      "grad_norm": 3.59375,
+      "learning_rate": 2.8881318444640566e-06,
+      "loss": 1.7796,
       "step": 15
     },
     {
       "epoch": 0.82,
+      "grad_norm": 3.796875,
+      "learning_rate": 2.778325235483954e-06,
+      "loss": 1.8091,
       "step": 20
     },
     {
       "epoch": 1.02,
+      "grad_norm": 4.625,
+      "learning_rate": 2.6350692237265428e-06,
+      "loss": 1.7224,
       "step": 25
     },
     {
       "epoch": 1.22,
+      "grad_norm": 3.953125,
+      "learning_rate": 2.4621123294467098e-06,
+      "loss": 1.7108,
       "step": 30
     },
     {
       "epoch": 1.43,
+      "grad_norm": 4.125,
+      "learning_rate": 2.2639802434931445e-06,
+      "loss": 1.7299,
       "step": 35
     },
     {
       "epoch": 1.63,
+      "grad_norm": 3.625,
+      "learning_rate": 2.0458574054452316e-06,
+      "loss": 1.7111,
       "step": 40
     },
     {
       "epoch": 1.84,
+      "grad_norm": 3.53125,
+      "learning_rate": 1.813451344546913e-06,
+      "loss": 1.7364,
       "step": 45
     },
     {
       "epoch": 2.04,
+      "grad_norm": 3.1875,
+      "learning_rate": 1.5728433331716726e-06,
+      "loss": 1.6664,
       "step": 50
     },
     {
       "epoch": 2.24,
+      "grad_norm": 3.125,
+      "learning_rate": 1.3303292607070737e-06,
+      "loss": 1.6673,
       "step": 55
     },
     {
       "epoch": 2.45,
+      "grad_norm": 3.65625,
+      "learning_rate": 1.0922548916454855e-06,
+      "loss": 1.6219,
       "step": 60
     },
     {
       "epoch": 2.65,
+      "grad_norm": 2.8125,
+      "learning_rate": 8.648498186137653e-07,
+      "loss": 1.6648,
       "step": 65
     },
     {
       "epoch": 2.86,
+      "grad_norm": 3.53125,
+      "learning_rate": 6.540644552236401e-07,
+      "loss": 1.699,
       "step": 70
     },
     {
       "epoch": 3.06,
+      "grad_norm": 4.15625,
+      "learning_rate": 4.6541433408284356e-07,
+      "loss": 1.6821,
       "step": 75
     },
     {
       "epoch": 3.27,
+      "grad_norm": 3.546875,
+      "learning_rate": 3.0383578415591913e-07,
+      "loss": 1.6633,
       "step": 80
     }
   ],
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 20,
+  "total_flos": 6434740059291648.0,
+  "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null
 }

checkpoint-80/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dcd55fa32c983512f289bcc506b75cd6687379a244a95f246ddb3cda8a97ea11
 size 4960

 version https://git-lfs.github.com/spec/v1
+oid sha256:23cfefdd62756fac4437632539fdfbb741029e6fb943cafeffe397c21a403a5d
 size 4960

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dcd55fa32c983512f289bcc506b75cd6687379a244a95f246ddb3cda8a97ea11
 size 4960

 version https://git-lfs.github.com/spec/v1
+oid sha256:23cfefdd62756fac4437632539fdfbb741029e6fb943cafeffe397c21a403a5d
 size 4960