PuxAI
/

ACSD_ZeroShot_Benchmark

Model card Files Files and versions

xet

Community

PuxAI commited on Mar 9

Commit

35d35d0

verified ·

1 Parent(s): e45cb15

Upload logs/train_log_Llama-3.1-8B_seed123.json with huggingface_hub

Browse files

Files changed (1) hide show

logs/train_log_Llama-3.1-8B_seed123.json +22 -22

logs/train_log_Llama-3.1-8B_seed123.json CHANGED Viewed

@@ -1,80 +1,80 @@
 [
   {
-    "loss": 2.3611,
-    "grad_norm": 1.9115740060806274,
     "learning_rate": 0.00018,
     "epoch": 0.5,
     "step": 10
   },
   {
-    "loss": 0.4042,
-    "grad_norm": 1.5066324472427368,
     "learning_rate": 0.00018,
     "epoch": 1.0,
     "step": 20
   },
   {
-    "loss": 0.1868,
-    "grad_norm": 0.6329495310783386,
     "learning_rate": 0.0001577777777777778,
     "epoch": 1.5,
     "step": 30
   },
   {
-    "loss": 0.153,
-    "grad_norm": 0.8787748217582703,
     "learning_rate": 0.00013555555555555556,
     "epoch": 2.0,
     "step": 40
   },
   {
-    "loss": 0.1163,
-    "grad_norm": 0.6138646006584167,
     "learning_rate": 0.00011333333333333334,
     "epoch": 2.5,
     "step": 50
   },
   {
-    "loss": 0.1006,
-    "grad_norm": 0.6085129976272583,
     "learning_rate": 9.111111111111112e-05,
     "epoch": 3.0,
     "step": 60
   },
   {
     "loss": 0.0861,
-    "grad_norm": 0.4830396771430969,
     "learning_rate": 6.88888888888889e-05,
     "epoch": 3.5,
     "step": 70
   },
   {
-    "loss": 0.0607,
-    "grad_norm": 0.9534498453140259,
     "learning_rate": 4.666666666666667e-05,
     "epoch": 4.0,
     "step": 80
   },
   {
     "loss": 0.0493,
-    "grad_norm": 0.6482242941856384,
     "learning_rate": 2.4444444444444445e-05,
     "epoch": 4.5,
     "step": 90
   },
   {
-    "loss": 0.0501,
-    "grad_norm": 0.8297228217124939,
     "learning_rate": 2.2222222222222225e-06,
     "epoch": 5.0,
     "step": 100
   },
   {
-    "train_runtime": 335.1443,
-    "train_samples_per_second": 18.738,
-    "train_steps_per_second": 0.298,
     "total_flos": 5.098583944986624e+16,
-    "train_loss": 0.3568024319410324,
     "epoch": 5.0,
     "step": 100
   }

 [
   {
+    "loss": 2.3613,
+    "grad_norm": 1.9053692817687988,
     "learning_rate": 0.00018,
     "epoch": 0.5,
     "step": 10
   },
   {
+    "loss": 0.4029,
+    "grad_norm": 1.454684853553772,
     "learning_rate": 0.00018,
     "epoch": 1.0,
     "step": 20
   },
   {
+    "loss": 0.1859,
+    "grad_norm": 0.6330903172492981,
     "learning_rate": 0.0001577777777777778,
     "epoch": 1.5,
     "step": 30
   },
   {
+    "loss": 0.1523,
+    "grad_norm": 0.862911581993103,
     "learning_rate": 0.00013555555555555556,
     "epoch": 2.0,
     "step": 40
   },
   {
+    "loss": 0.1158,
+    "grad_norm": 0.6093763113021851,
     "learning_rate": 0.00011333333333333334,
     "epoch": 2.5,
     "step": 50
   },
   {
+    "loss": 0.1004,
+    "grad_norm": 0.6005483269691467,
     "learning_rate": 9.111111111111112e-05,
     "epoch": 3.0,
     "step": 60
   },
   {
     "loss": 0.0861,
+    "grad_norm": 0.47914546728134155,
     "learning_rate": 6.88888888888889e-05,
     "epoch": 3.5,
     "step": 70
   },
   {
+    "loss": 0.0604,
+    "grad_norm": 0.9422203302383423,
     "learning_rate": 4.666666666666667e-05,
     "epoch": 4.0,
     "step": 80
   },
   {
     "loss": 0.0493,
+    "grad_norm": 0.6497517824172974,
     "learning_rate": 2.4444444444444445e-05,
     "epoch": 4.5,
     "step": 90
   },
   {
+    "loss": 0.0495,
+    "grad_norm": 0.820516049861908,
     "learning_rate": 2.2222222222222225e-06,
     "epoch": 5.0,
     "step": 100
   },
   {
+    "train_runtime": 329.7305,
+    "train_samples_per_second": 19.046,
+    "train_steps_per_second": 0.303,
     "total_flos": 5.098583944986624e+16,
+    "train_loss": 0.3564116105437279,
     "epoch": 5.0,
     "step": 100
   }