PuxAI
/

ACSD_ZeroShot_Benchmark

Model card Files Files and versions

xet

Community

PuxAI commited on Mar 8

Commit

09abb71

verified ·

1 Parent(s): d8013fb

Upload logs/train_log_Gemma3-4B-IT_seed789.json with huggingface_hub

Browse files

Files changed (1) hide show

logs/train_log_Gemma3-4B-IT_seed789.json +23 -23

logs/train_log_Gemma3-4B-IT_seed789.json CHANGED Viewed

@@ -1,80 +1,80 @@
 [
   {
-    "loss": 4.662,
-    "grad_norm": 5.247747421264648,
     "learning_rate": 0.00018,
     "epoch": 0.5,
     "step": 10
   },
   {
-    "loss": 0.693,
-    "grad_norm": 2.682288408279419,
     "learning_rate": 0.00018,
     "epoch": 1.0,
     "step": 20
   },
   {
-    "loss": 0.2551,
-    "grad_norm": 0.7315511703491211,
     "learning_rate": 0.0001577777777777778,
     "epoch": 1.5,
     "step": 30
   },
   {
-    "loss": 0.1824,
-    "grad_norm": 1.0467031002044678,
     "learning_rate": 0.00013555555555555556,
     "epoch": 2.0,
     "step": 40
   },
   {
-    "loss": 0.1345,
-    "grad_norm": 0.9925994277000427,
     "learning_rate": 0.00011333333333333334,
     "epoch": 2.5,
     "step": 50
   },
   {
-    "loss": 0.1294,
-    "grad_norm": 0.866705596446991,
     "learning_rate": 9.111111111111112e-05,
     "epoch": 3.0,
     "step": 60
   },
   {
     "loss": 0.1014,
-    "grad_norm": 0.5562663674354553,
     "learning_rate": 6.88888888888889e-05,
     "epoch": 3.5,
     "step": 70
   },
   {
-    "loss": 0.0787,
-    "grad_norm": 0.7542728185653687,
     "learning_rate": 4.666666666666667e-05,
     "epoch": 4.0,
     "step": 80
   },
   {
-    "loss": 0.0665,
-    "grad_norm": 0.7343875169754028,
     "learning_rate": 2.4444444444444445e-05,
     "epoch": 4.5,
     "step": 90
   },
   {
-    "loss": 0.066,
-    "grad_norm": 1.0010170936584473,
     "learning_rate": 2.2222222222222225e-06,
     "epoch": 5.0,
     "step": 100
   },
   {
-    "train_runtime": 281.3874,
-    "train_samples_per_second": 22.318,
-    "train_steps_per_second": 0.355,
     "total_flos": 2.585479150123776e+16,
-    "train_loss": 0.6368860471248626,
     "epoch": 5.0,
     "step": 100
   }

 [
   {
+    "loss": 4.6746,
+    "grad_norm": 5.330068588256836,
     "learning_rate": 0.00018,
     "epoch": 0.5,
     "step": 10
   },
   {
+    "loss": 0.6937,
+    "grad_norm": 2.7009994983673096,
     "learning_rate": 0.00018,
     "epoch": 1.0,
     "step": 20
   },
   {
+    "loss": 0.2554,
+    "grad_norm": 0.7277244329452515,
     "learning_rate": 0.0001577777777777778,
     "epoch": 1.5,
     "step": 30
   },
   {
+    "loss": 0.183,
+    "grad_norm": 1.0641778707504272,
     "learning_rate": 0.00013555555555555556,
     "epoch": 2.0,
     "step": 40
   },
   {
+    "loss": 0.1349,
+    "grad_norm": 0.9773860573768616,
     "learning_rate": 0.00011333333333333334,
     "epoch": 2.5,
     "step": 50
   },
   {
+    "loss": 0.129,
+    "grad_norm": 0.8620230555534363,
     "learning_rate": 9.111111111111112e-05,
     "epoch": 3.0,
     "step": 60
   },
   {
     "loss": 0.1014,
+    "grad_norm": 0.5703953504562378,
     "learning_rate": 6.88888888888889e-05,
     "epoch": 3.5,
     "step": 70
   },
   {
+    "loss": 0.0786,
+    "grad_norm": 0.7595603466033936,
     "learning_rate": 4.666666666666667e-05,
     "epoch": 4.0,
     "step": 80
   },
   {
+    "loss": 0.0664,
+    "grad_norm": 0.7379135489463806,
     "learning_rate": 2.4444444444444445e-05,
     "epoch": 4.5,
     "step": 90
   },
   {
+    "loss": 0.0657,
+    "grad_norm": 0.9689844846725464,
     "learning_rate": 2.2222222222222225e-06,
     "epoch": 5.0,
     "step": 100
   },
   {
+    "train_runtime": 472.0078,
+    "train_samples_per_second": 13.305,
+    "train_steps_per_second": 0.212,
     "total_flos": 2.585479150123776e+16,
+    "train_loss": 0.6382734912633896,
     "epoch": 5.0,
     "step": 100
   }