Training in progress, epoch 4
Browse files- artifacts/logs.csv +10 -7
- artifacts/logs.jsonl +3 -0
- model.safetensors +1 -1
artifacts/logs.csv
CHANGED
|
@@ -1,7 +1,10 @@
|
|
| 1 |
-
time,step,epoch,loss,grad_norm,learning_rate,eval_loss,eval_accuracy,eval_f1_macro,eval_runtime,eval_samples_per_second,eval_steps_per_second
|
| 2 |
-
1770207344.8248932,3594,1.0,0.22125994767755286,6.724055290222168,1.5005564830272678e-05,,,,,,
|
| 3 |
-
1770207384.0328238,3594,1.0,,,,0.172308087348938,0.939,0.9385995315324059,39.1904,127.582,4.006
|
| 4 |
-
1770207969.8896728,7188,2.0,0.1287970014857662,6.3051323890686035,1.0008347245409015e-05,,,,,,
|
| 5 |
-
1770208012.5958886,7188,2.0,,,,0.17122335731983185,0.9456,0.9452690872397813,42.7021,117.09,3.677
|
| 6 |
-
1770208602.8990612,10782,3.0,0.08359225723699655,0.1423230767250061,5.008347245409015e-06,,,,,,
|
| 7 |
-
1770208643.235154,10782,3.0,,,,0.19895857572555542,0.9478,0.9473008374036693,40.3327,123.969,3.893
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
time,step,epoch,loss,grad_norm,learning_rate,eval_loss,eval_accuracy,eval_f1_macro,eval_runtime,eval_samples_per_second,eval_steps_per_second,train_runtime,train_samples_per_second,train_steps_per_second,total_flos,train_loss
|
| 2 |
+
1770207344.8248932,3594,1.0,0.22125994767755286,6.724055290222168,1.5005564830272678e-05,,,,,,,,,,,
|
| 3 |
+
1770207384.0328238,3594,1.0,,,,0.172308087348938,0.939,0.9385995315324059,39.1904,127.582,4.006,,,,,
|
| 4 |
+
1770207969.8896728,7188,2.0,0.1287970014857662,6.3051323890686035,1.0008347245409015e-05,,,,,,,,,,,
|
| 5 |
+
1770208012.5958886,7188,2.0,,,,0.17122335731983185,0.9456,0.9452690872397813,42.7021,117.09,3.677,,,,,
|
| 6 |
+
1770208602.8990612,10782,3.0,0.08359225723699655,0.1423230767250061,5.008347245409015e-06,,,,,,,,,,,
|
| 7 |
+
1770208643.235154,10782,3.0,,,,0.19895857572555542,0.9478,0.9473008374036693,40.3327,123.969,3.893,,,,,
|
| 8 |
+
1770209258.537886,14376,4.0,0.05389018369238444,3.4260194301605225,1.1129660545353369e-08,,,,,,,,,,,
|
| 9 |
+
1770209297.2570825,14376,4.0,,,,0.23433832824230194,0.946,0.9456142780018644,38.7172,129.142,4.055,,,,,
|
| 10 |
+
1770209299.1378114,14376,4.0,,,,,,,,,,2558.2108,179.813,5.62,2.475637874594976e+16,0.12188484752317502
|
artifacts/logs.jsonl
CHANGED
|
@@ -4,3 +4,6 @@
|
|
| 4 |
{"time": 1770208012.5958886, "step": 7188, "epoch": 2.0, "eval_loss": 0.17122335731983185, "eval_accuracy": 0.9456, "eval_f1_macro": 0.9452690872397813, "eval_runtime": 42.7021, "eval_samples_per_second": 117.09, "eval_steps_per_second": 3.677}
|
| 5 |
{"time": 1770208602.8990612, "step": 10782, "epoch": 3.0, "loss": 0.08359225723699655, "grad_norm": 0.1423230767250061, "learning_rate": 5.008347245409015e-06}
|
| 6 |
{"time": 1770208643.235154, "step": 10782, "epoch": 3.0, "eval_loss": 0.19895857572555542, "eval_accuracy": 0.9478, "eval_f1_macro": 0.9473008374036693, "eval_runtime": 40.3327, "eval_samples_per_second": 123.969, "eval_steps_per_second": 3.893}
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
{"time": 1770208012.5958886, "step": 7188, "epoch": 2.0, "eval_loss": 0.17122335731983185, "eval_accuracy": 0.9456, "eval_f1_macro": 0.9452690872397813, "eval_runtime": 42.7021, "eval_samples_per_second": 117.09, "eval_steps_per_second": 3.677}
|
| 5 |
{"time": 1770208602.8990612, "step": 10782, "epoch": 3.0, "loss": 0.08359225723699655, "grad_norm": 0.1423230767250061, "learning_rate": 5.008347245409015e-06}
|
| 6 |
{"time": 1770208643.235154, "step": 10782, "epoch": 3.0, "eval_loss": 0.19895857572555542, "eval_accuracy": 0.9478, "eval_f1_macro": 0.9473008374036693, "eval_runtime": 40.3327, "eval_samples_per_second": 123.969, "eval_steps_per_second": 3.893}
|
| 7 |
+
{"time": 1770209258.537886, "step": 14376, "epoch": 4.0, "loss": 0.05389018369238444, "grad_norm": 3.4260194301605225, "learning_rate": 1.1129660545353369e-08}
|
| 8 |
+
{"time": 1770209297.2570825, "step": 14376, "epoch": 4.0, "eval_loss": 0.23433832824230194, "eval_accuracy": 0.946, "eval_f1_macro": 0.9456142780018644, "eval_runtime": 38.7172, "eval_samples_per_second": 129.142, "eval_steps_per_second": 4.055}
|
| 9 |
+
{"time": 1770209299.1378114, "step": 14376, "epoch": 4.0, "train_runtime": 2558.2108, "train_samples_per_second": 179.813, "train_steps_per_second": 5.62, "total_flos": 2.475637874594976e+16, "train_loss": 0.12188484752317502}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 437964776
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b7f1993041c18196245dd2ff1acddba407c4b7202c67d26f7a1b095e93bb45e
|
| 3 |
size 437964776
|