taherimoalem commited on
Commit
ceb3aa9
·
verified ·
1 Parent(s): ac84f15

Training in progress, epoch 4

Browse files
Files changed (3) hide show
  1. artifacts/logs.csv +10 -7
  2. artifacts/logs.jsonl +3 -0
  3. model.safetensors +1 -1
artifacts/logs.csv CHANGED
@@ -1,7 +1,10 @@
1
- time,step,epoch,loss,grad_norm,learning_rate,eval_loss,eval_accuracy,eval_f1_macro,eval_runtime,eval_samples_per_second,eval_steps_per_second
2
- 1770207344.8248932,3594,1.0,0.22125994767755286,6.724055290222168,1.5005564830272678e-05,,,,,,
3
- 1770207384.0328238,3594,1.0,,,,0.172308087348938,0.939,0.9385995315324059,39.1904,127.582,4.006
4
- 1770207969.8896728,7188,2.0,0.1287970014857662,6.3051323890686035,1.0008347245409015e-05,,,,,,
5
- 1770208012.5958886,7188,2.0,,,,0.17122335731983185,0.9456,0.9452690872397813,42.7021,117.09,3.677
6
- 1770208602.8990612,10782,3.0,0.08359225723699655,0.1423230767250061,5.008347245409015e-06,,,,,,
7
- 1770208643.235154,10782,3.0,,,,0.19895857572555542,0.9478,0.9473008374036693,40.3327,123.969,3.893
 
 
 
 
1
+ time,step,epoch,loss,grad_norm,learning_rate,eval_loss,eval_accuracy,eval_f1_macro,eval_runtime,eval_samples_per_second,eval_steps_per_second,train_runtime,train_samples_per_second,train_steps_per_second,total_flos,train_loss
2
+ 1770207344.8248932,3594,1.0,0.22125994767755286,6.724055290222168,1.5005564830272678e-05,,,,,,,,,,,
3
+ 1770207384.0328238,3594,1.0,,,,0.172308087348938,0.939,0.9385995315324059,39.1904,127.582,4.006,,,,,
4
+ 1770207969.8896728,7188,2.0,0.1287970014857662,6.3051323890686035,1.0008347245409015e-05,,,,,,,,,,,
5
+ 1770208012.5958886,7188,2.0,,,,0.17122335731983185,0.9456,0.9452690872397813,42.7021,117.09,3.677,,,,,
6
+ 1770208602.8990612,10782,3.0,0.08359225723699655,0.1423230767250061,5.008347245409015e-06,,,,,,,,,,,
7
+ 1770208643.235154,10782,3.0,,,,0.19895857572555542,0.9478,0.9473008374036693,40.3327,123.969,3.893,,,,,
8
+ 1770209258.537886,14376,4.0,0.05389018369238444,3.4260194301605225,1.1129660545353369e-08,,,,,,,,,,,
9
+ 1770209297.2570825,14376,4.0,,,,0.23433832824230194,0.946,0.9456142780018644,38.7172,129.142,4.055,,,,,
10
+ 1770209299.1378114,14376,4.0,,,,,,,,,,2558.2108,179.813,5.62,2.475637874594976e+16,0.12188484752317502
artifacts/logs.jsonl CHANGED
@@ -4,3 +4,6 @@
4
  {"time": 1770208012.5958886, "step": 7188, "epoch": 2.0, "eval_loss": 0.17122335731983185, "eval_accuracy": 0.9456, "eval_f1_macro": 0.9452690872397813, "eval_runtime": 42.7021, "eval_samples_per_second": 117.09, "eval_steps_per_second": 3.677}
5
  {"time": 1770208602.8990612, "step": 10782, "epoch": 3.0, "loss": 0.08359225723699655, "grad_norm": 0.1423230767250061, "learning_rate": 5.008347245409015e-06}
6
  {"time": 1770208643.235154, "step": 10782, "epoch": 3.0, "eval_loss": 0.19895857572555542, "eval_accuracy": 0.9478, "eval_f1_macro": 0.9473008374036693, "eval_runtime": 40.3327, "eval_samples_per_second": 123.969, "eval_steps_per_second": 3.893}
 
 
 
 
4
  {"time": 1770208012.5958886, "step": 7188, "epoch": 2.0, "eval_loss": 0.17122335731983185, "eval_accuracy": 0.9456, "eval_f1_macro": 0.9452690872397813, "eval_runtime": 42.7021, "eval_samples_per_second": 117.09, "eval_steps_per_second": 3.677}
5
  {"time": 1770208602.8990612, "step": 10782, "epoch": 3.0, "loss": 0.08359225723699655, "grad_norm": 0.1423230767250061, "learning_rate": 5.008347245409015e-06}
6
  {"time": 1770208643.235154, "step": 10782, "epoch": 3.0, "eval_loss": 0.19895857572555542, "eval_accuracy": 0.9478, "eval_f1_macro": 0.9473008374036693, "eval_runtime": 40.3327, "eval_samples_per_second": 123.969, "eval_steps_per_second": 3.893}
7
+ {"time": 1770209258.537886, "step": 14376, "epoch": 4.0, "loss": 0.05389018369238444, "grad_norm": 3.4260194301605225, "learning_rate": 1.1129660545353369e-08}
8
+ {"time": 1770209297.2570825, "step": 14376, "epoch": 4.0, "eval_loss": 0.23433832824230194, "eval_accuracy": 0.946, "eval_f1_macro": 0.9456142780018644, "eval_runtime": 38.7172, "eval_samples_per_second": 129.142, "eval_steps_per_second": 4.055}
9
+ {"time": 1770209299.1378114, "step": 14376, "epoch": 4.0, "train_runtime": 2558.2108, "train_samples_per_second": 179.813, "train_steps_per_second": 5.62, "total_flos": 2.475637874594976e+16, "train_loss": 0.12188484752317502}
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cb96d065522b57d11b92ade83936cf5e00884d474034699cf33deaf04488335
3
  size 437964776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b7f1993041c18196245dd2ff1acddba407c4b7202c67d26f7a1b095e93bb45e
3
  size 437964776