gsmyrnis commited on
Commit
766d149
·
verified ·
1 Parent(s): 7deef88

Training in progress, epoch 0

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b94874647a0ef4273f15e44dbc55eab469c13702e6c611faecc6e2298202a2b7
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:817ab32f31232c78e923363130cdc6529c2ce238a0e6cf8fca2bda4fd1385806
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8c9efaca77d3516983c1bd8a4a63121af41d543744e452eb2fb571060b686cd
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6584a09e48663033c119606f085a0e88a5c3f98cb21a1366a0d46470e0e588b6
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00d5ad4a82353b576c19338d7aed8d347587e86bcfa52976cb68c4e2a3257c66
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae6bcc0e8187a3a81b1360c684284e31da9d7909fb1da0535194449b2dfa9eb0
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5027706b18318d7599d1a46806fe245c5d6b7a4cbd24a765328743bcf8dcddfd
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb26593b786cba2828c84385eff2df42c45b50dc7e27d4954d55efba734f48a6
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -1,31 +1,10 @@
1
- {"current_steps": 1, "total_steps": 30, "loss": 1.0597, "lr": 3.3333333333333333e-06, "epoch": 0.09375, "percentage": 3.33, "elapsed_time": "0:00:14", "remaining_time": "0:06:47"}
2
- {"current_steps": 2, "total_steps": 30, "loss": 1.0411, "lr": 6.666666666666667e-06, "epoch": 0.1875, "percentage": 6.67, "elapsed_time": "0:00:25", "remaining_time": "0:05:59"}
3
- {"current_steps": 3, "total_steps": 30, "loss": 1.022, "lr": 1e-05, "epoch": 0.28125, "percentage": 10.0, "elapsed_time": "0:00:37", "remaining_time": "0:05:41"}
4
- {"current_steps": 4, "total_steps": 30, "loss": 0.8987, "lr": 9.966191788709716e-06, "epoch": 0.375, "percentage": 13.33, "elapsed_time": "0:00:53", "remaining_time": "0:05:48"}
5
- {"current_steps": 5, "total_steps": 30, "loss": 0.9115, "lr": 9.86522435289912e-06, "epoch": 0.46875, "percentage": 16.67, "elapsed_time": "0:01:05", "remaining_time": "0:05:29"}
6
- {"current_steps": 6, "total_steps": 30, "loss": 0.9104, "lr": 9.698463103929542e-06, "epoch": 0.5625, "percentage": 20.0, "elapsed_time": "0:01:16", "remaining_time": "0:05:07"}
7
- {"current_steps": 7, "total_steps": 30, "loss": 0.8984, "lr": 9.468163201617063e-06, "epoch": 0.65625, "percentage": 23.33, "elapsed_time": "0:01:26", "remaining_time": "0:04:44"}
8
- {"current_steps": 8, "total_steps": 30, "loss": 0.8972, "lr": 9.177439057064684e-06, "epoch": 0.75, "percentage": 26.67, "elapsed_time": "0:01:38", "remaining_time": "0:04:31"}
9
- {"current_steps": 9, "total_steps": 30, "loss": 0.8385, "lr": 8.83022221559489e-06, "epoch": 0.84375, "percentage": 30.0, "elapsed_time": "0:01:53", "remaining_time": "0:04:25"}
10
- {"current_steps": 10, "total_steps": 30, "loss": 0.8219, "lr": 8.43120818934367e-06, "epoch": 0.9375, "percentage": 33.33, "elapsed_time": "0:02:08", "remaining_time": "0:04:16"}
11
- {"current_steps": 11, "total_steps": 30, "loss": 1.3901, "lr": 7.985792958513932e-06, "epoch": 1.0625, "percentage": 36.67, "elapsed_time": "0:03:20", "remaining_time": "0:05:46"}
12
- {"current_steps": 12, "total_steps": 30, "loss": 0.731, "lr": 7.500000000000001e-06, "epoch": 1.15625, "percentage": 40.0, "elapsed_time": "0:03:33", "remaining_time": "0:05:20"}
13
- {"current_steps": 13, "total_steps": 30, "loss": 0.7711, "lr": 6.980398830195785e-06, "epoch": 1.25, "percentage": 43.33, "elapsed_time": "0:03:47", "remaining_time": "0:04:57"}
14
- {"current_steps": 14, "total_steps": 30, "loss": 0.7208, "lr": 6.434016163555452e-06, "epoch": 1.34375, "percentage": 46.67, "elapsed_time": "0:03:56", "remaining_time": "0:04:30"}
15
- {"current_steps": 15, "total_steps": 30, "loss": 0.768, "lr": 5.8682408883346535e-06, "epoch": 1.4375, "percentage": 50.0, "elapsed_time": "0:04:10", "remaining_time": "0:04:10"}
16
- {"current_steps": 16, "total_steps": 30, "loss": 0.7985, "lr": 5.290724144552379e-06, "epoch": 1.53125, "percentage": 53.33, "elapsed_time": "0:04:23", "remaining_time": "0:03:50"}
17
- {"current_steps": 17, "total_steps": 30, "loss": 0.6938, "lr": 4.7092758554476215e-06, "epoch": 1.625, "percentage": 56.67, "elapsed_time": "0:04:39", "remaining_time": "0:03:33"}
18
- {"current_steps": 18, "total_steps": 30, "loss": 0.7287, "lr": 4.131759111665349e-06, "epoch": 1.71875, "percentage": 60.0, "elapsed_time": "0:04:52", "remaining_time": "0:03:14"}
19
- {"current_steps": 19, "total_steps": 30, "loss": 0.7368, "lr": 3.5659838364445505e-06, "epoch": 1.8125, "percentage": 63.33, "elapsed_time": "0:05:02", "remaining_time": "0:02:55"}
20
- {"current_steps": 20, "total_steps": 30, "loss": 0.6756, "lr": 3.019601169804216e-06, "epoch": 1.90625, "percentage": 66.67, "elapsed_time": "0:05:15", "remaining_time": "0:02:37"}
21
- {"current_steps": 21, "total_steps": 30, "loss": 1.192, "lr": 2.5000000000000015e-06, "epoch": 2.03125, "percentage": 70.0, "elapsed_time": "0:06:19", "remaining_time": "0:02:42"}
22
- {"current_steps": 22, "total_steps": 30, "loss": 0.6696, "lr": 2.0142070414860704e-06, "epoch": 2.125, "percentage": 73.33, "elapsed_time": "0:06:33", "remaining_time": "0:02:23"}
23
- {"current_steps": 23, "total_steps": 30, "loss": 0.6785, "lr": 1.5687918106563326e-06, "epoch": 2.21875, "percentage": 76.67, "elapsed_time": "0:06:44", "remaining_time": "0:02:02"}
24
- {"current_steps": 24, "total_steps": 30, "loss": 0.7032, "lr": 1.1697777844051105e-06, "epoch": 2.3125, "percentage": 80.0, "elapsed_time": "0:06:55", "remaining_time": "0:01:43"}
25
- {"current_steps": 25, "total_steps": 30, "loss": 0.6561, "lr": 8.225609429353187e-07, "epoch": 2.40625, "percentage": 83.33, "elapsed_time": "0:07:10", "remaining_time": "0:01:26"}
26
- {"current_steps": 26, "total_steps": 30, "loss": 0.6628, "lr": 5.318367983829393e-07, "epoch": 2.5, "percentage": 86.67, "elapsed_time": "0:07:20", "remaining_time": "0:01:07"}
27
- {"current_steps": 27, "total_steps": 30, "loss": 0.7113, "lr": 3.015368960704584e-07, "epoch": 2.59375, "percentage": 90.0, "elapsed_time": "0:07:32", "remaining_time": "0:00:50"}
28
- {"current_steps": 28, "total_steps": 30, "loss": 0.6844, "lr": 1.3477564710088097e-07, "epoch": 2.6875, "percentage": 93.33, "elapsed_time": "0:07:47", "remaining_time": "0:00:33"}
29
- {"current_steps": 29, "total_steps": 30, "loss": 0.6778, "lr": 3.3808211290284886e-08, "epoch": 2.78125, "percentage": 96.67, "elapsed_time": "0:08:02", "remaining_time": "0:00:16"}
30
- {"current_steps": 30, "total_steps": 30, "loss": 0.6967, "lr": 0.0, "epoch": 2.875, "percentage": 100.0, "elapsed_time": "0:08:14", "remaining_time": "0:00:00"}
31
- {"current_steps": 30, "total_steps": 30, "epoch": 2.875, "percentage": 100.0, "elapsed_time": "0:10:20", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 1, "total_steps": 9, "loss": 1.0263, "lr": 1e-05, "epoch": 0.2727272727272727, "percentage": 11.11, "elapsed_time": "0:00:24", "remaining_time": "0:03:18"}
2
+ {"current_steps": 2, "total_steps": 9, "loss": 1.0116, "lr": 9.619397662556434e-06, "epoch": 0.5454545454545454, "percentage": 22.22, "elapsed_time": "0:00:43", "remaining_time": "0:02:32"}
3
+ {"current_steps": 3, "total_steps": 9, "loss": 0.9324, "lr": 8.535533905932739e-06, "epoch": 0.8181818181818182, "percentage": 33.33, "elapsed_time": "0:01:02", "remaining_time": "0:02:04"}
4
+ {"current_steps": 4, "total_steps": 9, "loss": 1.2052, "lr": 6.913417161825449e-06, "epoch": 1.0909090909090908, "percentage": 44.44, "elapsed_time": "0:02:28", "remaining_time": "0:03:06"}
5
+ {"current_steps": 5, "total_steps": 9, "loss": 0.8996, "lr": 5e-06, "epoch": 1.3636363636363638, "percentage": 55.56, "elapsed_time": "0:02:47", "remaining_time": "0:02:14"}
6
+ {"current_steps": 6, "total_steps": 9, "loss": 0.8354, "lr": 3.0865828381745515e-06, "epoch": 1.6363636363636362, "percentage": 66.67, "elapsed_time": "0:03:06", "remaining_time": "0:01:33"}
7
+ {"current_steps": 7, "total_steps": 9, "loss": 0.9686, "lr": 1.4644660940672628e-06, "epoch": 1.9090909090909092, "percentage": 77.78, "elapsed_time": "0:03:26", "remaining_time": "0:00:58"}
8
+ {"current_steps": 8, "total_steps": 9, "loss": 0.9383, "lr": 3.8060233744356634e-07, "epoch": 2.1818181818181817, "percentage": 88.89, "elapsed_time": "0:04:44", "remaining_time": "0:00:35"}
9
+ {"current_steps": 9, "total_steps": 9, "loss": 0.8515, "lr": 0.0, "epoch": 2.4545454545454546, "percentage": 100.0, "elapsed_time": "0:05:02", "remaining_time": "0:00:00"}
10
+ {"current_steps": 9, "total_steps": 9, "epoch": 2.4545454545454546, "percentage": 100.0, "elapsed_time": "0:06:56", "remaining_time": "0:00:00"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0553a31864cc1776bf32c202b170d207a4209b96fbf0264d1140466c264055cc
3
  size 7480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd1a2c70a583b12f137e9a245ed23845641d27bfaba53379ed1f021982d8bf20
3
  size 7480