gsmyrnis commited on
Commit
f240629
·
verified ·
1 Parent(s): 4848ecd

Training in progress, epoch 1

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7738873d7263edd23aa303f8c8adb4ac2dadb0b3f56b82e1c6f926b7e1582c92
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6bb3b33076c82793c47333e198edd8fb880df1c25aa657336d25f38c28aceec
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6632066997c3538025405e874cd8c60e637ab17d2485794a93d440e32386d057
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3612d7f712ab11fd9a51ef6e90ed66b0a8c30be8306767bbc9cf18b8c396bbb3
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b24b972e834ccb4803a9e9c5206d739ca2e57ddf45dfde940f5f6dcd7c1925b
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c40b3e01c89c651855dc7f5fb24657fcac3cff6ddbbd93279ffd49d281d1a2b
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ea4bfdfad57f865e69819e38bfafe39d6dd5d54dcbb08d72c12bfb4a2507f4d
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d097ed9acd9b44e7bb2a6aa96aecdfe026b0a8650a145e35861d2f8087f9f4bb
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -1,67 +1,52 @@
1
- {"current_steps": 10, "total_steps": 636, "loss": 0.723, "lr": 5e-06, "epoch": 0.04716981132075472, "percentage": 1.57, "elapsed_time": "0:00:59", "remaining_time": "1:02:23"}
2
- {"current_steps": 20, "total_steps": 636, "loss": 0.6454, "lr": 5e-06, "epoch": 0.09433962264150944, "percentage": 3.14, "elapsed_time": "0:01:58", "remaining_time": "1:00:40"}
3
- {"current_steps": 30, "total_steps": 636, "loss": 0.608, "lr": 5e-06, "epoch": 0.14150943396226415, "percentage": 4.72, "elapsed_time": "0:02:56", "remaining_time": "0:59:31"}
4
- {"current_steps": 40, "total_steps": 636, "loss": 0.5866, "lr": 5e-06, "epoch": 0.18867924528301888, "percentage": 6.29, "elapsed_time": "0:03:55", "remaining_time": "0:58:23"}
5
- {"current_steps": 50, "total_steps": 636, "loss": 0.5803, "lr": 5e-06, "epoch": 0.2358490566037736, "percentage": 7.86, "elapsed_time": "0:04:53", "remaining_time": "0:57:19"}
6
- {"current_steps": 60, "total_steps": 636, "loss": 0.571, "lr": 5e-06, "epoch": 0.2830188679245283, "percentage": 9.43, "elapsed_time": "0:05:52", "remaining_time": "0:56:22"}
7
- {"current_steps": 70, "total_steps": 636, "loss": 0.567, "lr": 5e-06, "epoch": 0.330188679245283, "percentage": 11.01, "elapsed_time": "0:06:50", "remaining_time": "0:55:22"}
8
- {"current_steps": 80, "total_steps": 636, "loss": 0.5609, "lr": 5e-06, "epoch": 0.37735849056603776, "percentage": 12.58, "elapsed_time": "0:07:49", "remaining_time": "0:54:21"}
9
- {"current_steps": 90, "total_steps": 636, "loss": 0.5614, "lr": 5e-06, "epoch": 0.42452830188679247, "percentage": 14.15, "elapsed_time": "0:08:47", "remaining_time": "0:53:21"}
10
- {"current_steps": 100, "total_steps": 636, "loss": 0.555, "lr": 5e-06, "epoch": 0.4716981132075472, "percentage": 15.72, "elapsed_time": "0:09:46", "remaining_time": "0:52:22"}
11
- {"current_steps": 110, "total_steps": 636, "loss": 0.5485, "lr": 5e-06, "epoch": 0.5188679245283019, "percentage": 17.3, "elapsed_time": "0:10:44", "remaining_time": "0:51:22"}
12
- {"current_steps": 120, "total_steps": 636, "loss": 0.5481, "lr": 5e-06, "epoch": 0.5660377358490566, "percentage": 18.87, "elapsed_time": "0:11:43", "remaining_time": "0:50:23"}
13
- {"current_steps": 130, "total_steps": 636, "loss": 0.5516, "lr": 5e-06, "epoch": 0.6132075471698113, "percentage": 20.44, "elapsed_time": "0:12:41", "remaining_time": "0:49:24"}
14
- {"current_steps": 140, "total_steps": 636, "loss": 0.5441, "lr": 5e-06, "epoch": 0.660377358490566, "percentage": 22.01, "elapsed_time": "0:13:40", "remaining_time": "0:48:26"}
15
- {"current_steps": 150, "total_steps": 636, "loss": 0.5444, "lr": 5e-06, "epoch": 0.7075471698113207, "percentage": 23.58, "elapsed_time": "0:14:38", "remaining_time": "0:47:27"}
16
- {"current_steps": 160, "total_steps": 636, "loss": 0.5405, "lr": 5e-06, "epoch": 0.7547169811320755, "percentage": 25.16, "elapsed_time": "0:15:37", "remaining_time": "0:46:28"}
17
- {"current_steps": 170, "total_steps": 636, "loss": 0.5383, "lr": 5e-06, "epoch": 0.8018867924528302, "percentage": 26.73, "elapsed_time": "0:16:35", "remaining_time": "0:45:29"}
18
- {"current_steps": 180, "total_steps": 636, "loss": 0.5398, "lr": 5e-06, "epoch": 0.8490566037735849, "percentage": 28.3, "elapsed_time": "0:17:34", "remaining_time": "0:44:31"}
19
- {"current_steps": 190, "total_steps": 636, "loss": 0.5423, "lr": 5e-06, "epoch": 0.8962264150943396, "percentage": 29.87, "elapsed_time": "0:18:32", "remaining_time": "0:43:32"}
20
- {"current_steps": 200, "total_steps": 636, "loss": 0.5401, "lr": 5e-06, "epoch": 0.9433962264150944, "percentage": 31.45, "elapsed_time": "0:19:31", "remaining_time": "0:42:33"}
21
- {"current_steps": 210, "total_steps": 636, "loss": 0.5355, "lr": 5e-06, "epoch": 0.9905660377358491, "percentage": 33.02, "elapsed_time": "0:20:29", "remaining_time": "0:41:34"}
22
- {"current_steps": 212, "total_steps": 636, "eval_loss": 0.5338753461837769, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:21:02", "remaining_time": "0:42:04"}
23
- {"current_steps": 220, "total_steps": 636, "loss": 0.5182, "lr": 5e-06, "epoch": 1.0377358490566038, "percentage": 34.59, "elapsed_time": "0:23:15", "remaining_time": "0:43:59"}
24
- {"current_steps": 230, "total_steps": 636, "loss": 0.5151, "lr": 5e-06, "epoch": 1.0849056603773586, "percentage": 36.16, "elapsed_time": "0:24:14", "remaining_time": "0:42:47"}
25
- {"current_steps": 240, "total_steps": 636, "loss": 0.5092, "lr": 5e-06, "epoch": 1.1320754716981132, "percentage": 37.74, "elapsed_time": "0:25:12", "remaining_time": "0:41:35"}
26
- {"current_steps": 250, "total_steps": 636, "loss": 0.511, "lr": 5e-06, "epoch": 1.179245283018868, "percentage": 39.31, "elapsed_time": "0:26:11", "remaining_time": "0:40:25"}
27
- {"current_steps": 260, "total_steps": 636, "loss": 0.5141, "lr": 5e-06, "epoch": 1.2264150943396226, "percentage": 40.88, "elapsed_time": "0:27:09", "remaining_time": "0:39:17"}
28
- {"current_steps": 270, "total_steps": 636, "loss": 0.5091, "lr": 5e-06, "epoch": 1.2735849056603774, "percentage": 42.45, "elapsed_time": "0:28:08", "remaining_time": "0:38:08"}
29
- {"current_steps": 280, "total_steps": 636, "loss": 0.5106, "lr": 5e-06, "epoch": 1.320754716981132, "percentage": 44.03, "elapsed_time": "0:29:06", "remaining_time": "0:37:00"}
30
- {"current_steps": 290, "total_steps": 636, "loss": 0.5143, "lr": 5e-06, "epoch": 1.3679245283018868, "percentage": 45.6, "elapsed_time": "0:30:05", "remaining_time": "0:35:54"}
31
- {"current_steps": 300, "total_steps": 636, "loss": 0.5046, "lr": 5e-06, "epoch": 1.4150943396226414, "percentage": 47.17, "elapsed_time": "0:31:03", "remaining_time": "0:34:47"}
32
- {"current_steps": 310, "total_steps": 636, "loss": 0.512, "lr": 5e-06, "epoch": 1.4622641509433962, "percentage": 48.74, "elapsed_time": "0:32:02", "remaining_time": "0:33:41"}
33
- {"current_steps": 320, "total_steps": 636, "loss": 0.5072, "lr": 5e-06, "epoch": 1.509433962264151, "percentage": 50.31, "elapsed_time": "0:33:00", "remaining_time": "0:32:36"}
34
- {"current_steps": 330, "total_steps": 636, "loss": 0.5095, "lr": 5e-06, "epoch": 1.5566037735849056, "percentage": 51.89, "elapsed_time": "0:33:59", "remaining_time": "0:31:31"}
35
- {"current_steps": 340, "total_steps": 636, "loss": 0.5066, "lr": 5e-06, "epoch": 1.6037735849056602, "percentage": 53.46, "elapsed_time": "0:34:57", "remaining_time": "0:30:26"}
36
- {"current_steps": 350, "total_steps": 636, "loss": 0.5105, "lr": 5e-06, "epoch": 1.650943396226415, "percentage": 55.03, "elapsed_time": "0:35:56", "remaining_time": "0:29:22"}
37
- {"current_steps": 360, "total_steps": 636, "loss": 0.5064, "lr": 5e-06, "epoch": 1.6981132075471699, "percentage": 56.6, "elapsed_time": "0:36:54", "remaining_time": "0:28:18"}
38
- {"current_steps": 370, "total_steps": 636, "loss": 0.5044, "lr": 5e-06, "epoch": 1.7452830188679245, "percentage": 58.18, "elapsed_time": "0:37:53", "remaining_time": "0:27:14"}
39
- {"current_steps": 380, "total_steps": 636, "loss": 0.5086, "lr": 5e-06, "epoch": 1.7924528301886793, "percentage": 59.75, "elapsed_time": "0:38:52", "remaining_time": "0:26:11"}
40
- {"current_steps": 390, "total_steps": 636, "loss": 0.5078, "lr": 5e-06, "epoch": 1.8396226415094339, "percentage": 61.32, "elapsed_time": "0:39:50", "remaining_time": "0:25:07"}
41
- {"current_steps": 400, "total_steps": 636, "loss": 0.5088, "lr": 5e-06, "epoch": 1.8867924528301887, "percentage": 62.89, "elapsed_time": "0:40:48", "remaining_time": "0:24:04"}
42
- {"current_steps": 410, "total_steps": 636, "loss": 0.5074, "lr": 5e-06, "epoch": 1.9339622641509435, "percentage": 64.47, "elapsed_time": "0:41:48", "remaining_time": "0:23:02"}
43
- {"current_steps": 420, "total_steps": 636, "loss": 0.5104, "lr": 5e-06, "epoch": 1.9811320754716981, "percentage": 66.04, "elapsed_time": "0:42:47", "remaining_time": "0:22:00"}
44
- {"current_steps": 424, "total_steps": 636, "eval_loss": 0.5254501104354858, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:43:31", "remaining_time": "0:21:45"}
45
- {"current_steps": 430, "total_steps": 636, "loss": 0.4885, "lr": 5e-06, "epoch": 2.0283018867924527, "percentage": 67.61, "elapsed_time": "0:45:37", "remaining_time": "0:21:51"}
46
- {"current_steps": 440, "total_steps": 636, "loss": 0.4778, "lr": 5e-06, "epoch": 2.0754716981132075, "percentage": 69.18, "elapsed_time": "0:46:36", "remaining_time": "0:20:45"}
47
- {"current_steps": 450, "total_steps": 636, "loss": 0.476, "lr": 5e-06, "epoch": 2.1226415094339623, "percentage": 70.75, "elapsed_time": "0:47:35", "remaining_time": "0:19:40"}
48
- {"current_steps": 460, "total_steps": 636, "loss": 0.4773, "lr": 5e-06, "epoch": 2.169811320754717, "percentage": 72.33, "elapsed_time": "0:48:33", "remaining_time": "0:18:34"}
49
- {"current_steps": 470, "total_steps": 636, "loss": 0.4811, "lr": 5e-06, "epoch": 2.2169811320754715, "percentage": 73.9, "elapsed_time": "0:49:32", "remaining_time": "0:17:29"}
50
- {"current_steps": 480, "total_steps": 636, "loss": 0.479, "lr": 5e-06, "epoch": 2.2641509433962264, "percentage": 75.47, "elapsed_time": "0:50:30", "remaining_time": "0:16:24"}
51
- {"current_steps": 490, "total_steps": 636, "loss": 0.4816, "lr": 5e-06, "epoch": 2.311320754716981, "percentage": 77.04, "elapsed_time": "0:51:29", "remaining_time": "0:15:20"}
52
- {"current_steps": 500, "total_steps": 636, "loss": 0.4833, "lr": 5e-06, "epoch": 2.358490566037736, "percentage": 78.62, "elapsed_time": "0:52:28", "remaining_time": "0:14:16"}
53
- {"current_steps": 510, "total_steps": 636, "loss": 0.4807, "lr": 5e-06, "epoch": 2.4056603773584904, "percentage": 80.19, "elapsed_time": "0:53:27", "remaining_time": "0:13:12"}
54
- {"current_steps": 520, "total_steps": 636, "loss": 0.4811, "lr": 5e-06, "epoch": 2.452830188679245, "percentage": 81.76, "elapsed_time": "0:54:25", "remaining_time": "0:12:08"}
55
- {"current_steps": 530, "total_steps": 636, "loss": 0.4818, "lr": 5e-06, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "0:55:23", "remaining_time": "0:11:04"}
56
- {"current_steps": 540, "total_steps": 636, "loss": 0.4775, "lr": 5e-06, "epoch": 2.547169811320755, "percentage": 84.91, "elapsed_time": "0:56:22", "remaining_time": "0:10:01"}
57
- {"current_steps": 550, "total_steps": 636, "loss": 0.4769, "lr": 5e-06, "epoch": 2.5943396226415096, "percentage": 86.48, "elapsed_time": "0:57:21", "remaining_time": "0:08:58"}
58
- {"current_steps": 560, "total_steps": 636, "loss": 0.4815, "lr": 5e-06, "epoch": 2.641509433962264, "percentage": 88.05, "elapsed_time": "0:58:19", "remaining_time": "0:07:54"}
59
- {"current_steps": 570, "total_steps": 636, "loss": 0.478, "lr": 5e-06, "epoch": 2.688679245283019, "percentage": 89.62, "elapsed_time": "0:59:18", "remaining_time": "0:06:52"}
60
- {"current_steps": 580, "total_steps": 636, "loss": 0.4799, "lr": 5e-06, "epoch": 2.7358490566037736, "percentage": 91.19, "elapsed_time": "1:00:16", "remaining_time": "0:05:49"}
61
- {"current_steps": 590, "total_steps": 636, "loss": 0.4794, "lr": 5e-06, "epoch": 2.7830188679245285, "percentage": 92.77, "elapsed_time": "1:01:15", "remaining_time": "0:04:46"}
62
- {"current_steps": 600, "total_steps": 636, "loss": 0.4824, "lr": 5e-06, "epoch": 2.830188679245283, "percentage": 94.34, "elapsed_time": "1:02:13", "remaining_time": "0:03:44"}
63
- {"current_steps": 610, "total_steps": 636, "loss": 0.4787, "lr": 5e-06, "epoch": 2.8773584905660377, "percentage": 95.91, "elapsed_time": "1:03:12", "remaining_time": "0:02:41"}
64
- {"current_steps": 620, "total_steps": 636, "loss": 0.4827, "lr": 5e-06, "epoch": 2.9245283018867925, "percentage": 97.48, "elapsed_time": "1:04:11", "remaining_time": "0:01:39"}
65
- {"current_steps": 630, "total_steps": 636, "loss": 0.4809, "lr": 5e-06, "epoch": 2.9716981132075473, "percentage": 99.06, "elapsed_time": "1:05:11", "remaining_time": "0:00:37"}
66
- {"current_steps": 636, "total_steps": 636, "eval_loss": 0.5257502794265747, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:07:33", "remaining_time": "0:00:00"}
67
- {"current_steps": 636, "total_steps": 636, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:08:56", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 10, "total_steps": 636, "loss": 0.7225, "lr": 5e-06, "epoch": 0.04716981132075472, "percentage": 1.57, "elapsed_time": "0:01:02", "remaining_time": "1:05:28"}
2
+ {"current_steps": 20, "total_steps": 636, "loss": 0.6548, "lr": 5e-06, "epoch": 0.09433962264150944, "percentage": 3.14, "elapsed_time": "0:02:03", "remaining_time": "1:03:12"}
3
+ {"current_steps": 30, "total_steps": 636, "loss": 0.6156, "lr": 5e-06, "epoch": 0.14150943396226415, "percentage": 4.72, "elapsed_time": "0:03:03", "remaining_time": "1:01:44"}
4
+ {"current_steps": 40, "total_steps": 636, "loss": 0.5884, "lr": 5e-06, "epoch": 0.18867924528301888, "percentage": 6.29, "elapsed_time": "0:04:02", "remaining_time": "1:00:12"}
5
+ {"current_steps": 50, "total_steps": 636, "loss": 0.5811, "lr": 5e-06, "epoch": 0.2358490566037736, "percentage": 7.86, "elapsed_time": "0:05:01", "remaining_time": "0:58:51"}
6
+ {"current_steps": 60, "total_steps": 636, "loss": 0.5714, "lr": 5e-06, "epoch": 0.2830188679245283, "percentage": 9.43, "elapsed_time": "0:06:00", "remaining_time": "0:57:38"}
7
+ {"current_steps": 70, "total_steps": 636, "loss": 0.5674, "lr": 5e-06, "epoch": 0.330188679245283, "percentage": 11.01, "elapsed_time": "0:06:59", "remaining_time": "0:56:32"}
8
+ {"current_steps": 80, "total_steps": 636, "loss": 0.5611, "lr": 5e-06, "epoch": 0.37735849056603776, "percentage": 12.58, "elapsed_time": "0:07:58", "remaining_time": "0:55:28"}
9
+ {"current_steps": 90, "total_steps": 636, "loss": 0.5615, "lr": 5e-06, "epoch": 0.42452830188679247, "percentage": 14.15, "elapsed_time": "0:08:58", "remaining_time": "0:54:24"}
10
+ {"current_steps": 100, "total_steps": 636, "loss": 0.5549, "lr": 5e-06, "epoch": 0.4716981132075472, "percentage": 15.72, "elapsed_time": "0:09:57", "remaining_time": "0:53:22"}
11
+ {"current_steps": 110, "total_steps": 636, "loss": 0.5486, "lr": 5e-06, "epoch": 0.5188679245283019, "percentage": 17.3, "elapsed_time": "0:10:56", "remaining_time": "0:52:20"}
12
+ {"current_steps": 120, "total_steps": 636, "loss": 0.5481, "lr": 5e-06, "epoch": 0.5660377358490566, "percentage": 18.87, "elapsed_time": "0:11:56", "remaining_time": "0:51:19"}
13
+ {"current_steps": 130, "total_steps": 636, "loss": 0.5514, "lr": 5e-06, "epoch": 0.6132075471698113, "percentage": 20.44, "elapsed_time": "0:12:55", "remaining_time": "0:50:17"}
14
+ {"current_steps": 140, "total_steps": 636, "loss": 0.544, "lr": 5e-06, "epoch": 0.660377358490566, "percentage": 22.01, "elapsed_time": "0:13:54", "remaining_time": "0:49:18"}
15
+ {"current_steps": 150, "total_steps": 636, "loss": 0.5442, "lr": 5e-06, "epoch": 0.7075471698113207, "percentage": 23.58, "elapsed_time": "0:14:54", "remaining_time": "0:48:17"}
16
+ {"current_steps": 160, "total_steps": 636, "loss": 0.5403, "lr": 5e-06, "epoch": 0.7547169811320755, "percentage": 25.16, "elapsed_time": "0:15:53", "remaining_time": "0:47:16"}
17
+ {"current_steps": 170, "total_steps": 636, "loss": 0.5381, "lr": 5e-06, "epoch": 0.8018867924528302, "percentage": 26.73, "elapsed_time": "0:16:53", "remaining_time": "0:46:17"}
18
+ {"current_steps": 180, "total_steps": 636, "loss": 0.5396, "lr": 5e-06, "epoch": 0.8490566037735849, "percentage": 28.3, "elapsed_time": "0:17:52", "remaining_time": "0:45:16"}
19
+ {"current_steps": 190, "total_steps": 636, "loss": 0.5421, "lr": 5e-06, "epoch": 0.8962264150943396, "percentage": 29.87, "elapsed_time": "0:18:51", "remaining_time": "0:44:15"}
20
+ {"current_steps": 200, "total_steps": 636, "loss": 0.54, "lr": 5e-06, "epoch": 0.9433962264150944, "percentage": 31.45, "elapsed_time": "0:19:50", "remaining_time": "0:43:15"}
21
+ {"current_steps": 210, "total_steps": 636, "loss": 0.5354, "lr": 5e-06, "epoch": 0.9905660377358491, "percentage": 33.02, "elapsed_time": "0:20:50", "remaining_time": "0:42:16"}
22
+ {"current_steps": 212, "total_steps": 636, "eval_loss": 0.5338487029075623, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:21:23", "remaining_time": "0:42:46"}
23
+ {"current_steps": 220, "total_steps": 636, "loss": 0.5179, "lr": 5e-06, "epoch": 1.0377358490566038, "percentage": 34.59, "elapsed_time": "0:23:29", "remaining_time": "0:44:25"}
24
+ {"current_steps": 230, "total_steps": 636, "loss": 0.5142, "lr": 5e-06, "epoch": 1.0849056603773586, "percentage": 36.16, "elapsed_time": "0:24:28", "remaining_time": "0:43:12"}
25
+ {"current_steps": 240, "total_steps": 636, "loss": 0.5084, "lr": 5e-06, "epoch": 1.1320754716981132, "percentage": 37.74, "elapsed_time": "0:25:27", "remaining_time": "0:42:00"}
26
+ {"current_steps": 250, "total_steps": 636, "loss": 0.5104, "lr": 5e-06, "epoch": 1.179245283018868, "percentage": 39.31, "elapsed_time": "0:26:26", "remaining_time": "0:40:50"}
27
+ {"current_steps": 260, "total_steps": 636, "loss": 0.5135, "lr": 5e-06, "epoch": 1.2264150943396226, "percentage": 40.88, "elapsed_time": "0:27:26", "remaining_time": "0:39:40"}
28
+ {"current_steps": 270, "total_steps": 636, "loss": 0.5085, "lr": 5e-06, "epoch": 1.2735849056603774, "percentage": 42.45, "elapsed_time": "0:28:25", "remaining_time": "0:38:32"}
29
+ {"current_steps": 280, "total_steps": 636, "loss": 0.51, "lr": 5e-06, "epoch": 1.320754716981132, "percentage": 44.03, "elapsed_time": "0:29:24", "remaining_time": "0:37:23"}
30
+ {"current_steps": 290, "total_steps": 636, "loss": 0.5136, "lr": 5e-06, "epoch": 1.3679245283018868, "percentage": 45.6, "elapsed_time": "0:30:24", "remaining_time": "0:36:16"}
31
+ {"current_steps": 300, "total_steps": 636, "loss": 0.504, "lr": 5e-06, "epoch": 1.4150943396226414, "percentage": 47.17, "elapsed_time": "0:31:24", "remaining_time": "0:35:10"}
32
+ {"current_steps": 310, "total_steps": 636, "loss": 0.5114, "lr": 5e-06, "epoch": 1.4622641509433962, "percentage": 48.74, "elapsed_time": "0:32:24", "remaining_time": "0:34:04"}
33
+ {"current_steps": 320, "total_steps": 636, "loss": 0.5065, "lr": 5e-06, "epoch": 1.509433962264151, "percentage": 50.31, "elapsed_time": "0:33:24", "remaining_time": "0:32:59"}
34
+ {"current_steps": 330, "total_steps": 636, "loss": 0.509, "lr": 5e-06, "epoch": 1.5566037735849056, "percentage": 51.89, "elapsed_time": "0:34:25", "remaining_time": "0:31:55"}
35
+ {"current_steps": 340, "total_steps": 636, "loss": 0.5062, "lr": 5e-06, "epoch": 1.6037735849056602, "percentage": 53.46, "elapsed_time": "0:35:25", "remaining_time": "0:30:50"}
36
+ {"current_steps": 350, "total_steps": 636, "loss": 0.51, "lr": 5e-06, "epoch": 1.650943396226415, "percentage": 55.03, "elapsed_time": "0:36:24", "remaining_time": "0:29:44"}
37
+ {"current_steps": 360, "total_steps": 636, "loss": 0.5059, "lr": 5e-06, "epoch": 1.6981132075471699, "percentage": 56.6, "elapsed_time": "0:37:23", "remaining_time": "0:28:40"}
38
+ {"current_steps": 370, "total_steps": 636, "loss": 0.5039, "lr": 5e-06, "epoch": 1.7452830188679245, "percentage": 58.18, "elapsed_time": "0:38:22", "remaining_time": "0:27:35"}
39
+ {"current_steps": 380, "total_steps": 636, "loss": 0.5081, "lr": 5e-06, "epoch": 1.7924528301886793, "percentage": 59.75, "elapsed_time": "0:39:21", "remaining_time": "0:26:31"}
40
+ {"current_steps": 390, "total_steps": 636, "loss": 0.5074, "lr": 5e-06, "epoch": 1.8396226415094339, "percentage": 61.32, "elapsed_time": "0:40:21", "remaining_time": "0:25:27"}
41
+ {"current_steps": 400, "total_steps": 636, "loss": 0.5084, "lr": 5e-06, "epoch": 1.8867924528301887, "percentage": 62.89, "elapsed_time": "0:41:21", "remaining_time": "0:24:24"}
42
+ {"current_steps": 410, "total_steps": 636, "loss": 0.5069, "lr": 5e-06, "epoch": 1.9339622641509435, "percentage": 64.47, "elapsed_time": "0:42:20", "remaining_time": "0:23:20"}
43
+ {"current_steps": 420, "total_steps": 636, "loss": 0.51, "lr": 5e-06, "epoch": 1.9811320754716981, "percentage": 66.04, "elapsed_time": "0:43:20", "remaining_time": "0:22:17"}
44
+ {"current_steps": 424, "total_steps": 636, "eval_loss": 0.5252274870872498, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:44:04", "remaining_time": "0:22:02"}
45
+ {"current_steps": 430, "total_steps": 636, "loss": 0.4878, "lr": 5e-06, "epoch": 2.0283018867924527, "percentage": 67.61, "elapsed_time": "0:45:54", "remaining_time": "0:21:59"}
46
+ {"current_steps": 440, "total_steps": 636, "loss": 0.4771, "lr": 5e-06, "epoch": 2.0754716981132075, "percentage": 69.18, "elapsed_time": "0:46:53", "remaining_time": "0:20:53"}
47
+ {"current_steps": 450, "total_steps": 636, "loss": 0.4753, "lr": 5e-06, "epoch": 2.1226415094339623, "percentage": 70.75, "elapsed_time": "0:47:52", "remaining_time": "0:19:47"}
48
+ {"current_steps": 460, "total_steps": 636, "loss": 0.4765, "lr": 5e-06, "epoch": 2.169811320754717, "percentage": 72.33, "elapsed_time": "0:48:52", "remaining_time": "0:18:41"}
49
+ {"current_steps": 470, "total_steps": 636, "loss": 0.4804, "lr": 5e-06, "epoch": 2.2169811320754715, "percentage": 73.9, "elapsed_time": "0:49:51", "remaining_time": "0:17:36"}
50
+ {"current_steps": 480, "total_steps": 636, "loss": 0.4784, "lr": 5e-06, "epoch": 2.2641509433962264, "percentage": 75.47, "elapsed_time": "0:50:50", "remaining_time": "0:16:31"}
51
+ {"current_steps": 490, "total_steps": 636, "loss": 0.4809, "lr": 5e-06, "epoch": 2.311320754716981, "percentage": 77.04, "elapsed_time": "0:51:49", "remaining_time": "0:15:26"}
52
+ {"current_steps": 500, "total_steps": 636, "loss": 0.4826, "lr": 5e-06, "epoch": 2.358490566037736, "percentage": 78.62, "elapsed_time": "0:52:48", "remaining_time": "0:14:21"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac0cda98f0faf084335dcbc5796efa073982fa8258e3c6fbd5a730ea0a908e0d
3
  size 7288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f06f40c242d3017d497ab54d78362b0bd7fdd9fef3fa0cc4133c54b92e3d174
3
  size 7288