gsmyrnis commited on
Commit
bcb6e2c
·
verified ·
1 Parent(s): e58d992

Training in progress, epoch 0

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ab7eeaba30e27e987825f250fbc985ff8d1a74f5f37f181c0b2498f4fe8e822
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c86c0ec2727674130120ed7e92dec1433c00e11824d1e4839b85f043001e364
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdc2772d053f6eedcfb63c778f181f9a142b157cf3e5e0061eeb81a6a87390e3
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d72d2847577cc4910439175e19910ca6ed26b2263bfd010c3d7a2aadb26aec18
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:863abe32ba6bfe786655c14e0d74e4cd0ba8b8022b01977b0ae9179a477d3823
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f882cbc77c3791b5ca83d7233fbe401b10bb4d3749212ba77ffd1e316b71741
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e930e450e5f46041da6f42431f442139660eb72ba3f894592c553563e4df85df
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a1a1f4c55b1105c59875cae5c9026276d4e0eef0ac84b17ef7d3c4189c2751f
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -1,109 +1,70 @@
1
- {"current_steps": 1, "total_steps": 108, "loss": 0.6855, "lr": 9.090909090909091e-07, "epoch": 0.027777777777777776, "percentage": 0.93, "elapsed_time": "0:00:04", "remaining_time": "0:08:06"}
2
- {"current_steps": 2, "total_steps": 108, "loss": 0.6554, "lr": 1.8181818181818183e-06, "epoch": 0.05555555555555555, "percentage": 1.85, "elapsed_time": "0:00:06", "remaining_time": "0:05:28"}
3
- {"current_steps": 3, "total_steps": 108, "loss": 0.631, "lr": 2.7272727272727272e-06, "epoch": 0.08333333333333333, "percentage": 2.78, "elapsed_time": "0:00:07", "remaining_time": "0:04:32"}
4
- {"current_steps": 4, "total_steps": 108, "loss": 0.6684, "lr": 3.6363636363636366e-06, "epoch": 0.1111111111111111, "percentage": 3.7, "elapsed_time": "0:00:09", "remaining_time": "0:03:57"}
5
- {"current_steps": 5, "total_steps": 108, "loss": 0.551, "lr": 4.5454545454545455e-06, "epoch": 0.1388888888888889, "percentage": 4.63, "elapsed_time": "0:00:10", "remaining_time": "0:03:37"}
6
- {"current_steps": 6, "total_steps": 108, "loss": 0.5989, "lr": 5.4545454545454545e-06, "epoch": 0.16666666666666666, "percentage": 5.56, "elapsed_time": "0:00:11", "remaining_time": "0:03:22"}
7
- {"current_steps": 7, "total_steps": 108, "loss": 0.5333, "lr": 6.363636363636364e-06, "epoch": 0.19444444444444445, "percentage": 6.48, "elapsed_time": "0:00:13", "remaining_time": "0:03:19"}
8
- {"current_steps": 8, "total_steps": 108, "loss": 0.5054, "lr": 7.272727272727273e-06, "epoch": 0.2222222222222222, "percentage": 7.41, "elapsed_time": "0:00:15", "remaining_time": "0:03:09"}
9
- {"current_steps": 9, "total_steps": 108, "loss": 0.5322, "lr": 8.181818181818183e-06, "epoch": 0.25, "percentage": 8.33, "elapsed_time": "0:00:16", "remaining_time": "0:03:01"}
10
- {"current_steps": 10, "total_steps": 108, "loss": 0.5433, "lr": 9.090909090909091e-06, "epoch": 0.2777777777777778, "percentage": 9.26, "elapsed_time": "0:00:17", "remaining_time": "0:02:54"}
11
- {"current_steps": 11, "total_steps": 108, "loss": 0.5709, "lr": 1e-05, "epoch": 0.3055555555555556, "percentage": 10.19, "elapsed_time": "0:00:19", "remaining_time": "0:02:48"}
12
- {"current_steps": 12, "total_steps": 108, "loss": 0.4906, "lr": 9.997377845227577e-06, "epoch": 0.3333333333333333, "percentage": 11.11, "elapsed_time": "0:00:20", "remaining_time": "0:02:44"}
13
- {"current_steps": 13, "total_steps": 108, "loss": 0.6254, "lr": 9.98951413118856e-06, "epoch": 0.3611111111111111, "percentage": 12.04, "elapsed_time": "0:00:21", "remaining_time": "0:02:39"}
14
- {"current_steps": 14, "total_steps": 108, "loss": 0.5566, "lr": 9.97641710583307e-06, "epoch": 0.3888888888888889, "percentage": 12.96, "elapsed_time": "0:00:23", "remaining_time": "0:02:35"}
15
- {"current_steps": 15, "total_steps": 108, "loss": 0.5086, "lr": 9.958100506132127e-06, "epoch": 0.4166666666666667, "percentage": 13.89, "elapsed_time": "0:00:24", "remaining_time": "0:02:34"}
16
- {"current_steps": 16, "total_steps": 108, "loss": 0.5798, "lr": 9.934583543669454e-06, "epoch": 0.4444444444444444, "percentage": 14.81, "elapsed_time": "0:00:26", "remaining_time": "0:02:30"}
17
- {"current_steps": 17, "total_steps": 108, "loss": 0.547, "lr": 9.905890884491196e-06, "epoch": 0.4722222222222222, "percentage": 15.74, "elapsed_time": "0:00:27", "remaining_time": "0:02:27"}
18
- {"current_steps": 18, "total_steps": 108, "loss": 0.4698, "lr": 9.872052623234632e-06, "epoch": 0.5, "percentage": 16.67, "elapsed_time": "0:00:28", "remaining_time": "0:02:24"}
19
- {"current_steps": 19, "total_steps": 108, "loss": 0.6414, "lr": 9.833104251563058e-06, "epoch": 0.5277777777777778, "percentage": 17.59, "elapsed_time": "0:00:30", "remaining_time": "0:02:21"}
20
- {"current_steps": 20, "total_steps": 108, "loss": 0.4365, "lr": 9.789086620939936e-06, "epoch": 0.5555555555555556, "percentage": 18.52, "elapsed_time": "0:00:31", "remaining_time": "0:02:18"}
21
- {"current_steps": 21, "total_steps": 108, "loss": 0.4913, "lr": 9.740045899781353e-06, "epoch": 0.5833333333333334, "percentage": 19.44, "elapsed_time": "0:00:33", "remaining_time": "0:02:20"}
22
- {"current_steps": 22, "total_steps": 108, "loss": 0.5518, "lr": 9.68603352503172e-06, "epoch": 0.6111111111111112, "percentage": 20.37, "elapsed_time": "0:00:35", "remaining_time": "0:02:17"}
23
- {"current_steps": 23, "total_steps": 108, "loss": 0.5354, "lr": 9.627106148213521e-06, "epoch": 0.6388888888888888, "percentage": 21.3, "elapsed_time": "0:00:36", "remaining_time": "0:02:15"}
24
- {"current_steps": 24, "total_steps": 108, "loss": 0.5213, "lr": 9.563325576007702e-06, "epoch": 0.6666666666666666, "percentage": 22.22, "elapsed_time": "0:00:37", "remaining_time": "0:02:12"}
25
- {"current_steps": 25, "total_steps": 108, "loss": 0.598, "lr": 9.494758705426978e-06, "epoch": 0.6944444444444444, "percentage": 23.15, "elapsed_time": "0:00:39", "remaining_time": "0:02:10"}
26
- {"current_steps": 26, "total_steps": 108, "loss": 0.4658, "lr": 9.421477453650118e-06, "epoch": 0.7222222222222222, "percentage": 24.07, "elapsed_time": "0:00:40", "remaining_time": "0:02:07"}
27
- {"current_steps": 27, "total_steps": 108, "loss": 0.6228, "lr": 9.343558682590757e-06, "epoch": 0.75, "percentage": 25.0, "elapsed_time": "0:00:44", "remaining_time": "0:02:13"}
28
- {"current_steps": 28, "total_steps": 108, "loss": 0.5333, "lr": 9.261084118279846e-06, "epoch": 0.7777777777777778, "percentage": 25.93, "elapsed_time": "0:00:45", "remaining_time": "0:02:10"}
29
- {"current_steps": 29, "total_steps": 108, "loss": 0.4741, "lr": 9.174140265146356e-06, "epoch": 0.8055555555555556, "percentage": 26.85, "elapsed_time": "0:00:47", "remaining_time": "0:02:08"}
30
- {"current_steps": 30, "total_steps": 108, "loss": 0.5714, "lr": 9.082818315286054e-06, "epoch": 0.8333333333333334, "percentage": 27.78, "elapsed_time": "0:00:55", "remaining_time": "0:02:23"}
31
- {"current_steps": 31, "total_steps": 108, "loss": 0.5379, "lr": 8.987214052813605e-06, "epoch": 0.8611111111111112, "percentage": 28.7, "elapsed_time": "0:00:56", "remaining_time": "0:02:20"}
32
- {"current_steps": 32, "total_steps": 108, "loss": 0.5221, "lr": 8.887427753398249e-06, "epoch": 0.8888888888888888, "percentage": 29.63, "elapsed_time": "0:00:58", "remaining_time": "0:02:18"}
33
- {"current_steps": 33, "total_steps": 108, "loss": 0.5237, "lr": 8.783564079088478e-06, "epoch": 0.9166666666666666, "percentage": 30.56, "elapsed_time": "0:00:59", "remaining_time": "0:02:15"}
34
- {"current_steps": 34, "total_steps": 108, "loss": 0.5525, "lr": 8.675731968536004e-06, "epoch": 0.9444444444444444, "percentage": 31.48, "elapsed_time": "0:01:00", "remaining_time": "0:02:12"}
35
- {"current_steps": 35, "total_steps": 108, "loss": 0.4881, "lr": 8.564044522734147e-06, "epoch": 0.9722222222222222, "percentage": 32.41, "elapsed_time": "0:01:02", "remaining_time": "0:02:09"}
36
- {"current_steps": 36, "total_steps": 108, "loss": 0.4985, "lr": 8.448618886390523e-06, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:01:03", "remaining_time": "0:02:06"}
37
- {"current_steps": 37, "total_steps": 108, "loss": 0.4365, "lr": 8.329576125058406e-06, "epoch": 1.0277777777777777, "percentage": 34.26, "elapsed_time": "0:02:04", "remaining_time": "0:03:58"}
38
- {"current_steps": 38, "total_steps": 108, "loss": 0.5556, "lr": 8.207041098155701e-06, "epoch": 1.0555555555555556, "percentage": 35.19, "elapsed_time": "0:02:12", "remaining_time": "0:04:04"}
39
- {"current_steps": 39, "total_steps": 108, "loss": 0.4846, "lr": 8.081142328004638e-06, "epoch": 1.0833333333333333, "percentage": 36.11, "elapsed_time": "0:02:13", "remaining_time": "0:03:56"}
40
- {"current_steps": 40, "total_steps": 108, "loss": 0.4603, "lr": 7.952011865029614e-06, "epoch": 1.1111111111111112, "percentage": 37.04, "elapsed_time": "0:02:15", "remaining_time": "0:03:49"}
41
- {"current_steps": 41, "total_steps": 108, "loss": 0.4483, "lr": 7.819785149254534e-06, "epoch": 1.1388888888888888, "percentage": 37.96, "elapsed_time": "0:02:19", "remaining_time": "0:03:47"}
42
- {"current_steps": 42, "total_steps": 108, "loss": 0.5091, "lr": 7.68460086824492e-06, "epoch": 1.1666666666666667, "percentage": 38.89, "elapsed_time": "0:02:20", "remaining_time": "0:03:40"}
43
- {"current_steps": 43, "total_steps": 108, "loss": 0.4768, "lr": 7.546600811643816e-06, "epoch": 1.1944444444444444, "percentage": 39.81, "elapsed_time": "0:02:21", "remaining_time": "0:03:34"}
44
- {"current_steps": 44, "total_steps": 108, "loss": 0.4879, "lr": 7.405929722454026e-06, "epoch": 1.2222222222222223, "percentage": 40.74, "elapsed_time": "0:02:23", "remaining_time": "0:03:28"}
45
- {"current_steps": 45, "total_steps": 108, "loss": 0.4424, "lr": 7.262735145222696e-06, "epoch": 1.25, "percentage": 41.67, "elapsed_time": "0:02:25", "remaining_time": "0:03:23"}
46
- {"current_steps": 46, "total_steps": 108, "loss": 0.4268, "lr": 7.117167271287453e-06, "epoch": 1.2777777777777777, "percentage": 42.59, "elapsed_time": "0:02:26", "remaining_time": "0:03:17"}
47
- {"current_steps": 47, "total_steps": 108, "loss": 0.3586, "lr": 6.969378781246436e-06, "epoch": 1.3055555555555556, "percentage": 43.52, "elapsed_time": "0:02:27", "remaining_time": "0:03:11"}
48
- {"current_steps": 48, "total_steps": 108, "loss": 0.4861, "lr": 6.819524684817439e-06, "epoch": 1.3333333333333333, "percentage": 44.44, "elapsed_time": "0:02:29", "remaining_time": "0:03:06"}
49
- {"current_steps": 49, "total_steps": 108, "loss": 0.4543, "lr": 6.667762158254104e-06, "epoch": 1.3611111111111112, "percentage": 45.37, "elapsed_time": "0:02:30", "remaining_time": "0:03:00"}
50
- {"current_steps": 50, "total_steps": 108, "loss": 0.4277, "lr": 6.514250379489754e-06, "epoch": 1.3888888888888888, "percentage": 46.3, "elapsed_time": "0:02:31", "remaining_time": "0:02:56"}
51
- {"current_steps": 51, "total_steps": 108, "loss": 0.4245, "lr": 6.3591503611817155e-06, "epoch": 1.4166666666666667, "percentage": 47.22, "elapsed_time": "0:02:33", "remaining_time": "0:02:51"}
52
- {"current_steps": 52, "total_steps": 108, "loss": 0.4865, "lr": 6.202624781831269e-06, "epoch": 1.4444444444444444, "percentage": 48.15, "elapsed_time": "0:02:34", "remaining_time": "0:02:46"}
53
- {"current_steps": 53, "total_steps": 108, "loss": 0.467, "lr": 6.044837815156377e-06, "epoch": 1.4722222222222223, "percentage": 49.07, "elapsed_time": "0:02:35", "remaining_time": "0:02:41"}
54
- {"current_steps": 54, "total_steps": 108, "loss": 0.4313, "lr": 5.885954957896115e-06, "epoch": 1.5, "percentage": 50.0, "elapsed_time": "0:02:37", "remaining_time": "0:02:37"}
55
- {"current_steps": 55, "total_steps": 108, "loss": 0.4161, "lr": 5.726142856227453e-06, "epoch": 1.5277777777777777, "percentage": 50.93, "elapsed_time": "0:02:38", "remaining_time": "0:02:32"}
56
- {"current_steps": 56, "total_steps": 108, "loss": 0.4562, "lr": 5.5655691309764225e-06, "epoch": 1.5555555555555556, "percentage": 51.85, "elapsed_time": "0:02:39", "remaining_time": "0:02:28"}
57
- {"current_steps": 57, "total_steps": 108, "loss": 0.4636, "lr": 5.404402201807022e-06, "epoch": 1.5833333333333335, "percentage": 52.78, "elapsed_time": "0:02:41", "remaining_time": "0:02:24"}
58
- {"current_steps": 58, "total_steps": 108, "loss": 0.3946, "lr": 5.242811110572243e-06, "epoch": 1.6111111111111112, "percentage": 53.7, "elapsed_time": "0:02:42", "remaining_time": "0:02:20"}
59
- {"current_steps": 59, "total_steps": 108, "loss": 0.4234, "lr": 5.080965344012509e-06, "epoch": 1.6388888888888888, "percentage": 54.63, "elapsed_time": "0:02:43", "remaining_time": "0:02:16"}
60
- {"current_steps": 60, "total_steps": 108, "loss": 0.4956, "lr": 4.919034655987493e-06, "epoch": 1.6666666666666665, "percentage": 55.56, "elapsed_time": "0:02:45", "remaining_time": "0:02:12"}
61
- {"current_steps": 61, "total_steps": 108, "loss": 0.4513, "lr": 4.757188889427761e-06, "epoch": 1.6944444444444444, "percentage": 56.48, "elapsed_time": "0:02:46", "remaining_time": "0:02:08"}
62
- {"current_steps": 62, "total_steps": 108, "loss": 0.3554, "lr": 4.59559779819298e-06, "epoch": 1.7222222222222223, "percentage": 57.41, "elapsed_time": "0:02:47", "remaining_time": "0:02:04"}
63
- {"current_steps": 63, "total_steps": 108, "loss": 0.423, "lr": 4.434430869023579e-06, "epoch": 1.75, "percentage": 58.33, "elapsed_time": "0:02:49", "remaining_time": "0:02:00"}
64
- {"current_steps": 64, "total_steps": 108, "loss": 0.3804, "lr": 4.27385714377255e-06, "epoch": 1.7777777777777777, "percentage": 59.26, "elapsed_time": "0:02:50", "remaining_time": "0:01:57"}
65
- {"current_steps": 65, "total_steps": 108, "loss": 0.46, "lr": 4.1140450421038865e-06, "epoch": 1.8055555555555556, "percentage": 60.19, "elapsed_time": "0:02:51", "remaining_time": "0:01:53"}
66
- {"current_steps": 66, "total_steps": 108, "loss": 0.5136, "lr": 3.955162184843625e-06, "epoch": 1.8333333333333335, "percentage": 61.11, "elapsed_time": "0:02:53", "remaining_time": "0:01:50"}
67
- {"current_steps": 67, "total_steps": 108, "loss": 0.4108, "lr": 3.7973752181687336e-06, "epoch": 1.8611111111111112, "percentage": 62.04, "elapsed_time": "0:02:54", "remaining_time": "0:01:46"}
68
- {"current_steps": 68, "total_steps": 108, "loss": 0.5141, "lr": 3.6408496388182857e-06, "epoch": 1.8888888888888888, "percentage": 62.96, "elapsed_time": "0:02:55", "remaining_time": "0:01:43"}
69
- {"current_steps": 69, "total_steps": 108, "loss": 0.4591, "lr": 3.4857496205102475e-06, "epoch": 1.9166666666666665, "percentage": 63.89, "elapsed_time": "0:02:57", "remaining_time": "0:01:40"}
70
- {"current_steps": 70, "total_steps": 108, "loss": 0.4447, "lr": 3.3322378417458985e-06, "epoch": 1.9444444444444444, "percentage": 64.81, "elapsed_time": "0:02:59", "remaining_time": "0:01:37"}
71
- {"current_steps": 71, "total_steps": 108, "loss": 0.3775, "lr": 3.180475315182563e-06, "epoch": 1.9722222222222223, "percentage": 65.74, "elapsed_time": "0:03:01", "remaining_time": "0:01:34"}
72
- {"current_steps": 72, "total_steps": 108, "loss": 0.362, "lr": 3.0306212187535653e-06, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:03:02", "remaining_time": "0:01:31"}
73
- {"current_steps": 73, "total_steps": 108, "loss": 0.4395, "lr": 2.882832728712551e-06, "epoch": 2.0277777777777777, "percentage": 67.59, "elapsed_time": "0:04:02", "remaining_time": "0:01:56"}
74
- {"current_steps": 74, "total_steps": 108, "loss": 0.385, "lr": 2.7372648547773063e-06, "epoch": 2.0555555555555554, "percentage": 68.52, "elapsed_time": "0:04:03", "remaining_time": "0:01:52"}
75
- {"current_steps": 75, "total_steps": 108, "loss": 0.3634, "lr": 2.594070277545975e-06, "epoch": 2.0833333333333335, "percentage": 69.44, "elapsed_time": "0:04:06", "remaining_time": "0:01:48"}
76
- {"current_steps": 76, "total_steps": 108, "loss": 0.3352, "lr": 2.4533991883561868e-06, "epoch": 2.111111111111111, "percentage": 70.37, "elapsed_time": "0:04:07", "remaining_time": "0:01:44"}
77
- {"current_steps": 77, "total_steps": 108, "loss": 0.3654, "lr": 2.315399131755081e-06, "epoch": 2.138888888888889, "percentage": 71.3, "elapsed_time": "0:04:08", "remaining_time": "0:01:40"}
78
- {"current_steps": 78, "total_steps": 108, "loss": 0.3648, "lr": 2.1802148507454675e-06, "epoch": 2.1666666666666665, "percentage": 72.22, "elapsed_time": "0:04:10", "remaining_time": "0:01:36"}
79
- {"current_steps": 79, "total_steps": 108, "loss": 0.3567, "lr": 2.0479881349703885e-06, "epoch": 2.1944444444444446, "percentage": 73.15, "elapsed_time": "0:04:11", "remaining_time": "0:01:32"}
80
- {"current_steps": 80, "total_steps": 108, "loss": 0.5043, "lr": 1.9188576719953635e-06, "epoch": 2.2222222222222223, "percentage": 74.07, "elapsed_time": "0:04:19", "remaining_time": "0:01:30"}
81
- {"current_steps": 81, "total_steps": 108, "loss": 0.3835, "lr": 1.7929589018443016e-06, "epoch": 2.25, "percentage": 75.0, "elapsed_time": "0:04:21", "remaining_time": "0:01:27"}
82
- {"current_steps": 82, "total_steps": 108, "loss": 0.4195, "lr": 1.6704238749415958e-06, "epoch": 2.2777777777777777, "percentage": 75.93, "elapsed_time": "0:04:22", "remaining_time": "0:01:23"}
83
- {"current_steps": 83, "total_steps": 108, "loss": 0.3808, "lr": 1.5513811136094786e-06, "epoch": 2.3055555555555554, "percentage": 76.85, "elapsed_time": "0:04:23", "remaining_time": "0:01:19"}
84
- {"current_steps": 84, "total_steps": 108, "loss": 0.3484, "lr": 1.4359554772658551e-06, "epoch": 2.3333333333333335, "percentage": 77.78, "elapsed_time": "0:04:25", "remaining_time": "0:01:15"}
85
- {"current_steps": 85, "total_steps": 108, "loss": 0.3442, "lr": 1.3242680314639995e-06, "epoch": 2.361111111111111, "percentage": 78.7, "elapsed_time": "0:04:27", "remaining_time": "0:01:12"}
86
- {"current_steps": 86, "total_steps": 108, "loss": 0.3701, "lr": 1.2164359209115235e-06, "epoch": 2.388888888888889, "percentage": 79.63, "elapsed_time": "0:04:31", "remaining_time": "0:01:09"}
87
- {"current_steps": 87, "total_steps": 108, "loss": 0.4531, "lr": 1.1125722466017547e-06, "epoch": 2.4166666666666665, "percentage": 80.56, "elapsed_time": "0:04:32", "remaining_time": "0:01:05"}
88
- {"current_steps": 88, "total_steps": 108, "loss": 0.3512, "lr": 1.012785947186397e-06, "epoch": 2.4444444444444446, "percentage": 81.48, "elapsed_time": "0:04:33", "remaining_time": "0:01:02"}
89
- {"current_steps": 89, "total_steps": 108, "loss": 0.3224, "lr": 9.171816847139447e-07, "epoch": 2.4722222222222223, "percentage": 82.41, "elapsed_time": "0:04:35", "remaining_time": "0:00:58"}
90
- {"current_steps": 90, "total_steps": 108, "loss": 0.3708, "lr": 8.258597348536452e-07, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "0:04:36", "remaining_time": "0:00:55"}
91
- {"current_steps": 91, "total_steps": 108, "loss": 0.4313, "lr": 7.389158817201541e-07, "epoch": 2.5277777777777777, "percentage": 84.26, "elapsed_time": "0:04:37", "remaining_time": "0:00:51"}
92
- {"current_steps": 92, "total_steps": 108, "loss": 0.3964, "lr": 6.564413174092443e-07, "epoch": 2.5555555555555554, "percentage": 85.19, "elapsed_time": "0:04:39", "remaining_time": "0:00:48"}
93
- {"current_steps": 93, "total_steps": 108, "loss": 0.3827, "lr": 5.785225463498828e-07, "epoch": 2.5833333333333335, "percentage": 86.11, "elapsed_time": "0:04:40", "remaining_time": "0:00:45"}
94
- {"current_steps": 94, "total_steps": 108, "loss": 0.4383, "lr": 5.05241294573024e-07, "epoch": 2.611111111111111, "percentage": 87.04, "elapsed_time": "0:04:41", "remaining_time": "0:00:41"}
95
- {"current_steps": 95, "total_steps": 108, "loss": 0.4371, "lr": 4.3667442399229985e-07, "epoch": 2.638888888888889, "percentage": 87.96, "elapsed_time": "0:04:43", "remaining_time": "0:00:38"}
96
- {"current_steps": 96, "total_steps": 108, "loss": 0.3829, "lr": 3.728938517864794e-07, "epoch": 2.6666666666666665, "percentage": 88.89, "elapsed_time": "0:04:44", "remaining_time": "0:00:35"}
97
- {"current_steps": 97, "total_steps": 108, "loss": 0.3965, "lr": 3.1396647496828245e-07, "epoch": 2.6944444444444446, "percentage": 89.81, "elapsed_time": "0:04:45", "remaining_time": "0:00:32"}
98
- {"current_steps": 98, "total_steps": 108, "loss": 0.3122, "lr": 2.599541002186479e-07, "epoch": 2.7222222222222223, "percentage": 90.74, "elapsed_time": "0:04:47", "remaining_time": "0:00:29"}
99
- {"current_steps": 99, "total_steps": 108, "loss": 0.4733, "lr": 2.109133790600648e-07, "epoch": 2.75, "percentage": 91.67, "elapsed_time": "0:04:48", "remaining_time": "0:00:26"}
100
- {"current_steps": 100, "total_steps": 108, "loss": 0.4024, "lr": 1.6689574843694433e-07, "epoch": 2.7777777777777777, "percentage": 92.59, "elapsed_time": "0:04:50", "remaining_time": "0:00:23"}
101
- {"current_steps": 101, "total_steps": 108, "loss": 0.4336, "lr": 1.2794737676536993e-07, "epoch": 2.8055555555555554, "percentage": 93.52, "elapsed_time": "0:04:51", "remaining_time": "0:00:20"}
102
- {"current_steps": 102, "total_steps": 108, "loss": 0.3595, "lr": 9.410911550880474e-08, "epoch": 2.8333333333333335, "percentage": 94.44, "elapsed_time": "0:04:52", "remaining_time": "0:00:17"}
103
- {"current_steps": 103, "total_steps": 108, "loss": 0.4163, "lr": 6.54164563305465e-08, "epoch": 2.861111111111111, "percentage": 95.37, "elapsed_time": "0:04:54", "remaining_time": "0:00:14"}
104
- {"current_steps": 104, "total_steps": 108, "loss": 0.4264, "lr": 4.189949386787462e-08, "epoch": 2.888888888888889, "percentage": 96.3, "elapsed_time": "0:04:55", "remaining_time": "0:00:11"}
105
- {"current_steps": 105, "total_steps": 108, "loss": 0.4073, "lr": 2.358289416693027e-08, "epoch": 2.9166666666666665, "percentage": 97.22, "elapsed_time": "0:04:56", "remaining_time": "0:00:08"}
106
- {"current_steps": 106, "total_steps": 108, "loss": 0.4084, "lr": 1.0485868811441757e-08, "epoch": 2.9444444444444446, "percentage": 98.15, "elapsed_time": "0:04:58", "remaining_time": "0:00:05"}
107
- {"current_steps": 107, "total_steps": 108, "loss": 0.3967, "lr": 2.6221547724253337e-09, "epoch": 2.9722222222222223, "percentage": 99.07, "elapsed_time": "0:04:59", "remaining_time": "0:00:02"}
108
- {"current_steps": 108, "total_steps": 108, "loss": 0.3847, "lr": 0.0, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:05:01", "remaining_time": "0:00:00"}
109
- {"current_steps": 108, "total_steps": 108, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:07:13", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 1, "total_steps": 105, "loss": 0.6948, "lr": 9.090909090909091e-07, "epoch": 0.028037383177570093, "percentage": 0.95, "elapsed_time": "0:00:04", "remaining_time": "0:08:29"}
2
+ {"current_steps": 2, "total_steps": 105, "loss": 0.659, "lr": 1.8181818181818183e-06, "epoch": 0.056074766355140186, "percentage": 1.9, "elapsed_time": "0:00:08", "remaining_time": "0:07:10"}
3
+ {"current_steps": 3, "total_steps": 105, "loss": 0.6481, "lr": 2.7272727272727272e-06, "epoch": 0.08411214953271028, "percentage": 2.86, "elapsed_time": "0:00:11", "remaining_time": "0:06:39"}
4
+ {"current_steps": 4, "total_steps": 105, "loss": 0.6641, "lr": 3.6363636363636366e-06, "epoch": 0.11214953271028037, "percentage": 3.81, "elapsed_time": "0:00:15", "remaining_time": "0:06:24"}
5
+ {"current_steps": 5, "total_steps": 105, "loss": 0.5579, "lr": 4.5454545454545455e-06, "epoch": 0.14018691588785046, "percentage": 4.76, "elapsed_time": "0:00:18", "remaining_time": "0:06:13"}
6
+ {"current_steps": 6, "total_steps": 105, "loss": 0.6036, "lr": 5.4545454545454545e-06, "epoch": 0.16822429906542055, "percentage": 5.71, "elapsed_time": "0:00:22", "remaining_time": "0:06:04"}
7
+ {"current_steps": 7, "total_steps": 105, "loss": 0.5288, "lr": 6.363636363636364e-06, "epoch": 0.19626168224299065, "percentage": 6.67, "elapsed_time": "0:00:25", "remaining_time": "0:05:58"}
8
+ {"current_steps": 8, "total_steps": 105, "loss": 0.5054, "lr": 7.272727272727273e-06, "epoch": 0.22429906542056074, "percentage": 7.62, "elapsed_time": "0:00:29", "remaining_time": "0:05:52"}
9
+ {"current_steps": 9, "total_steps": 105, "loss": 0.5339, "lr": 8.181818181818183e-06, "epoch": 0.2523364485981308, "percentage": 8.57, "elapsed_time": "0:00:32", "remaining_time": "0:05:46"}
10
+ {"current_steps": 10, "total_steps": 105, "loss": 0.5433, "lr": 9.090909090909091e-06, "epoch": 0.2803738317757009, "percentage": 9.52, "elapsed_time": "0:00:35", "remaining_time": "0:05:41"}
11
+ {"current_steps": 11, "total_steps": 105, "loss": 0.5537, "lr": 1e-05, "epoch": 0.308411214953271, "percentage": 10.48, "elapsed_time": "0:00:39", "remaining_time": "0:05:36"}
12
+ {"current_steps": 12, "total_steps": 105, "loss": 0.498, "lr": 9.997207818651273e-06, "epoch": 0.3364485981308411, "percentage": 11.43, "elapsed_time": "0:00:42", "remaining_time": "0:05:31"}
13
+ {"current_steps": 13, "total_steps": 105, "loss": 0.6412, "lr": 9.988834393115768e-06, "epoch": 0.3644859813084112, "percentage": 12.38, "elapsed_time": "0:00:46", "remaining_time": "0:05:26"}
14
+ {"current_steps": 14, "total_steps": 105, "loss": 0.5796, "lr": 9.97488907544252e-06, "epoch": 0.3925233644859813, "percentage": 13.33, "elapsed_time": "0:00:49", "remaining_time": "0:05:21"}
15
+ {"current_steps": 15, "total_steps": 105, "loss": 0.5019, "lr": 9.955387440773902e-06, "epoch": 0.4205607476635514, "percentage": 14.29, "elapsed_time": "0:00:52", "remaining_time": "0:05:17"}
16
+ {"current_steps": 16, "total_steps": 105, "loss": 0.5982, "lr": 9.930351269950144e-06, "epoch": 0.4485981308411215, "percentage": 15.24, "elapsed_time": "0:00:56", "remaining_time": "0:05:13"}
17
+ {"current_steps": 17, "total_steps": 105, "loss": 0.5377, "lr": 9.899808525182935e-06, "epoch": 0.4766355140186916, "percentage": 16.19, "elapsed_time": "0:00:59", "remaining_time": "0:05:09"}
18
+ {"current_steps": 18, "total_steps": 105, "loss": 0.4776, "lr": 9.863793318825186e-06, "epoch": 0.5046728971962616, "percentage": 17.14, "elapsed_time": "0:01:03", "remaining_time": "0:05:05"}
19
+ {"current_steps": 19, "total_steps": 105, "loss": 0.648, "lr": 9.822345875271884e-06, "epoch": 0.5327102803738317, "percentage": 18.1, "elapsed_time": "0:01:06", "remaining_time": "0:05:01"}
20
+ {"current_steps": 20, "total_steps": 105, "loss": 0.4544, "lr": 9.775512486034564e-06, "epoch": 0.5607476635514018, "percentage": 19.05, "elapsed_time": "0:01:09", "remaining_time": "0:04:57"}
21
+ {"current_steps": 21, "total_steps": 105, "loss": 0.4936, "lr": 9.723345458039595e-06, "epoch": 0.5887850467289719, "percentage": 20.0, "elapsed_time": "0:01:13", "remaining_time": "0:04:54"}
22
+ {"current_steps": 22, "total_steps": 105, "loss": 0.5819, "lr": 9.665903055208013e-06, "epoch": 0.616822429906542, "percentage": 20.95, "elapsed_time": "0:01:16", "remaining_time": "0:04:50"}
23
+ {"current_steps": 23, "total_steps": 105, "loss": 0.5506, "lr": 9.603249433382145e-06, "epoch": 0.6448598130841121, "percentage": 21.9, "elapsed_time": "0:01:20", "remaining_time": "0:04:46"}
24
+ {"current_steps": 24, "total_steps": 105, "loss": 0.4932, "lr": 9.535454568671705e-06, "epoch": 0.6728971962616822, "percentage": 22.86, "elapsed_time": "0:01:23", "remaining_time": "0:04:42"}
25
+ {"current_steps": 25, "total_steps": 105, "loss": 0.5776, "lr": 9.462594179299408e-06, "epoch": 0.7009345794392523, "percentage": 23.81, "elapsed_time": "0:01:27", "remaining_time": "0:04:38"}
26
+ {"current_steps": 26, "total_steps": 105, "loss": 0.4665, "lr": 9.384749641033358e-06, "epoch": 0.7289719626168224, "percentage": 24.76, "elapsed_time": "0:01:30", "remaining_time": "0:04:34"}
27
+ {"current_steps": 27, "total_steps": 105, "loss": 0.6547, "lr": 9.302007896300697e-06, "epoch": 0.7570093457943925, "percentage": 25.71, "elapsed_time": "0:01:34", "remaining_time": "0:04:32"}
28
+ {"current_steps": 28, "total_steps": 105, "loss": 0.5403, "lr": 9.214461357083986e-06, "epoch": 0.7850467289719626, "percentage": 26.67, "elapsed_time": "0:01:37", "remaining_time": "0:04:29"}
29
+ {"current_steps": 29, "total_steps": 105, "loss": 0.4792, "lr": 9.122207801708802e-06, "epoch": 0.8130841121495327, "percentage": 27.62, "elapsed_time": "0:01:41", "remaining_time": "0:04:25"}
30
+ {"current_steps": 30, "total_steps": 105, "loss": 0.5767, "lr": 9.025350265637816e-06, "epoch": 0.8411214953271028, "percentage": 28.57, "elapsed_time": "0:01:46", "remaining_time": "0:04:25"}
31
+ {"current_steps": 31, "total_steps": 105, "loss": 0.5339, "lr": 8.923996926393306e-06, "epoch": 0.8691588785046729, "percentage": 29.52, "elapsed_time": "0:01:49", "remaining_time": "0:04:21"}
32
+ {"current_steps": 32, "total_steps": 105, "loss": 0.5102, "lr": 8.818260982736662e-06, "epoch": 0.897196261682243, "percentage": 30.48, "elapsed_time": "0:01:53", "remaining_time": "0:04:17"}
33
+ {"current_steps": 33, "total_steps": 105, "loss": 0.5196, "lr": 8.708260528239788e-06, "epoch": 0.9252336448598131, "percentage": 31.43, "elapsed_time": "0:01:56", "remaining_time": "0:04:14"}
34
+ {"current_steps": 34, "total_steps": 105, "loss": 0.5486, "lr": 8.594118419389648e-06, "epoch": 0.9532710280373832, "percentage": 32.38, "elapsed_time": "0:01:59", "remaining_time": "0:04:10"}
35
+ {"current_steps": 35, "total_steps": 105, "loss": 0.4885, "lr": 8.475962138373212e-06, "epoch": 0.9813084112149533, "percentage": 33.33, "elapsed_time": "0:02:03", "remaining_time": "0:04:06"}
36
+ {"current_steps": 36, "total_steps": 105, "loss": 0.7283, "lr": 8.353923650696119e-06, "epoch": 1.0186915887850467, "percentage": 34.29, "elapsed_time": "0:03:05", "remaining_time": "0:05:55"}
37
+ {"current_steps": 37, "total_steps": 105, "loss": 0.5858, "lr": 8.228139257794012e-06, "epoch": 1.0467289719626167, "percentage": 35.24, "elapsed_time": "0:03:10", "remaining_time": "0:05:50"}
38
+ {"current_steps": 38, "total_steps": 105, "loss": 0.4984, "lr": 8.098749444801226e-06, "epoch": 1.074766355140187, "percentage": 36.19, "elapsed_time": "0:03:13", "remaining_time": "0:05:41"}
39
+ {"current_steps": 39, "total_steps": 105, "loss": 0.4133, "lr": 7.965898723646777e-06, "epoch": 1.102803738317757, "percentage": 37.14, "elapsed_time": "0:03:17", "remaining_time": "0:05:33"}
40
+ {"current_steps": 40, "total_steps": 105, "loss": 0.4737, "lr": 7.829735471652978e-06, "epoch": 1.1308411214953271, "percentage": 38.1, "elapsed_time": "0:03:21", "remaining_time": "0:05:26"}
41
+ {"current_steps": 41, "total_steps": 105, "loss": 0.461, "lr": 7.690411765816864e-06, "epoch": 1.158878504672897, "percentage": 39.05, "elapsed_time": "0:03:24", "remaining_time": "0:05:19"}
42
+ {"current_steps": 42, "total_steps": 105, "loss": 0.4878, "lr": 7.548083212959588e-06, "epoch": 1.1869158878504673, "percentage": 40.0, "elapsed_time": "0:03:28", "remaining_time": "0:05:12"}
43
+ {"current_steps": 43, "total_steps": 105, "loss": 0.5398, "lr": 7.402908775933419e-06, "epoch": 1.2149532710280373, "percentage": 40.95, "elapsed_time": "0:03:31", "remaining_time": "0:05:04"}
44
+ {"current_steps": 44, "total_steps": 105, "loss": 0.4282, "lr": 7.25505059608051e-06, "epoch": 1.2429906542056075, "percentage": 41.9, "elapsed_time": "0:03:34", "remaining_time": "0:04:57"}
45
+ {"current_steps": 45, "total_steps": 105, "loss": 0.4352, "lr": 7.104673812141676e-06, "epoch": 1.2710280373831775, "percentage": 42.86, "elapsed_time": "0:03:38", "remaining_time": "0:04:51"}
46
+ {"current_steps": 46, "total_steps": 105, "loss": 0.3746, "lr": 6.9519463758174745e-06, "epoch": 1.2990654205607477, "percentage": 43.81, "elapsed_time": "0:03:41", "remaining_time": "0:04:44"}
47
+ {"current_steps": 47, "total_steps": 105, "loss": 0.4648, "lr": 6.797038864187564e-06, "epoch": 1.3271028037383177, "percentage": 44.76, "elapsed_time": "0:03:45", "remaining_time": "0:04:38"}
48
+ {"current_steps": 48, "total_steps": 105, "loss": 0.412, "lr": 6.640124289197845e-06, "epoch": 1.355140186915888, "percentage": 45.71, "elapsed_time": "0:03:48", "remaining_time": "0:04:31"}
49
+ {"current_steps": 49, "total_steps": 105, "loss": 0.4639, "lr": 6.481377904428171e-06, "epoch": 1.3831775700934579, "percentage": 46.67, "elapsed_time": "0:03:52", "remaining_time": "0:04:25"}
50
+ {"current_steps": 50, "total_steps": 105, "loss": 0.4511, "lr": 6.3209770093564315e-06, "epoch": 1.411214953271028, "percentage": 47.62, "elapsed_time": "0:03:55", "remaining_time": "0:04:19"}
51
+ {"current_steps": 51, "total_steps": 105, "loss": 0.4204, "lr": 6.1591007513376425e-06, "epoch": 1.439252336448598, "percentage": 48.57, "elapsed_time": "0:03:59", "remaining_time": "0:04:13"}
52
+ {"current_steps": 52, "total_steps": 105, "loss": 0.4717, "lr": 5.995929925519181e-06, "epoch": 1.4672897196261683, "percentage": 49.52, "elapsed_time": "0:04:02", "remaining_time": "0:04:07"}
53
+ {"current_steps": 53, "total_steps": 105, "loss": 0.451, "lr": 5.831646772915651e-06, "epoch": 1.4953271028037383, "percentage": 50.48, "elapsed_time": "0:04:05", "remaining_time": "0:04:01"}
54
+ {"current_steps": 54, "total_steps": 105, "loss": 0.4598, "lr": 5.666434776868895e-06, "epoch": 1.5233644859813085, "percentage": 51.43, "elapsed_time": "0:04:09", "remaining_time": "0:03:55"}
55
+ {"current_steps": 55, "total_steps": 105, "loss": 0.4471, "lr": 5.500478458120493e-06, "epoch": 1.5514018691588785, "percentage": 52.38, "elapsed_time": "0:04:12", "remaining_time": "0:03:49"}
56
+ {"current_steps": 56, "total_steps": 105, "loss": 0.4323, "lr": 5.3339631687256085e-06, "epoch": 1.5794392523364484, "percentage": 53.33, "elapsed_time": "0:04:16", "remaining_time": "0:03:44"}
57
+ {"current_steps": 57, "total_steps": 105, "loss": 0.421, "lr": 5.1670748850383734e-06, "epoch": 1.6074766355140186, "percentage": 54.29, "elapsed_time": "0:04:19", "remaining_time": "0:03:38"}
58
+ {"current_steps": 58, "total_steps": 105, "loss": 0.3956, "lr": 5e-06, "epoch": 1.6355140186915889, "percentage": 55.24, "elapsed_time": "0:04:22", "remaining_time": "0:03:33"}
59
+ {"current_steps": 59, "total_steps": 105, "loss": 0.4661, "lr": 4.832925114961629e-06, "epoch": 1.6635514018691588, "percentage": 56.19, "elapsed_time": "0:04:26", "remaining_time": "0:03:27"}
60
+ {"current_steps": 60, "total_steps": 105, "loss": 0.4704, "lr": 4.666036831274392e-06, "epoch": 1.6915887850467288, "percentage": 57.14, "elapsed_time": "0:04:29", "remaining_time": "0:03:22"}
61
+ {"current_steps": 61, "total_steps": 105, "loss": 0.465, "lr": 4.499521541879508e-06, "epoch": 1.719626168224299, "percentage": 58.1, "elapsed_time": "0:04:33", "remaining_time": "0:03:17"}
62
+ {"current_steps": 62, "total_steps": 105, "loss": 0.3283, "lr": 4.333565223131107e-06, "epoch": 1.7476635514018692, "percentage": 59.05, "elapsed_time": "0:04:36", "remaining_time": "0:03:11"}
63
+ {"current_steps": 63, "total_steps": 105, "loss": 0.4216, "lr": 4.1683532270843505e-06, "epoch": 1.7757009345794392, "percentage": 60.0, "elapsed_time": "0:04:39", "remaining_time": "0:03:06"}
64
+ {"current_steps": 64, "total_steps": 105, "loss": 0.4443, "lr": 4.004070074480821e-06, "epoch": 1.8037383177570092, "percentage": 60.95, "elapsed_time": "0:04:43", "remaining_time": "0:03:01"}
65
+ {"current_steps": 65, "total_steps": 105, "loss": 0.4901, "lr": 3.840899248662358e-06, "epoch": 1.8317757009345794, "percentage": 61.9, "elapsed_time": "0:04:46", "remaining_time": "0:02:56"}
66
+ {"current_steps": 66, "total_steps": 105, "loss": 0.4687, "lr": 3.6790229906435706e-06, "epoch": 1.8598130841121496, "percentage": 62.86, "elapsed_time": "0:04:50", "remaining_time": "0:02:51"}
67
+ {"current_steps": 67, "total_steps": 105, "loss": 0.5103, "lr": 3.518622095571831e-06, "epoch": 1.8878504672897196, "percentage": 63.81, "elapsed_time": "0:04:53", "remaining_time": "0:02:46"}
68
+ {"current_steps": 68, "total_steps": 105, "loss": 0.4563, "lr": 3.3598757108021546e-06, "epoch": 1.9158878504672896, "percentage": 64.76, "elapsed_time": "0:04:57", "remaining_time": "0:02:41"}
69
+ {"current_steps": 69, "total_steps": 105, "loss": 0.4273, "lr": 3.202961135812437e-06, "epoch": 1.9439252336448598, "percentage": 65.71, "elapsed_time": "0:05:00", "remaining_time": "0:02:36"}
70
+ {"current_steps": 70, "total_steps": 105, "loss": 0.3991, "lr": 3.0480536241825263e-06, "epoch": 1.97196261682243, "percentage": 66.67, "elapsed_time": "0:05:04", "remaining_time": "0:02:32"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:999b38a72071e34358d62d311c3f202721ead9a5c7fa78efde04491d39966809
3
  size 7288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:369789d3cae672f3e32df0b1f11694493ba673b92b3443e1bb7bf201c2650c62
3
  size 7288