gsmyrnis commited on
Commit
8b53066
·
verified ·
1 Parent(s): 1d30274

Training in progress, epoch 1

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:733e0c36c10a960fa9770a20f696b159718fc3f443e2ce6eed82c6fe5e4058e3
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:442164fc656c9843f705329c58cf65e24cfc26b7baf46e1adea9d8487ac056e1
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b72776056b6f4ddc046a46aecaf0d6fea0de1bbfbc06d87f58881e41a2faca8
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:379b3d56d12c02758fb003c91d4a2e77f4b27b4db486f073c3775defc20086a1
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40e1208a1c534cc96e4ca346ddff3de64125fcaec5eebdb00ad7a8aceab96519
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48bf26b77f5b9335b38fe7ad9e7ac0c0f3d255f093270dae041b6839096755f3
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74ae3361785901963b286d81b46373b6c1dadf399806316b4fd00a868b019e2a
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7432de9a36c7e824bb91122c32f09929a09f007a6a2dd56a21afba2bea9a7886
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -87,3 +87,81 @@
87
  {"current_steps": 87, "total_steps": 234, "loss": 0.7185, "lr": 7.938926261462366e-06, "epoch": 1.1106382978723404, "percentage": 37.18, "elapsed_time": "0:21:48", "remaining_time": "0:36:50"}
88
  {"current_steps": 88, "total_steps": 234, "loss": 0.7618, "lr": 7.87808532842837e-06, "epoch": 1.123404255319149, "percentage": 37.61, "elapsed_time": "0:22:01", "remaining_time": "0:36:32"}
89
  {"current_steps": 89, "total_steps": 234, "loss": 0.7565, "lr": 7.81660029031811e-06, "epoch": 1.1361702127659574, "percentage": 38.03, "elapsed_time": "0:22:17", "remaining_time": "0:36:19"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  {"current_steps": 87, "total_steps": 234, "loss": 0.7185, "lr": 7.938926261462366e-06, "epoch": 1.1106382978723404, "percentage": 37.18, "elapsed_time": "0:21:48", "remaining_time": "0:36:50"}
88
  {"current_steps": 88, "total_steps": 234, "loss": 0.7618, "lr": 7.87808532842837e-06, "epoch": 1.123404255319149, "percentage": 37.61, "elapsed_time": "0:22:01", "remaining_time": "0:36:32"}
89
  {"current_steps": 89, "total_steps": 234, "loss": 0.7565, "lr": 7.81660029031811e-06, "epoch": 1.1361702127659574, "percentage": 38.03, "elapsed_time": "0:22:17", "remaining_time": "0:36:19"}
90
+ {"current_steps": 90, "total_steps": 234, "loss": 0.7626, "lr": 7.754484907260513e-06, "epoch": 1.148936170212766, "percentage": 38.46, "elapsed_time": "0:22:32", "remaining_time": "0:36:03"}
91
+ {"current_steps": 91, "total_steps": 234, "loss": 0.7504, "lr": 7.691753080453413e-06, "epoch": 1.1617021276595745, "percentage": 38.89, "elapsed_time": "0:22:44", "remaining_time": "0:35:43"}
92
+ {"current_steps": 92, "total_steps": 234, "loss": 0.7695, "lr": 7.628418849052523e-06, "epoch": 1.174468085106383, "percentage": 39.32, "elapsed_time": "0:22:59", "remaining_time": "0:35:29"}
93
+ {"current_steps": 93, "total_steps": 234, "loss": 0.8094, "lr": 7.564496387029532e-06, "epoch": 1.1872340425531915, "percentage": 39.74, "elapsed_time": "0:23:14", "remaining_time": "0:35:14"}
94
+ {"current_steps": 94, "total_steps": 234, "loss": 0.7346, "lr": 7.500000000000001e-06, "epoch": 1.2, "percentage": 40.17, "elapsed_time": "0:23:31", "remaining_time": "0:35:01"}
95
+ {"current_steps": 95, "total_steps": 234, "loss": 0.8103, "lr": 7.434944122021837e-06, "epoch": 1.2127659574468086, "percentage": 40.6, "elapsed_time": "0:23:43", "remaining_time": "0:34:42"}
96
+ {"current_steps": 96, "total_steps": 234, "loss": 0.7658, "lr": 7.369343312364994e-06, "epoch": 1.225531914893617, "percentage": 41.03, "elapsed_time": "0:23:56", "remaining_time": "0:34:25"}
97
+ {"current_steps": 97, "total_steps": 234, "loss": 0.7674, "lr": 7.303212252253163e-06, "epoch": 1.2382978723404254, "percentage": 41.45, "elapsed_time": "0:24:09", "remaining_time": "0:34:07"}
98
+ {"current_steps": 98, "total_steps": 234, "loss": 0.7526, "lr": 7.236565741578163e-06, "epoch": 1.251063829787234, "percentage": 41.88, "elapsed_time": "0:24:24", "remaining_time": "0:33:52"}
99
+ {"current_steps": 99, "total_steps": 234, "loss": 0.7459, "lr": 7.169418695587791e-06, "epoch": 1.2638297872340425, "percentage": 42.31, "elapsed_time": "0:24:36", "remaining_time": "0:33:33"}
100
+ {"current_steps": 100, "total_steps": 234, "loss": 0.7433, "lr": 7.101786141547829e-06, "epoch": 1.2765957446808511, "percentage": 42.74, "elapsed_time": "0:24:50", "remaining_time": "0:33:17"}
101
+ {"current_steps": 101, "total_steps": 234, "loss": 0.8091, "lr": 7.033683215379002e-06, "epoch": 1.2893617021276595, "percentage": 43.16, "elapsed_time": "0:25:02", "remaining_time": "0:32:58"}
102
+ {"current_steps": 102, "total_steps": 234, "loss": 0.7685, "lr": 6.965125158269619e-06, "epoch": 1.302127659574468, "percentage": 43.59, "elapsed_time": "0:25:17", "remaining_time": "0:32:43"}
103
+ {"current_steps": 103, "total_steps": 234, "loss": 0.7512, "lr": 6.896127313264643e-06, "epoch": 1.3148936170212766, "percentage": 44.02, "elapsed_time": "0:25:31", "remaining_time": "0:32:27"}
104
+ {"current_steps": 104, "total_steps": 234, "loss": 0.7207, "lr": 6.8267051218319766e-06, "epoch": 1.327659574468085, "percentage": 44.44, "elapsed_time": "0:25:46", "remaining_time": "0:32:12"}
105
+ {"current_steps": 105, "total_steps": 234, "loss": 0.7743, "lr": 6.7568741204067145e-06, "epoch": 1.3404255319148937, "percentage": 44.87, "elapsed_time": "0:25:57", "remaining_time": "0:31:54"}
106
+ {"current_steps": 106, "total_steps": 234, "loss": 0.7671, "lr": 6.686649936914151e-06, "epoch": 1.353191489361702, "percentage": 45.3, "elapsed_time": "0:26:10", "remaining_time": "0:31:36"}
107
+ {"current_steps": 107, "total_steps": 234, "loss": 0.6478, "lr": 6.616048287272301e-06, "epoch": 1.3659574468085105, "percentage": 45.73, "elapsed_time": "0:26:21", "remaining_time": "0:31:17"}
108
+ {"current_steps": 108, "total_steps": 234, "loss": 0.8928, "lr": 6.545084971874738e-06, "epoch": 1.3787234042553191, "percentage": 46.15, "elapsed_time": "0:26:36", "remaining_time": "0:31:02"}
109
+ {"current_steps": 109, "total_steps": 234, "loss": 0.8068, "lr": 6.473775872054522e-06, "epoch": 1.3914893617021278, "percentage": 46.58, "elapsed_time": "0:26:52", "remaining_time": "0:30:49"}
110
+ {"current_steps": 110, "total_steps": 234, "loss": 0.6719, "lr": 6.402136946530014e-06, "epoch": 1.4042553191489362, "percentage": 47.01, "elapsed_time": "0:27:03", "remaining_time": "0:30:30"}
111
+ {"current_steps": 111, "total_steps": 234, "loss": 0.7651, "lr": 6.330184227833376e-06, "epoch": 1.4170212765957446, "percentage": 47.44, "elapsed_time": "0:27:14", "remaining_time": "0:30:11"}
112
+ {"current_steps": 112, "total_steps": 234, "loss": 0.8234, "lr": 6.257933818722544e-06, "epoch": 1.4297872340425533, "percentage": 47.86, "elapsed_time": "0:27:29", "remaining_time": "0:29:56"}
113
+ {"current_steps": 113, "total_steps": 234, "loss": 0.7144, "lr": 6.185401888577488e-06, "epoch": 1.4425531914893617, "percentage": 48.29, "elapsed_time": "0:27:41", "remaining_time": "0:29:38"}
114
+ {"current_steps": 114, "total_steps": 234, "loss": 0.8006, "lr": 6.112604669781572e-06, "epoch": 1.4553191489361703, "percentage": 48.72, "elapsed_time": "0:27:57", "remaining_time": "0:29:26"}
115
+ {"current_steps": 115, "total_steps": 234, "loss": 0.69, "lr": 6.039558454088796e-06, "epoch": 1.4680851063829787, "percentage": 49.15, "elapsed_time": "0:28:12", "remaining_time": "0:29:11"}
116
+ {"current_steps": 116, "total_steps": 234, "loss": 0.8544, "lr": 5.9662795889777666e-06, "epoch": 1.4808510638297872, "percentage": 49.57, "elapsed_time": "0:28:28", "remaining_time": "0:28:58"}
117
+ {"current_steps": 117, "total_steps": 234, "loss": 0.7449, "lr": 5.892784473993184e-06, "epoch": 1.4936170212765958, "percentage": 50.0, "elapsed_time": "0:28:44", "remaining_time": "0:28:44"}
118
+ {"current_steps": 118, "total_steps": 234, "loss": 0.7141, "lr": 5.819089557075689e-06, "epoch": 1.5063829787234042, "percentage": 50.43, "elapsed_time": "0:28:58", "remaining_time": "0:28:28"}
119
+ {"current_steps": 119, "total_steps": 234, "loss": 0.8249, "lr": 5.745211330880872e-06, "epoch": 1.5191489361702128, "percentage": 50.85, "elapsed_time": "0:29:14", "remaining_time": "0:28:15"}
120
+ {"current_steps": 120, "total_steps": 234, "loss": 0.7202, "lr": 5.671166329088278e-06, "epoch": 1.5319148936170213, "percentage": 51.28, "elapsed_time": "0:29:25", "remaining_time": "0:27:57"}
121
+ {"current_steps": 121, "total_steps": 234, "loss": 0.7705, "lr": 5.596971122701221e-06, "epoch": 1.5446808510638297, "percentage": 51.71, "elapsed_time": "0:29:39", "remaining_time": "0:27:41"}
122
+ {"current_steps": 122, "total_steps": 234, "loss": 0.7026, "lr": 5.522642316338268e-06, "epoch": 1.5574468085106383, "percentage": 52.14, "elapsed_time": "0:29:54", "remaining_time": "0:27:27"}
123
+ {"current_steps": 123, "total_steps": 234, "loss": 0.7979, "lr": 5.448196544517168e-06, "epoch": 1.570212765957447, "percentage": 52.56, "elapsed_time": "0:30:10", "remaining_time": "0:27:13"}
124
+ {"current_steps": 124, "total_steps": 234, "loss": 0.7143, "lr": 5.373650467932122e-06, "epoch": 1.5829787234042554, "percentage": 52.99, "elapsed_time": "0:30:20", "remaining_time": "0:26:54"}
125
+ {"current_steps": 125, "total_steps": 234, "loss": 0.7873, "lr": 5.299020769725172e-06, "epoch": 1.5957446808510638, "percentage": 53.42, "elapsed_time": "0:30:31", "remaining_time": "0:26:37"}
126
+ {"current_steps": 126, "total_steps": 234, "loss": 0.7365, "lr": 5.224324151752575e-06, "epoch": 1.6085106382978722, "percentage": 53.85, "elapsed_time": "0:30:42", "remaining_time": "0:26:19"}
127
+ {"current_steps": 127, "total_steps": 234, "loss": 0.8644, "lr": 5.1495773308469935e-06, "epoch": 1.6212765957446809, "percentage": 54.27, "elapsed_time": "0:30:56", "remaining_time": "0:26:03"}
128
+ {"current_steps": 128, "total_steps": 234, "loss": 0.645, "lr": 5.074797035076319e-06, "epoch": 1.6340425531914895, "percentage": 54.7, "elapsed_time": "0:31:05", "remaining_time": "0:25:44"}
129
+ {"current_steps": 129, "total_steps": 234, "loss": 0.7568, "lr": 5e-06, "epoch": 1.646808510638298, "percentage": 55.13, "elapsed_time": "0:31:20", "remaining_time": "0:25:30"}
130
+ {"current_steps": 130, "total_steps": 234, "loss": 0.7629, "lr": 4.9252029649236835e-06, "epoch": 1.6595744680851063, "percentage": 55.56, "elapsed_time": "0:31:33", "remaining_time": "0:25:14"}
131
+ {"current_steps": 131, "total_steps": 234, "loss": 0.7951, "lr": 4.850422669153009e-06, "epoch": 1.6723404255319148, "percentage": 55.98, "elapsed_time": "0:31:45", "remaining_time": "0:24:58"}
132
+ {"current_steps": 132, "total_steps": 234, "loss": 0.742, "lr": 4.775675848247427e-06, "epoch": 1.6851063829787234, "percentage": 56.41, "elapsed_time": "0:32:00", "remaining_time": "0:24:44"}
133
+ {"current_steps": 133, "total_steps": 234, "loss": 0.885, "lr": 4.700979230274829e-06, "epoch": 1.697872340425532, "percentage": 56.84, "elapsed_time": "0:32:14", "remaining_time": "0:24:28"}
134
+ {"current_steps": 134, "total_steps": 234, "loss": 0.6209, "lr": 4.626349532067879e-06, "epoch": 1.7106382978723405, "percentage": 57.26, "elapsed_time": "0:32:21", "remaining_time": "0:24:08"}
135
+ {"current_steps": 135, "total_steps": 234, "loss": 0.7317, "lr": 4.551803455482833e-06, "epoch": 1.7234042553191489, "percentage": 57.69, "elapsed_time": "0:32:33", "remaining_time": "0:23:52"}
136
+ {"current_steps": 136, "total_steps": 234, "loss": 0.7886, "lr": 4.477357683661734e-06, "epoch": 1.7361702127659573, "percentage": 58.12, "elapsed_time": "0:32:49", "remaining_time": "0:23:39"}
137
+ {"current_steps": 137, "total_steps": 234, "loss": 0.6998, "lr": 4.4030288772987795e-06, "epoch": 1.748936170212766, "percentage": 58.55, "elapsed_time": "0:33:02", "remaining_time": "0:23:23"}
138
+ {"current_steps": 138, "total_steps": 234, "loss": 0.7685, "lr": 4.3288336709117246e-06, "epoch": 1.7617021276595746, "percentage": 58.97, "elapsed_time": "0:33:15", "remaining_time": "0:23:07"}
139
+ {"current_steps": 139, "total_steps": 234, "loss": 0.8504, "lr": 4.254788669119127e-06, "epoch": 1.774468085106383, "percentage": 59.4, "elapsed_time": "0:33:31", "remaining_time": "0:22:54"}
140
+ {"current_steps": 140, "total_steps": 234, "loss": 0.6402, "lr": 4.180910442924312e-06, "epoch": 1.7872340425531914, "percentage": 59.83, "elapsed_time": "0:33:43", "remaining_time": "0:22:38"}
141
+ {"current_steps": 141, "total_steps": 234, "loss": 0.8257, "lr": 4.107215526006818e-06, "epoch": 1.8, "percentage": 60.26, "elapsed_time": "0:33:57", "remaining_time": "0:22:24"}
142
+ {"current_steps": 142, "total_steps": 234, "loss": 0.7743, "lr": 4.033720411022235e-06, "epoch": 1.8127659574468085, "percentage": 60.68, "elapsed_time": "0:34:14", "remaining_time": "0:22:11"}
143
+ {"current_steps": 143, "total_steps": 234, "loss": 0.6946, "lr": 3.960441545911205e-06, "epoch": 1.825531914893617, "percentage": 61.11, "elapsed_time": "0:34:23", "remaining_time": "0:21:53"}
144
+ {"current_steps": 144, "total_steps": 234, "loss": 0.7032, "lr": 3.887395330218429e-06, "epoch": 1.8382978723404255, "percentage": 61.54, "elapsed_time": "0:34:34", "remaining_time": "0:21:36"}
145
+ {"current_steps": 145, "total_steps": 234, "loss": 0.7748, "lr": 3.8145981114225135e-06, "epoch": 1.851063829787234, "percentage": 61.97, "elapsed_time": "0:34:49", "remaining_time": "0:21:22"}
146
+ {"current_steps": 146, "total_steps": 234, "loss": 0.8035, "lr": 3.7420661812774577e-06, "epoch": 1.8638297872340426, "percentage": 62.39, "elapsed_time": "0:35:03", "remaining_time": "0:21:07"}
147
+ {"current_steps": 147, "total_steps": 234, "loss": 0.7535, "lr": 3.669815772166625e-06, "epoch": 1.8765957446808512, "percentage": 62.82, "elapsed_time": "0:35:18", "remaining_time": "0:20:53"}
148
+ {"current_steps": 148, "total_steps": 234, "loss": 0.7643, "lr": 3.5978630534699873e-06, "epoch": 1.8893617021276596, "percentage": 63.25, "elapsed_time": "0:35:31", "remaining_time": "0:20:38"}
149
+ {"current_steps": 149, "total_steps": 234, "loss": 0.7609, "lr": 3.526224127945479e-06, "epoch": 1.902127659574468, "percentage": 63.68, "elapsed_time": "0:35:46", "remaining_time": "0:20:24"}
150
+ {"current_steps": 150, "total_steps": 234, "loss": 0.7115, "lr": 3.4549150281252635e-06, "epoch": 1.9148936170212765, "percentage": 64.1, "elapsed_time": "0:36:02", "remaining_time": "0:20:11"}
151
+ {"current_steps": 151, "total_steps": 234, "loss": 0.858, "lr": 3.383951712727701e-06, "epoch": 1.9276595744680851, "percentage": 64.53, "elapsed_time": "0:36:16", "remaining_time": "0:19:56"}
152
+ {"current_steps": 152, "total_steps": 234, "loss": 0.7184, "lr": 3.3133500630858507e-06, "epoch": 1.9404255319148938, "percentage": 64.96, "elapsed_time": "0:36:30", "remaining_time": "0:19:41"}
153
+ {"current_steps": 153, "total_steps": 234, "loss": 0.7139, "lr": 3.2431258795932863e-06, "epoch": 1.9531914893617022, "percentage": 65.38, "elapsed_time": "0:36:43", "remaining_time": "0:19:26"}
154
+ {"current_steps": 154, "total_steps": 234, "loss": 0.7809, "lr": 3.173294878168025e-06, "epoch": 1.9659574468085106, "percentage": 65.81, "elapsed_time": "0:36:58", "remaining_time": "0:19:12"}
155
+ {"current_steps": 155, "total_steps": 234, "loss": 0.7562, "lr": 3.1038726867353587e-06, "epoch": 1.978723404255319, "percentage": 66.24, "elapsed_time": "0:37:13", "remaining_time": "0:18:58"}
156
+ {"current_steps": 156, "total_steps": 234, "loss": 0.76, "lr": 3.0348748417303826e-06, "epoch": 1.9914893617021276, "percentage": 66.67, "elapsed_time": "0:37:27", "remaining_time": "0:18:43"}
157
+ {"current_steps": 157, "total_steps": 234, "loss": 1.1093, "lr": 2.966316784621e-06, "epoch": 2.0042553191489363, "percentage": 67.09, "elapsed_time": "0:38:48", "remaining_time": "0:19:01"}
158
+ {"current_steps": 158, "total_steps": 234, "loss": 0.6804, "lr": 2.8982138584521734e-06, "epoch": 2.0170212765957447, "percentage": 67.52, "elapsed_time": "0:39:00", "remaining_time": "0:18:45"}
159
+ {"current_steps": 159, "total_steps": 234, "loss": 0.7346, "lr": 2.83058130441221e-06, "epoch": 2.029787234042553, "percentage": 67.95, "elapsed_time": "0:39:09", "remaining_time": "0:18:28"}
160
+ {"current_steps": 160, "total_steps": 234, "loss": 0.7322, "lr": 2.7634342584218364e-06, "epoch": 2.0425531914893615, "percentage": 68.38, "elapsed_time": "0:39:26", "remaining_time": "0:18:14"}
161
+ {"current_steps": 161, "total_steps": 234, "loss": 0.6979, "lr": 2.6967877477468394e-06, "epoch": 2.0553191489361704, "percentage": 68.8, "elapsed_time": "0:39:42", "remaining_time": "0:18:00"}
162
+ {"current_steps": 162, "total_steps": 234, "loss": 0.7996, "lr": 2.6306566876350072e-06, "epoch": 2.068085106382979, "percentage": 69.23, "elapsed_time": "0:39:57", "remaining_time": "0:17:45"}
163
+ {"current_steps": 163, "total_steps": 234, "loss": 0.5683, "lr": 2.5650558779781635e-06, "epoch": 2.0808510638297872, "percentage": 69.66, "elapsed_time": "0:40:09", "remaining_time": "0:17:29"}
164
+ {"current_steps": 164, "total_steps": 234, "loss": 0.6968, "lr": 2.5000000000000015e-06, "epoch": 2.0936170212765957, "percentage": 70.09, "elapsed_time": "0:40:25", "remaining_time": "0:17:15"}
165
+ {"current_steps": 165, "total_steps": 234, "loss": 0.8232, "lr": 2.43550361297047e-06, "epoch": 2.106382978723404, "percentage": 70.51, "elapsed_time": "0:40:42", "remaining_time": "0:17:01"}
166
+ {"current_steps": 166, "total_steps": 234, "loss": 0.6172, "lr": 2.371581150947476e-06, "epoch": 2.119148936170213, "percentage": 70.94, "elapsed_time": "0:40:56", "remaining_time": "0:16:46"}
167
+ {"current_steps": 167, "total_steps": 234, "loss": 0.7489, "lr": 2.3082469195465893e-06, "epoch": 2.1319148936170214, "percentage": 71.37, "elapsed_time": "0:41:12", "remaining_time": "0:16:32"}