Reyad-Ahmmed commited on
Commit
fa7a965
·
verified ·
1 Parent(s): 52bb4a6

Push MLM model

Browse files
bert_embeddings_finetune/checkpoint-4662/trainer_state.json CHANGED
@@ -11,9 +11,9 @@
11
  {
12
  "epoch": 1.0,
13
  "eval_loss": 2.5209195613861084,
14
- "eval_runtime": 7.238,
15
- "eval_samples_per_second": 735.839,
16
- "eval_steps_per_second": 46.007,
17
  "step": 333
18
  },
19
  {
@@ -26,17 +26,17 @@
26
  {
27
  "epoch": 2.0,
28
  "eval_loss": 2.278496742248535,
29
- "eval_runtime": 7.3227,
30
- "eval_samples_per_second": 727.328,
31
- "eval_steps_per_second": 45.475,
32
  "step": 666
33
  },
34
  {
35
  "epoch": 3.0,
36
  "eval_loss": NaN,
37
- "eval_runtime": 7.4657,
38
- "eval_samples_per_second": 713.401,
39
- "eval_steps_per_second": 44.604,
40
  "step": 999
41
  },
42
  {
@@ -49,9 +49,9 @@
49
  {
50
  "epoch": 4.0,
51
  "eval_loss": NaN,
52
- "eval_runtime": 7.4388,
53
- "eval_samples_per_second": 715.977,
54
- "eval_steps_per_second": 44.765,
55
  "step": 1332
56
  },
57
  {
@@ -64,17 +64,17 @@
64
  {
65
  "epoch": 5.0,
66
  "eval_loss": 1.7883461713790894,
67
- "eval_runtime": 7.3242,
68
- "eval_samples_per_second": 727.175,
69
- "eval_steps_per_second": 45.466,
70
  "step": 1665
71
  },
72
  {
73
  "epoch": 6.0,
74
  "eval_loss": 1.617417573928833,
75
- "eval_runtime": 7.305,
76
- "eval_samples_per_second": 729.085,
77
- "eval_steps_per_second": 45.585,
78
  "step": 1998
79
  },
80
  {
@@ -87,9 +87,9 @@
87
  {
88
  "epoch": 7.0,
89
  "eval_loss": 1.5227739810943604,
90
- "eval_runtime": 7.3236,
91
- "eval_samples_per_second": 727.238,
92
- "eval_steps_per_second": 45.469,
93
  "step": 2331
94
  },
95
  {
@@ -102,17 +102,17 @@
102
  {
103
  "epoch": 8.0,
104
  "eval_loss": 1.448709487915039,
105
- "eval_runtime": 7.3349,
106
- "eval_samples_per_second": 726.119,
107
- "eval_steps_per_second": 45.399,
108
  "step": 2664
109
  },
110
  {
111
  "epoch": 9.0,
112
  "eval_loss": NaN,
113
- "eval_runtime": 7.4098,
114
- "eval_samples_per_second": 718.774,
115
- "eval_steps_per_second": 44.94,
116
  "step": 2997
117
  },
118
  {
@@ -125,9 +125,9 @@
125
  {
126
  "epoch": 10.0,
127
  "eval_loss": NaN,
128
- "eval_runtime": 7.4467,
129
- "eval_samples_per_second": 715.218,
130
- "eval_steps_per_second": 44.718,
131
  "step": 3330
132
  },
133
  {
@@ -140,17 +140,17 @@
140
  {
141
  "epoch": 11.0,
142
  "eval_loss": 1.2851027250289917,
143
- "eval_runtime": 7.5809,
144
- "eval_samples_per_second": 702.554,
145
- "eval_steps_per_second": 43.926,
146
  "step": 3663
147
  },
148
  {
149
  "epoch": 12.0,
150
  "eval_loss": 1.2175213098526,
151
- "eval_runtime": 7.6804,
152
- "eval_samples_per_second": 693.45,
153
- "eval_steps_per_second": 43.357,
154
  "step": 3996
155
  },
156
  {
@@ -163,9 +163,9 @@
163
  {
164
  "epoch": 13.0,
165
  "eval_loss": NaN,
166
- "eval_runtime": 7.5087,
167
- "eval_samples_per_second": 709.31,
168
- "eval_steps_per_second": 44.349,
169
  "step": 4329
170
  },
171
  {
@@ -178,9 +178,9 @@
178
  {
179
  "epoch": 14.0,
180
  "eval_loss": 1.1719356775283813,
181
- "eval_runtime": 7.5831,
182
- "eval_samples_per_second": 702.355,
183
- "eval_steps_per_second": 43.914,
184
  "step": 4662
185
  }
186
  ],
 
11
  {
12
  "epoch": 1.0,
13
  "eval_loss": 2.5209195613861084,
14
+ "eval_runtime": 7.2095,
15
+ "eval_samples_per_second": 738.75,
16
+ "eval_steps_per_second": 46.189,
17
  "step": 333
18
  },
19
  {
 
26
  {
27
  "epoch": 2.0,
28
  "eval_loss": 2.278496742248535,
29
+ "eval_runtime": 7.3416,
30
+ "eval_samples_per_second": 725.453,
31
+ "eval_steps_per_second": 45.358,
32
  "step": 666
33
  },
34
  {
35
  "epoch": 3.0,
36
  "eval_loss": NaN,
37
+ "eval_runtime": 7.386,
38
+ "eval_samples_per_second": 721.09,
39
+ "eval_steps_per_second": 45.085,
40
  "step": 999
41
  },
42
  {
 
49
  {
50
  "epoch": 4.0,
51
  "eval_loss": NaN,
52
+ "eval_runtime": 7.5366,
53
+ "eval_samples_per_second": 706.68,
54
+ "eval_steps_per_second": 44.184,
55
  "step": 1332
56
  },
57
  {
 
64
  {
65
  "epoch": 5.0,
66
  "eval_loss": 1.7883461713790894,
67
+ "eval_runtime": 7.5591,
68
+ "eval_samples_per_second": 704.577,
69
+ "eval_steps_per_second": 44.053,
70
  "step": 1665
71
  },
72
  {
73
  "epoch": 6.0,
74
  "eval_loss": 1.617417573928833,
75
+ "eval_runtime": 7.5921,
76
+ "eval_samples_per_second": 701.518,
77
+ "eval_steps_per_second": 43.861,
78
  "step": 1998
79
  },
80
  {
 
87
  {
88
  "epoch": 7.0,
89
  "eval_loss": 1.5227739810943604,
90
+ "eval_runtime": 7.4262,
91
+ "eval_samples_per_second": 717.188,
92
+ "eval_steps_per_second": 44.841,
93
  "step": 2331
94
  },
95
  {
 
102
  {
103
  "epoch": 8.0,
104
  "eval_loss": 1.448709487915039,
105
+ "eval_runtime": 7.4455,
106
+ "eval_samples_per_second": 715.331,
107
+ "eval_steps_per_second": 44.725,
108
  "step": 2664
109
  },
110
  {
111
  "epoch": 9.0,
112
  "eval_loss": NaN,
113
+ "eval_runtime": 7.5963,
114
+ "eval_samples_per_second": 701.13,
115
+ "eval_steps_per_second": 43.837,
116
  "step": 2997
117
  },
118
  {
 
125
  {
126
  "epoch": 10.0,
127
  "eval_loss": NaN,
128
+ "eval_runtime": 7.4766,
129
+ "eval_samples_per_second": 712.357,
130
+ "eval_steps_per_second": 44.539,
131
  "step": 3330
132
  },
133
  {
 
140
  {
141
  "epoch": 11.0,
142
  "eval_loss": 1.2851027250289917,
143
+ "eval_runtime": 7.5315,
144
+ "eval_samples_per_second": 707.164,
145
+ "eval_steps_per_second": 44.214,
146
  "step": 3663
147
  },
148
  {
149
  "epoch": 12.0,
150
  "eval_loss": 1.2175213098526,
151
+ "eval_runtime": 7.5286,
152
+ "eval_samples_per_second": 707.433,
153
+ "eval_steps_per_second": 44.231,
154
  "step": 3996
155
  },
156
  {
 
163
  {
164
  "epoch": 13.0,
165
  "eval_loss": NaN,
166
+ "eval_runtime": 7.4167,
167
+ "eval_samples_per_second": 718.113,
168
+ "eval_steps_per_second": 44.899,
169
  "step": 4329
170
  },
171
  {
 
178
  {
179
  "epoch": 14.0,
180
  "eval_loss": 1.1719356775283813,
181
+ "eval_runtime": 7.391,
182
+ "eval_samples_per_second": 720.609,
183
+ "eval_steps_per_second": 45.055,
184
  "step": 4662
185
  }
186
  ],
bert_embeddings_finetune/checkpoint-4995/trainer_state.json CHANGED
@@ -11,9 +11,9 @@
11
  {
12
  "epoch": 1.0,
13
  "eval_loss": 2.5209195613861084,
14
- "eval_runtime": 7.238,
15
- "eval_samples_per_second": 735.839,
16
- "eval_steps_per_second": 46.007,
17
  "step": 333
18
  },
19
  {
@@ -26,17 +26,17 @@
26
  {
27
  "epoch": 2.0,
28
  "eval_loss": 2.278496742248535,
29
- "eval_runtime": 7.3227,
30
- "eval_samples_per_second": 727.328,
31
- "eval_steps_per_second": 45.475,
32
  "step": 666
33
  },
34
  {
35
  "epoch": 3.0,
36
  "eval_loss": NaN,
37
- "eval_runtime": 7.4657,
38
- "eval_samples_per_second": 713.401,
39
- "eval_steps_per_second": 44.604,
40
  "step": 999
41
  },
42
  {
@@ -49,9 +49,9 @@
49
  {
50
  "epoch": 4.0,
51
  "eval_loss": NaN,
52
- "eval_runtime": 7.4388,
53
- "eval_samples_per_second": 715.977,
54
- "eval_steps_per_second": 44.765,
55
  "step": 1332
56
  },
57
  {
@@ -64,17 +64,17 @@
64
  {
65
  "epoch": 5.0,
66
  "eval_loss": 1.7883461713790894,
67
- "eval_runtime": 7.3242,
68
- "eval_samples_per_second": 727.175,
69
- "eval_steps_per_second": 45.466,
70
  "step": 1665
71
  },
72
  {
73
  "epoch": 6.0,
74
  "eval_loss": 1.617417573928833,
75
- "eval_runtime": 7.305,
76
- "eval_samples_per_second": 729.085,
77
- "eval_steps_per_second": 45.585,
78
  "step": 1998
79
  },
80
  {
@@ -87,9 +87,9 @@
87
  {
88
  "epoch": 7.0,
89
  "eval_loss": 1.5227739810943604,
90
- "eval_runtime": 7.3236,
91
- "eval_samples_per_second": 727.238,
92
- "eval_steps_per_second": 45.469,
93
  "step": 2331
94
  },
95
  {
@@ -102,17 +102,17 @@
102
  {
103
  "epoch": 8.0,
104
  "eval_loss": 1.448709487915039,
105
- "eval_runtime": 7.3349,
106
- "eval_samples_per_second": 726.119,
107
- "eval_steps_per_second": 45.399,
108
  "step": 2664
109
  },
110
  {
111
  "epoch": 9.0,
112
  "eval_loss": NaN,
113
- "eval_runtime": 7.4098,
114
- "eval_samples_per_second": 718.774,
115
- "eval_steps_per_second": 44.94,
116
  "step": 2997
117
  },
118
  {
@@ -125,9 +125,9 @@
125
  {
126
  "epoch": 10.0,
127
  "eval_loss": NaN,
128
- "eval_runtime": 7.4467,
129
- "eval_samples_per_second": 715.218,
130
- "eval_steps_per_second": 44.718,
131
  "step": 3330
132
  },
133
  {
@@ -140,17 +140,17 @@
140
  {
141
  "epoch": 11.0,
142
  "eval_loss": 1.2851027250289917,
143
- "eval_runtime": 7.5809,
144
- "eval_samples_per_second": 702.554,
145
- "eval_steps_per_second": 43.926,
146
  "step": 3663
147
  },
148
  {
149
  "epoch": 12.0,
150
  "eval_loss": 1.2175213098526,
151
- "eval_runtime": 7.6804,
152
- "eval_samples_per_second": 693.45,
153
- "eval_steps_per_second": 43.357,
154
  "step": 3996
155
  },
156
  {
@@ -163,9 +163,9 @@
163
  {
164
  "epoch": 13.0,
165
  "eval_loss": NaN,
166
- "eval_runtime": 7.5087,
167
- "eval_samples_per_second": 709.31,
168
- "eval_steps_per_second": 44.349,
169
  "step": 4329
170
  },
171
  {
@@ -178,17 +178,17 @@
178
  {
179
  "epoch": 14.0,
180
  "eval_loss": 1.1719356775283813,
181
- "eval_runtime": 7.5831,
182
- "eval_samples_per_second": 702.355,
183
- "eval_steps_per_second": 43.914,
184
  "step": 4662
185
  },
186
  {
187
  "epoch": 15.0,
188
  "eval_loss": 1.2066022157669067,
189
- "eval_runtime": 7.6392,
190
- "eval_samples_per_second": 697.198,
191
- "eval_steps_per_second": 43.591,
192
  "step": 4995
193
  }
194
  ],
 
11
  {
12
  "epoch": 1.0,
13
  "eval_loss": 2.5209195613861084,
14
+ "eval_runtime": 7.2095,
15
+ "eval_samples_per_second": 738.75,
16
+ "eval_steps_per_second": 46.189,
17
  "step": 333
18
  },
19
  {
 
26
  {
27
  "epoch": 2.0,
28
  "eval_loss": 2.278496742248535,
29
+ "eval_runtime": 7.3416,
30
+ "eval_samples_per_second": 725.453,
31
+ "eval_steps_per_second": 45.358,
32
  "step": 666
33
  },
34
  {
35
  "epoch": 3.0,
36
  "eval_loss": NaN,
37
+ "eval_runtime": 7.386,
38
+ "eval_samples_per_second": 721.09,
39
+ "eval_steps_per_second": 45.085,
40
  "step": 999
41
  },
42
  {
 
49
  {
50
  "epoch": 4.0,
51
  "eval_loss": NaN,
52
+ "eval_runtime": 7.5366,
53
+ "eval_samples_per_second": 706.68,
54
+ "eval_steps_per_second": 44.184,
55
  "step": 1332
56
  },
57
  {
 
64
  {
65
  "epoch": 5.0,
66
  "eval_loss": 1.7883461713790894,
67
+ "eval_runtime": 7.5591,
68
+ "eval_samples_per_second": 704.577,
69
+ "eval_steps_per_second": 44.053,
70
  "step": 1665
71
  },
72
  {
73
  "epoch": 6.0,
74
  "eval_loss": 1.617417573928833,
75
+ "eval_runtime": 7.5921,
76
+ "eval_samples_per_second": 701.518,
77
+ "eval_steps_per_second": 43.861,
78
  "step": 1998
79
  },
80
  {
 
87
  {
88
  "epoch": 7.0,
89
  "eval_loss": 1.5227739810943604,
90
+ "eval_runtime": 7.4262,
91
+ "eval_samples_per_second": 717.188,
92
+ "eval_steps_per_second": 44.841,
93
  "step": 2331
94
  },
95
  {
 
102
  {
103
  "epoch": 8.0,
104
  "eval_loss": 1.448709487915039,
105
+ "eval_runtime": 7.4455,
106
+ "eval_samples_per_second": 715.331,
107
+ "eval_steps_per_second": 44.725,
108
  "step": 2664
109
  },
110
  {
111
  "epoch": 9.0,
112
  "eval_loss": NaN,
113
+ "eval_runtime": 7.5963,
114
+ "eval_samples_per_second": 701.13,
115
+ "eval_steps_per_second": 43.837,
116
  "step": 2997
117
  },
118
  {
 
125
  {
126
  "epoch": 10.0,
127
  "eval_loss": NaN,
128
+ "eval_runtime": 7.4766,
129
+ "eval_samples_per_second": 712.357,
130
+ "eval_steps_per_second": 44.539,
131
  "step": 3330
132
  },
133
  {
 
140
  {
141
  "epoch": 11.0,
142
  "eval_loss": 1.2851027250289917,
143
+ "eval_runtime": 7.5315,
144
+ "eval_samples_per_second": 707.164,
145
+ "eval_steps_per_second": 44.214,
146
  "step": 3663
147
  },
148
  {
149
  "epoch": 12.0,
150
  "eval_loss": 1.2175213098526,
151
+ "eval_runtime": 7.5286,
152
+ "eval_samples_per_second": 707.433,
153
+ "eval_steps_per_second": 44.231,
154
  "step": 3996
155
  },
156
  {
 
163
  {
164
  "epoch": 13.0,
165
  "eval_loss": NaN,
166
+ "eval_runtime": 7.4167,
167
+ "eval_samples_per_second": 718.113,
168
+ "eval_steps_per_second": 44.899,
169
  "step": 4329
170
  },
171
  {
 
178
  {
179
  "epoch": 14.0,
180
  "eval_loss": 1.1719356775283813,
181
+ "eval_runtime": 7.391,
182
+ "eval_samples_per_second": 720.609,
183
+ "eval_steps_per_second": 45.055,
184
  "step": 4662
185
  },
186
  {
187
  "epoch": 15.0,
188
  "eval_loss": 1.2066022157669067,
189
+ "eval_runtime": 7.4171,
190
+ "eval_samples_per_second": 718.067,
191
+ "eval_steps_per_second": 44.896,
192
  "step": 4995
193
  }
194
  ],