Reyad-Ahmmed commited on
Commit
52bb4a6
·
verified ·
1 Parent(s): 38a542d

Push MLM model

Browse files
bert_embeddings_finetune/checkpoint-4662/config.json CHANGED
@@ -19,7 +19,7 @@
19
  "pad_token_id": 0,
20
  "position_embedding_type": "absolute",
21
  "torch_dtype": "float32",
22
- "transformers_version": "4.48.0",
23
  "type_vocab_size": 2,
24
  "use_cache": true,
25
  "vocab_size": 30522
 
19
  "pad_token_id": 0,
20
  "position_embedding_type": "absolute",
21
  "torch_dtype": "float32",
22
+ "transformers_version": "4.48.2",
23
  "type_vocab_size": 2,
24
  "use_cache": true,
25
  "vocab_size": 30522
bert_embeddings_finetune/checkpoint-4662/generation_config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
  "_from_model_config": true,
3
  "pad_token_id": 0,
4
- "transformers_version": "4.48.0"
5
  }
 
1
  {
2
  "_from_model_config": true,
3
  "pad_token_id": 0,
4
+ "transformers_version": "4.48.2"
5
  }
bert_embeddings_finetune/checkpoint-4662/trainer_state.json CHANGED
@@ -11,9 +11,9 @@
11
  {
12
  "epoch": 1.0,
13
  "eval_loss": 2.5209195613861084,
14
- "eval_runtime": 7.3239,
15
- "eval_samples_per_second": 727.207,
16
- "eval_steps_per_second": 45.468,
17
  "step": 333
18
  },
19
  {
@@ -26,17 +26,17 @@
26
  {
27
  "epoch": 2.0,
28
  "eval_loss": 2.278496742248535,
29
- "eval_runtime": 7.4414,
30
- "eval_samples_per_second": 715.73,
31
- "eval_steps_per_second": 44.75,
32
  "step": 666
33
  },
34
  {
35
  "epoch": 3.0,
36
  "eval_loss": NaN,
37
- "eval_runtime": 7.4958,
38
- "eval_samples_per_second": 710.531,
39
- "eval_steps_per_second": 44.425,
40
  "step": 999
41
  },
42
  {
@@ -49,9 +49,9 @@
49
  {
50
  "epoch": 4.0,
51
  "eval_loss": NaN,
52
- "eval_runtime": 7.4729,
53
- "eval_samples_per_second": 712.709,
54
- "eval_steps_per_second": 44.561,
55
  "step": 1332
56
  },
57
  {
@@ -64,17 +64,17 @@
64
  {
65
  "epoch": 5.0,
66
  "eval_loss": 1.7883461713790894,
67
- "eval_runtime": 7.4123,
68
- "eval_samples_per_second": 718.54,
69
- "eval_steps_per_second": 44.926,
70
  "step": 1665
71
  },
72
  {
73
  "epoch": 6.0,
74
  "eval_loss": 1.617417573928833,
75
- "eval_runtime": 7.4583,
76
- "eval_samples_per_second": 714.103,
77
- "eval_steps_per_second": 44.648,
78
  "step": 1998
79
  },
80
  {
@@ -87,9 +87,9 @@
87
  {
88
  "epoch": 7.0,
89
  "eval_loss": 1.5227739810943604,
90
- "eval_runtime": 7.5807,
91
- "eval_samples_per_second": 702.576,
92
- "eval_steps_per_second": 43.928,
93
  "step": 2331
94
  },
95
  {
@@ -102,17 +102,17 @@
102
  {
103
  "epoch": 8.0,
104
  "eval_loss": 1.448709487915039,
105
- "eval_runtime": 7.4971,
106
- "eval_samples_per_second": 710.405,
107
- "eval_steps_per_second": 44.417,
108
  "step": 2664
109
  },
110
  {
111
  "epoch": 9.0,
112
  "eval_loss": NaN,
113
- "eval_runtime": 7.416,
114
- "eval_samples_per_second": 718.181,
115
- "eval_steps_per_second": 44.903,
116
  "step": 2997
117
  },
118
  {
@@ -125,9 +125,9 @@
125
  {
126
  "epoch": 10.0,
127
  "eval_loss": NaN,
128
- "eval_runtime": 7.5506,
129
- "eval_samples_per_second": 705.37,
130
- "eval_steps_per_second": 44.102,
131
  "step": 3330
132
  },
133
  {
@@ -140,17 +140,17 @@
140
  {
141
  "epoch": 11.0,
142
  "eval_loss": 1.2851027250289917,
143
- "eval_runtime": 7.3621,
144
- "eval_samples_per_second": 723.436,
145
- "eval_steps_per_second": 45.232,
146
  "step": 3663
147
  },
148
  {
149
  "epoch": 12.0,
150
  "eval_loss": 1.2175213098526,
151
- "eval_runtime": 7.3326,
152
- "eval_samples_per_second": 726.348,
153
- "eval_steps_per_second": 45.414,
154
  "step": 3996
155
  },
156
  {
@@ -163,9 +163,9 @@
163
  {
164
  "epoch": 13.0,
165
  "eval_loss": NaN,
166
- "eval_runtime": 7.2691,
167
- "eval_samples_per_second": 732.695,
168
- "eval_steps_per_second": 45.811,
169
  "step": 4329
170
  },
171
  {
@@ -178,9 +178,9 @@
178
  {
179
  "epoch": 14.0,
180
  "eval_loss": 1.1719356775283813,
181
- "eval_runtime": 7.2773,
182
- "eval_samples_per_second": 731.868,
183
- "eval_steps_per_second": 45.759,
184
  "step": 4662
185
  }
186
  ],
 
11
  {
12
  "epoch": 1.0,
13
  "eval_loss": 2.5209195613861084,
14
+ "eval_runtime": 7.238,
15
+ "eval_samples_per_second": 735.839,
16
+ "eval_steps_per_second": 46.007,
17
  "step": 333
18
  },
19
  {
 
26
  {
27
  "epoch": 2.0,
28
  "eval_loss": 2.278496742248535,
29
+ "eval_runtime": 7.3227,
30
+ "eval_samples_per_second": 727.328,
31
+ "eval_steps_per_second": 45.475,
32
  "step": 666
33
  },
34
  {
35
  "epoch": 3.0,
36
  "eval_loss": NaN,
37
+ "eval_runtime": 7.4657,
38
+ "eval_samples_per_second": 713.401,
39
+ "eval_steps_per_second": 44.604,
40
  "step": 999
41
  },
42
  {
 
49
  {
50
  "epoch": 4.0,
51
  "eval_loss": NaN,
52
+ "eval_runtime": 7.4388,
53
+ "eval_samples_per_second": 715.977,
54
+ "eval_steps_per_second": 44.765,
55
  "step": 1332
56
  },
57
  {
 
64
  {
65
  "epoch": 5.0,
66
  "eval_loss": 1.7883461713790894,
67
+ "eval_runtime": 7.3242,
68
+ "eval_samples_per_second": 727.175,
69
+ "eval_steps_per_second": 45.466,
70
  "step": 1665
71
  },
72
  {
73
  "epoch": 6.0,
74
  "eval_loss": 1.617417573928833,
75
+ "eval_runtime": 7.305,
76
+ "eval_samples_per_second": 729.085,
77
+ "eval_steps_per_second": 45.585,
78
  "step": 1998
79
  },
80
  {
 
87
  {
88
  "epoch": 7.0,
89
  "eval_loss": 1.5227739810943604,
90
+ "eval_runtime": 7.3236,
91
+ "eval_samples_per_second": 727.238,
92
+ "eval_steps_per_second": 45.469,
93
  "step": 2331
94
  },
95
  {
 
102
  {
103
  "epoch": 8.0,
104
  "eval_loss": 1.448709487915039,
105
+ "eval_runtime": 7.3349,
106
+ "eval_samples_per_second": 726.119,
107
+ "eval_steps_per_second": 45.399,
108
  "step": 2664
109
  },
110
  {
111
  "epoch": 9.0,
112
  "eval_loss": NaN,
113
+ "eval_runtime": 7.4098,
114
+ "eval_samples_per_second": 718.774,
115
+ "eval_steps_per_second": 44.94,
116
  "step": 2997
117
  },
118
  {
 
125
  {
126
  "epoch": 10.0,
127
  "eval_loss": NaN,
128
+ "eval_runtime": 7.4467,
129
+ "eval_samples_per_second": 715.218,
130
+ "eval_steps_per_second": 44.718,
131
  "step": 3330
132
  },
133
  {
 
140
  {
141
  "epoch": 11.0,
142
  "eval_loss": 1.2851027250289917,
143
+ "eval_runtime": 7.5809,
144
+ "eval_samples_per_second": 702.554,
145
+ "eval_steps_per_second": 43.926,
146
  "step": 3663
147
  },
148
  {
149
  "epoch": 12.0,
150
  "eval_loss": 1.2175213098526,
151
+ "eval_runtime": 7.6804,
152
+ "eval_samples_per_second": 693.45,
153
+ "eval_steps_per_second": 43.357,
154
  "step": 3996
155
  },
156
  {
 
163
  {
164
  "epoch": 13.0,
165
  "eval_loss": NaN,
166
+ "eval_runtime": 7.5087,
167
+ "eval_samples_per_second": 709.31,
168
+ "eval_steps_per_second": 44.349,
169
  "step": 4329
170
  },
171
  {
 
178
  {
179
  "epoch": 14.0,
180
  "eval_loss": 1.1719356775283813,
181
+ "eval_runtime": 7.5831,
182
+ "eval_samples_per_second": 702.355,
183
+ "eval_steps_per_second": 43.914,
184
  "step": 4662
185
  }
186
  ],
bert_embeddings_finetune/checkpoint-4995/config.json CHANGED
@@ -19,7 +19,7 @@
19
  "pad_token_id": 0,
20
  "position_embedding_type": "absolute",
21
  "torch_dtype": "float32",
22
- "transformers_version": "4.48.0",
23
  "type_vocab_size": 2,
24
  "use_cache": true,
25
  "vocab_size": 30522
 
19
  "pad_token_id": 0,
20
  "position_embedding_type": "absolute",
21
  "torch_dtype": "float32",
22
+ "transformers_version": "4.48.2",
23
  "type_vocab_size": 2,
24
  "use_cache": true,
25
  "vocab_size": 30522
bert_embeddings_finetune/checkpoint-4995/generation_config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
  "_from_model_config": true,
3
  "pad_token_id": 0,
4
- "transformers_version": "4.48.0"
5
  }
 
1
  {
2
  "_from_model_config": true,
3
  "pad_token_id": 0,
4
+ "transformers_version": "4.48.2"
5
  }
bert_embeddings_finetune/checkpoint-4995/trainer_state.json CHANGED
@@ -11,9 +11,9 @@
11
  {
12
  "epoch": 1.0,
13
  "eval_loss": 2.5209195613861084,
14
- "eval_runtime": 7.3239,
15
- "eval_samples_per_second": 727.207,
16
- "eval_steps_per_second": 45.468,
17
  "step": 333
18
  },
19
  {
@@ -26,17 +26,17 @@
26
  {
27
  "epoch": 2.0,
28
  "eval_loss": 2.278496742248535,
29
- "eval_runtime": 7.4414,
30
- "eval_samples_per_second": 715.73,
31
- "eval_steps_per_second": 44.75,
32
  "step": 666
33
  },
34
  {
35
  "epoch": 3.0,
36
  "eval_loss": NaN,
37
- "eval_runtime": 7.4958,
38
- "eval_samples_per_second": 710.531,
39
- "eval_steps_per_second": 44.425,
40
  "step": 999
41
  },
42
  {
@@ -49,9 +49,9 @@
49
  {
50
  "epoch": 4.0,
51
  "eval_loss": NaN,
52
- "eval_runtime": 7.4729,
53
- "eval_samples_per_second": 712.709,
54
- "eval_steps_per_second": 44.561,
55
  "step": 1332
56
  },
57
  {
@@ -64,17 +64,17 @@
64
  {
65
  "epoch": 5.0,
66
  "eval_loss": 1.7883461713790894,
67
- "eval_runtime": 7.4123,
68
- "eval_samples_per_second": 718.54,
69
- "eval_steps_per_second": 44.926,
70
  "step": 1665
71
  },
72
  {
73
  "epoch": 6.0,
74
  "eval_loss": 1.617417573928833,
75
- "eval_runtime": 7.4583,
76
- "eval_samples_per_second": 714.103,
77
- "eval_steps_per_second": 44.648,
78
  "step": 1998
79
  },
80
  {
@@ -87,9 +87,9 @@
87
  {
88
  "epoch": 7.0,
89
  "eval_loss": 1.5227739810943604,
90
- "eval_runtime": 7.5807,
91
- "eval_samples_per_second": 702.576,
92
- "eval_steps_per_second": 43.928,
93
  "step": 2331
94
  },
95
  {
@@ -102,17 +102,17 @@
102
  {
103
  "epoch": 8.0,
104
  "eval_loss": 1.448709487915039,
105
- "eval_runtime": 7.4971,
106
- "eval_samples_per_second": 710.405,
107
- "eval_steps_per_second": 44.417,
108
  "step": 2664
109
  },
110
  {
111
  "epoch": 9.0,
112
  "eval_loss": NaN,
113
- "eval_runtime": 7.416,
114
- "eval_samples_per_second": 718.181,
115
- "eval_steps_per_second": 44.903,
116
  "step": 2997
117
  },
118
  {
@@ -125,9 +125,9 @@
125
  {
126
  "epoch": 10.0,
127
  "eval_loss": NaN,
128
- "eval_runtime": 7.5506,
129
- "eval_samples_per_second": 705.37,
130
- "eval_steps_per_second": 44.102,
131
  "step": 3330
132
  },
133
  {
@@ -140,17 +140,17 @@
140
  {
141
  "epoch": 11.0,
142
  "eval_loss": 1.2851027250289917,
143
- "eval_runtime": 7.3621,
144
- "eval_samples_per_second": 723.436,
145
- "eval_steps_per_second": 45.232,
146
  "step": 3663
147
  },
148
  {
149
  "epoch": 12.0,
150
  "eval_loss": 1.2175213098526,
151
- "eval_runtime": 7.3326,
152
- "eval_samples_per_second": 726.348,
153
- "eval_steps_per_second": 45.414,
154
  "step": 3996
155
  },
156
  {
@@ -163,9 +163,9 @@
163
  {
164
  "epoch": 13.0,
165
  "eval_loss": NaN,
166
- "eval_runtime": 7.2691,
167
- "eval_samples_per_second": 732.695,
168
- "eval_steps_per_second": 45.811,
169
  "step": 4329
170
  },
171
  {
@@ -178,17 +178,17 @@
178
  {
179
  "epoch": 14.0,
180
  "eval_loss": 1.1719356775283813,
181
- "eval_runtime": 7.2773,
182
- "eval_samples_per_second": 731.868,
183
- "eval_steps_per_second": 45.759,
184
  "step": 4662
185
  },
186
  {
187
  "epoch": 15.0,
188
  "eval_loss": 1.2066022157669067,
189
- "eval_runtime": 7.2996,
190
- "eval_samples_per_second": 729.633,
191
- "eval_steps_per_second": 45.619,
192
  "step": 4995
193
  }
194
  ],
 
11
  {
12
  "epoch": 1.0,
13
  "eval_loss": 2.5209195613861084,
14
+ "eval_runtime": 7.238,
15
+ "eval_samples_per_second": 735.839,
16
+ "eval_steps_per_second": 46.007,
17
  "step": 333
18
  },
19
  {
 
26
  {
27
  "epoch": 2.0,
28
  "eval_loss": 2.278496742248535,
29
+ "eval_runtime": 7.3227,
30
+ "eval_samples_per_second": 727.328,
31
+ "eval_steps_per_second": 45.475,
32
  "step": 666
33
  },
34
  {
35
  "epoch": 3.0,
36
  "eval_loss": NaN,
37
+ "eval_runtime": 7.4657,
38
+ "eval_samples_per_second": 713.401,
39
+ "eval_steps_per_second": 44.604,
40
  "step": 999
41
  },
42
  {
 
49
  {
50
  "epoch": 4.0,
51
  "eval_loss": NaN,
52
+ "eval_runtime": 7.4388,
53
+ "eval_samples_per_second": 715.977,
54
+ "eval_steps_per_second": 44.765,
55
  "step": 1332
56
  },
57
  {
 
64
  {
65
  "epoch": 5.0,
66
  "eval_loss": 1.7883461713790894,
67
+ "eval_runtime": 7.3242,
68
+ "eval_samples_per_second": 727.175,
69
+ "eval_steps_per_second": 45.466,
70
  "step": 1665
71
  },
72
  {
73
  "epoch": 6.0,
74
  "eval_loss": 1.617417573928833,
75
+ "eval_runtime": 7.305,
76
+ "eval_samples_per_second": 729.085,
77
+ "eval_steps_per_second": 45.585,
78
  "step": 1998
79
  },
80
  {
 
87
  {
88
  "epoch": 7.0,
89
  "eval_loss": 1.5227739810943604,
90
+ "eval_runtime": 7.3236,
91
+ "eval_samples_per_second": 727.238,
92
+ "eval_steps_per_second": 45.469,
93
  "step": 2331
94
  },
95
  {
 
102
  {
103
  "epoch": 8.0,
104
  "eval_loss": 1.448709487915039,
105
+ "eval_runtime": 7.3349,
106
+ "eval_samples_per_second": 726.119,
107
+ "eval_steps_per_second": 45.399,
108
  "step": 2664
109
  },
110
  {
111
  "epoch": 9.0,
112
  "eval_loss": NaN,
113
+ "eval_runtime": 7.4098,
114
+ "eval_samples_per_second": 718.774,
115
+ "eval_steps_per_second": 44.94,
116
  "step": 2997
117
  },
118
  {
 
125
  {
126
  "epoch": 10.0,
127
  "eval_loss": NaN,
128
+ "eval_runtime": 7.4467,
129
+ "eval_samples_per_second": 715.218,
130
+ "eval_steps_per_second": 44.718,
131
  "step": 3330
132
  },
133
  {
 
140
  {
141
  "epoch": 11.0,
142
  "eval_loss": 1.2851027250289917,
143
+ "eval_runtime": 7.5809,
144
+ "eval_samples_per_second": 702.554,
145
+ "eval_steps_per_second": 43.926,
146
  "step": 3663
147
  },
148
  {
149
  "epoch": 12.0,
150
  "eval_loss": 1.2175213098526,
151
+ "eval_runtime": 7.6804,
152
+ "eval_samples_per_second": 693.45,
153
+ "eval_steps_per_second": 43.357,
154
  "step": 3996
155
  },
156
  {
 
163
  {
164
  "epoch": 13.0,
165
  "eval_loss": NaN,
166
+ "eval_runtime": 7.5087,
167
+ "eval_samples_per_second": 709.31,
168
+ "eval_steps_per_second": 44.349,
169
  "step": 4329
170
  },
171
  {
 
178
  {
179
  "epoch": 14.0,
180
  "eval_loss": 1.1719356775283813,
181
+ "eval_runtime": 7.5831,
182
+ "eval_samples_per_second": 702.355,
183
+ "eval_steps_per_second": 43.914,
184
  "step": 4662
185
  },
186
  {
187
  "epoch": 15.0,
188
  "eval_loss": 1.2066022157669067,
189
+ "eval_runtime": 7.6392,
190
+ "eval_samples_per_second": 697.198,
191
+ "eval_steps_per_second": 43.591,
192
  "step": 4995
193
  }
194
  ],
bert_embeddings_finetune/config.json CHANGED
@@ -19,7 +19,7 @@
19
  "pad_token_id": 0,
20
  "position_embedding_type": "absolute",
21
  "torch_dtype": "float32",
22
- "transformers_version": "4.48.0",
23
  "type_vocab_size": 2,
24
  "use_cache": true,
25
  "vocab_size": 30522
 
19
  "pad_token_id": 0,
20
  "position_embedding_type": "absolute",
21
  "torch_dtype": "float32",
22
+ "transformers_version": "4.48.2",
23
  "type_vocab_size": 2,
24
  "use_cache": true,
25
  "vocab_size": 30522
bert_embeddings_finetune/generation_config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
  "_from_model_config": true,
3
  "pad_token_id": 0,
4
- "transformers_version": "4.48.0"
5
  }
 
1
  {
2
  "_from_model_config": true,
3
  "pad_token_id": 0,
4
+ "transformers_version": "4.48.2"
5
  }