HK0712 commited on
Commit
cb2f31e
·
1 Parent(s): c49a63a
bert_sentiment_model/checkpoint-10252/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:873e0bfc4370c12d3d114845be4849002d2871bc76ff75ef1a25526addb6008d
3
  size 267835644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45f05fef0c477156a0ee0355c569be98ea80244f4e2c86194707b971086dbba6
3
  size 267835644
bert_sentiment_model/checkpoint-10252/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ce435dc5353a073f95c1258305b3f4eaf966a71d457368b94b343212f087bc9
3
  size 535733434
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e08d2d6e825e0432082a9a0f7f907decfe780f3cad7ed2d1fc00c0e2cfefff2
3
  size 535733434
bert_sentiment_model/checkpoint-10252/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 5126,
3
- "best_metric": 0.14533720910549164,
4
  "best_model_checkpoint": "bert_sentiment_model/checkpoint-5126",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
@@ -11,160 +11,160 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.09754194303550527,
14
- "grad_norm": 1.0485230684280396,
15
  "learning_rate": 1.9351020939003774e-05,
16
- "loss": 0.2321,
17
  "step": 500
18
  },
19
  {
20
  "epoch": 0.19508388607101054,
21
- "grad_norm": 3.7028684616088867,
22
  "learning_rate": 1.870074131876707e-05,
23
- "loss": 0.1714,
24
  "step": 1000
25
  },
26
  {
27
  "epoch": 0.2926258291065158,
28
- "grad_norm": 0.6240711212158203,
29
  "learning_rate": 1.805046169853037e-05,
30
- "loss": 0.1624,
31
  "step": 1500
32
  },
33
  {
34
  "epoch": 0.3901677721420211,
35
- "grad_norm": 1.1150747537612915,
36
  "learning_rate": 1.7400182078293668e-05,
37
- "loss": 0.1627,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.4877097151775263,
42
- "grad_norm": 0.31245458126068115,
43
  "learning_rate": 1.6749902458056965e-05,
44
- "loss": 0.1477,
45
  "step": 2500
46
  },
47
  {
48
  "epoch": 0.5852516582130316,
49
- "grad_norm": 1.4625613689422607,
50
  "learning_rate": 1.6099622837820262e-05,
51
- "loss": 0.1574,
52
  "step": 3000
53
  },
54
  {
55
  "epoch": 0.6827936012485368,
56
- "grad_norm": 0.7397491931915283,
57
  "learning_rate": 1.5449343217583563e-05,
58
- "loss": 0.146,
59
  "step": 3500
60
  },
61
  {
62
  "epoch": 0.7803355442840422,
63
- "grad_norm": 3.28501033782959,
64
  "learning_rate": 1.4799063597346861e-05,
65
- "loss": 0.1471,
66
  "step": 4000
67
  },
68
  {
69
  "epoch": 0.8778774873195474,
70
- "grad_norm": 5.020526885986328,
71
  "learning_rate": 1.4148783977110158e-05,
72
- "loss": 0.1509,
73
  "step": 4500
74
  },
75
  {
76
  "epoch": 0.9754194303550526,
77
- "grad_norm": 3.897925615310669,
78
  "learning_rate": 1.3498504356873457e-05,
79
- "loss": 0.1454,
80
  "step": 5000
81
  },
82
  {
83
  "epoch": 1.0,
84
- "eval_accuracy": 0.9512789875393206,
85
- "eval_loss": 0.14533720910549164,
86
- "eval_runtime": 74.7449,
87
- "eval_samples_per_second": 548.653,
88
- "eval_steps_per_second": 17.152,
89
  "step": 5126
90
  },
91
  {
92
  "epoch": 1.0729613733905579,
93
- "grad_norm": 1.9805785417556763,
94
  "learning_rate": 1.2848224736636754e-05,
95
- "loss": 0.13,
96
  "step": 5500
97
  },
98
  {
99
  "epoch": 1.1705033164260632,
100
- "grad_norm": 1.3755881786346436,
101
  "learning_rate": 1.2197945116400054e-05,
102
- "loss": 0.1276,
103
  "step": 6000
104
  },
105
  {
106
  "epoch": 1.2680452594615685,
107
- "grad_norm": 3.5536131858825684,
108
  "learning_rate": 1.1547665496163351e-05,
109
  "loss": 0.1185,
110
  "step": 6500
111
  },
112
  {
113
  "epoch": 1.3655872024970737,
114
- "grad_norm": 1.3367644548416138,
115
  "learning_rate": 1.089738587592665e-05,
116
- "loss": 0.1262,
117
  "step": 7000
118
  },
119
  {
120
  "epoch": 1.463129145532579,
121
- "grad_norm": 3.0645148754119873,
122
  "learning_rate": 1.0247106255689946e-05,
123
- "loss": 0.1253,
124
  "step": 7500
125
  },
126
  {
127
  "epoch": 1.5606710885680841,
128
- "grad_norm": 0.6243263483047485,
129
  "learning_rate": 9.596826635453245e-06,
130
- "loss": 0.1209,
131
  "step": 8000
132
  },
133
  {
134
  "epoch": 1.6582130316035895,
135
- "grad_norm": 4.116105556488037,
136
  "learning_rate": 8.946547015216544e-06,
137
  "loss": 0.1242,
138
  "step": 8500
139
  },
140
  {
141
  "epoch": 1.7557549746390948,
142
- "grad_norm": 4.457709789276123,
143
  "learning_rate": 8.296267394979842e-06,
144
- "loss": 0.1267,
145
  "step": 9000
146
  },
147
  {
148
  "epoch": 1.8532969176746001,
149
- "grad_norm": 0.8020169734954834,
150
  "learning_rate": 7.64598777474314e-06,
151
- "loss": 0.1289,
152
  "step": 9500
153
  },
154
  {
155
  "epoch": 1.9508388607101055,
156
- "grad_norm": 3.107414722442627,
157
  "learning_rate": 6.995708154506439e-06,
158
- "loss": 0.1166,
159
  "step": 10000
160
  },
161
  {
162
  "epoch": 2.0,
163
- "eval_accuracy": 0.9529371601355799,
164
- "eval_loss": 0.1457434594631195,
165
- "eval_runtime": 74.98,
166
- "eval_samples_per_second": 546.933,
167
- "eval_steps_per_second": 17.098,
168
  "step": 10252
169
  }
170
  ],
 
1
  {
2
  "best_global_step": 5126,
3
+ "best_metric": 0.14522501826286316,
4
  "best_model_checkpoint": "bert_sentiment_model/checkpoint-5126",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.09754194303550527,
14
+ "grad_norm": 0.7001327872276306,
15
  "learning_rate": 1.9351020939003774e-05,
16
+ "loss": 0.2293,
17
  "step": 500
18
  },
19
  {
20
  "epoch": 0.19508388607101054,
21
+ "grad_norm": 4.297205924987793,
22
  "learning_rate": 1.870074131876707e-05,
23
+ "loss": 0.1698,
24
  "step": 1000
25
  },
26
  {
27
  "epoch": 0.2926258291065158,
28
+ "grad_norm": 1.2200896739959717,
29
  "learning_rate": 1.805046169853037e-05,
30
+ "loss": 0.1634,
31
  "step": 1500
32
  },
33
  {
34
  "epoch": 0.3901677721420211,
35
+ "grad_norm": 1.0850228071212769,
36
  "learning_rate": 1.7400182078293668e-05,
37
+ "loss": 0.1635,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.4877097151775263,
42
+ "grad_norm": 0.29681870341300964,
43
  "learning_rate": 1.6749902458056965e-05,
44
+ "loss": 0.1485,
45
  "step": 2500
46
  },
47
  {
48
  "epoch": 0.5852516582130316,
49
+ "grad_norm": 1.5155694484710693,
50
  "learning_rate": 1.6099622837820262e-05,
51
+ "loss": 0.1566,
52
  "step": 3000
53
  },
54
  {
55
  "epoch": 0.6827936012485368,
56
+ "grad_norm": 0.7628584504127502,
57
  "learning_rate": 1.5449343217583563e-05,
58
+ "loss": 0.1469,
59
  "step": 3500
60
  },
61
  {
62
  "epoch": 0.7803355442840422,
63
+ "grad_norm": 2.7121143341064453,
64
  "learning_rate": 1.4799063597346861e-05,
65
+ "loss": 0.1467,
66
  "step": 4000
67
  },
68
  {
69
  "epoch": 0.8778774873195474,
70
+ "grad_norm": 4.958296298980713,
71
  "learning_rate": 1.4148783977110158e-05,
72
+ "loss": 0.1512,
73
  "step": 4500
74
  },
75
  {
76
  "epoch": 0.9754194303550526,
77
+ "grad_norm": 3.2470967769622803,
78
  "learning_rate": 1.3498504356873457e-05,
79
+ "loss": 0.1445,
80
  "step": 5000
81
  },
82
  {
83
  "epoch": 1.0,
84
+ "eval_accuracy": 0.9502548221122192,
85
+ "eval_loss": 0.14522501826286316,
86
+ "eval_runtime": 77.7071,
87
+ "eval_samples_per_second": 527.738,
88
+ "eval_steps_per_second": 16.498,
89
  "step": 5126
90
  },
91
  {
92
  "epoch": 1.0729613733905579,
93
+ "grad_norm": 1.8167076110839844,
94
  "learning_rate": 1.2848224736636754e-05,
95
+ "loss": 0.1307,
96
  "step": 5500
97
  },
98
  {
99
  "epoch": 1.1705033164260632,
100
+ "grad_norm": 1.4334732294082642,
101
  "learning_rate": 1.2197945116400054e-05,
102
+ "loss": 0.1273,
103
  "step": 6000
104
  },
105
  {
106
  "epoch": 1.2680452594615685,
107
+ "grad_norm": 3.130772590637207,
108
  "learning_rate": 1.1547665496163351e-05,
109
  "loss": 0.1185,
110
  "step": 6500
111
  },
112
  {
113
  "epoch": 1.3655872024970737,
114
+ "grad_norm": 1.2805956602096558,
115
  "learning_rate": 1.089738587592665e-05,
116
+ "loss": 0.1254,
117
  "step": 7000
118
  },
119
  {
120
  "epoch": 1.463129145532579,
121
+ "grad_norm": 6.393189907073975,
122
  "learning_rate": 1.0247106255689946e-05,
123
+ "loss": 0.1246,
124
  "step": 7500
125
  },
126
  {
127
  "epoch": 1.5606710885680841,
128
+ "grad_norm": 0.682952880859375,
129
  "learning_rate": 9.596826635453245e-06,
130
+ "loss": 0.1213,
131
  "step": 8000
132
  },
133
  {
134
  "epoch": 1.6582130316035895,
135
+ "grad_norm": 3.8420190811157227,
136
  "learning_rate": 8.946547015216544e-06,
137
  "loss": 0.1242,
138
  "step": 8500
139
  },
140
  {
141
  "epoch": 1.7557549746390948,
142
+ "grad_norm": 5.53094482421875,
143
  "learning_rate": 8.296267394979842e-06,
144
+ "loss": 0.1266,
145
  "step": 9000
146
  },
147
  {
148
  "epoch": 1.8532969176746001,
149
+ "grad_norm": 2.091581344604492,
150
  "learning_rate": 7.64598777474314e-06,
151
+ "loss": 0.1296,
152
  "step": 9500
153
  },
154
  {
155
  "epoch": 1.9508388607101055,
156
+ "grad_norm": 1.8294498920440674,
157
  "learning_rate": 6.995708154506439e-06,
158
+ "loss": 0.1175,
159
  "step": 10000
160
  },
161
  {
162
  "epoch": 2.0,
163
+ "eval_accuracy": 0.9524006925309079,
164
+ "eval_loss": 0.147334024310112,
165
+ "eval_runtime": 77.7359,
166
+ "eval_samples_per_second": 527.543,
167
+ "eval_steps_per_second": 16.492,
168
  "step": 10252
169
  }
170
  ],
bert_sentiment_model/checkpoint-10252/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:821493876e4c41dba842c49ce5fab3b065233a721a9d4cedcdac3b251c6aa960
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bef23a9df09a87b02e3a926fa1cef1d6042ebe2dff78b46cbd238fd398f8e34
3
  size 5368
bert_sentiment_model/checkpoint-15378/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68b0a910bf6f5c17ac24cb683004cc4075d767dc2160f7b643cacd7ac667869d
3
  size 267835644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:969b24c73843c7220531eedd087942b39833f2227968cc49ec78d6308e3699c9
3
  size 267835644
bert_sentiment_model/checkpoint-15378/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a00717569fd9026ec62b066c6023adbd6790e5956d07de6ad2c7fb500848cb2
3
  size 535733434
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3be434bcf8328bcaf1237acdb938faaaa0f5498001aa4866aabb6186e812dd8a
3
  size 535733434
bert_sentiment_model/checkpoint-15378/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 5126,
3
- "best_metric": 0.14533720910549164,
4
  "best_model_checkpoint": "bert_sentiment_model/checkpoint-5126",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
@@ -11,239 +11,239 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.09754194303550527,
14
- "grad_norm": 1.0485230684280396,
15
  "learning_rate": 1.9351020939003774e-05,
16
- "loss": 0.2321,
17
  "step": 500
18
  },
19
  {
20
  "epoch": 0.19508388607101054,
21
- "grad_norm": 3.7028684616088867,
22
  "learning_rate": 1.870074131876707e-05,
23
- "loss": 0.1714,
24
  "step": 1000
25
  },
26
  {
27
  "epoch": 0.2926258291065158,
28
- "grad_norm": 0.6240711212158203,
29
  "learning_rate": 1.805046169853037e-05,
30
- "loss": 0.1624,
31
  "step": 1500
32
  },
33
  {
34
  "epoch": 0.3901677721420211,
35
- "grad_norm": 1.1150747537612915,
36
  "learning_rate": 1.7400182078293668e-05,
37
- "loss": 0.1627,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.4877097151775263,
42
- "grad_norm": 0.31245458126068115,
43
  "learning_rate": 1.6749902458056965e-05,
44
- "loss": 0.1477,
45
  "step": 2500
46
  },
47
  {
48
  "epoch": 0.5852516582130316,
49
- "grad_norm": 1.4625613689422607,
50
  "learning_rate": 1.6099622837820262e-05,
51
- "loss": 0.1574,
52
  "step": 3000
53
  },
54
  {
55
  "epoch": 0.6827936012485368,
56
- "grad_norm": 0.7397491931915283,
57
  "learning_rate": 1.5449343217583563e-05,
58
- "loss": 0.146,
59
  "step": 3500
60
  },
61
  {
62
  "epoch": 0.7803355442840422,
63
- "grad_norm": 3.28501033782959,
64
  "learning_rate": 1.4799063597346861e-05,
65
- "loss": 0.1471,
66
  "step": 4000
67
  },
68
  {
69
  "epoch": 0.8778774873195474,
70
- "grad_norm": 5.020526885986328,
71
  "learning_rate": 1.4148783977110158e-05,
72
- "loss": 0.1509,
73
  "step": 4500
74
  },
75
  {
76
  "epoch": 0.9754194303550526,
77
- "grad_norm": 3.897925615310669,
78
  "learning_rate": 1.3498504356873457e-05,
79
- "loss": 0.1454,
80
  "step": 5000
81
  },
82
  {
83
  "epoch": 1.0,
84
- "eval_accuracy": 0.9512789875393206,
85
- "eval_loss": 0.14533720910549164,
86
- "eval_runtime": 74.7449,
87
- "eval_samples_per_second": 548.653,
88
- "eval_steps_per_second": 17.152,
89
  "step": 5126
90
  },
91
  {
92
  "epoch": 1.0729613733905579,
93
- "grad_norm": 1.9805785417556763,
94
  "learning_rate": 1.2848224736636754e-05,
95
- "loss": 0.13,
96
  "step": 5500
97
  },
98
  {
99
  "epoch": 1.1705033164260632,
100
- "grad_norm": 1.3755881786346436,
101
  "learning_rate": 1.2197945116400054e-05,
102
- "loss": 0.1276,
103
  "step": 6000
104
  },
105
  {
106
  "epoch": 1.2680452594615685,
107
- "grad_norm": 3.5536131858825684,
108
  "learning_rate": 1.1547665496163351e-05,
109
  "loss": 0.1185,
110
  "step": 6500
111
  },
112
  {
113
  "epoch": 1.3655872024970737,
114
- "grad_norm": 1.3367644548416138,
115
  "learning_rate": 1.089738587592665e-05,
116
- "loss": 0.1262,
117
  "step": 7000
118
  },
119
  {
120
  "epoch": 1.463129145532579,
121
- "grad_norm": 3.0645148754119873,
122
  "learning_rate": 1.0247106255689946e-05,
123
- "loss": 0.1253,
124
  "step": 7500
125
  },
126
  {
127
  "epoch": 1.5606710885680841,
128
- "grad_norm": 0.6243263483047485,
129
  "learning_rate": 9.596826635453245e-06,
130
- "loss": 0.1209,
131
  "step": 8000
132
  },
133
  {
134
  "epoch": 1.6582130316035895,
135
- "grad_norm": 4.116105556488037,
136
  "learning_rate": 8.946547015216544e-06,
137
  "loss": 0.1242,
138
  "step": 8500
139
  },
140
  {
141
  "epoch": 1.7557549746390948,
142
- "grad_norm": 4.457709789276123,
143
  "learning_rate": 8.296267394979842e-06,
144
- "loss": 0.1267,
145
  "step": 9000
146
  },
147
  {
148
  "epoch": 1.8532969176746001,
149
- "grad_norm": 0.8020169734954834,
150
  "learning_rate": 7.64598777474314e-06,
151
- "loss": 0.1289,
152
  "step": 9500
153
  },
154
  {
155
  "epoch": 1.9508388607101055,
156
- "grad_norm": 3.107414722442627,
157
  "learning_rate": 6.995708154506439e-06,
158
- "loss": 0.1166,
159
  "step": 10000
160
  },
161
  {
162
  "epoch": 2.0,
163
- "eval_accuracy": 0.9529371601355799,
164
- "eval_loss": 0.1457434594631195,
165
- "eval_runtime": 74.98,
166
- "eval_samples_per_second": 546.933,
167
- "eval_steps_per_second": 17.098,
168
  "step": 10252
169
  },
170
  {
171
  "epoch": 2.048380803745611,
172
- "grad_norm": 1.5238676071166992,
173
  "learning_rate": 6.345428534269737e-06,
174
- "loss": 0.1146,
175
  "step": 10500
176
  },
177
  {
178
  "epoch": 2.1459227467811157,
179
- "grad_norm": 0.5615836977958679,
180
  "learning_rate": 5.695148914033035e-06,
181
- "loss": 0.0978,
182
  "step": 11000
183
  },
184
  {
185
  "epoch": 2.243464689816621,
186
- "grad_norm": 2.8565406799316406,
187
  "learning_rate": 5.044869293796333e-06,
188
- "loss": 0.1008,
189
  "step": 11500
190
  },
191
  {
192
  "epoch": 2.3410066328521264,
193
- "grad_norm": 0.9610119462013245,
194
  "learning_rate": 4.394589673559631e-06,
195
- "loss": 0.1066,
196
  "step": 12000
197
  },
198
  {
199
  "epoch": 2.4385485758876317,
200
- "grad_norm": 5.30699348449707,
201
  "learning_rate": 3.7443100533229294e-06,
202
  "loss": 0.1002,
203
  "step": 12500
204
  },
205
  {
206
  "epoch": 2.536090518923137,
207
- "grad_norm": 3.4920408725738525,
208
  "learning_rate": 3.0940304330862276e-06,
209
- "loss": 0.0966,
210
  "step": 13000
211
  },
212
  {
213
  "epoch": 2.6336324619586424,
214
- "grad_norm": 1.7962068319320679,
215
  "learning_rate": 2.4437508128495254e-06,
216
- "loss": 0.1044,
217
  "step": 13500
218
  },
219
  {
220
  "epoch": 2.7311744049941473,
221
- "grad_norm": 1.224229097366333,
222
  "learning_rate": 1.7934711926128236e-06,
223
- "loss": 0.0972,
224
  "step": 14000
225
  },
226
  {
227
  "epoch": 2.8287163480296527,
228
- "grad_norm": 1.4311927556991577,
229
  "learning_rate": 1.1431915723761218e-06,
230
- "loss": 0.1059,
231
  "step": 14500
232
  },
233
  {
234
  "epoch": 2.926258291065158,
235
- "grad_norm": 0.935365617275238,
236
  "learning_rate": 4.9291195213942e-07,
237
- "loss": 0.1074,
238
  "step": 15000
239
  },
240
  {
241
  "epoch": 3.0,
242
- "eval_accuracy": 0.9533273183935234,
243
- "eval_loss": 0.15521423518657684,
244
- "eval_runtime": 74.605,
245
- "eval_samples_per_second": 549.682,
246
- "eval_steps_per_second": 17.184,
247
  "step": 15378
248
  }
249
  ],
 
1
  {
2
  "best_global_step": 5126,
3
+ "best_metric": 0.14522501826286316,
4
  "best_model_checkpoint": "bert_sentiment_model/checkpoint-5126",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.09754194303550527,
14
+ "grad_norm": 0.7001327872276306,
15
  "learning_rate": 1.9351020939003774e-05,
16
+ "loss": 0.2293,
17
  "step": 500
18
  },
19
  {
20
  "epoch": 0.19508388607101054,
21
+ "grad_norm": 4.297205924987793,
22
  "learning_rate": 1.870074131876707e-05,
23
+ "loss": 0.1698,
24
  "step": 1000
25
  },
26
  {
27
  "epoch": 0.2926258291065158,
28
+ "grad_norm": 1.2200896739959717,
29
  "learning_rate": 1.805046169853037e-05,
30
+ "loss": 0.1634,
31
  "step": 1500
32
  },
33
  {
34
  "epoch": 0.3901677721420211,
35
+ "grad_norm": 1.0850228071212769,
36
  "learning_rate": 1.7400182078293668e-05,
37
+ "loss": 0.1635,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.4877097151775263,
42
+ "grad_norm": 0.29681870341300964,
43
  "learning_rate": 1.6749902458056965e-05,
44
+ "loss": 0.1485,
45
  "step": 2500
46
  },
47
  {
48
  "epoch": 0.5852516582130316,
49
+ "grad_norm": 1.5155694484710693,
50
  "learning_rate": 1.6099622837820262e-05,
51
+ "loss": 0.1566,
52
  "step": 3000
53
  },
54
  {
55
  "epoch": 0.6827936012485368,
56
+ "grad_norm": 0.7628584504127502,
57
  "learning_rate": 1.5449343217583563e-05,
58
+ "loss": 0.1469,
59
  "step": 3500
60
  },
61
  {
62
  "epoch": 0.7803355442840422,
63
+ "grad_norm": 2.7121143341064453,
64
  "learning_rate": 1.4799063597346861e-05,
65
+ "loss": 0.1467,
66
  "step": 4000
67
  },
68
  {
69
  "epoch": 0.8778774873195474,
70
+ "grad_norm": 4.958296298980713,
71
  "learning_rate": 1.4148783977110158e-05,
72
+ "loss": 0.1512,
73
  "step": 4500
74
  },
75
  {
76
  "epoch": 0.9754194303550526,
77
+ "grad_norm": 3.2470967769622803,
78
  "learning_rate": 1.3498504356873457e-05,
79
+ "loss": 0.1445,
80
  "step": 5000
81
  },
82
  {
83
  "epoch": 1.0,
84
+ "eval_accuracy": 0.9502548221122192,
85
+ "eval_loss": 0.14522501826286316,
86
+ "eval_runtime": 77.7071,
87
+ "eval_samples_per_second": 527.738,
88
+ "eval_steps_per_second": 16.498,
89
  "step": 5126
90
  },
91
  {
92
  "epoch": 1.0729613733905579,
93
+ "grad_norm": 1.8167076110839844,
94
  "learning_rate": 1.2848224736636754e-05,
95
+ "loss": 0.1307,
96
  "step": 5500
97
  },
98
  {
99
  "epoch": 1.1705033164260632,
100
+ "grad_norm": 1.4334732294082642,
101
  "learning_rate": 1.2197945116400054e-05,
102
+ "loss": 0.1273,
103
  "step": 6000
104
  },
105
  {
106
  "epoch": 1.2680452594615685,
107
+ "grad_norm": 3.130772590637207,
108
  "learning_rate": 1.1547665496163351e-05,
109
  "loss": 0.1185,
110
  "step": 6500
111
  },
112
  {
113
  "epoch": 1.3655872024970737,
114
+ "grad_norm": 1.2805956602096558,
115
  "learning_rate": 1.089738587592665e-05,
116
+ "loss": 0.1254,
117
  "step": 7000
118
  },
119
  {
120
  "epoch": 1.463129145532579,
121
+ "grad_norm": 6.393189907073975,
122
  "learning_rate": 1.0247106255689946e-05,
123
+ "loss": 0.1246,
124
  "step": 7500
125
  },
126
  {
127
  "epoch": 1.5606710885680841,
128
+ "grad_norm": 0.682952880859375,
129
  "learning_rate": 9.596826635453245e-06,
130
+ "loss": 0.1213,
131
  "step": 8000
132
  },
133
  {
134
  "epoch": 1.6582130316035895,
135
+ "grad_norm": 3.8420190811157227,
136
  "learning_rate": 8.946547015216544e-06,
137
  "loss": 0.1242,
138
  "step": 8500
139
  },
140
  {
141
  "epoch": 1.7557549746390948,
142
+ "grad_norm": 5.53094482421875,
143
  "learning_rate": 8.296267394979842e-06,
144
+ "loss": 0.1266,
145
  "step": 9000
146
  },
147
  {
148
  "epoch": 1.8532969176746001,
149
+ "grad_norm": 2.091581344604492,
150
  "learning_rate": 7.64598777474314e-06,
151
+ "loss": 0.1296,
152
  "step": 9500
153
  },
154
  {
155
  "epoch": 1.9508388607101055,
156
+ "grad_norm": 1.8294498920440674,
157
  "learning_rate": 6.995708154506439e-06,
158
+ "loss": 0.1175,
159
  "step": 10000
160
  },
161
  {
162
  "epoch": 2.0,
163
+ "eval_accuracy": 0.9524006925309079,
164
+ "eval_loss": 0.147334024310112,
165
+ "eval_runtime": 77.7359,
166
+ "eval_samples_per_second": 527.543,
167
+ "eval_steps_per_second": 16.492,
168
  "step": 10252
169
  },
170
  {
171
  "epoch": 2.048380803745611,
172
+ "grad_norm": 0.9975255727767944,
173
  "learning_rate": 6.345428534269737e-06,
174
+ "loss": 0.1142,
175
  "step": 10500
176
  },
177
  {
178
  "epoch": 2.1459227467811157,
179
+ "grad_norm": 0.9234778881072998,
180
  "learning_rate": 5.695148914033035e-06,
181
+ "loss": 0.0986,
182
  "step": 11000
183
  },
184
  {
185
  "epoch": 2.243464689816621,
186
+ "grad_norm": 2.015749216079712,
187
  "learning_rate": 5.044869293796333e-06,
188
+ "loss": 0.1014,
189
  "step": 11500
190
  },
191
  {
192
  "epoch": 2.3410066328521264,
193
+ "grad_norm": 1.0170682668685913,
194
  "learning_rate": 4.394589673559631e-06,
195
+ "loss": 0.1075,
196
  "step": 12000
197
  },
198
  {
199
  "epoch": 2.4385485758876317,
200
+ "grad_norm": 4.830516815185547,
201
  "learning_rate": 3.7443100533229294e-06,
202
  "loss": 0.1002,
203
  "step": 12500
204
  },
205
  {
206
  "epoch": 2.536090518923137,
207
+ "grad_norm": 4.4720377922058105,
208
  "learning_rate": 3.0940304330862276e-06,
209
+ "loss": 0.0968,
210
  "step": 13000
211
  },
212
  {
213
  "epoch": 2.6336324619586424,
214
+ "grad_norm": 1.8125988245010376,
215
  "learning_rate": 2.4437508128495254e-06,
216
+ "loss": 0.1053,
217
  "step": 13500
218
  },
219
  {
220
  "epoch": 2.7311744049941473,
221
+ "grad_norm": 2.168368101119995,
222
  "learning_rate": 1.7934711926128236e-06,
223
+ "loss": 0.0984,
224
  "step": 14000
225
  },
226
  {
227
  "epoch": 2.8287163480296527,
228
+ "grad_norm": 1.9152271747589111,
229
  "learning_rate": 1.1431915723761218e-06,
230
+ "loss": 0.1065,
231
  "step": 14500
232
  },
233
  {
234
  "epoch": 2.926258291065158,
235
+ "grad_norm": 1.1602133512496948,
236
  "learning_rate": 4.9291195213942e-07,
237
+ "loss": 0.1058,
238
  "step": 15000
239
  },
240
  {
241
  "epoch": 3.0,
242
+ "eval_accuracy": 0.9531322392645517,
243
+ "eval_loss": 0.1548861563205719,
244
+ "eval_runtime": 77.3477,
245
+ "eval_samples_per_second": 530.19,
246
+ "eval_steps_per_second": 16.575,
247
  "step": 15378
248
  }
249
  ],
bert_sentiment_model/checkpoint-15378/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:821493876e4c41dba842c49ce5fab3b065233a721a9d4cedcdac3b251c6aa960
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bef23a9df09a87b02e3a926fa1cef1d6042ebe2dff78b46cbd238fd398f8e34
3
  size 5368
bert_sentiment_model/checkpoint-5126/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dec3328f2136cd119e3e22196d97cfabe5f711be78842ec0fe551baad38456b0
3
  size 267835644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bfabd5bd5d8b50816a72f021346d8f92e6559146162f66ea47e8af3d8239663
3
  size 267835644
bert_sentiment_model/checkpoint-5126/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e60f8ba4cc8d1bca385deeba83bf27f0b2d3b9c39e8de40068d97b0d194fc21b
3
  size 535733434
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dc5d01664fff60d7e83241bc732fb15b5a9a0c1df8024101d648b6104539c65
3
  size 535733434
bert_sentiment_model/checkpoint-5126/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 5126,
3
- "best_metric": 0.14533720910549164,
4
  "best_model_checkpoint": "bert_sentiment_model/checkpoint-5126",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
@@ -11,81 +11,81 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.09754194303550527,
14
- "grad_norm": 1.0485230684280396,
15
  "learning_rate": 1.9351020939003774e-05,
16
- "loss": 0.2321,
17
  "step": 500
18
  },
19
  {
20
  "epoch": 0.19508388607101054,
21
- "grad_norm": 3.7028684616088867,
22
  "learning_rate": 1.870074131876707e-05,
23
- "loss": 0.1714,
24
  "step": 1000
25
  },
26
  {
27
  "epoch": 0.2926258291065158,
28
- "grad_norm": 0.6240711212158203,
29
  "learning_rate": 1.805046169853037e-05,
30
- "loss": 0.1624,
31
  "step": 1500
32
  },
33
  {
34
  "epoch": 0.3901677721420211,
35
- "grad_norm": 1.1150747537612915,
36
  "learning_rate": 1.7400182078293668e-05,
37
- "loss": 0.1627,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.4877097151775263,
42
- "grad_norm": 0.31245458126068115,
43
  "learning_rate": 1.6749902458056965e-05,
44
- "loss": 0.1477,
45
  "step": 2500
46
  },
47
  {
48
  "epoch": 0.5852516582130316,
49
- "grad_norm": 1.4625613689422607,
50
  "learning_rate": 1.6099622837820262e-05,
51
- "loss": 0.1574,
52
  "step": 3000
53
  },
54
  {
55
  "epoch": 0.6827936012485368,
56
- "grad_norm": 0.7397491931915283,
57
  "learning_rate": 1.5449343217583563e-05,
58
- "loss": 0.146,
59
  "step": 3500
60
  },
61
  {
62
  "epoch": 0.7803355442840422,
63
- "grad_norm": 3.28501033782959,
64
  "learning_rate": 1.4799063597346861e-05,
65
- "loss": 0.1471,
66
  "step": 4000
67
  },
68
  {
69
  "epoch": 0.8778774873195474,
70
- "grad_norm": 5.020526885986328,
71
  "learning_rate": 1.4148783977110158e-05,
72
- "loss": 0.1509,
73
  "step": 4500
74
  },
75
  {
76
  "epoch": 0.9754194303550526,
77
- "grad_norm": 3.897925615310669,
78
  "learning_rate": 1.3498504356873457e-05,
79
- "loss": 0.1454,
80
  "step": 5000
81
  },
82
  {
83
  "epoch": 1.0,
84
- "eval_accuracy": 0.9512789875393206,
85
- "eval_loss": 0.14533720910549164,
86
- "eval_runtime": 74.7449,
87
- "eval_samples_per_second": 548.653,
88
- "eval_steps_per_second": 17.152,
89
  "step": 5126
90
  }
91
  ],
 
1
  {
2
  "best_global_step": 5126,
3
+ "best_metric": 0.14522501826286316,
4
  "best_model_checkpoint": "bert_sentiment_model/checkpoint-5126",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.09754194303550527,
14
+ "grad_norm": 0.7001327872276306,
15
  "learning_rate": 1.9351020939003774e-05,
16
+ "loss": 0.2293,
17
  "step": 500
18
  },
19
  {
20
  "epoch": 0.19508388607101054,
21
+ "grad_norm": 4.297205924987793,
22
  "learning_rate": 1.870074131876707e-05,
23
+ "loss": 0.1698,
24
  "step": 1000
25
  },
26
  {
27
  "epoch": 0.2926258291065158,
28
+ "grad_norm": 1.2200896739959717,
29
  "learning_rate": 1.805046169853037e-05,
30
+ "loss": 0.1634,
31
  "step": 1500
32
  },
33
  {
34
  "epoch": 0.3901677721420211,
35
+ "grad_norm": 1.0850228071212769,
36
  "learning_rate": 1.7400182078293668e-05,
37
+ "loss": 0.1635,
38
  "step": 2000
39
  },
40
  {
41
  "epoch": 0.4877097151775263,
42
+ "grad_norm": 0.29681870341300964,
43
  "learning_rate": 1.6749902458056965e-05,
44
+ "loss": 0.1485,
45
  "step": 2500
46
  },
47
  {
48
  "epoch": 0.5852516582130316,
49
+ "grad_norm": 1.5155694484710693,
50
  "learning_rate": 1.6099622837820262e-05,
51
+ "loss": 0.1566,
52
  "step": 3000
53
  },
54
  {
55
  "epoch": 0.6827936012485368,
56
+ "grad_norm": 0.7628584504127502,
57
  "learning_rate": 1.5449343217583563e-05,
58
+ "loss": 0.1469,
59
  "step": 3500
60
  },
61
  {
62
  "epoch": 0.7803355442840422,
63
+ "grad_norm": 2.7121143341064453,
64
  "learning_rate": 1.4799063597346861e-05,
65
+ "loss": 0.1467,
66
  "step": 4000
67
  },
68
  {
69
  "epoch": 0.8778774873195474,
70
+ "grad_norm": 4.958296298980713,
71
  "learning_rate": 1.4148783977110158e-05,
72
+ "loss": 0.1512,
73
  "step": 4500
74
  },
75
  {
76
  "epoch": 0.9754194303550526,
77
+ "grad_norm": 3.2470967769622803,
78
  "learning_rate": 1.3498504356873457e-05,
79
+ "loss": 0.1445,
80
  "step": 5000
81
  },
82
  {
83
  "epoch": 1.0,
84
+ "eval_accuracy": 0.9502548221122192,
85
+ "eval_loss": 0.14522501826286316,
86
+ "eval_runtime": 77.7071,
87
+ "eval_samples_per_second": 527.738,
88
+ "eval_steps_per_second": 16.498,
89
  "step": 5126
90
  }
91
  ],
bert_sentiment_model/checkpoint-5126/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:821493876e4c41dba842c49ce5fab3b065233a721a9d4cedcdac3b251c6aa960
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bef23a9df09a87b02e3a926fa1cef1d6042ebe2dff78b46cbd238fd398f8e34
3
  size 5368
bert_sentiment_model/runs/{Nov22_15-25-42_0db92e0bbeed/events.out.tfevents.1763825143.0db92e0bbeed.48.0 → Nov23_06-43-46_010a7f9e6512/events.out.tfevents.1763880227.010a7f9e6512.48.0} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:722143b2b87a2fa3bee692b56d80fb7ca82226bfd46543009ef47db25caece7c
3
  size 12660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc7b837e644ca3a40eb6a6c7628bfea2c578369bac6aa7808bcb675aea001d91
3
  size 12660
lstm_model.keras CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae7cc154c422d9a844884d821176f4702f054bd25bc940a75d0ed8f5861d1f6e
3
  size 33928028
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7544f463eeaa33ae7572edb6328f22ab4305ffc3071761caddf5eea89d577b16
3
  size 33928028
tokenizer.pickle → lstm_tokenizer.pickle RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21e4fd61bb2bccd17a7fa7cf3d3d4bfd11152717e35c7bc22be7d005183dbada
3
- size 467490
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9424507a7b1583b0466691aa520b3d4977f77237c89c51cc9d18ee693b4110e
3
+ size 1546136
tfidf_vectorizer.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:098fb11fbad95b6e55cfe4233c13eff78fc2ac3ade0c6ec86d2d849553adcdff
3
  size 1244923
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24915c16f607a8c931c095cda107118bb24c14b0bf539a7553eefae0842900af
3
  size 1244923