mayaram commited on
Commit
5a89227
·
1 Parent(s): 0e32756

Upload 7 files

Browse files
Files changed (3) hide show
  1. rng_state.pth +1 -1
  2. scheduler.pt +1 -1
  3. trainer_state.json +109 -3
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a24023cc6c46dd166b1fdbd0c2279c2ac131964234801d28f0e5db34de8a7535
3
  size 13553
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dbbe18d76df09c22fa36f245fba82f2fe9a2b4776d99c603305bc23ef70fd21
3
  size 13553
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:620a2c6044c4ec2a4dcd175f5fe80f8acb66bb949178847e0257327ae79e1185
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:183a7ce17aefcfb041aca645cba320fd6d7d899b8807bffb9a11937d9e30ac58
3
  size 627
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": 3.395401954650879,
3
  "best_model_checkpoint": "/content/drive/MyDrive/ICModel/arabertVit/checkpoint-726",
4
- "epoch": 2.0,
5
- "global_step": 726,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -106,11 +106,117 @@
106
  "eval_samples_per_second": 5.563,
107
  "eval_steps_per_second": 0.088,
108
  "step": 726
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  }
110
  ],
111
  "max_steps": 1452,
112
  "num_train_epochs": 4,
113
- "total_flos": 8.411884290016543e+18,
114
  "trial_name": null,
115
  "trial_params": null
116
  }
 
1
  {
2
  "best_metric": 3.395401954650879,
3
  "best_model_checkpoint": "/content/drive/MyDrive/ICModel/arabertVit/checkpoint-726",
4
+ "epoch": 4.0,
5
+ "global_step": 1452,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
106
  "eval_samples_per_second": 5.563,
107
  "eval_steps_per_second": 0.088,
108
  "step": 726
109
+ },
110
+ {
111
+ "epoch": 2.07,
112
+ "learning_rate": 2.4173553719008264e-05,
113
+ "loss": 3.0104,
114
+ "step": 750
115
+ },
116
+ {
117
+ "epoch": 2.2,
118
+ "learning_rate": 2.2451790633608817e-05,
119
+ "loss": 2.8998,
120
+ "step": 800
121
+ },
122
+ {
123
+ "epoch": 2.34,
124
+ "learning_rate": 2.073002754820937e-05,
125
+ "loss": 2.8714,
126
+ "step": 850
127
+ },
128
+ {
129
+ "epoch": 2.48,
130
+ "learning_rate": 1.900826446280992e-05,
131
+ "loss": 2.8648,
132
+ "step": 900
133
+ },
134
+ {
135
+ "epoch": 2.62,
136
+ "learning_rate": 1.728650137741047e-05,
137
+ "loss": 2.8893,
138
+ "step": 950
139
+ },
140
+ {
141
+ "epoch": 2.75,
142
+ "learning_rate": 1.5564738292011018e-05,
143
+ "loss": 2.8479,
144
+ "step": 1000
145
+ },
146
+ {
147
+ "epoch": 2.89,
148
+ "learning_rate": 1.3842975206611573e-05,
149
+ "loss": 2.8732,
150
+ "step": 1050
151
+ },
152
+ {
153
+ "epoch": 3.0,
154
+ "eval_loss": 3.4181885719299316,
155
+ "eval_runtime": 474.579,
156
+ "eval_samples_per_second": 5.441,
157
+ "eval_steps_per_second": 0.086,
158
+ "step": 1089
159
+ },
160
+ {
161
+ "epoch": 3.03,
162
+ "learning_rate": 1.2121212121212122e-05,
163
+ "loss": 2.8077,
164
+ "step": 1100
165
+ },
166
+ {
167
+ "epoch": 3.17,
168
+ "learning_rate": 1.0399449035812673e-05,
169
+ "loss": 2.6918,
170
+ "step": 1150
171
+ },
172
+ {
173
+ "epoch": 3.31,
174
+ "learning_rate": 8.677685950413224e-06,
175
+ "loss": 2.6974,
176
+ "step": 1200
177
+ },
178
+ {
179
+ "epoch": 3.44,
180
+ "learning_rate": 6.955922865013774e-06,
181
+ "loss": 2.6855,
182
+ "step": 1250
183
+ },
184
+ {
185
+ "epoch": 3.58,
186
+ "learning_rate": 5.234159779614326e-06,
187
+ "loss": 2.6727,
188
+ "step": 1300
189
+ },
190
+ {
191
+ "epoch": 3.72,
192
+ "learning_rate": 3.5123966942148763e-06,
193
+ "loss": 2.6863,
194
+ "step": 1350
195
+ },
196
+ {
197
+ "epoch": 3.86,
198
+ "learning_rate": 1.7906336088154272e-06,
199
+ "loss": 2.6866,
200
+ "step": 1400
201
+ },
202
+ {
203
+ "epoch": 3.99,
204
+ "learning_rate": 6.887052341597797e-08,
205
+ "loss": 2.6622,
206
+ "step": 1450
207
+ },
208
+ {
209
+ "epoch": 4.0,
210
+ "eval_loss": 3.4961369037628174,
211
+ "eval_runtime": 465.9454,
212
+ "eval_samples_per_second": 5.541,
213
+ "eval_steps_per_second": 0.088,
214
+ "step": 1452
215
  }
216
  ],
217
  "max_steps": 1452,
218
  "num_train_epochs": 4,
219
+ "total_flos": 1.6823768580033085e+19,
220
  "trial_name": null,
221
  "trial_params": null
222
  }