Raihan004 commited on
Commit
dfafb38
·
1 Parent(s): c0791f8

🍻 cheers

Browse files
README.md CHANGED
@@ -2,6 +2,7 @@
2
  license: apache-2.0
3
  base_model: Raihan004/Hierarchical_Agent_Action
4
  tags:
 
5
  - generated_from_trainer
6
  datasets:
7
  - image_folder
@@ -14,7 +15,7 @@ model-index:
14
  name: Image Classification
15
  type: image-classification
16
  dataset:
17
- name: image_folder
18
  type: image_folder
19
  config: hierarchical-action-agent
20
  split: train
@@ -22,7 +23,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.7913669064748201
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -30,10 +31,10 @@ should probably proofread and complete it, then remove this comment. -->
30
 
31
  # Hierarchical_Agent_Action
32
 
33
- This model is a fine-tuned version of [Raihan004/Hierarchical_Agent_Action](https://huggingface.co/Raihan004/Hierarchical_Agent_Action) on the image_folder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.8794
36
- - Accuracy: 0.7914
37
 
38
  ## Model description
39
 
 
2
  license: apache-2.0
3
  base_model: Raihan004/Hierarchical_Agent_Action
4
  tags:
5
+ - image-classification
6
  - generated_from_trainer
7
  datasets:
8
  - image_folder
 
15
  name: Image Classification
16
  type: image-classification
17
  dataset:
18
+ name: agent_action_class
19
  type: image_folder
20
  config: hierarchical-action-agent
21
  split: train
 
23
  metrics:
24
  - name: Accuracy
25
  type: accuracy
26
+ value: 0.7856115107913669
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
31
 
32
  # Hierarchical_Agent_Action
33
 
34
+ This model is a fine-tuned version of [Raihan004/Hierarchical_Agent_Action](https://huggingface.co/Raihan004/Hierarchical_Agent_Action) on the agent_action_class dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 0.8558
37
+ - Accuracy: 0.7856
38
 
39
  ## Model description
40
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_accuracy": 0.8129496402877698,
4
- "eval_loss": 0.6429479122161865,
5
- "eval_runtime": 13.5771,
6
- "eval_samples_per_second": 51.189,
7
- "eval_steps_per_second": 6.408,
8
- "total_flos": 4.5776019534039245e+18,
9
- "train_loss": 0.7704572311011694,
10
- "train_runtime": 1921.5202,
11
- "train_samples_per_second": 30.733,
12
- "train_steps_per_second": 0.484
13
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 0.7856115107913669,
4
+ "eval_loss": 0.855847954750061,
5
+ "eval_runtime": 14.4633,
6
+ "eval_samples_per_second": 48.053,
7
+ "eval_steps_per_second": 6.015,
8
+ "total_flos": 9.155203906807849e+18,
9
+ "train_loss": 0.21589128868554228,
10
+ "train_runtime": 3907.045,
11
+ "train_samples_per_second": 30.23,
12
+ "train_steps_per_second": 0.476
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_accuracy": 0.8129496402877698,
4
- "eval_loss": 0.6429479122161865,
5
- "eval_runtime": 13.5771,
6
- "eval_samples_per_second": 51.189,
7
- "eval_steps_per_second": 6.408
8
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 0.7856115107913669,
4
+ "eval_loss": 0.855847954750061,
5
+ "eval_runtime": 14.4633,
6
+ "eval_samples_per_second": 48.053,
7
+ "eval_steps_per_second": 6.015
8
  }
runs/Dec26_19-24-41_37ea9fdc1c3c/events.out.tfevents.1703622620.37ea9fdc1c3c.47.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4fb94f5cd1121f29cceec3fdfea694dfb0af68896b299af4f9a87d678e68a2e
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 15.0,
3
- "total_flos": 4.5776019534039245e+18,
4
- "train_loss": 0.7704572311011694,
5
- "train_runtime": 1921.5202,
6
- "train_samples_per_second": 30.733,
7
- "train_steps_per_second": 0.484
8
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "total_flos": 9.155203906807849e+18,
4
+ "train_loss": 0.21589128868554228,
5
+ "train_runtime": 3907.045,
6
+ "train_samples_per_second": 30.23,
7
+ "train_steps_per_second": 0.476
8
  }
trainer_state.json CHANGED
@@ -1,385 +1,748 @@
1
  {
2
- "best_metric": 0.6429479122161865,
3
- "best_model_checkpoint": "Hierarchical_Agent_Action/checkpoint-900",
4
- "epoch": 15.0,
5
  "eval_steps": 100,
6
- "global_step": 930,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.32,
13
- "learning_rate": 0.0001956989247311828,
14
- "loss": 3.2878,
15
  "step": 20
16
  },
17
  {
18
  "epoch": 0.65,
19
- "learning_rate": 0.0001913978494623656,
20
- "loss": 2.7048,
21
  "step": 40
22
  },
23
  {
24
  "epoch": 0.97,
25
- "learning_rate": 0.0001870967741935484,
26
- "loss": 2.285,
27
  "step": 60
28
  },
29
  {
30
  "epoch": 1.29,
31
- "learning_rate": 0.0001827956989247312,
32
- "loss": 1.934,
33
  "step": 80
34
  },
35
  {
36
  "epoch": 1.61,
37
- "learning_rate": 0.00017849462365591398,
38
- "loss": 1.6851,
39
  "step": 100
40
  },
41
  {
42
  "epoch": 1.61,
43
- "eval_accuracy": 0.7568345323741007,
44
- "eval_loss": 1.5225424766540527,
45
- "eval_runtime": 13.5302,
46
- "eval_samples_per_second": 51.366,
47
- "eval_steps_per_second": 6.43,
48
  "step": 100
49
  },
50
  {
51
  "epoch": 1.94,
52
- "learning_rate": 0.00017419354838709678,
53
- "loss": 1.5182,
54
  "step": 120
55
  },
56
  {
57
  "epoch": 2.26,
58
- "learning_rate": 0.00016989247311827957,
59
- "loss": 1.2911,
60
  "step": 140
61
  },
62
  {
63
  "epoch": 2.58,
64
- "learning_rate": 0.0001655913978494624,
65
- "loss": 1.1929,
66
  "step": 160
67
  },
68
  {
69
  "epoch": 2.9,
70
- "learning_rate": 0.00016129032258064516,
71
- "loss": 1.0997,
72
  "step": 180
73
  },
74
  {
75
  "epoch": 3.23,
76
- "learning_rate": 0.00015698924731182796,
77
- "loss": 0.9569,
78
  "step": 200
79
  },
80
  {
81
  "epoch": 3.23,
82
- "eval_accuracy": 0.7769784172661871,
83
- "eval_loss": 0.9738827347755432,
84
- "eval_runtime": 13.5802,
85
- "eval_samples_per_second": 51.177,
86
- "eval_steps_per_second": 6.406,
87
  "step": 200
88
  },
89
  {
90
  "epoch": 3.55,
91
- "learning_rate": 0.00015268817204301075,
92
- "loss": 0.9299,
93
  "step": 220
94
  },
95
  {
96
  "epoch": 3.87,
97
- "learning_rate": 0.00014838709677419355,
98
- "loss": 0.8698,
99
  "step": 240
100
  },
101
  {
102
  "epoch": 4.19,
103
- "learning_rate": 0.00014408602150537637,
104
- "loss": 0.8559,
105
  "step": 260
106
  },
107
  {
108
  "epoch": 4.52,
109
- "learning_rate": 0.00013978494623655916,
110
- "loss": 0.7397,
111
  "step": 280
112
  },
113
  {
114
  "epoch": 4.84,
115
- "learning_rate": 0.00013548387096774193,
116
- "loss": 0.7947,
117
  "step": 300
118
  },
119
  {
120
  "epoch": 4.84,
121
- "eval_accuracy": 0.8071942446043165,
122
- "eval_loss": 0.7677171230316162,
123
- "eval_runtime": 13.5251,
124
- "eval_samples_per_second": 51.386,
125
- "eval_steps_per_second": 6.432,
126
  "step": 300
127
  },
128
  {
129
  "epoch": 5.16,
130
- "learning_rate": 0.00013118279569892472,
131
- "loss": 0.7,
132
  "step": 320
133
  },
134
  {
135
  "epoch": 5.48,
136
- "learning_rate": 0.00012688172043010752,
137
- "loss": 0.6085,
138
  "step": 340
139
  },
140
  {
141
  "epoch": 5.81,
142
- "learning_rate": 0.00012258064516129034,
143
- "loss": 0.6686,
144
  "step": 360
145
  },
146
  {
147
  "epoch": 6.13,
148
- "learning_rate": 0.00011827956989247313,
149
- "loss": 0.6064,
150
  "step": 380
151
  },
152
  {
153
  "epoch": 6.45,
154
- "learning_rate": 0.00011397849462365593,
155
- "loss": 0.6196,
156
  "step": 400
157
  },
158
  {
159
  "epoch": 6.45,
160
- "eval_accuracy": 0.7928057553956834,
161
- "eval_loss": 0.7337872385978699,
162
- "eval_runtime": 13.5683,
163
- "eval_samples_per_second": 51.222,
164
- "eval_steps_per_second": 6.412,
165
  "step": 400
166
  },
167
  {
168
  "epoch": 6.77,
169
- "learning_rate": 0.00010967741935483871,
170
- "loss": 0.575,
171
  "step": 420
172
  },
173
  {
174
  "epoch": 7.1,
175
- "learning_rate": 0.0001053763440860215,
176
- "loss": 0.5475,
177
  "step": 440
178
  },
179
  {
180
  "epoch": 7.42,
181
- "learning_rate": 0.0001010752688172043,
182
- "loss": 0.5107,
183
  "step": 460
184
  },
185
  {
186
  "epoch": 7.74,
187
- "learning_rate": 9.677419354838711e-05,
188
- "loss": 0.5313,
189
  "step": 480
190
  },
191
  {
192
  "epoch": 8.06,
193
- "learning_rate": 9.268817204301076e-05,
194
- "loss": 0.4951,
195
  "step": 500
196
  },
197
  {
198
  "epoch": 8.06,
199
- "eval_accuracy": 0.8014388489208633,
200
- "eval_loss": 0.7010597586631775,
201
- "eval_runtime": 13.5437,
202
- "eval_samples_per_second": 51.315,
203
- "eval_steps_per_second": 6.424,
204
  "step": 500
205
  },
206
  {
207
  "epoch": 8.39,
208
- "learning_rate": 8.838709677419355e-05,
209
- "loss": 0.4507,
210
  "step": 520
211
  },
212
  {
213
  "epoch": 8.71,
214
- "learning_rate": 8.408602150537634e-05,
215
- "loss": 0.4825,
216
  "step": 540
217
  },
218
  {
219
  "epoch": 9.03,
220
- "learning_rate": 7.978494623655914e-05,
221
- "loss": 0.4517,
222
  "step": 560
223
  },
224
  {
225
  "epoch": 9.35,
226
- "learning_rate": 7.548387096774195e-05,
227
- "loss": 0.4049,
228
  "step": 580
229
  },
230
  {
231
  "epoch": 9.68,
232
- "learning_rate": 7.13978494623656e-05,
233
- "loss": 0.3935,
234
  "step": 600
235
  },
236
  {
237
  "epoch": 9.68,
238
- "eval_accuracy": 0.7913669064748201,
239
- "eval_loss": 0.7296842336654663,
240
- "eval_runtime": 13.4431,
241
- "eval_samples_per_second": 51.699,
242
- "eval_steps_per_second": 6.472,
243
  "step": 600
244
  },
245
  {
246
  "epoch": 10.0,
247
- "learning_rate": 6.709677419354839e-05,
248
- "loss": 0.4355,
249
  "step": 620
250
  },
251
  {
252
  "epoch": 10.32,
253
- "learning_rate": 6.279569892473119e-05,
254
- "loss": 0.4131,
255
  "step": 640
256
  },
257
  {
258
  "epoch": 10.65,
259
- "learning_rate": 5.849462365591398e-05,
260
- "loss": 0.4059,
261
  "step": 660
262
  },
263
  {
264
  "epoch": 10.97,
265
- "learning_rate": 5.419354838709678e-05,
266
- "loss": 0.3698,
267
  "step": 680
268
  },
269
  {
270
  "epoch": 11.29,
271
- "learning_rate": 4.989247311827957e-05,
272
- "loss": 0.3646,
273
  "step": 700
274
  },
275
  {
276
  "epoch": 11.29,
277
- "eval_accuracy": 0.8100719424460432,
278
- "eval_loss": 0.6603276133537292,
279
- "eval_runtime": 13.4983,
280
- "eval_samples_per_second": 51.488,
281
- "eval_steps_per_second": 6.445,
282
  "step": 700
283
  },
284
  {
285
  "epoch": 11.61,
286
- "learning_rate": 4.559139784946237e-05,
287
- "loss": 0.3585,
288
  "step": 720
289
  },
290
  {
291
  "epoch": 11.94,
292
- "learning_rate": 4.1290322580645165e-05,
293
- "loss": 0.3363,
294
  "step": 740
295
  },
296
  {
297
  "epoch": 12.26,
298
- "learning_rate": 3.698924731182796e-05,
299
- "loss": 0.343,
300
  "step": 760
301
  },
302
  {
303
  "epoch": 12.58,
304
- "learning_rate": 3.2688172043010754e-05,
305
- "loss": 0.3284,
306
  "step": 780
307
  },
308
  {
309
  "epoch": 12.9,
310
- "learning_rate": 2.838709677419355e-05,
311
- "loss": 0.3382,
312
  "step": 800
313
  },
314
  {
315
  "epoch": 12.9,
316
- "eval_accuracy": 0.8158273381294964,
317
- "eval_loss": 0.6626977324485779,
318
- "eval_runtime": 13.6829,
319
- "eval_samples_per_second": 50.793,
320
- "eval_steps_per_second": 6.358,
321
  "step": 800
322
  },
323
  {
324
  "epoch": 13.23,
325
- "learning_rate": 2.4086021505376347e-05,
326
- "loss": 0.3016,
327
  "step": 820
328
  },
329
  {
330
  "epoch": 13.55,
331
- "learning_rate": 1.978494623655914e-05,
332
- "loss": 0.2865,
333
  "step": 840
334
  },
335
  {
336
  "epoch": 13.87,
337
- "learning_rate": 1.5483870967741936e-05,
338
- "loss": 0.308,
339
  "step": 860
340
  },
341
  {
342
  "epoch": 14.19,
343
- "learning_rate": 1.1182795698924732e-05,
344
- "loss": 0.3115,
345
  "step": 880
346
  },
347
  {
348
  "epoch": 14.52,
349
- "learning_rate": 6.881720430107527e-06,
350
- "loss": 0.3133,
351
  "step": 900
352
  },
353
  {
354
  "epoch": 14.52,
355
- "eval_accuracy": 0.8129496402877698,
356
- "eval_loss": 0.6429479122161865,
357
- "eval_runtime": 13.0863,
358
- "eval_samples_per_second": 53.109,
359
- "eval_steps_per_second": 6.648,
360
  "step": 900
361
  },
362
  {
363
  "epoch": 14.84,
364
- "learning_rate": 2.580645161290323e-06,
365
- "loss": 0.3048,
366
  "step": 920
367
  },
368
  {
369
- "epoch": 15.0,
370
- "step": 930,
371
- "total_flos": 4.5776019534039245e+18,
372
- "train_loss": 0.7704572311011694,
373
- "train_runtime": 1921.5202,
374
- "train_samples_per_second": 30.733,
375
- "train_steps_per_second": 0.484
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  }
377
  ],
378
  "logging_steps": 20,
379
- "max_steps": 930,
380
- "num_train_epochs": 15,
381
  "save_steps": 100,
382
- "total_flos": 4.5776019534039245e+18,
383
  "trial_name": null,
384
  "trial_params": null
385
  }
 
1
  {
2
+ "best_metric": 0.855847954750061,
3
+ "best_model_checkpoint": "Hierarchical_Agent_Action/checkpoint-1600",
4
+ "epoch": 30.0,
5
  "eval_steps": 100,
6
+ "global_step": 1860,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.32,
13
+ "learning_rate": 0.00019784946236559142,
14
+ "loss": 0.4563,
15
  "step": 20
16
  },
17
  {
18
  "epoch": 0.65,
19
+ "learning_rate": 0.0001956989247311828,
20
+ "loss": 0.4173,
21
  "step": 40
22
  },
23
  {
24
  "epoch": 0.97,
25
+ "learning_rate": 0.00019365591397849463,
26
+ "loss": 0.3669,
27
  "step": 60
28
  },
29
  {
30
  "epoch": 1.29,
31
+ "learning_rate": 0.00019150537634408604,
32
+ "loss": 0.2848,
33
  "step": 80
34
  },
35
  {
36
  "epoch": 1.61,
37
+ "learning_rate": 0.00018935483870967742,
38
+ "loss": 0.2855,
39
  "step": 100
40
  },
41
  {
42
  "epoch": 1.61,
43
+ "eval_accuracy": 0.7841726618705036,
44
+ "eval_loss": 0.8655692934989929,
45
+ "eval_runtime": 14.3186,
46
+ "eval_samples_per_second": 48.538,
47
+ "eval_steps_per_second": 6.076,
48
  "step": 100
49
  },
50
  {
51
  "epoch": 1.94,
52
+ "learning_rate": 0.00018720430107526883,
53
+ "loss": 0.2924,
54
  "step": 120
55
  },
56
  {
57
  "epoch": 2.26,
58
+ "learning_rate": 0.00018505376344086022,
59
+ "loss": 0.2681,
60
  "step": 140
61
  },
62
  {
63
  "epoch": 2.58,
64
+ "learning_rate": 0.00018290322580645163,
65
+ "loss": 0.2849,
66
  "step": 160
67
  },
68
  {
69
  "epoch": 2.9,
70
+ "learning_rate": 0.00018075268817204304,
71
+ "loss": 0.2872,
72
  "step": 180
73
  },
74
  {
75
  "epoch": 3.23,
76
+ "learning_rate": 0.00017860215053763442,
77
+ "loss": 0.2304,
78
  "step": 200
79
  },
80
  {
81
  "epoch": 3.23,
82
+ "eval_accuracy": 0.7496402877697842,
83
+ "eval_loss": 0.9936912655830383,
84
+ "eval_runtime": 14.2202,
85
+ "eval_samples_per_second": 48.874,
86
+ "eval_steps_per_second": 6.118,
87
  "step": 200
88
  },
89
  {
90
  "epoch": 3.55,
91
+ "learning_rate": 0.0001764516129032258,
92
+ "loss": 0.2441,
93
  "step": 220
94
  },
95
  {
96
  "epoch": 3.87,
97
+ "learning_rate": 0.00017430107526881722,
98
+ "loss": 0.2692,
99
  "step": 240
100
  },
101
  {
102
  "epoch": 4.19,
103
+ "learning_rate": 0.0001721505376344086,
104
+ "loss": 0.2872,
105
  "step": 260
106
  },
107
  {
108
  "epoch": 4.52,
109
+ "learning_rate": 0.00017,
110
+ "loss": 0.2347,
111
  "step": 280
112
  },
113
  {
114
  "epoch": 4.84,
115
+ "learning_rate": 0.0001678494623655914,
116
+ "loss": 0.3009,
117
  "step": 300
118
  },
119
  {
120
  "epoch": 4.84,
121
+ "eval_accuracy": 0.7482014388489209,
122
+ "eval_loss": 1.0166507959365845,
123
+ "eval_runtime": 14.2233,
124
+ "eval_samples_per_second": 48.863,
125
+ "eval_steps_per_second": 6.117,
126
  "step": 300
127
  },
128
  {
129
  "epoch": 5.16,
130
+ "learning_rate": 0.0001656989247311828,
131
+ "loss": 0.2292,
132
  "step": 320
133
  },
134
  {
135
  "epoch": 5.48,
136
+ "learning_rate": 0.0001635483870967742,
137
+ "loss": 0.2113,
138
  "step": 340
139
  },
140
  {
141
  "epoch": 5.81,
142
+ "learning_rate": 0.0001613978494623656,
143
+ "loss": 0.2343,
144
  "step": 360
145
  },
146
  {
147
  "epoch": 6.13,
148
+ "learning_rate": 0.000159247311827957,
149
+ "loss": 0.2554,
150
  "step": 380
151
  },
152
  {
153
  "epoch": 6.45,
154
+ "learning_rate": 0.0001570967741935484,
155
+ "loss": 0.2438,
156
  "step": 400
157
  },
158
  {
159
  "epoch": 6.45,
160
+ "eval_accuracy": 0.7424460431654676,
161
+ "eval_loss": 1.0789364576339722,
162
+ "eval_runtime": 14.0121,
163
+ "eval_samples_per_second": 49.6,
164
+ "eval_steps_per_second": 6.209,
165
  "step": 400
166
  },
167
  {
168
  "epoch": 6.77,
169
+ "learning_rate": 0.0001549462365591398,
170
+ "loss": 0.2667,
171
  "step": 420
172
  },
173
  {
174
  "epoch": 7.1,
175
+ "learning_rate": 0.0001527956989247312,
176
+ "loss": 0.2425,
177
  "step": 440
178
  },
179
  {
180
  "epoch": 7.42,
181
+ "learning_rate": 0.00015064516129032257,
182
+ "loss": 0.2405,
183
  "step": 460
184
  },
185
  {
186
  "epoch": 7.74,
187
+ "learning_rate": 0.00014849462365591399,
188
+ "loss": 0.2509,
189
  "step": 480
190
  },
191
  {
192
  "epoch": 8.06,
193
+ "learning_rate": 0.00014634408602150537,
194
+ "loss": 0.222,
195
  "step": 500
196
  },
197
  {
198
  "epoch": 8.06,
199
+ "eval_accuracy": 0.7697841726618705,
200
+ "eval_loss": 0.886641800403595,
201
+ "eval_runtime": 14.2992,
202
+ "eval_samples_per_second": 48.604,
203
+ "eval_steps_per_second": 6.084,
204
  "step": 500
205
  },
206
  {
207
  "epoch": 8.39,
208
+ "learning_rate": 0.00014419354838709678,
209
+ "loss": 0.2118,
210
  "step": 520
211
  },
212
  {
213
  "epoch": 8.71,
214
+ "learning_rate": 0.00014204301075268816,
215
+ "loss": 0.2346,
216
  "step": 540
217
  },
218
  {
219
  "epoch": 9.03,
220
+ "learning_rate": 0.00013989247311827957,
221
+ "loss": 0.2422,
222
  "step": 560
223
  },
224
  {
225
  "epoch": 9.35,
226
+ "learning_rate": 0.00013774193548387099,
227
+ "loss": 0.2259,
228
  "step": 580
229
  },
230
  {
231
  "epoch": 9.68,
232
+ "learning_rate": 0.00013559139784946237,
233
+ "loss": 0.2174,
234
  "step": 600
235
  },
236
  {
237
  "epoch": 9.68,
238
+ "eval_accuracy": 0.7798561151079136,
239
+ "eval_loss": 0.8679013252258301,
240
+ "eval_runtime": 14.2534,
241
+ "eval_samples_per_second": 48.76,
242
+ "eval_steps_per_second": 6.104,
243
  "step": 600
244
  },
245
  {
246
  "epoch": 10.0,
247
+ "learning_rate": 0.00013344086021505378,
248
+ "loss": 0.2519,
249
  "step": 620
250
  },
251
  {
252
  "epoch": 10.32,
253
+ "learning_rate": 0.00013129032258064516,
254
+ "loss": 0.2167,
255
  "step": 640
256
  },
257
  {
258
  "epoch": 10.65,
259
+ "learning_rate": 0.00012913978494623657,
260
+ "loss": 0.2184,
261
  "step": 660
262
  },
263
  {
264
  "epoch": 10.97,
265
+ "learning_rate": 0.00012698924731182796,
266
+ "loss": 0.2143,
267
  "step": 680
268
  },
269
  {
270
  "epoch": 11.29,
271
+ "learning_rate": 0.00012483870967741934,
272
+ "loss": 0.2177,
273
  "step": 700
274
  },
275
  {
276
  "epoch": 11.29,
277
+ "eval_accuracy": 0.7654676258992805,
278
+ "eval_loss": 0.979772686958313,
279
+ "eval_runtime": 14.1285,
280
+ "eval_samples_per_second": 49.191,
281
+ "eval_steps_per_second": 6.158,
282
  "step": 700
283
  },
284
  {
285
  "epoch": 11.61,
286
+ "learning_rate": 0.00012268817204301075,
287
+ "loss": 0.2061,
288
  "step": 720
289
  },
290
  {
291
  "epoch": 11.94,
292
+ "learning_rate": 0.00012053763440860215,
293
+ "loss": 0.2389,
294
  "step": 740
295
  },
296
  {
297
  "epoch": 12.26,
298
+ "learning_rate": 0.00011838709677419355,
299
+ "loss": 0.2323,
300
  "step": 760
301
  },
302
  {
303
  "epoch": 12.58,
304
+ "learning_rate": 0.00011623655913978494,
305
+ "loss": 0.2404,
306
  "step": 780
307
  },
308
  {
309
  "epoch": 12.9,
310
+ "learning_rate": 0.00011408602150537636,
311
+ "loss": 0.2471,
312
  "step": 800
313
  },
314
  {
315
  "epoch": 12.9,
316
+ "eval_accuracy": 0.7726618705035971,
317
+ "eval_loss": 1.0291355848312378,
318
+ "eval_runtime": 14.1227,
319
+ "eval_samples_per_second": 49.212,
320
+ "eval_steps_per_second": 6.16,
321
  "step": 800
322
  },
323
  {
324
  "epoch": 13.23,
325
+ "learning_rate": 0.00011193548387096775,
326
+ "loss": 0.2054,
327
  "step": 820
328
  },
329
  {
330
  "epoch": 13.55,
331
+ "learning_rate": 0.00010978494623655915,
332
+ "loss": 0.2303,
333
  "step": 840
334
  },
335
  {
336
  "epoch": 13.87,
337
+ "learning_rate": 0.00010763440860215055,
338
+ "loss": 0.2336,
339
  "step": 860
340
  },
341
  {
342
  "epoch": 14.19,
343
+ "learning_rate": 0.00010548387096774195,
344
+ "loss": 0.249,
345
  "step": 880
346
  },
347
  {
348
  "epoch": 14.52,
349
+ "learning_rate": 0.00010333333333333334,
350
+ "loss": 0.2782,
351
  "step": 900
352
  },
353
  {
354
  "epoch": 14.52,
355
+ "eval_accuracy": 0.7798561151079136,
356
+ "eval_loss": 0.8910142779350281,
357
+ "eval_runtime": 14.1009,
358
+ "eval_samples_per_second": 49.288,
359
+ "eval_steps_per_second": 6.17,
360
  "step": 900
361
  },
362
  {
363
  "epoch": 14.84,
364
+ "learning_rate": 0.00010118279569892473,
365
+ "loss": 0.2677,
366
  "step": 920
367
  },
368
  {
369
+ "epoch": 15.16,
370
+ "learning_rate": 9.903225806451614e-05,
371
+ "loss": 0.2355,
372
+ "step": 940
373
+ },
374
+ {
375
+ "epoch": 15.48,
376
+ "learning_rate": 9.688172043010753e-05,
377
+ "loss": 0.2132,
378
+ "step": 960
379
+ },
380
+ {
381
+ "epoch": 15.81,
382
+ "learning_rate": 9.473118279569893e-05,
383
+ "loss": 0.2291,
384
+ "step": 980
385
+ },
386
+ {
387
+ "epoch": 16.13,
388
+ "learning_rate": 9.258064516129033e-05,
389
+ "loss": 0.2149,
390
+ "step": 1000
391
+ },
392
+ {
393
+ "epoch": 16.13,
394
+ "eval_accuracy": 0.7712230215827338,
395
+ "eval_loss": 0.87116539478302,
396
+ "eval_runtime": 14.1714,
397
+ "eval_samples_per_second": 49.042,
398
+ "eval_steps_per_second": 6.139,
399
+ "step": 1000
400
+ },
401
+ {
402
+ "epoch": 16.45,
403
+ "learning_rate": 9.053763440860215e-05,
404
+ "loss": 0.2691,
405
+ "step": 1020
406
+ },
407
+ {
408
+ "epoch": 16.77,
409
+ "learning_rate": 8.838709677419355e-05,
410
+ "loss": 0.2236,
411
+ "step": 1040
412
+ },
413
+ {
414
+ "epoch": 17.1,
415
+ "learning_rate": 8.623655913978495e-05,
416
+ "loss": 0.2445,
417
+ "step": 1060
418
+ },
419
+ {
420
+ "epoch": 17.42,
421
+ "learning_rate": 8.408602150537634e-05,
422
+ "loss": 0.2325,
423
+ "step": 1080
424
+ },
425
+ {
426
+ "epoch": 17.74,
427
+ "learning_rate": 8.193548387096774e-05,
428
+ "loss": 0.2141,
429
+ "step": 1100
430
+ },
431
+ {
432
+ "epoch": 17.74,
433
+ "eval_accuracy": 0.7741007194244605,
434
+ "eval_loss": 0.9116857051849365,
435
+ "eval_runtime": 14.1821,
436
+ "eval_samples_per_second": 49.005,
437
+ "eval_steps_per_second": 6.134,
438
+ "step": 1100
439
+ },
440
+ {
441
+ "epoch": 18.06,
442
+ "learning_rate": 7.978494623655914e-05,
443
+ "loss": 0.2006,
444
+ "step": 1120
445
+ },
446
+ {
447
+ "epoch": 18.39,
448
+ "learning_rate": 7.763440860215054e-05,
449
+ "loss": 0.2067,
450
+ "step": 1140
451
+ },
452
+ {
453
+ "epoch": 18.71,
454
+ "learning_rate": 7.548387096774195e-05,
455
+ "loss": 0.2099,
456
+ "step": 1160
457
+ },
458
+ {
459
+ "epoch": 19.03,
460
+ "learning_rate": 7.333333333333333e-05,
461
+ "loss": 0.2355,
462
+ "step": 1180
463
+ },
464
+ {
465
+ "epoch": 19.35,
466
+ "learning_rate": 7.118279569892473e-05,
467
+ "loss": 0.1863,
468
+ "step": 1200
469
+ },
470
+ {
471
+ "epoch": 19.35,
472
+ "eval_accuracy": 0.7769784172661871,
473
+ "eval_loss": 0.8968440890312195,
474
+ "eval_runtime": 14.2697,
475
+ "eval_samples_per_second": 48.704,
476
+ "eval_steps_per_second": 6.097,
477
+ "step": 1200
478
+ },
479
+ {
480
+ "epoch": 19.68,
481
+ "learning_rate": 6.903225806451613e-05,
482
+ "loss": 0.1932,
483
+ "step": 1220
484
+ },
485
+ {
486
+ "epoch": 20.0,
487
+ "learning_rate": 6.688172043010754e-05,
488
+ "loss": 0.1745,
489
+ "step": 1240
490
+ },
491
+ {
492
+ "epoch": 20.32,
493
+ "learning_rate": 6.473118279569893e-05,
494
+ "loss": 0.1846,
495
+ "step": 1260
496
+ },
497
+ {
498
+ "epoch": 20.65,
499
+ "learning_rate": 6.258064516129033e-05,
500
+ "loss": 0.1794,
501
+ "step": 1280
502
+ },
503
+ {
504
+ "epoch": 20.97,
505
+ "learning_rate": 6.0430107526881715e-05,
506
+ "loss": 0.2194,
507
+ "step": 1300
508
+ },
509
+ {
510
+ "epoch": 20.97,
511
+ "eval_accuracy": 0.7726618705035971,
512
+ "eval_loss": 0.8560822606086731,
513
+ "eval_runtime": 14.1334,
514
+ "eval_samples_per_second": 49.174,
515
+ "eval_steps_per_second": 6.156,
516
+ "step": 1300
517
+ },
518
+ {
519
+ "epoch": 21.29,
520
+ "learning_rate": 5.827956989247312e-05,
521
+ "loss": 0.1749,
522
+ "step": 1320
523
+ },
524
+ {
525
+ "epoch": 21.61,
526
+ "learning_rate": 5.612903225806452e-05,
527
+ "loss": 0.1527,
528
+ "step": 1340
529
+ },
530
+ {
531
+ "epoch": 21.94,
532
+ "learning_rate": 5.397849462365592e-05,
533
+ "loss": 0.2119,
534
+ "step": 1360
535
+ },
536
+ {
537
+ "epoch": 22.26,
538
+ "learning_rate": 5.182795698924732e-05,
539
+ "loss": 0.1753,
540
+ "step": 1380
541
+ },
542
+ {
543
+ "epoch": 22.58,
544
+ "learning_rate": 4.967741935483871e-05,
545
+ "loss": 0.1635,
546
+ "step": 1400
547
+ },
548
+ {
549
+ "epoch": 22.58,
550
+ "eval_accuracy": 0.781294964028777,
551
+ "eval_loss": 0.856007993221283,
552
+ "eval_runtime": 14.183,
553
+ "eval_samples_per_second": 49.002,
554
+ "eval_steps_per_second": 6.134,
555
+ "step": 1400
556
+ },
557
+ {
558
+ "epoch": 22.9,
559
+ "learning_rate": 4.752688172043011e-05,
560
+ "loss": 0.19,
561
+ "step": 1420
562
+ },
563
+ {
564
+ "epoch": 23.23,
565
+ "learning_rate": 4.53763440860215e-05,
566
+ "loss": 0.1711,
567
+ "step": 1440
568
+ },
569
+ {
570
+ "epoch": 23.55,
571
+ "learning_rate": 4.322580645161291e-05,
572
+ "loss": 0.1491,
573
+ "step": 1460
574
+ },
575
+ {
576
+ "epoch": 23.87,
577
+ "learning_rate": 4.1075268817204305e-05,
578
+ "loss": 0.1536,
579
+ "step": 1480
580
+ },
581
+ {
582
+ "epoch": 24.19,
583
+ "learning_rate": 3.8924731182795695e-05,
584
+ "loss": 0.1528,
585
+ "step": 1500
586
+ },
587
+ {
588
+ "epoch": 24.19,
589
+ "eval_accuracy": 0.7884892086330936,
590
+ "eval_loss": 0.8994219899177551,
591
+ "eval_runtime": 14.0742,
592
+ "eval_samples_per_second": 49.381,
593
+ "eval_steps_per_second": 6.182,
594
+ "step": 1500
595
+ },
596
+ {
597
+ "epoch": 24.52,
598
+ "learning_rate": 3.67741935483871e-05,
599
+ "loss": 0.1572,
600
+ "step": 1520
601
+ },
602
+ {
603
+ "epoch": 24.84,
604
+ "learning_rate": 3.4623655913978497e-05,
605
+ "loss": 0.1478,
606
+ "step": 1540
607
+ },
608
+ {
609
+ "epoch": 25.16,
610
+ "learning_rate": 3.2473118279569894e-05,
611
+ "loss": 0.1322,
612
+ "step": 1560
613
+ },
614
+ {
615
+ "epoch": 25.48,
616
+ "learning_rate": 3.032258064516129e-05,
617
+ "loss": 0.1408,
618
+ "step": 1580
619
+ },
620
+ {
621
+ "epoch": 25.81,
622
+ "learning_rate": 2.8172043010752692e-05,
623
+ "loss": 0.1416,
624
+ "step": 1600
625
+ },
626
+ {
627
+ "epoch": 25.81,
628
+ "eval_accuracy": 0.7856115107913669,
629
+ "eval_loss": 0.855847954750061,
630
+ "eval_runtime": 14.3352,
631
+ "eval_samples_per_second": 48.482,
632
+ "eval_steps_per_second": 6.069,
633
+ "step": 1600
634
+ },
635
+ {
636
+ "epoch": 26.13,
637
+ "learning_rate": 2.6021505376344086e-05,
638
+ "loss": 0.1351,
639
+ "step": 1620
640
+ },
641
+ {
642
+ "epoch": 26.45,
643
+ "learning_rate": 2.3870967741935486e-05,
644
+ "loss": 0.1345,
645
+ "step": 1640
646
+ },
647
+ {
648
+ "epoch": 26.77,
649
+ "learning_rate": 2.172043010752688e-05,
650
+ "loss": 0.1551,
651
+ "step": 1660
652
+ },
653
+ {
654
+ "epoch": 27.1,
655
+ "learning_rate": 1.956989247311828e-05,
656
+ "loss": 0.1727,
657
+ "step": 1680
658
+ },
659
+ {
660
+ "epoch": 27.42,
661
+ "learning_rate": 1.741935483870968e-05,
662
+ "loss": 0.1454,
663
+ "step": 1700
664
+ },
665
+ {
666
+ "epoch": 27.42,
667
+ "eval_accuracy": 0.7956834532374101,
668
+ "eval_loss": 0.8568124175071716,
669
+ "eval_runtime": 14.2245,
670
+ "eval_samples_per_second": 48.859,
671
+ "eval_steps_per_second": 6.116,
672
+ "step": 1700
673
+ },
674
+ {
675
+ "epoch": 27.74,
676
+ "learning_rate": 1.5268817204301076e-05,
677
+ "loss": 0.1293,
678
+ "step": 1720
679
+ },
680
+ {
681
+ "epoch": 28.06,
682
+ "learning_rate": 1.3118279569892475e-05,
683
+ "loss": 0.1059,
684
+ "step": 1740
685
+ },
686
+ {
687
+ "epoch": 28.39,
688
+ "learning_rate": 1.0967741935483872e-05,
689
+ "loss": 0.1655,
690
+ "step": 1760
691
+ },
692
+ {
693
+ "epoch": 28.71,
694
+ "learning_rate": 8.81720430107527e-06,
695
+ "loss": 0.1181,
696
+ "step": 1780
697
+ },
698
+ {
699
+ "epoch": 29.03,
700
+ "learning_rate": 6.774193548387098e-06,
701
+ "loss": 0.1642,
702
+ "step": 1800
703
+ },
704
+ {
705
+ "epoch": 29.03,
706
+ "eval_accuracy": 0.7913669064748201,
707
+ "eval_loss": 0.8794375658035278,
708
+ "eval_runtime": 14.2696,
709
+ "eval_samples_per_second": 48.705,
710
+ "eval_steps_per_second": 6.097,
711
+ "step": 1800
712
+ },
713
+ {
714
+ "epoch": 29.35,
715
+ "learning_rate": 4.623655913978495e-06,
716
+ "loss": 0.1297,
717
+ "step": 1820
718
+ },
719
+ {
720
+ "epoch": 29.68,
721
+ "learning_rate": 2.4731182795698927e-06,
722
+ "loss": 0.1257,
723
+ "step": 1840
724
+ },
725
+ {
726
+ "epoch": 30.0,
727
+ "learning_rate": 3.2258064516129035e-07,
728
+ "loss": 0.1283,
729
+ "step": 1860
730
+ },
731
+ {
732
+ "epoch": 30.0,
733
+ "step": 1860,
734
+ "total_flos": 9.155203906807849e+18,
735
+ "train_loss": 0.21589128868554228,
736
+ "train_runtime": 3907.045,
737
+ "train_samples_per_second": 30.23,
738
+ "train_steps_per_second": 0.476
739
  }
740
  ],
741
  "logging_steps": 20,
742
+ "max_steps": 1860,
743
+ "num_train_epochs": 30,
744
  "save_steps": 100,
745
+ "total_flos": 9.155203906807849e+18,
746
  "trial_name": null,
747
  "trial_params": null
748
  }