JulesGo commited on
Commit
062237a
·
verified ·
1 Parent(s): f9d4c4b

Fin de l'entraînement

Browse files
Files changed (5) hide show
  1. README.md +3 -3
  2. all_results.json +11 -11
  3. eval_results.json +7 -7
  4. train_results.json +5 -5
  5. trainer_state.json +237 -133
README.md CHANGED
@@ -14,9 +14,9 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
- - Loss: 0.0609
18
- - Mse: 0.1301
19
- - Mae: 0.3117
20
 
21
  ## Model description
22
 
 
14
 
15
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Loss: 0.0658
18
+ - Mse: 0.1253
19
+ - Mae: 0.3059
20
 
21
  ## Model description
22
 
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 18.0,
3
- "eval_loss": 0.08588916808366776,
4
- "eval_mae": 0.3119707405567169,
5
- "eval_mse": 0.1291341334581375,
6
- "eval_runtime": 9.945,
7
- "eval_samples_per_second": 7.642,
8
- "eval_steps_per_second": 1.006,
9
  "total_flos": 0.0,
10
- "train_loss": 0.2600840449333191,
11
- "train_runtime": 1002.0455,
12
- "train_samples_per_second": 6.048,
13
- "train_steps_per_second": 0.18
14
  }
 
1
  {
2
+ "epoch": 27.0,
3
+ "eval_loss": 0.06580791622400284,
4
+ "eval_mae": 0.3059428930282593,
5
+ "eval_mse": 0.12533096969127655,
6
+ "eval_runtime": 10.4766,
7
+ "eval_samples_per_second": 7.254,
8
+ "eval_steps_per_second": 0.955,
9
  "total_flos": 0.0,
10
+ "train_loss": 0.13240765200720894,
11
+ "train_runtime": 1534.1197,
12
+ "train_samples_per_second": 5.925,
13
+ "train_steps_per_second": 0.176
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 18.0,
3
- "eval_loss": 0.08588916808366776,
4
- "eval_mae": 0.3119707405567169,
5
- "eval_mse": 0.1291341334581375,
6
- "eval_runtime": 9.945,
7
- "eval_samples_per_second": 7.642,
8
- "eval_steps_per_second": 1.006
9
  }
 
1
  {
2
+ "epoch": 27.0,
3
+ "eval_loss": 0.06580791622400284,
4
+ "eval_mae": 0.3059428930282593,
5
+ "eval_mse": 0.12533096969127655,
6
+ "eval_runtime": 10.4766,
7
+ "eval_samples_per_second": 7.254,
8
+ "eval_steps_per_second": 0.955
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 18.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.2600840449333191,
5
- "train_runtime": 1002.0455,
6
- "train_samples_per_second": 6.048,
7
- "train_steps_per_second": 0.18
8
  }
 
1
  {
2
+ "epoch": 27.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.13240765200720894,
5
+ "train_runtime": 1534.1197,
6
+ "train_samples_per_second": 5.925,
7
+ "train_steps_per_second": 0.176
8
  }
trainer_state.json CHANGED
@@ -1,236 +1,340 @@
1
  {
2
- "best_global_step": 100,
3
- "best_metric": 0.1291341334581375,
4
- "best_model_checkpoint": "./vit_focus/checkpoint-100",
5
- "epoch": 18.0,
6
  "eval_steps": 500,
7
- "global_step": 180,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_loss": 0.11766688525676727,
15
- "eval_mae": 0.3475768268108368,
16
- "eval_mse": 0.1613956242799759,
17
- "eval_runtime": 10.0195,
18
- "eval_samples_per_second": 7.585,
19
- "eval_steps_per_second": 0.998,
20
  "step": 10
21
  },
22
  {
23
  "epoch": 2.0,
24
- "eval_loss": 0.09316065162420273,
25
- "eval_mae": 0.32877787947654724,
26
- "eval_mse": 0.14106006920337677,
27
- "eval_runtime": 9.4019,
28
- "eval_samples_per_second": 8.083,
29
- "eval_steps_per_second": 1.064,
30
  "step": 20
31
  },
32
  {
33
  "epoch": 3.0,
34
- "eval_loss": 0.09138184040784836,
35
- "eval_mae": 0.3351140022277832,
36
- "eval_mse": 0.14778681099414825,
37
- "eval_runtime": 9.4168,
38
- "eval_samples_per_second": 8.071,
39
- "eval_steps_per_second": 1.062,
40
  "step": 30
41
  },
42
  {
43
  "epoch": 4.0,
44
- "grad_norm": 3.422793388366699,
45
- "learning_rate": 7.833333333333333e-05,
46
- "loss": 0.3379,
47
  "step": 40
48
  },
49
  {
50
  "epoch": 4.0,
51
- "eval_loss": 0.07990685850381851,
52
- "eval_mae": 0.3216111361980438,
53
- "eval_mse": 0.1368674337863922,
54
- "eval_runtime": 9.6691,
55
- "eval_samples_per_second": 7.86,
56
- "eval_steps_per_second": 1.034,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 5.0,
61
- "eval_loss": 0.07730501145124435,
62
- "eval_mae": 0.3210676312446594,
63
- "eval_mse": 0.1364751160144806,
64
- "eval_runtime": 9.6377,
65
- "eval_samples_per_second": 7.886,
66
- "eval_steps_per_second": 1.038,
67
  "step": 50
68
  },
69
  {
70
  "epoch": 6.0,
71
- "eval_loss": 0.08424032479524612,
72
- "eval_mae": 0.3250683844089508,
73
- "eval_mse": 0.14131243526935577,
74
- "eval_runtime": 9.4489,
75
- "eval_samples_per_second": 8.043,
76
- "eval_steps_per_second": 1.058,
77
  "step": 60
78
  },
79
  {
80
  "epoch": 7.0,
81
- "eval_loss": 0.08766830712556839,
82
- "eval_mae": 0.3169863820075989,
83
- "eval_mse": 0.13288582861423492,
84
- "eval_runtime": 10.5663,
85
- "eval_samples_per_second": 7.193,
86
- "eval_steps_per_second": 0.946,
87
  "step": 70
88
  },
89
  {
90
  "epoch": 8.0,
91
- "grad_norm": 3.210268020629883,
92
- "learning_rate": 5.6111111111111114e-05,
93
- "loss": 0.2829,
94
  "step": 80
95
  },
96
  {
97
  "epoch": 8.0,
98
- "eval_loss": 0.08300431817770004,
99
- "eval_mae": 0.3192159831523895,
100
- "eval_mse": 0.1370069533586502,
101
- "eval_runtime": 9.75,
102
- "eval_samples_per_second": 7.795,
103
- "eval_steps_per_second": 1.026,
104
  "step": 80
105
  },
106
  {
107
  "epoch": 9.0,
108
- "eval_loss": 0.08274427056312561,
109
- "eval_mae": 0.32542383670806885,
110
- "eval_mse": 0.140847310423851,
111
- "eval_runtime": 9.5472,
112
- "eval_samples_per_second": 7.96,
113
- "eval_steps_per_second": 1.047,
114
  "step": 90
115
  },
116
  {
117
  "epoch": 10.0,
118
- "eval_loss": 0.08588916808366776,
119
- "eval_mae": 0.3119707405567169,
120
- "eval_mse": 0.1291341334581375,
121
- "eval_runtime": 10.6888,
122
- "eval_samples_per_second": 7.11,
123
- "eval_steps_per_second": 0.936,
124
  "step": 100
125
  },
126
  {
127
  "epoch": 11.0,
128
- "eval_loss": 0.10085263103246689,
129
- "eval_mae": 0.330853134393692,
130
- "eval_mse": 0.14782297611236572,
131
- "eval_runtime": 9.484,
132
- "eval_samples_per_second": 8.013,
133
- "eval_steps_per_second": 1.054,
134
  "step": 110
135
  },
136
  {
137
  "epoch": 12.0,
138
- "grad_norm": 2.0312862396240234,
139
- "learning_rate": 3.388888888888889e-05,
140
- "loss": 0.2461,
141
  "step": 120
142
  },
143
  {
144
  "epoch": 12.0,
145
- "eval_loss": 0.08102226257324219,
146
- "eval_mae": 0.3174217641353607,
147
- "eval_mse": 0.135166734457016,
148
- "eval_runtime": 9.5097,
149
- "eval_samples_per_second": 7.992,
150
- "eval_steps_per_second": 1.052,
151
  "step": 120
152
  },
153
  {
154
  "epoch": 13.0,
155
- "eval_loss": 0.08370836824178696,
156
- "eval_mae": 0.31886667013168335,
157
- "eval_mse": 0.13684500753879547,
158
- "eval_runtime": 9.4388,
159
- "eval_samples_per_second": 8.052,
160
- "eval_steps_per_second": 1.059,
161
  "step": 130
162
  },
163
  {
164
  "epoch": 14.0,
165
- "eval_loss": 0.08873885869979858,
166
- "eval_mae": 0.32301220297813416,
167
- "eval_mse": 0.1409922093153,
168
- "eval_runtime": 9.629,
169
- "eval_samples_per_second": 7.893,
170
- "eval_steps_per_second": 1.039,
171
  "step": 140
172
  },
173
  {
174
  "epoch": 15.0,
175
- "eval_loss": 0.08366208523511887,
176
- "eval_mae": 0.31394028663635254,
177
- "eval_mse": 0.13212691247463226,
178
- "eval_runtime": 9.4772,
179
- "eval_samples_per_second": 8.019,
180
- "eval_steps_per_second": 1.055,
181
  "step": 150
182
  },
183
  {
184
  "epoch": 16.0,
185
- "grad_norm": 2.5568466186523438,
186
- "learning_rate": 1.1666666666666668e-05,
187
- "loss": 0.2104,
188
  "step": 160
189
  },
190
  {
191
  "epoch": 16.0,
192
- "eval_loss": 0.09077440947294235,
193
- "eval_mae": 0.3225802481174469,
194
- "eval_mse": 0.14043577015399933,
195
- "eval_runtime": 9.6623,
196
- "eval_samples_per_second": 7.866,
197
- "eval_steps_per_second": 1.035,
198
  "step": 160
199
  },
200
  {
201
  "epoch": 17.0,
202
- "eval_loss": 0.08574231714010239,
203
- "eval_mae": 0.31825631856918335,
204
- "eval_mse": 0.13667234778404236,
205
- "eval_runtime": 9.4595,
206
- "eval_samples_per_second": 8.034,
207
- "eval_steps_per_second": 1.057,
208
  "step": 170
209
  },
210
  {
211
  "epoch": 18.0,
212
- "eval_loss": 0.08340632170438766,
213
- "eval_mae": 0.3171162009239197,
214
- "eval_mse": 0.13509555160999298,
215
- "eval_runtime": 9.4982,
216
- "eval_samples_per_second": 8.002,
217
- "eval_steps_per_second": 1.053,
218
  "step": 180
219
  },
220
  {
221
- "epoch": 18.0,
222
- "step": 180,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  "total_flos": 0.0,
224
- "train_loss": 0.2600840449333191,
225
- "train_runtime": 1002.0455,
226
- "train_samples_per_second": 6.048,
227
- "train_steps_per_second": 0.18
228
  }
229
  ],
230
  "logging_steps": 40,
231
- "max_steps": 180,
232
  "num_input_tokens_seen": 0,
233
- "num_train_epochs": 20,
234
  "save_steps": 500,
235
  "stateful_callbacks": {
236
  "TrainerControl": {
 
1
  {
2
+ "best_global_step": 120,
3
+ "best_metric": 0.12533096969127655,
4
+ "best_model_checkpoint": "./vit_focus/checkpoint-120",
5
+ "epoch": 27.0,
6
  "eval_steps": 500,
7
+ "global_step": 270,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_loss": 0.0993029847741127,
15
+ "eval_mae": 0.3380415141582489,
16
+ "eval_mse": 0.15286438167095184,
17
+ "eval_runtime": 9.9578,
18
+ "eval_samples_per_second": 7.632,
19
+ "eval_steps_per_second": 1.004,
20
  "step": 10
21
  },
22
  {
23
  "epoch": 2.0,
24
+ "eval_loss": 0.1050349548459053,
25
+ "eval_mae": 0.34093156456947327,
26
+ "eval_mse": 0.15535660088062286,
27
+ "eval_runtime": 9.7265,
28
+ "eval_samples_per_second": 7.814,
29
+ "eval_steps_per_second": 1.028,
30
  "step": 20
31
  },
32
  {
33
  "epoch": 3.0,
34
+ "eval_loss": 0.09966066479682922,
35
+ "eval_mae": 0.3352396786212921,
36
+ "eval_mse": 0.14932329952716827,
37
+ "eval_runtime": 9.4644,
38
+ "eval_samples_per_second": 8.03,
39
+ "eval_steps_per_second": 1.057,
40
  "step": 30
41
  },
42
  {
43
  "epoch": 4.0,
44
+ "grad_norm": 6.926674842834473,
45
+ "learning_rate": 4.277777777777778e-05,
46
+ "loss": 0.313,
47
  "step": 40
48
  },
49
  {
50
  "epoch": 4.0,
51
+ "eval_loss": 0.06556536257266998,
52
+ "eval_mae": 0.3157392740249634,
53
+ "eval_mse": 0.13447947800159454,
54
+ "eval_runtime": 10.1308,
55
+ "eval_samples_per_second": 7.502,
56
+ "eval_steps_per_second": 0.987,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 5.0,
61
+ "eval_loss": 0.06592569500207901,
62
+ "eval_mae": 0.3202681839466095,
63
+ "eval_mse": 0.13659903407096863,
64
+ "eval_runtime": 9.7043,
65
+ "eval_samples_per_second": 7.832,
66
+ "eval_steps_per_second": 1.03,
67
  "step": 50
68
  },
69
  {
70
  "epoch": 6.0,
71
+ "eval_loss": 0.0638844296336174,
72
+ "eval_mae": 0.31192123889923096,
73
+ "eval_mse": 0.12961846590042114,
74
+ "eval_runtime": 9.5179,
75
+ "eval_samples_per_second": 7.985,
76
+ "eval_steps_per_second": 1.051,
77
  "step": 60
78
  },
79
  {
80
  "epoch": 7.0,
81
+ "eval_loss": 0.06389027088880539,
82
+ "eval_mae": 0.3178236484527588,
83
+ "eval_mse": 0.13510307669639587,
84
+ "eval_runtime": 9.5025,
85
+ "eval_samples_per_second": 7.998,
86
+ "eval_steps_per_second": 1.052,
87
  "step": 70
88
  },
89
  {
90
  "epoch": 8.0,
91
+ "grad_norm": 4.451300144195557,
92
+ "learning_rate": 3.537037037037037e-05,
93
+ "loss": 0.1742,
94
  "step": 80
95
  },
96
  {
97
  "epoch": 8.0,
98
+ "eval_loss": 0.06391099840402603,
99
+ "eval_mae": 0.3085971772670746,
100
+ "eval_mse": 0.12736700475215912,
101
+ "eval_runtime": 10.1592,
102
+ "eval_samples_per_second": 7.481,
103
+ "eval_steps_per_second": 0.984,
104
  "step": 80
105
  },
106
  {
107
  "epoch": 9.0,
108
+ "eval_loss": 0.07279632240533829,
109
+ "eval_mae": 0.3096161186695099,
110
+ "eval_mse": 0.12943950295448303,
111
+ "eval_runtime": 9.4821,
112
+ "eval_samples_per_second": 8.015,
113
+ "eval_steps_per_second": 1.055,
114
  "step": 90
115
  },
116
  {
117
  "epoch": 10.0,
118
+ "eval_loss": 0.06712160259485245,
119
+ "eval_mae": 0.3150458335876465,
120
+ "eval_mse": 0.13300836086273193,
121
+ "eval_runtime": 9.7046,
122
+ "eval_samples_per_second": 7.831,
123
+ "eval_steps_per_second": 1.03,
124
  "step": 100
125
  },
126
  {
127
  "epoch": 11.0,
128
+ "eval_loss": 0.06695493310689926,
129
+ "eval_mae": 0.30665045976638794,
130
+ "eval_mse": 0.12600918114185333,
131
+ "eval_runtime": 9.7852,
132
+ "eval_samples_per_second": 7.767,
133
+ "eval_steps_per_second": 1.022,
134
  "step": 110
135
  },
136
  {
137
  "epoch": 12.0,
138
+ "grad_norm": 2.9957473278045654,
139
+ "learning_rate": 2.7962962962962965e-05,
140
+ "loss": 0.1284,
141
  "step": 120
142
  },
143
  {
144
  "epoch": 12.0,
145
+ "eval_loss": 0.06580791622400284,
146
+ "eval_mae": 0.3059428930282593,
147
+ "eval_mse": 0.12533096969127655,
148
+ "eval_runtime": 9.7135,
149
+ "eval_samples_per_second": 7.824,
150
+ "eval_steps_per_second": 1.029,
151
  "step": 120
152
  },
153
  {
154
  "epoch": 13.0,
155
+ "eval_loss": 0.06405826658010483,
156
+ "eval_mae": 0.3104270100593567,
157
+ "eval_mse": 0.1280805468559265,
158
+ "eval_runtime": 12.0741,
159
+ "eval_samples_per_second": 6.294,
160
+ "eval_steps_per_second": 0.828,
161
  "step": 130
162
  },
163
  {
164
  "epoch": 14.0,
165
+ "eval_loss": 0.06428611278533936,
166
+ "eval_mae": 0.3104848563671112,
167
+ "eval_mse": 0.12893278896808624,
168
+ "eval_runtime": 9.5891,
169
+ "eval_samples_per_second": 7.926,
170
+ "eval_steps_per_second": 1.043,
171
  "step": 140
172
  },
173
  {
174
  "epoch": 15.0,
175
+ "eval_loss": 0.06487523764371872,
176
+ "eval_mae": 0.3171584904193878,
177
+ "eval_mse": 0.13420797884464264,
178
+ "eval_runtime": 9.5632,
179
+ "eval_samples_per_second": 7.947,
180
+ "eval_steps_per_second": 1.046,
181
  "step": 150
182
  },
183
  {
184
  "epoch": 16.0,
185
+ "grad_norm": 1.922245740890503,
186
+ "learning_rate": 2.0555555555555555e-05,
187
+ "loss": 0.0981,
188
  "step": 160
189
  },
190
  {
191
  "epoch": 16.0,
192
+ "eval_loss": 0.06558659672737122,
193
+ "eval_mae": 0.30849871039390564,
194
+ "eval_mse": 0.12756428122520447,
195
+ "eval_runtime": 9.5905,
196
+ "eval_samples_per_second": 7.924,
197
+ "eval_steps_per_second": 1.043,
198
  "step": 160
199
  },
200
  {
201
  "epoch": 17.0,
202
+ "eval_loss": 0.06274469941854477,
203
+ "eval_mae": 0.3136182427406311,
204
+ "eval_mse": 0.13160544633865356,
205
+ "eval_runtime": 10.0109,
206
+ "eval_samples_per_second": 7.592,
207
+ "eval_steps_per_second": 0.999,
208
  "step": 170
209
  },
210
  {
211
  "epoch": 18.0,
212
+ "eval_loss": 0.06201491877436638,
213
+ "eval_mae": 0.3168633282184601,
214
+ "eval_mse": 0.1343080997467041,
215
+ "eval_runtime": 9.9918,
216
+ "eval_samples_per_second": 7.606,
217
+ "eval_steps_per_second": 1.001,
218
  "step": 180
219
  },
220
  {
221
+ "epoch": 19.0,
222
+ "eval_loss": 0.0631915032863617,
223
+ "eval_mae": 0.31292420625686646,
224
+ "eval_mse": 0.13110676407814026,
225
+ "eval_runtime": 9.5351,
226
+ "eval_samples_per_second": 7.971,
227
+ "eval_steps_per_second": 1.049,
228
+ "step": 190
229
+ },
230
+ {
231
+ "epoch": 20.0,
232
+ "grad_norm": 1.9687647819519043,
233
+ "learning_rate": 1.3148148148148148e-05,
234
+ "loss": 0.0767,
235
+ "step": 200
236
+ },
237
+ {
238
+ "epoch": 20.0,
239
+ "eval_loss": 0.06296339631080627,
240
+ "eval_mae": 0.3142727017402649,
241
+ "eval_mse": 0.1326274573802948,
242
+ "eval_runtime": 9.7999,
243
+ "eval_samples_per_second": 7.755,
244
+ "eval_steps_per_second": 1.02,
245
+ "step": 200
246
+ },
247
+ {
248
+ "epoch": 21.0,
249
+ "eval_loss": 0.06408733129501343,
250
+ "eval_mae": 0.311717689037323,
251
+ "eval_mse": 0.12986762821674347,
252
+ "eval_runtime": 9.6462,
253
+ "eval_samples_per_second": 7.879,
254
+ "eval_steps_per_second": 1.037,
255
+ "step": 210
256
+ },
257
+ {
258
+ "epoch": 22.0,
259
+ "eval_loss": 0.06340750306844711,
260
+ "eval_mae": 0.3114081621170044,
261
+ "eval_mse": 0.12940751016139984,
262
+ "eval_runtime": 9.5394,
263
+ "eval_samples_per_second": 7.967,
264
+ "eval_steps_per_second": 1.048,
265
+ "step": 220
266
+ },
267
+ {
268
+ "epoch": 23.0,
269
+ "eval_loss": 0.06285858899354935,
270
+ "eval_mae": 0.31304195523262024,
271
+ "eval_mse": 0.13149800896644592,
272
+ "eval_runtime": 9.8923,
273
+ "eval_samples_per_second": 7.683,
274
+ "eval_steps_per_second": 1.011,
275
+ "step": 230
276
+ },
277
+ {
278
+ "epoch": 24.0,
279
+ "grad_norm": 1.0159116983413696,
280
+ "learning_rate": 5.740740740740741e-06,
281
+ "loss": 0.0615,
282
+ "step": 240
283
+ },
284
+ {
285
+ "epoch": 24.0,
286
+ "eval_loss": 0.06115531921386719,
287
+ "eval_mae": 0.3123721480369568,
288
+ "eval_mse": 0.13078482449054718,
289
+ "eval_runtime": 9.6638,
290
+ "eval_samples_per_second": 7.864,
291
+ "eval_steps_per_second": 1.035,
292
+ "step": 240
293
+ },
294
+ {
295
+ "epoch": 25.0,
296
+ "eval_loss": 0.059913910925388336,
297
+ "eval_mae": 0.31175902485847473,
298
+ "eval_mse": 0.13015513122081757,
299
+ "eval_runtime": 9.6921,
300
+ "eval_samples_per_second": 7.841,
301
+ "eval_steps_per_second": 1.032,
302
+ "step": 250
303
+ },
304
+ {
305
+ "epoch": 26.0,
306
+ "eval_loss": 0.06085545942187309,
307
+ "eval_mae": 0.313151478767395,
308
+ "eval_mse": 0.13129989802837372,
309
+ "eval_runtime": 9.5449,
310
+ "eval_samples_per_second": 7.962,
311
+ "eval_steps_per_second": 1.048,
312
+ "step": 260
313
+ },
314
+ {
315
+ "epoch": 27.0,
316
+ "eval_loss": 0.060885023325681686,
317
+ "eval_mae": 0.3116842210292816,
318
+ "eval_mse": 0.13011318445205688,
319
+ "eval_runtime": 9.6423,
320
+ "eval_samples_per_second": 7.882,
321
+ "eval_steps_per_second": 1.037,
322
+ "step": 270
323
+ },
324
+ {
325
+ "epoch": 27.0,
326
+ "step": 270,
327
  "total_flos": 0.0,
328
+ "train_loss": 0.13240765200720894,
329
+ "train_runtime": 1534.1197,
330
+ "train_samples_per_second": 5.925,
331
+ "train_steps_per_second": 0.176
332
  }
333
  ],
334
  "logging_steps": 40,
335
+ "max_steps": 270,
336
  "num_input_tokens_seen": 0,
337
+ "num_train_epochs": 30,
338
  "save_steps": 500,
339
  "stateful_callbacks": {
340
  "TrainerControl": {