rbelanec commited on
Commit
150f73c
·
verified ·
1 Parent(s): 3e30826

End of training

Browse files
Files changed (5) hide show
  1. README.md +3 -3
  2. all_results.json +6 -6
  3. eval_results.json +3 -3
  4. train_results.json +3 -3
  5. trainer_state.json +54 -54
README.md CHANGED
@@ -17,10 +17,10 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  # test
19
 
20
- This model is a fine-tuned version of [meta-llama/Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) on an unknown dataset.
21
  It achieves the following results on the evaluation set:
22
- - Loss: 0.3492
23
- - Num Input Tokens Seen: 46944
24
 
25
  ## Model description
26
 
 
17
 
18
  # test
19
 
20
+ This model is a fine-tuned version of [meta-llama/Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) on the wsc dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 0.3459
23
+ - Num Input Tokens Seen: 49376
24
 
25
  ## Model description
26
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 1.0,
3
  "eval_loss": 0.34589245915412903,
4
- "eval_runtime": 0.8481,
5
- "eval_samples_per_second": 66.033,
6
- "eval_steps_per_second": 16.508,
7
  "num_input_tokens_seen": 49376,
8
  "total_flos": 497127920369664.0,
9
  "train_loss": 1.1438678817749024,
10
- "train_runtime": 264.1495,
11
- "train_samples_per_second": 1.885,
12
- "train_steps_per_second": 0.473
13
  }
 
1
  {
2
  "epoch": 1.0,
3
  "eval_loss": 0.34589245915412903,
4
+ "eval_runtime": 0.7925,
5
+ "eval_samples_per_second": 70.661,
6
+ "eval_steps_per_second": 17.665,
7
  "num_input_tokens_seen": 49376,
8
  "total_flos": 497127920369664.0,
9
  "train_loss": 1.1438678817749024,
10
+ "train_runtime": 224.169,
11
+ "train_samples_per_second": 2.222,
12
+ "train_steps_per_second": 0.558
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
  "eval_loss": 0.34589245915412903,
4
- "eval_runtime": 0.8481,
5
- "eval_samples_per_second": 66.033,
6
- "eval_steps_per_second": 16.508,
7
  "num_input_tokens_seen": 49376
8
  }
 
1
  {
2
  "epoch": 1.0,
3
  "eval_loss": 0.34589245915412903,
4
+ "eval_runtime": 0.7925,
5
+ "eval_samples_per_second": 70.661,
6
+ "eval_steps_per_second": 17.665,
7
  "num_input_tokens_seen": 49376
8
  }
train_results.json CHANGED
@@ -3,7 +3,7 @@
3
  "num_input_tokens_seen": 49376,
4
  "total_flos": 497127920369664.0,
5
  "train_loss": 1.1438678817749024,
6
- "train_runtime": 264.1495,
7
- "train_samples_per_second": 1.885,
8
- "train_steps_per_second": 0.473
9
  }
 
3
  "num_input_tokens_seen": 49376,
4
  "total_flos": 497127920369664.0,
5
  "train_loss": 1.1438678817749024,
6
+ "train_runtime": 224.169,
7
+ "train_samples_per_second": 2.222,
8
+ "train_steps_per_second": 0.558
9
  }
trainer_state.json CHANGED
@@ -20,9 +20,9 @@
20
  {
21
  "epoch": 0.056,
22
  "eval_loss": 6.5227251052856445,
23
- "eval_runtime": 0.7016,
24
- "eval_samples_per_second": 79.812,
25
- "eval_steps_per_second": 19.953,
26
  "num_input_tokens_seen": 2880,
27
  "step": 7
28
  },
@@ -37,9 +37,9 @@
37
  {
38
  "epoch": 0.112,
39
  "eval_loss": 1.382468581199646,
40
- "eval_runtime": 0.7422,
41
- "eval_samples_per_second": 75.454,
42
- "eval_steps_per_second": 18.864,
43
  "num_input_tokens_seen": 5920,
44
  "step": 14
45
  },
@@ -62,9 +62,9 @@
62
  {
63
  "epoch": 0.168,
64
  "eval_loss": 0.4987373352050781,
65
- "eval_runtime": 0.7195,
66
- "eval_samples_per_second": 77.827,
67
- "eval_steps_per_second": 19.457,
68
  "num_input_tokens_seen": 8416,
69
  "step": 21
70
  },
@@ -79,9 +79,9 @@
79
  {
80
  "epoch": 0.224,
81
  "eval_loss": 0.4531269073486328,
82
- "eval_runtime": 0.7843,
83
- "eval_samples_per_second": 71.406,
84
- "eval_steps_per_second": 17.851,
85
  "num_input_tokens_seen": 11264,
86
  "step": 28
87
  },
@@ -104,9 +104,9 @@
104
  {
105
  "epoch": 0.28,
106
  "eval_loss": 0.36931881308555603,
107
- "eval_runtime": 0.8719,
108
- "eval_samples_per_second": 64.225,
109
- "eval_steps_per_second": 16.056,
110
  "num_input_tokens_seen": 13824,
111
  "step": 35
112
  },
@@ -121,9 +121,9 @@
121
  {
122
  "epoch": 0.336,
123
  "eval_loss": 0.3872639238834381,
124
- "eval_runtime": 0.7387,
125
- "eval_samples_per_second": 75.812,
126
- "eval_steps_per_second": 18.953,
127
  "num_input_tokens_seen": 16672,
128
  "step": 42
129
  },
@@ -138,9 +138,9 @@
138
  {
139
  "epoch": 0.392,
140
  "eval_loss": 0.3777945637702942,
141
- "eval_runtime": 0.7665,
142
- "eval_samples_per_second": 73.06,
143
- "eval_steps_per_second": 18.265,
144
  "num_input_tokens_seen": 19296,
145
  "step": 49
146
  },
@@ -163,9 +163,9 @@
163
  {
164
  "epoch": 0.448,
165
  "eval_loss": 0.4436803460121155,
166
- "eval_runtime": 0.7702,
167
- "eval_samples_per_second": 72.71,
168
- "eval_steps_per_second": 18.178,
169
  "num_input_tokens_seen": 22432,
170
  "step": 56
171
  },
@@ -180,9 +180,9 @@
180
  {
181
  "epoch": 0.504,
182
  "eval_loss": 0.35032057762145996,
183
- "eval_runtime": 0.9256,
184
- "eval_samples_per_second": 60.502,
185
- "eval_steps_per_second": 15.125,
186
  "num_input_tokens_seen": 25504,
187
  "step": 63
188
  },
@@ -205,9 +205,9 @@
205
  {
206
  "epoch": 0.56,
207
  "eval_loss": 0.37164703011512756,
208
- "eval_runtime": 0.816,
209
- "eval_samples_per_second": 68.627,
210
- "eval_steps_per_second": 17.157,
211
  "num_input_tokens_seen": 28064,
212
  "step": 70
213
  },
@@ -222,9 +222,9 @@
222
  {
223
  "epoch": 0.616,
224
  "eval_loss": 0.3748786747455597,
225
- "eval_runtime": 0.7992,
226
- "eval_samples_per_second": 70.066,
227
- "eval_steps_per_second": 17.516,
228
  "num_input_tokens_seen": 30720,
229
  "step": 77
230
  },
@@ -239,9 +239,9 @@
239
  {
240
  "epoch": 0.672,
241
  "eval_loss": 0.3603578209877014,
242
- "eval_runtime": 0.7982,
243
- "eval_samples_per_second": 70.16,
244
- "eval_steps_per_second": 17.54,
245
  "num_input_tokens_seen": 33504,
246
  "step": 84
247
  },
@@ -264,9 +264,9 @@
264
  {
265
  "epoch": 0.728,
266
  "eval_loss": 0.35710158944129944,
267
- "eval_runtime": 0.8602,
268
- "eval_samples_per_second": 65.102,
269
- "eval_steps_per_second": 16.276,
270
  "num_input_tokens_seen": 36128,
271
  "step": 91
272
  },
@@ -281,9 +281,9 @@
281
  {
282
  "epoch": 0.784,
283
  "eval_loss": 0.3520326614379883,
284
- "eval_runtime": 0.7911,
285
- "eval_samples_per_second": 70.792,
286
- "eval_steps_per_second": 17.698,
287
  "num_input_tokens_seen": 38592,
288
  "step": 98
289
  },
@@ -306,9 +306,9 @@
306
  {
307
  "epoch": 0.84,
308
  "eval_loss": 0.34758228063583374,
309
- "eval_runtime": 0.8072,
310
- "eval_samples_per_second": 69.373,
311
- "eval_steps_per_second": 17.343,
312
  "num_input_tokens_seen": 41280,
313
  "step": 105
314
  },
@@ -323,9 +323,9 @@
323
  {
324
  "epoch": 0.896,
325
  "eval_loss": 0.34589245915412903,
326
- "eval_runtime": 0.822,
327
- "eval_samples_per_second": 68.123,
328
- "eval_steps_per_second": 17.031,
329
  "num_input_tokens_seen": 44160,
330
  "step": 112
331
  },
@@ -340,9 +340,9 @@
340
  {
341
  "epoch": 0.952,
342
  "eval_loss": 0.3492301404476166,
343
- "eval_runtime": 0.8757,
344
- "eval_samples_per_second": 63.945,
345
- "eval_steps_per_second": 15.986,
346
  "num_input_tokens_seen": 46944,
347
  "step": 119
348
  },
@@ -368,9 +368,9 @@
368
  "step": 125,
369
  "total_flos": 497127920369664.0,
370
  "train_loss": 1.1438678817749024,
371
- "train_runtime": 264.1495,
372
- "train_samples_per_second": 1.885,
373
- "train_steps_per_second": 0.473
374
  }
375
  ],
376
  "logging_steps": 5,
 
20
  {
21
  "epoch": 0.056,
22
  "eval_loss": 6.5227251052856445,
23
+ "eval_runtime": 0.703,
24
+ "eval_samples_per_second": 79.659,
25
+ "eval_steps_per_second": 19.915,
26
  "num_input_tokens_seen": 2880,
27
  "step": 7
28
  },
 
37
  {
38
  "epoch": 0.112,
39
  "eval_loss": 1.382468581199646,
40
+ "eval_runtime": 0.7137,
41
+ "eval_samples_per_second": 78.462,
42
+ "eval_steps_per_second": 19.616,
43
  "num_input_tokens_seen": 5920,
44
  "step": 14
45
  },
 
62
  {
63
  "epoch": 0.168,
64
  "eval_loss": 0.4987373352050781,
65
+ "eval_runtime": 0.5096,
66
+ "eval_samples_per_second": 109.892,
67
+ "eval_steps_per_second": 27.473,
68
  "num_input_tokens_seen": 8416,
69
  "step": 21
70
  },
 
79
  {
80
  "epoch": 0.224,
81
  "eval_loss": 0.4531269073486328,
82
+ "eval_runtime": 0.5083,
83
+ "eval_samples_per_second": 110.162,
84
+ "eval_steps_per_second": 27.54,
85
  "num_input_tokens_seen": 11264,
86
  "step": 28
87
  },
 
104
  {
105
  "epoch": 0.28,
106
  "eval_loss": 0.36931881308555603,
107
+ "eval_runtime": 0.7118,
108
+ "eval_samples_per_second": 78.679,
109
+ "eval_steps_per_second": 19.67,
110
  "num_input_tokens_seen": 13824,
111
  "step": 35
112
  },
 
121
  {
122
  "epoch": 0.336,
123
  "eval_loss": 0.3872639238834381,
124
+ "eval_runtime": 0.7546,
125
+ "eval_samples_per_second": 74.213,
126
+ "eval_steps_per_second": 18.553,
127
  "num_input_tokens_seen": 16672,
128
  "step": 42
129
  },
 
138
  {
139
  "epoch": 0.392,
140
  "eval_loss": 0.3777945637702942,
141
+ "eval_runtime": 0.7935,
142
+ "eval_samples_per_second": 70.573,
143
+ "eval_steps_per_second": 17.643,
144
  "num_input_tokens_seen": 19296,
145
  "step": 49
146
  },
 
163
  {
164
  "epoch": 0.448,
165
  "eval_loss": 0.4436803460121155,
166
+ "eval_runtime": 0.7397,
167
+ "eval_samples_per_second": 75.71,
168
+ "eval_steps_per_second": 18.927,
169
  "num_input_tokens_seen": 22432,
170
  "step": 56
171
  },
 
180
  {
181
  "epoch": 0.504,
182
  "eval_loss": 0.35032057762145996,
183
+ "eval_runtime": 0.7463,
184
+ "eval_samples_per_second": 75.04,
185
+ "eval_steps_per_second": 18.76,
186
  "num_input_tokens_seen": 25504,
187
  "step": 63
188
  },
 
205
  {
206
  "epoch": 0.56,
207
  "eval_loss": 0.37164703011512756,
208
+ "eval_runtime": 0.7524,
209
+ "eval_samples_per_second": 74.427,
210
+ "eval_steps_per_second": 18.607,
211
  "num_input_tokens_seen": 28064,
212
  "step": 70
213
  },
 
222
  {
223
  "epoch": 0.616,
224
  "eval_loss": 0.3748786747455597,
225
+ "eval_runtime": 0.6694,
226
+ "eval_samples_per_second": 83.657,
227
+ "eval_steps_per_second": 20.914,
228
  "num_input_tokens_seen": 30720,
229
  "step": 77
230
  },
 
239
  {
240
  "epoch": 0.672,
241
  "eval_loss": 0.3603578209877014,
242
+ "eval_runtime": 0.7578,
243
+ "eval_samples_per_second": 73.897,
244
+ "eval_steps_per_second": 18.474,
245
  "num_input_tokens_seen": 33504,
246
  "step": 84
247
  },
 
264
  {
265
  "epoch": 0.728,
266
  "eval_loss": 0.35710158944129944,
267
+ "eval_runtime": 0.5883,
268
+ "eval_samples_per_second": 95.185,
269
+ "eval_steps_per_second": 23.796,
270
  "num_input_tokens_seen": 36128,
271
  "step": 91
272
  },
 
281
  {
282
  "epoch": 0.784,
283
  "eval_loss": 0.3520326614379883,
284
+ "eval_runtime": 0.8055,
285
+ "eval_samples_per_second": 69.518,
286
+ "eval_steps_per_second": 17.38,
287
  "num_input_tokens_seen": 38592,
288
  "step": 98
289
  },
 
306
  {
307
  "epoch": 0.84,
308
  "eval_loss": 0.34758228063583374,
309
+ "eval_runtime": 0.7519,
310
+ "eval_samples_per_second": 74.483,
311
+ "eval_steps_per_second": 18.621,
312
  "num_input_tokens_seen": 41280,
313
  "step": 105
314
  },
 
323
  {
324
  "epoch": 0.896,
325
  "eval_loss": 0.34589245915412903,
326
+ "eval_runtime": 0.7431,
327
+ "eval_samples_per_second": 75.359,
328
+ "eval_steps_per_second": 18.84,
329
  "num_input_tokens_seen": 44160,
330
  "step": 112
331
  },
 
340
  {
341
  "epoch": 0.952,
342
  "eval_loss": 0.3492301404476166,
343
+ "eval_runtime": 0.7889,
344
+ "eval_samples_per_second": 70.986,
345
+ "eval_steps_per_second": 17.747,
346
  "num_input_tokens_seen": 46944,
347
  "step": 119
348
  },
 
368
  "step": 125,
369
  "total_flos": 497127920369664.0,
370
  "train_loss": 1.1438678817749024,
371
+ "train_runtime": 224.169,
372
+ "train_samples_per_second": 2.222,
373
+ "train_steps_per_second": 0.558
374
  }
375
  ],
376
  "logging_steps": 5,