BilelDJ commited on
Commit
35231e1
·
verified ·
1 Parent(s): 19b7782

End of training

Browse files
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 2.0,
3
  "eval_loss": 0.051597148180007935,
4
- "eval_runtime": 3.1643,
5
- "eval_samples_per_second": 1.264,
6
- "eval_steps_per_second": 0.316,
7
  "test_loss": 1.4216160774230957,
8
- "test_runtime": 2.9307,
9
- "test_samples_per_second": 1.365,
10
- "test_steps_per_second": 0.341,
11
  "total_flos": 1163339959320.0,
12
  "train_loss": 0.13540960324462503,
13
- "train_runtime": 98.8611,
14
- "train_samples_per_second": 0.202,
15
- "train_steps_per_second": 0.02
16
  }
 
1
  {
2
  "epoch": 2.0,
3
  "eval_loss": 0.051597148180007935,
4
+ "eval_runtime": 3.1317,
5
+ "eval_samples_per_second": 1.277,
6
+ "eval_steps_per_second": 0.319,
7
  "test_loss": 1.4216160774230957,
8
+ "test_runtime": 3.3956,
9
+ "test_samples_per_second": 1.178,
10
+ "test_steps_per_second": 0.294,
11
  "total_flos": 1163339959320.0,
12
  "train_loss": 0.13540960324462503,
13
+ "train_runtime": 108.0249,
14
+ "train_samples_per_second": 0.185,
15
+ "train_steps_per_second": 0.019
16
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 2.0,
3
  "eval_loss": 0.051597148180007935,
4
- "eval_runtime": 3.1643,
5
- "eval_samples_per_second": 1.264,
6
- "eval_steps_per_second": 0.316
7
  }
 
1
  {
2
  "epoch": 2.0,
3
  "eval_loss": 0.051597148180007935,
4
+ "eval_runtime": 3.1317,
5
+ "eval_samples_per_second": 1.277,
6
+ "eval_steps_per_second": 0.319
7
  }
runs/May08_20-26-36_2e4200763c33/events.out.tfevents.1715200240.2e4200763c33.31644.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cecc843cc6a203859c78f88bbad3b1f802cad2cfeb13be7676ee859054950e2e
3
+ size 354
test_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "test_loss": 1.4216160774230957,
3
- "test_runtime": 2.9307,
4
- "test_samples_per_second": 1.365,
5
- "test_steps_per_second": 0.341
6
  }
 
1
  {
2
  "test_loss": 1.4216160774230957,
3
+ "test_runtime": 3.3956,
4
+ "test_samples_per_second": 1.178,
5
+ "test_steps_per_second": 0.294
6
  }
tokenizer.json CHANGED
@@ -7,9 +7,7 @@
7
  "stride": 0
8
  },
9
  "padding": {
10
- "strategy": {
11
- "Fixed": 77
12
- },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
15
  "pad_id": 49407,
 
7
  "stride": 0
8
  },
9
  "padding": {
10
+ "strategy": "BatchLongest",
 
 
11
  "direction": "Right",
12
  "pad_to_multiple_of": null,
13
  "pad_id": 49407,
train_results.json CHANGED
@@ -2,7 +2,7 @@
2
  "epoch": 2.0,
3
  "total_flos": 1163339959320.0,
4
  "train_loss": 0.13540960324462503,
5
- "train_runtime": 98.8611,
6
- "train_samples_per_second": 0.202,
7
- "train_steps_per_second": 0.02
8
  }
 
2
  "epoch": 2.0,
3
  "total_flos": 1163339959320.0,
4
  "train_loss": 0.13540960324462503,
5
+ "train_runtime": 108.0249,
6
+ "train_samples_per_second": 0.185,
7
+ "train_steps_per_second": 0.019
8
  }
trainer_state.json CHANGED
@@ -18,9 +18,9 @@
18
  {
19
  "epoch": 1.0,
20
  "eval_loss": 0.00024110873346216977,
21
- "eval_runtime": 2.8556,
22
- "eval_samples_per_second": 1.401,
23
- "eval_steps_per_second": 0.35,
24
  "step": 1
25
  },
26
  {
@@ -33,9 +33,9 @@
33
  {
34
  "epoch": 2.0,
35
  "eval_loss": 0.051597148180007935,
36
- "eval_runtime": 1.127,
37
- "eval_samples_per_second": 3.549,
38
- "eval_steps_per_second": 0.887,
39
  "step": 2
40
  },
41
  {
@@ -43,9 +43,9 @@
43
  "step": 2,
44
  "total_flos": 1163339959320.0,
45
  "train_loss": 0.13540960324462503,
46
- "train_runtime": 98.8611,
47
- "train_samples_per_second": 0.202,
48
- "train_steps_per_second": 0.02
49
  }
50
  ],
51
  "logging_steps": 1.0,
 
18
  {
19
  "epoch": 1.0,
20
  "eval_loss": 0.00024110873346216977,
21
+ "eval_runtime": 6.3219,
22
+ "eval_samples_per_second": 0.633,
23
+ "eval_steps_per_second": 0.158,
24
  "step": 1
25
  },
26
  {
 
33
  {
34
  "epoch": 2.0,
35
  "eval_loss": 0.051597148180007935,
36
+ "eval_runtime": 1.1315,
37
+ "eval_samples_per_second": 3.535,
38
+ "eval_steps_per_second": 0.884,
39
  "step": 2
40
  },
41
  {
 
43
  "step": 2,
44
  "total_flos": 1163339959320.0,
45
  "train_loss": 0.13540960324462503,
46
+ "train_runtime": 108.0249,
47
+ "train_samples_per_second": 0.185,
48
+ "train_steps_per_second": 0.019
49
  }
50
  ],
51
  "logging_steps": 1.0,