Mathildeholst commited on
Commit
5fa5386
·
verified ·
1 Parent(s): 478a5a5

Training in progress, epoch 4

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c30bdd43ef8700852682fdd81d7cfb8b85f7b07fe64e66514f5c74ab85f913a3
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ade954cb3addefcce0810d360fa809c65f180c0974fc2c66eeec8b322557750f
3
  size 598898116
run-1/checkpoint-1252/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:370d3534f9b361c680c32623da6a50591b08544112b8d478f0468d789fee2c8e
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ade954cb3addefcce0810d360fa809c65f180c0974fc2c66eeec8b322557750f
3
  size 598898116
run-1/checkpoint-1252/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6d329c03d57e13e0a4475e000d046b99bdf2268ca265b833b673a4db9cc40da
3
  size 1197886411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd526ba45a1abe63bbaf3ef1ad2b0b245794993a6f1dfc3fbc20ca249336d6af
3
  size 1197886411
run-1/checkpoint-1252/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6327f5ab4e72ed9c0c4a05ebebdbe843085c362a66f43ed9a35b8913cac50eea
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf6043120806951e527204f266c4987f61ac4c1701320a05415bcdad14ecca1
3
  size 1465
run-1/checkpoint-1252/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 1252,
3
- "best_metric": 0.7973427772521973,
4
  "best_model_checkpoint": "classifier-clinc-MBbase-distilled/run-1/checkpoint-1252",
5
  "epoch": 4.0,
6
  "eval_steps": 500,
@@ -11,59 +11,59 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.8748387096774194,
15
- "eval_loss": 2.3514034748077393,
16
- "eval_runtime": 8.6888,
17
- "eval_samples_per_second": 356.78,
18
- "eval_steps_per_second": 11.164,
19
  "step": 313
20
  },
21
  {
22
  "epoch": 1.5974440894568689,
23
- "grad_norm": 11.41929817199707,
24
- "learning_rate": 1.2028753993610226e-05,
25
- "loss": 4.6814,
26
  "step": 500
27
  },
28
  {
29
  "epoch": 2.0,
30
- "eval_accuracy": 0.9387096774193548,
31
- "eval_loss": 1.0946013927459717,
32
- "eval_runtime": 8.8111,
33
- "eval_samples_per_second": 351.83,
34
- "eval_steps_per_second": 11.009,
35
  "step": 626
36
  },
37
  {
38
  "epoch": 3.0,
39
  "eval_accuracy": 0.9490322580645161,
40
- "eval_loss": 0.8638759851455688,
41
- "eval_runtime": 8.7924,
42
- "eval_samples_per_second": 352.576,
43
- "eval_steps_per_second": 11.032,
44
  "step": 939
45
  },
46
  {
47
  "epoch": 3.194888178913738,
48
- "grad_norm": 2.687797784805298,
49
- "learning_rate": 4.041533546325879e-06,
50
- "loss": 0.8232,
51
  "step": 1000
52
  },
53
  {
54
  "epoch": 4.0,
55
- "eval_accuracy": 0.9487096774193549,
56
- "eval_loss": 0.7973427772521973,
57
- "eval_runtime": 8.9762,
58
- "eval_samples_per_second": 345.357,
59
- "eval_steps_per_second": 10.806,
60
  "step": 1252
61
  }
62
  ],
63
  "logging_steps": 500,
64
- "max_steps": 1252,
65
  "num_input_tokens_seen": 0,
66
- "num_train_epochs": 4,
67
  "save_steps": 500,
68
  "stateful_callbacks": {
69
  "EarlyStoppingCallback": {
@@ -81,7 +81,7 @@
81
  "should_evaluate": false,
82
  "should_log": false,
83
  "should_save": true,
84
- "should_training_stop": true
85
  },
86
  "attributes": {}
87
  }
@@ -90,8 +90,8 @@
90
  "train_batch_size": 32,
91
  "trial_name": null,
92
  "trial_params": {
93
- "alpha": 0.6366341517957751,
94
- "num_train_epochs": 4,
95
- "temperature": 4.356589509372901
96
  }
97
  }
 
1
  {
2
  "best_global_step": 1252,
3
+ "best_metric": 0.8706804513931274,
4
  "best_model_checkpoint": "classifier-clinc-MBbase-distilled/run-1/checkpoint-1252",
5
  "epoch": 4.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.8841935483870967,
15
+ "eval_loss": 2.7125542163848877,
16
+ "eval_runtime": 8.642,
17
+ "eval_samples_per_second": 358.712,
18
+ "eval_steps_per_second": 11.224,
19
  "step": 313
20
  },
21
  {
22
  "epoch": 1.5974440894568689,
23
+ "grad_norm": 15.764195442199707,
24
+ "learning_rate": 1.468583599574015e-05,
25
+ "loss": 5.4057,
26
  "step": 500
27
  },
28
  {
29
  "epoch": 2.0,
30
+ "eval_accuracy": 0.9380645161290323,
31
+ "eval_loss": 1.3560537099838257,
32
+ "eval_runtime": 8.8142,
33
+ "eval_samples_per_second": 351.707,
34
+ "eval_steps_per_second": 11.005,
35
  "step": 626
36
  },
37
  {
38
  "epoch": 3.0,
39
  "eval_accuracy": 0.9490322580645161,
40
+ "eval_loss": 1.0215872526168823,
41
+ "eval_runtime": 8.8098,
42
+ "eval_samples_per_second": 351.879,
43
+ "eval_steps_per_second": 11.01,
44
  "step": 939
45
  },
46
  {
47
  "epoch": 3.194888178913738,
48
+ "grad_norm": 3.932048797607422,
49
+ "learning_rate": 9.361022364217253e-06,
50
+ "loss": 1.0728,
51
  "step": 1000
52
  },
53
  {
54
  "epoch": 4.0,
55
+ "eval_accuracy": 0.9538709677419355,
56
+ "eval_loss": 0.8706804513931274,
57
+ "eval_runtime": 8.7191,
58
+ "eval_samples_per_second": 355.54,
59
+ "eval_steps_per_second": 11.125,
60
  "step": 1252
61
  }
62
  ],
63
  "logging_steps": 500,
64
+ "max_steps": 1878,
65
  "num_input_tokens_seen": 0,
66
+ "num_train_epochs": 6,
67
  "save_steps": 500,
68
  "stateful_callbacks": {
69
  "EarlyStoppingCallback": {
 
81
  "should_evaluate": false,
82
  "should_log": false,
83
  "should_save": true,
84
+ "should_training_stop": false
85
  },
86
  "attributes": {}
87
  }
 
90
  "train_batch_size": 32,
91
  "trial_name": null,
92
  "trial_params": {
93
+ "alpha": 0.43669134703064955,
94
+ "num_train_epochs": 6,
95
+ "temperature": 4.700820204359401
96
  }
97
  }
run-1/checkpoint-1252/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56ff736131b836a7dabe107136a81c1bbf2af9db596535261d17917925265c8c
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:005e75c27b9684ffca09be4a319451b5b3ed9c7d3c39d92ef3bf6ef33af1bd87
3
  size 5905
run-1/checkpoint-626/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9ad87f0bcabd8753af801099adaebcaff62efac5ec99c204a5d30f9340d7c06
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9a39f3d5c808671b9c50e8dd232fb118c2a863cf297a8291f6b6e886ed6ea25
3
  size 598898116
run-1/checkpoint-626/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c231c0cd3fbc47a8b6e27933af7e2b7295464a826f8359b228a3f1fdbf483b23
3
  size 1197886411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df1cbe6c21069771c20c4a22652fd1cb4c9fa8e7c260004ccdbc1bf23ee51db2
3
  size 1197886411
run-1/checkpoint-626/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e9a7b5592a4029f3ec57c7dbcfd35e00b01562cef219c39239e50201a9d22f7
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f79a33daf84d1b784d1517562e7679fa0e6281bfc97ee6961b9f75a9f0ba2a7b
3
  size 1465
run-1/checkpoint-626/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56ff736131b836a7dabe107136a81c1bbf2af9db596535261d17917925265c8c
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:005e75c27b9684ffca09be4a319451b5b3ed9c7d3c39d92ef3bf6ef33af1bd87
3
  size 5905
run-1/checkpoint-939/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e99a38237f5188be1bf3c854a2809e1af914ca807dc52e24f29be9c3cfe94420
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea96ce5e92abdf60ad828f47231aa42a9f2a2f5daf40e1a2bdd0eb325c3ea43e
3
  size 598898116
run-1/checkpoint-939/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4265f1139d29d76272c48fd95b09d238668611bcdcf40b88b561f87c18f83159
3
  size 1197886411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7139dc98d864f192a2cbce0e87baf6c1d34dc5553d9abd2772810498fe2f9960
3
  size 1197886411
run-1/checkpoint-939/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84cfb1e7a62ce13d3bd2b30cd84954b016eca2ceddbf4f35473bd12c1cb4a0c8
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c75b3ffa83808fbdba53567d88e17d4635157498f448356d32b88393fb1d6557
3
  size 1465
run-1/checkpoint-939/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 939,
3
- "best_metric": 0.8638759851455688,
4
  "best_model_checkpoint": "classifier-clinc-MBbase-distilled/run-1/checkpoint-939",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
@@ -11,43 +11,43 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.8748387096774194,
15
- "eval_loss": 2.3514034748077393,
16
- "eval_runtime": 8.6888,
17
- "eval_samples_per_second": 356.78,
18
- "eval_steps_per_second": 11.164,
19
  "step": 313
20
  },
21
  {
22
  "epoch": 1.5974440894568689,
23
- "grad_norm": 11.41929817199707,
24
- "learning_rate": 1.2028753993610226e-05,
25
- "loss": 4.6814,
26
  "step": 500
27
  },
28
  {
29
  "epoch": 2.0,
30
- "eval_accuracy": 0.9387096774193548,
31
- "eval_loss": 1.0946013927459717,
32
- "eval_runtime": 8.8111,
33
- "eval_samples_per_second": 351.83,
34
- "eval_steps_per_second": 11.009,
35
  "step": 626
36
  },
37
  {
38
  "epoch": 3.0,
39
  "eval_accuracy": 0.9490322580645161,
40
- "eval_loss": 0.8638759851455688,
41
- "eval_runtime": 8.7924,
42
- "eval_samples_per_second": 352.576,
43
- "eval_steps_per_second": 11.032,
44
  "step": 939
45
  }
46
  ],
47
  "logging_steps": 500,
48
- "max_steps": 1252,
49
  "num_input_tokens_seen": 0,
50
- "num_train_epochs": 4,
51
  "save_steps": 500,
52
  "stateful_callbacks": {
53
  "EarlyStoppingCallback": {
@@ -74,8 +74,8 @@
74
  "train_batch_size": 32,
75
  "trial_name": null,
76
  "trial_params": {
77
- "alpha": 0.6366341517957751,
78
- "num_train_epochs": 4,
79
- "temperature": 4.356589509372901
80
  }
81
  }
 
1
  {
2
  "best_global_step": 939,
3
+ "best_metric": 1.0215872526168823,
4
  "best_model_checkpoint": "classifier-clinc-MBbase-distilled/run-1/checkpoint-939",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.8841935483870967,
15
+ "eval_loss": 2.7125542163848877,
16
+ "eval_runtime": 8.642,
17
+ "eval_samples_per_second": 358.712,
18
+ "eval_steps_per_second": 11.224,
19
  "step": 313
20
  },
21
  {
22
  "epoch": 1.5974440894568689,
23
+ "grad_norm": 15.764195442199707,
24
+ "learning_rate": 1.468583599574015e-05,
25
+ "loss": 5.4057,
26
  "step": 500
27
  },
28
  {
29
  "epoch": 2.0,
30
+ "eval_accuracy": 0.9380645161290323,
31
+ "eval_loss": 1.3560537099838257,
32
+ "eval_runtime": 8.8142,
33
+ "eval_samples_per_second": 351.707,
34
+ "eval_steps_per_second": 11.005,
35
  "step": 626
36
  },
37
  {
38
  "epoch": 3.0,
39
  "eval_accuracy": 0.9490322580645161,
40
+ "eval_loss": 1.0215872526168823,
41
+ "eval_runtime": 8.8098,
42
+ "eval_samples_per_second": 351.879,
43
+ "eval_steps_per_second": 11.01,
44
  "step": 939
45
  }
46
  ],
47
  "logging_steps": 500,
48
+ "max_steps": 1878,
49
  "num_input_tokens_seen": 0,
50
+ "num_train_epochs": 6,
51
  "save_steps": 500,
52
  "stateful_callbacks": {
53
  "EarlyStoppingCallback": {
 
74
  "train_batch_size": 32,
75
  "trial_name": null,
76
  "trial_params": {
77
+ "alpha": 0.43669134703064955,
78
+ "num_train_epochs": 6,
79
+ "temperature": 4.700820204359401
80
  }
81
  }
run-1/checkpoint-939/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56ff736131b836a7dabe107136a81c1bbf2af9db596535261d17917925265c8c
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:005e75c27b9684ffca09be4a319451b5b3ed9c7d3c39d92ef3bf6ef33af1bd87
3
  size 5905