Mathildeholst commited on
Commit
478a5a5
·
verified ·
1 Parent(s): 26583da

Training in progress, epoch 1

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e78e0191867f8669128a3259b23e8008f38a2bbf9e1a8d6a1ec63143e22832f
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c30bdd43ef8700852682fdd81d7cfb8b85f7b07fe64e66514f5c74ab85f913a3
3
  size 598898116
run-1/checkpoint-313/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e985070cbe84cf0764a1d8314763d133c5b36b06873746fdedbd96fbd5925169
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c30bdd43ef8700852682fdd81d7cfb8b85f7b07fe64e66514f5c74ab85f913a3
3
  size 598898116
run-1/checkpoint-313/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94d6f3c67b7a216b817582bcba8e01ac40f60455307d798c658ebd1a9fd9a6c2
3
  size 1197886411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f10be5aaf79caf149b739ea48b3fece59a8d47b6a3d1d7546ce1ef252764a35
3
  size 1197886411
run-1/checkpoint-313/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:286bfb530c9dac2b032454875e9a2d08d27f169bd4b58a6c68e31dd670e5aaf1
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:081827cb2011e5cb93b09f1991f3547ab57c55e5fa27dab24d0ee2d860e2a68c
3
  size 1465
run-1/checkpoint-313/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 313,
3
- "best_metric": 2.3514034748077393,
4
  "best_model_checkpoint": "classifier-clinc-MBbase-distilled/run-1/checkpoint-313",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
@@ -11,18 +11,18 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.8748387096774194,
15
- "eval_loss": 2.3514034748077393,
16
- "eval_runtime": 8.6888,
17
- "eval_samples_per_second": 356.78,
18
- "eval_steps_per_second": 11.164,
19
  "step": 313
20
  }
21
  ],
22
  "logging_steps": 500,
23
- "max_steps": 1252,
24
  "num_input_tokens_seen": 0,
25
- "num_train_epochs": 4,
26
  "save_steps": 500,
27
  "stateful_callbacks": {
28
  "EarlyStoppingCallback": {
@@ -49,8 +49,8 @@
49
  "train_batch_size": 32,
50
  "trial_name": null,
51
  "trial_params": {
52
- "alpha": 0.6366341517957751,
53
- "num_train_epochs": 4,
54
- "temperature": 4.356589509372901
55
  }
56
  }
 
1
  {
2
  "best_global_step": 313,
3
+ "best_metric": 2.7125542163848877,
4
  "best_model_checkpoint": "classifier-clinc-MBbase-distilled/run-1/checkpoint-313",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.8841935483870967,
15
+ "eval_loss": 2.7125542163848877,
16
+ "eval_runtime": 8.642,
17
+ "eval_samples_per_second": 358.712,
18
+ "eval_steps_per_second": 11.224,
19
  "step": 313
20
  }
21
  ],
22
  "logging_steps": 500,
23
+ "max_steps": 1878,
24
  "num_input_tokens_seen": 0,
25
+ "num_train_epochs": 6,
26
  "save_steps": 500,
27
  "stateful_callbacks": {
28
  "EarlyStoppingCallback": {
 
49
  "train_batch_size": 32,
50
  "trial_name": null,
51
  "trial_params": {
52
+ "alpha": 0.43669134703064955,
53
+ "num_train_epochs": 6,
54
+ "temperature": 4.700820204359401
55
  }
56
  }
run-1/checkpoint-313/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56ff736131b836a7dabe107136a81c1bbf2af9db596535261d17917925265c8c
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:005e75c27b9684ffca09be4a319451b5b3ed9c7d3c39d92ef3bf6ef33af1bd87
3
  size 5905
run-1/checkpoint-626/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 626,
3
- "best_metric": 1.0946013927459717,
4
  "best_model_checkpoint": "classifier-clinc-MBbase-distilled/run-1/checkpoint-626",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
@@ -11,34 +11,34 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.8748387096774194,
15
- "eval_loss": 2.3514034748077393,
16
- "eval_runtime": 8.6888,
17
- "eval_samples_per_second": 356.78,
18
- "eval_steps_per_second": 11.164,
19
  "step": 313
20
  },
21
  {
22
  "epoch": 1.5974440894568689,
23
- "grad_norm": 11.41929817199707,
24
- "learning_rate": 1.2028753993610226e-05,
25
- "loss": 4.6814,
26
  "step": 500
27
  },
28
  {
29
  "epoch": 2.0,
30
- "eval_accuracy": 0.9387096774193548,
31
- "eval_loss": 1.0946013927459717,
32
- "eval_runtime": 8.8111,
33
- "eval_samples_per_second": 351.83,
34
- "eval_steps_per_second": 11.009,
35
  "step": 626
36
  }
37
  ],
38
  "logging_steps": 500,
39
- "max_steps": 1252,
40
  "num_input_tokens_seen": 0,
41
- "num_train_epochs": 4,
42
  "save_steps": 500,
43
  "stateful_callbacks": {
44
  "EarlyStoppingCallback": {
@@ -65,8 +65,8 @@
65
  "train_batch_size": 32,
66
  "trial_name": null,
67
  "trial_params": {
68
- "alpha": 0.6366341517957751,
69
- "num_train_epochs": 4,
70
- "temperature": 4.356589509372901
71
  }
72
  }
 
1
  {
2
  "best_global_step": 626,
3
+ "best_metric": 1.3560537099838257,
4
  "best_model_checkpoint": "classifier-clinc-MBbase-distilled/run-1/checkpoint-626",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.8841935483870967,
15
+ "eval_loss": 2.7125542163848877,
16
+ "eval_runtime": 8.642,
17
+ "eval_samples_per_second": 358.712,
18
+ "eval_steps_per_second": 11.224,
19
  "step": 313
20
  },
21
  {
22
  "epoch": 1.5974440894568689,
23
+ "grad_norm": 15.764195442199707,
24
+ "learning_rate": 1.468583599574015e-05,
25
+ "loss": 5.4057,
26
  "step": 500
27
  },
28
  {
29
  "epoch": 2.0,
30
+ "eval_accuracy": 0.9380645161290323,
31
+ "eval_loss": 1.3560537099838257,
32
+ "eval_runtime": 8.8142,
33
+ "eval_samples_per_second": 351.707,
34
+ "eval_steps_per_second": 11.005,
35
  "step": 626
36
  }
37
  ],
38
  "logging_steps": 500,
39
+ "max_steps": 1878,
40
  "num_input_tokens_seen": 0,
41
+ "num_train_epochs": 6,
42
  "save_steps": 500,
43
  "stateful_callbacks": {
44
  "EarlyStoppingCallback": {
 
65
  "train_batch_size": 32,
66
  "trial_name": null,
67
  "trial_params": {
68
+ "alpha": 0.43669134703064955,
69
+ "num_train_epochs": 6,
70
+ "temperature": 4.700820204359401
71
  }
72
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3774de0dc9784f2ab1918da7fc680b1fa7a61c646dab0571a12594c4ddc726eb
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:005e75c27b9684ffca09be4a319451b5b3ed9c7d3c39d92ef3bf6ef33af1bd87
3
  size 5905