DanielDanielDanielDanielDanielDaniel commited on
Commit
3b8bec9
·
verified ·
1 Parent(s): af80bfa

Training in progress, epoch 6

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f416b06624d5c0d9c88fef618f495f3388aabde44fd315d4840e248486442a6
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1efac210aef14a836eb8349cce523c66e2d6fa5a154e59184f0b9fac6a1455c0
3
  size 598898116
run-0/checkpoint-720/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b4978a36c2bfa514c88e7d77522720cbd803b0a220428a29701d116615b89f8
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1efac210aef14a836eb8349cce523c66e2d6fa5a154e59184f0b9fac6a1455c0
3
  size 598898116
run-0/checkpoint-720/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:568847898d617e33aee75b1974a224f130cca253f6b7d8348831c5432289079d
3
  size 1197886411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccc361d9ba19744a8fac05c2041365476665d92b97a6be454d52ff012130de0b
3
  size 1197886411
run-0/checkpoint-720/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5995efe96d9a5f379fc424e908527d23598b0bf812a8ecdac188a585ddec1b05
3
+ size 1383
run-0/checkpoint-720/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_global_step": 600,
3
- "best_metric": 0.9441935483870968,
4
- "best_model_checkpoint": "student_modernBERT_clinc_oos/run-0/checkpoint-600",
5
  "epoch": 6.0,
6
  "eval_steps": 500,
7
  "global_step": 720,
@@ -11,63 +11,63 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.792258064516129,
15
- "eval_loss": 5.377152442932129,
16
- "eval_runtime": 8.5979,
17
- "eval_samples_per_second": 360.552,
18
- "eval_steps_per_second": 11.282,
19
  "step": 120
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.9112903225806451,
24
- "eval_loss": 1.624509572982788,
25
- "eval_runtime": 8.6846,
26
- "eval_samples_per_second": 356.952,
27
- "eval_steps_per_second": 11.169,
28
  "step": 240
29
  },
30
  {
31
  "epoch": 3.0,
32
- "eval_accuracy": 0.9387096774193548,
33
- "eval_loss": 0.9765816926956177,
34
- "eval_runtime": 8.7254,
35
- "eval_samples_per_second": 355.283,
36
- "eval_steps_per_second": 11.117,
37
  "step": 360
38
  },
39
  {
40
  "epoch": 4.0,
41
- "eval_accuracy": 0.9412903225806452,
42
- "eval_loss": 0.799368679523468,
43
- "eval_runtime": 8.7525,
44
- "eval_samples_per_second": 354.185,
45
- "eval_steps_per_second": 11.083,
46
  "step": 480
47
  },
48
  {
49
  "epoch": 4.167714884696017,
50
- "grad_norm": 16.940773010253906,
51
  "learning_rate": 6.13888888888889e-06,
52
- "loss": 15.9509,
53
  "step": 500
54
  },
55
  {
56
  "epoch": 5.0,
57
- "eval_accuracy": 0.9441935483870968,
58
- "eval_loss": 0.7491059899330139,
59
- "eval_runtime": 8.7187,
60
- "eval_samples_per_second": 355.557,
61
- "eval_steps_per_second": 11.125,
62
  "step": 600
63
  },
64
  {
65
  "epoch": 6.0,
66
- "eval_accuracy": 0.9435483870967742,
67
- "eval_loss": 0.7189788818359375,
68
- "eval_runtime": 8.7026,
69
- "eval_samples_per_second": 356.214,
70
- "eval_steps_per_second": 11.146,
71
  "step": 720
72
  }
73
  ],
@@ -92,8 +92,8 @@
92
  "train_batch_size": 32,
93
  "trial_name": null,
94
  "trial_params": {
95
- "alpha": 0.34970567105973893,
96
  "num_train_epochs": 6,
97
- "temperature": 2
98
  }
99
  }
 
1
  {
2
+ "best_global_step": 720,
3
+ "best_metric": 0.9245161290322581,
4
+ "best_model_checkpoint": "student_modernBERT_clinc_oos/run-0/checkpoint-720",
5
  "epoch": 6.0,
6
  "eval_steps": 500,
7
  "global_step": 720,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.5541935483870968,
15
+ "eval_loss": 1.7238802909851074,
16
+ "eval_runtime": 7.0038,
17
+ "eval_samples_per_second": 442.616,
18
+ "eval_steps_per_second": 13.85,
19
  "step": 120
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.8570967741935483,
24
+ "eval_loss": 1.1208338737487793,
25
+ "eval_runtime": 6.8486,
26
+ "eval_samples_per_second": 452.646,
27
+ "eval_steps_per_second": 14.163,
28
  "step": 240
29
  },
30
  {
31
  "epoch": 3.0,
32
+ "eval_accuracy": 0.9051612903225806,
33
+ "eval_loss": 0.9070075154304504,
34
+ "eval_runtime": 6.8675,
35
+ "eval_samples_per_second": 451.403,
36
+ "eval_steps_per_second": 14.125,
37
  "step": 360
38
  },
39
  {
40
  "epoch": 4.0,
41
+ "eval_accuracy": 0.9164516129032259,
42
+ "eval_loss": 0.8145634531974792,
43
+ "eval_runtime": 6.8676,
44
+ "eval_samples_per_second": 451.395,
45
+ "eval_steps_per_second": 14.124,
46
  "step": 480
47
  },
48
  {
49
  "epoch": 4.167714884696017,
50
+ "grad_norm": 6.844017505645752,
51
  "learning_rate": 6.13888888888889e-06,
52
+ "loss": 5.4805,
53
  "step": 500
54
  },
55
  {
56
  "epoch": 5.0,
57
+ "eval_accuracy": 0.9219354838709677,
58
+ "eval_loss": 0.7720023989677429,
59
+ "eval_runtime": 6.8461,
60
+ "eval_samples_per_second": 452.813,
61
+ "eval_steps_per_second": 14.169,
62
  "step": 600
63
  },
64
  {
65
  "epoch": 6.0,
66
+ "eval_accuracy": 0.9245161290322581,
67
+ "eval_loss": 0.7577660083770752,
68
+ "eval_runtime": 6.835,
69
+ "eval_samples_per_second": 453.548,
70
+ "eval_steps_per_second": 14.192,
71
  "step": 720
72
  }
73
  ],
 
92
  "train_batch_size": 32,
93
  "trial_name": null,
94
  "trial_params": {
95
+ "alpha": 0.17839594203233045,
96
  "num_train_epochs": 6,
97
+ "temperature": 13
98
  }
99
  }
run-0/checkpoint-720/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12a14a0ccc0ae09af707f7c86d234da20596840e059e696e004c12f6a9fa0185
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:defea8818495be5704f8f89261e105141cdc7770477d9f23e5c577ed2e3f7868
3
  size 5905
runs/Oct26_17-22-45_49ca29516c75/events.out.tfevents.1761500015.49ca29516c75.609.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a2b0505116aa438a0cf29708e2f0148a0a7f0a4b35c0f9a653554bf943994da
3
- size 15041
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f41c254dc090469b35f8a4d9a7bcc851a64bd7ca9b81cdea3b3c2426806f6cc9
3
+ size 15718