MariaFGI commited on
Commit
8136c67
·
verified ·
1 Parent(s): 27eb9cf

Training in progress, epoch 6

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86a6df00c1d88a0078b8c3ae2e7f91485ea1e5e53eaabe0ce162de126840de80
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e7355b73ebac035ca39c08ab03cc3883ea81e6e0a887448e0d23c867a628ceb
3
  size 598898116
run-1/checkpoint-5724/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7182e860596e3a533cb682a44ccd367d2a63548718a534c235c370acd8871676
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e7355b73ebac035ca39c08ab03cc3883ea81e6e0a887448e0d23c867a628ceb
3
  size 598898116
run-1/checkpoint-5724/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdb69b8ca84dfedda708daaa4e745e034699ccc94865c9bcb68cb320222dbfbe
3
  size 1197886411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e02ab88d378d05297aed5c6b681e19e34d0b6ae0e2413b951e9606084608b3c
3
  size 1197886411
run-1/checkpoint-5724/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c1191ec6ef2ec1c0aa33d51d1e49844dedd6100b74b90bbbe306c3fc44a0080
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a0b4230f34cfc1b81dc2c15ef8d265bdd348193f5a746ca2018df11549c7ac0
3
  size 1383
run-1/checkpoint-5724/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:842c477c03985d57c436f042185e94f8b68859521c9e0660a688efc82daf99b1
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e140f341ca0a4230bd903c046ed15686cf39544df723eba883508fdb9721c46d
3
  size 1465
run-1/checkpoint-5724/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_global_step": 4770,
3
- "best_metric": 0.9670967741935483,
4
- "best_model_checkpoint": "ModernBERT-base-finetuned-distilled-clinc/run-1/checkpoint-4770",
5
  "epoch": 6.0,
6
  "eval_steps": 500,
7
  "global_step": 5724,
@@ -11,105 +11,105 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.9989517819706499,
14
- "grad_norm": 5.256518363952637,
15
- "learning_rate": 1.7148846960167716e-05,
16
- "loss": 2.94,
17
  "step": 953
18
  },
19
  {
20
  "epoch": 1.0,
21
- "eval_accuracy": 0.9467741935483871,
22
- "eval_loss": 1.2690216302871704,
23
- "eval_runtime": 13.7144,
24
- "eval_samples_per_second": 226.039,
25
- "eval_steps_per_second": 14.146,
26
  "step": 954
27
  },
28
  {
29
  "epoch": 1.9979035639412999,
30
- "grad_norm": 5.380569934844971,
31
- "learning_rate": 1.4294699011680145e-05,
32
- "loss": 0.8842,
33
  "step": 1906
34
  },
35
  {
36
  "epoch": 2.0,
37
- "eval_accuracy": 0.9593548387096774,
38
- "eval_loss": 0.7936070561408997,
39
- "eval_runtime": 13.7,
40
- "eval_samples_per_second": 226.278,
41
- "eval_steps_per_second": 14.161,
42
  "step": 1908
43
  },
44
  {
45
  "epoch": 2.99685534591195,
46
- "grad_norm": 3.613981246948242,
47
- "learning_rate": 1.1440551063192572e-05,
48
- "loss": 0.5699,
49
  "step": 2859
50
  },
51
  {
52
  "epoch": 3.0,
53
- "eval_accuracy": 0.9641935483870968,
54
- "eval_loss": 0.6687456965446472,
55
- "eval_runtime": 13.7363,
56
- "eval_samples_per_second": 225.679,
57
- "eval_steps_per_second": 14.123,
58
  "step": 2862
59
  },
60
  {
61
  "epoch": 3.9958071278825997,
62
- "grad_norm": 3.4474356174468994,
63
- "learning_rate": 8.586403114705001e-06,
64
- "loss": 0.4559,
65
  "step": 3812
66
  },
67
  {
68
  "epoch": 4.0,
69
- "eval_accuracy": 0.9654838709677419,
70
- "eval_loss": 0.6137078404426575,
71
- "eval_runtime": 13.7263,
72
- "eval_samples_per_second": 225.844,
73
- "eval_steps_per_second": 14.133,
74
  "step": 3816
75
  },
76
  {
77
  "epoch": 4.99475890985325,
78
- "grad_norm": 2.365896463394165,
79
- "learning_rate": 5.732255166217431e-06,
80
- "loss": 0.3946,
81
  "step": 4765
82
  },
83
  {
84
  "epoch": 5.0,
85
- "eval_accuracy": 0.9670967741935483,
86
- "eval_loss": 0.5847680568695068,
87
- "eval_runtime": 13.7279,
88
- "eval_samples_per_second": 225.817,
89
- "eval_steps_per_second": 14.132,
90
  "step": 4770
91
  },
92
  {
93
  "epoch": 5.9937106918239,
94
- "grad_norm": 2.425455093383789,
95
- "learning_rate": 2.8781072177298598e-06,
96
- "loss": 0.356,
97
  "step": 5718
98
  },
99
  {
100
  "epoch": 6.0,
101
- "eval_accuracy": 0.9654838709677419,
102
- "eval_loss": 0.5716797709465027,
103
- "eval_runtime": 13.7447,
104
- "eval_samples_per_second": 225.542,
105
- "eval_steps_per_second": 14.115,
106
  "step": 5724
107
  }
108
  ],
109
  "logging_steps": 953,
110
- "max_steps": 6678,
111
  "num_input_tokens_seen": 0,
112
- "num_train_epochs": 7,
113
  "save_steps": 1000000000.0,
114
  "stateful_callbacks": {
115
  "EarlyStoppingCallback": {
@@ -118,7 +118,7 @@
118
  "early_stopping_threshold": 0.0
119
  },
120
  "attributes": {
121
- "early_stopping_patience_counter": 1
122
  }
123
  },
124
  "TrainerControl": {
@@ -127,7 +127,7 @@
127
  "should_evaluate": false,
128
  "should_log": false,
129
  "should_save": true,
130
- "should_training_stop": false
131
  },
132
  "attributes": {}
133
  }
@@ -136,8 +136,8 @@
136
  "train_batch_size": 16,
137
  "trial_name": null,
138
  "trial_params": {
139
- "alpha": 0.15100905033765077,
140
- "num_train_epochs": 7,
141
- "temperature": 18
142
  }
143
  }
 
1
  {
2
+ "best_global_step": 5724,
3
+ "best_metric": 0.9606451612903226,
4
+ "best_model_checkpoint": "ModernBERT-base-finetuned-distilled-clinc/run-1/checkpoint-5724",
5
  "epoch": 6.0,
6
  "eval_steps": 500,
7
  "global_step": 5724,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.9989517819706499,
14
+ "grad_norm": 7.467678070068359,
15
+ "learning_rate": 1.6673654786862336e-05,
16
+ "loss": 3.1301,
17
  "step": 953
18
  },
19
  {
20
  "epoch": 1.0,
21
+ "eval_accuracy": 0.9203225806451613,
22
+ "eval_loss": 1.3439604043960571,
23
+ "eval_runtime": 13.3738,
24
+ "eval_samples_per_second": 231.797,
25
+ "eval_steps_per_second": 14.506,
26
  "step": 954
27
  },
28
  {
29
  "epoch": 1.9979035639412999,
30
+ "grad_norm": 7.362800598144531,
31
+ "learning_rate": 1.3343815513626837e-05,
32
+ "loss": 0.9149,
33
  "step": 1906
34
  },
35
  {
36
  "epoch": 2.0,
37
+ "eval_accuracy": 0.9490322580645161,
38
+ "eval_loss": 0.8044440746307373,
39
+ "eval_runtime": 13.3633,
40
+ "eval_samples_per_second": 231.979,
41
+ "eval_steps_per_second": 14.517,
42
  "step": 1908
43
  },
44
  {
45
  "epoch": 2.99685534591195,
46
+ "grad_norm": 4.091987133026123,
47
+ "learning_rate": 1.0013976240391337e-05,
48
+ "loss": 0.5577,
49
  "step": 2859
50
  },
51
  {
52
  "epoch": 3.0,
53
+ "eval_accuracy": 0.9561290322580646,
54
+ "eval_loss": 0.6619836688041687,
55
+ "eval_runtime": 13.3076,
56
+ "eval_samples_per_second": 232.95,
57
+ "eval_steps_per_second": 14.578,
58
  "step": 2862
59
  },
60
  {
61
  "epoch": 3.9958071278825997,
62
+ "grad_norm": 3.777040958404541,
63
+ "learning_rate": 6.6841369671558355e-06,
64
+ "loss": 0.4371,
65
  "step": 3812
66
  },
67
  {
68
  "epoch": 4.0,
69
+ "eval_accuracy": 0.9590322580645161,
70
+ "eval_loss": 0.5996799468994141,
71
+ "eval_runtime": 13.379,
72
+ "eval_samples_per_second": 231.706,
73
+ "eval_steps_per_second": 14.5,
74
  "step": 3816
75
  },
76
  {
77
  "epoch": 4.99475890985325,
78
+ "grad_norm": 2.1198227405548096,
79
+ "learning_rate": 3.354297693920336e-06,
80
+ "loss": 0.3772,
81
  "step": 4765
82
  },
83
  {
84
  "epoch": 5.0,
85
+ "eval_accuracy": 0.96,
86
+ "eval_loss": 0.5738052725791931,
87
+ "eval_runtime": 13.314,
88
+ "eval_samples_per_second": 232.837,
89
+ "eval_steps_per_second": 14.571,
90
  "step": 4770
91
  },
92
  {
93
  "epoch": 5.9937106918239,
94
+ "grad_norm": 3.388993501663208,
95
+ "learning_rate": 2.445842068483578e-08,
96
+ "loss": 0.3448,
97
  "step": 5718
98
  },
99
  {
100
  "epoch": 6.0,
101
+ "eval_accuracy": 0.9606451612903226,
102
+ "eval_loss": 0.5634305477142334,
103
+ "eval_runtime": 13.4419,
104
+ "eval_samples_per_second": 230.622,
105
+ "eval_steps_per_second": 14.432,
106
  "step": 5724
107
  }
108
  ],
109
  "logging_steps": 953,
110
+ "max_steps": 5724,
111
  "num_input_tokens_seen": 0,
112
+ "num_train_epochs": 6,
113
  "save_steps": 1000000000.0,
114
  "stateful_callbacks": {
115
  "EarlyStoppingCallback": {
 
118
  "early_stopping_threshold": 0.0
119
  },
120
  "attributes": {
121
+ "early_stopping_patience_counter": 0
122
  }
123
  },
124
  "TrainerControl": {
 
127
  "should_evaluate": false,
128
  "should_log": false,
129
  "should_save": true,
130
+ "should_training_stop": true
131
  },
132
  "attributes": {}
133
  }
 
136
  "train_batch_size": 16,
137
  "trial_name": null,
138
  "trial_params": {
139
+ "alpha": 0.9287856252244455,
140
+ "num_train_epochs": 6,
141
+ "temperature": 15
142
  }
143
  }
run-1/checkpoint-5724/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15589d5aeaba3d8b56609cb603fda218d1886c5365ce6493f2d41326202e6ecd
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20ebde890c3b25915c27f2892df04c94cddcc4ef788adf1925833998a07e8392
3
  size 5905