Training in progress, epoch 1
Browse files- model.safetensors +1 -1
- run-2/checkpoint-240/model.safetensors +1 -1
- run-2/checkpoint-240/optimizer.pt +1 -1
- run-2/checkpoint-240/scheduler.pt +1 -1
- run-2/checkpoint-240/training_args.bin +1 -1
- run-2/checkpoint-360/model.safetensors +1 -1
- run-2/checkpoint-360/optimizer.pt +1 -1
- run-2/checkpoint-360/scheduler.pt +1 -1
- run-2/checkpoint-360/trainer_state.json +21 -21
- run-2/checkpoint-360/training_args.bin +1 -1
- run-2/checkpoint-480/model.safetensors +1 -1
- run-2/checkpoint-480/optimizer.pt +1 -1
- run-2/checkpoint-480/scheduler.pt +1 -1
- run-2/checkpoint-480/trainer_state.json +26 -26
- run-2/checkpoint-480/training_args.bin +1 -1
- run-2/checkpoint-600/model.safetensors +1 -1
- run-2/checkpoint-600/optimizer.pt +1 -1
- run-2/checkpoint-600/scheduler.pt +1 -1
- run-2/checkpoint-600/trainer_state.json +34 -34
- run-2/checkpoint-600/training_args.bin +1 -1
- run-2/checkpoint-720/model.safetensors +1 -1
- run-2/checkpoint-720/optimizer.pt +2 -2
- run-2/checkpoint-720/rng_state.pth +3 -0
- run-2/checkpoint-720/scheduler.pt +3 -0
- run-2/checkpoint-720/trainer_state.json +99 -0
- run-2/checkpoint-720/training_args.bin +1 -1
- runs/Oct22_10-42-10_5515f8f51c79/events.out.tfevents.1761132371.5515f8f51c79.36495.3 +2 -2
- runs/Oct22_10-42-10_5515f8f51c79/events.out.tfevents.1761133175.5515f8f51c79.36495.4 +3 -0
- runs/Oct22_15-09-25_5515f8f51c79/events.out.tfevents.1761145777.5515f8f51c79.102745.0 +3 -0
- training_args.bin +1 -1
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 598898116
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e9a93c96af72d82d1948b211536371c8b90e993f97655333941c27153619b48
|
| 3 |
size 598898116
|
run-2/checkpoint-240/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 598898116
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d19f751c835e4a83f407bd290c3cfa629eb5b7d69cc9a6c4aef931fd56b42d58
|
| 3 |
size 598898116
|
run-2/checkpoint-240/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1197886411
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43bc5a5d7df840cb2bcd11c9964b930c621fdcbdaca88129d582aa99bc165459
|
| 3 |
size 1197886411
|
run-2/checkpoint-240/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d17eed1de60c5b0d63a7107d97b5f756179afc2b9ed6fa9240b6c0f51933a5ca
|
| 3 |
size 1465
|
run-2/checkpoint-240/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5905
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30763bea4e95fddafc920e08c1ea1c5e03c960715d174ba50f005d578e068b60
|
| 3 |
size 5905
|
run-2/checkpoint-360/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 598898116
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e71774b6250dccc404d7a43eb99909369c29006c636d7d5f020a1936f152985
|
| 3 |
size 598898116
|
run-2/checkpoint-360/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1197886411
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6aeb750fd063d0897295ed9b3c704e8ca8affaccbfe391748bb41367334f2ec9
|
| 3 |
size 1197886411
|
run-2/checkpoint-360/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc5e6ebd5938282b4ca3e8958f53e1e7f1aab10a1a01fad3653bf04d73b15d61
|
| 3 |
size 1465
|
run-2/checkpoint-360/trainer_state.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": 360,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": "student_modernBERT_clinc_oos/run-2/checkpoint-360",
|
| 5 |
"epoch": 3.0,
|
| 6 |
"eval_steps": 500,
|
|
@@ -11,36 +11,36 @@
|
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 1.0,
|
| 14 |
-
"eval_accuracy": 0.
|
| 15 |
-
"eval_loss":
|
| 16 |
-
"eval_runtime": 8.
|
| 17 |
-
"eval_samples_per_second": 358.
|
| 18 |
-
"eval_steps_per_second": 11.
|
| 19 |
"step": 120
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"epoch": 2.0,
|
| 23 |
-
"eval_accuracy": 0.
|
| 24 |
-
"eval_loss":
|
| 25 |
-
"eval_runtime":
|
| 26 |
-
"eval_samples_per_second":
|
| 27 |
-
"eval_steps_per_second":
|
| 28 |
"step": 240
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"epoch": 3.0,
|
| 32 |
-
"eval_accuracy": 0.
|
| 33 |
-
"eval_loss":
|
| 34 |
-
"eval_runtime": 9.
|
| 35 |
-
"eval_samples_per_second":
|
| 36 |
-
"eval_steps_per_second": 10.
|
| 37 |
"step": 360
|
| 38 |
}
|
| 39 |
],
|
| 40 |
"logging_steps": 500,
|
| 41 |
-
"max_steps":
|
| 42 |
"num_input_tokens_seen": 0,
|
| 43 |
-
"num_train_epochs":
|
| 44 |
"save_steps": 500,
|
| 45 |
"stateful_callbacks": {
|
| 46 |
"TrainerControl": {
|
|
@@ -58,8 +58,8 @@
|
|
| 58 |
"train_batch_size": 32,
|
| 59 |
"trial_name": null,
|
| 60 |
"trial_params": {
|
| 61 |
-
"alpha": 0.
|
| 62 |
-
"num_train_epochs":
|
| 63 |
-
"temperature":
|
| 64 |
}
|
| 65 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": 360,
|
| 3 |
+
"best_metric": 0.922258064516129,
|
| 4 |
"best_model_checkpoint": "student_modernBERT_clinc_oos/run-2/checkpoint-360",
|
| 5 |
"epoch": 3.0,
|
| 6 |
"eval_steps": 500,
|
|
|
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 1.0,
|
| 14 |
+
"eval_accuracy": 0.6767741935483871,
|
| 15 |
+
"eval_loss": 7.750784873962402,
|
| 16 |
+
"eval_runtime": 8.6464,
|
| 17 |
+
"eval_samples_per_second": 358.531,
|
| 18 |
+
"eval_steps_per_second": 11.219,
|
| 19 |
"step": 120
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"epoch": 2.0,
|
| 23 |
+
"eval_accuracy": 0.8777419354838709,
|
| 24 |
+
"eval_loss": 3.922584056854248,
|
| 25 |
+
"eval_runtime": 8.802,
|
| 26 |
+
"eval_samples_per_second": 352.193,
|
| 27 |
+
"eval_steps_per_second": 11.02,
|
| 28 |
"step": 240
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"epoch": 3.0,
|
| 32 |
+
"eval_accuracy": 0.922258064516129,
|
| 33 |
+
"eval_loss": 2.6575753688812256,
|
| 34 |
+
"eval_runtime": 9.5831,
|
| 35 |
+
"eval_samples_per_second": 323.485,
|
| 36 |
+
"eval_steps_per_second": 10.122,
|
| 37 |
"step": 360
|
| 38 |
}
|
| 39 |
],
|
| 40 |
"logging_steps": 500,
|
| 41 |
+
"max_steps": 720,
|
| 42 |
"num_input_tokens_seen": 0,
|
| 43 |
+
"num_train_epochs": 6,
|
| 44 |
"save_steps": 500,
|
| 45 |
"stateful_callbacks": {
|
| 46 |
"TrainerControl": {
|
|
|
|
| 58 |
"train_batch_size": 32,
|
| 59 |
"trial_name": null,
|
| 60 |
"trial_params": {
|
| 61 |
+
"alpha": 0.7203792274973846,
|
| 62 |
+
"num_train_epochs": 6,
|
| 63 |
+
"temperature": 7
|
| 64 |
}
|
| 65 |
}
|
run-2/checkpoint-360/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5905
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30763bea4e95fddafc920e08c1ea1c5e03c960715d174ba50f005d578e068b60
|
| 3 |
size 5905
|
run-2/checkpoint-480/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 598898116
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1e82e0c12aba607099b4d2be138de8f321c2124b057f6f454b315485344be22
|
| 3 |
size 598898116
|
run-2/checkpoint-480/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1197886411
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f795c607132939ca56cee0f6003a8a8eea7b7e4bb0953a26f65ba50ea431cec8
|
| 3 |
size 1197886411
|
run-2/checkpoint-480/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ddd0552961f51613cb77af24bc0e3fccce21e8083c3d1c6f93f74ab36b5046da
|
| 3 |
size 1465
|
run-2/checkpoint-480/trainer_state.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": 480,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": "student_modernBERT_clinc_oos/run-2/checkpoint-480",
|
| 5 |
"epoch": 4.0,
|
| 6 |
"eval_steps": 500,
|
|
@@ -11,45 +11,45 @@
|
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 1.0,
|
| 14 |
-
"eval_accuracy": 0.
|
| 15 |
-
"eval_loss":
|
| 16 |
-
"eval_runtime": 8.
|
| 17 |
-
"eval_samples_per_second": 358.
|
| 18 |
-
"eval_steps_per_second": 11.
|
| 19 |
"step": 120
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"epoch": 2.0,
|
| 23 |
-
"eval_accuracy": 0.
|
| 24 |
-
"eval_loss":
|
| 25 |
-
"eval_runtime":
|
| 26 |
-
"eval_samples_per_second":
|
| 27 |
-
"eval_steps_per_second":
|
| 28 |
"step": 240
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"epoch": 3.0,
|
| 32 |
-
"eval_accuracy": 0.
|
| 33 |
-
"eval_loss":
|
| 34 |
-
"eval_runtime": 9.
|
| 35 |
-
"eval_samples_per_second":
|
| 36 |
-
"eval_steps_per_second": 10.
|
| 37 |
"step": 360
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"epoch": 4.0,
|
| 41 |
-
"eval_accuracy": 0.
|
| 42 |
-
"eval_loss":
|
| 43 |
-
"eval_runtime": 8.
|
| 44 |
-
"eval_samples_per_second":
|
| 45 |
-
"eval_steps_per_second": 10.
|
| 46 |
"step": 480
|
| 47 |
}
|
| 48 |
],
|
| 49 |
"logging_steps": 500,
|
| 50 |
-
"max_steps":
|
| 51 |
"num_input_tokens_seen": 0,
|
| 52 |
-
"num_train_epochs":
|
| 53 |
"save_steps": 500,
|
| 54 |
"stateful_callbacks": {
|
| 55 |
"TrainerControl": {
|
|
@@ -67,8 +67,8 @@
|
|
| 67 |
"train_batch_size": 32,
|
| 68 |
"trial_name": null,
|
| 69 |
"trial_params": {
|
| 70 |
-
"alpha": 0.
|
| 71 |
-
"num_train_epochs":
|
| 72 |
-
"temperature":
|
| 73 |
}
|
| 74 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": 480,
|
| 3 |
+
"best_metric": 0.9312903225806451,
|
| 4 |
"best_model_checkpoint": "student_modernBERT_clinc_oos/run-2/checkpoint-480",
|
| 5 |
"epoch": 4.0,
|
| 6 |
"eval_steps": 500,
|
|
|
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 1.0,
|
| 14 |
+
"eval_accuracy": 0.6767741935483871,
|
| 15 |
+
"eval_loss": 7.750784873962402,
|
| 16 |
+
"eval_runtime": 8.6464,
|
| 17 |
+
"eval_samples_per_second": 358.531,
|
| 18 |
+
"eval_steps_per_second": 11.219,
|
| 19 |
"step": 120
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"epoch": 2.0,
|
| 23 |
+
"eval_accuracy": 0.8777419354838709,
|
| 24 |
+
"eval_loss": 3.922584056854248,
|
| 25 |
+
"eval_runtime": 8.802,
|
| 26 |
+
"eval_samples_per_second": 352.193,
|
| 27 |
+
"eval_steps_per_second": 11.02,
|
| 28 |
"step": 240
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"epoch": 3.0,
|
| 32 |
+
"eval_accuracy": 0.922258064516129,
|
| 33 |
+
"eval_loss": 2.6575753688812256,
|
| 34 |
+
"eval_runtime": 9.5831,
|
| 35 |
+
"eval_samples_per_second": 323.485,
|
| 36 |
+
"eval_steps_per_second": 10.122,
|
| 37 |
"step": 360
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"epoch": 4.0,
|
| 41 |
+
"eval_accuracy": 0.9312903225806451,
|
| 42 |
+
"eval_loss": 2.112471342086792,
|
| 43 |
+
"eval_runtime": 8.867,
|
| 44 |
+
"eval_samples_per_second": 349.611,
|
| 45 |
+
"eval_steps_per_second": 10.939,
|
| 46 |
"step": 480
|
| 47 |
}
|
| 48 |
],
|
| 49 |
"logging_steps": 500,
|
| 50 |
+
"max_steps": 720,
|
| 51 |
"num_input_tokens_seen": 0,
|
| 52 |
+
"num_train_epochs": 6,
|
| 53 |
"save_steps": 500,
|
| 54 |
"stateful_callbacks": {
|
| 55 |
"TrainerControl": {
|
|
|
|
| 67 |
"train_batch_size": 32,
|
| 68 |
"trial_name": null,
|
| 69 |
"trial_params": {
|
| 70 |
+
"alpha": 0.7203792274973846,
|
| 71 |
+
"num_train_epochs": 6,
|
| 72 |
+
"temperature": 7
|
| 73 |
}
|
| 74 |
}
|
run-2/checkpoint-480/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5905
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30763bea4e95fddafc920e08c1ea1c5e03c960715d174ba50f005d578e068b60
|
| 3 |
size 5905
|
run-2/checkpoint-600/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 598898116
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1dfaeca3a10b2e05d4644ba854079a0f41d2c936606f2375dfbb165a8632c62
|
| 3 |
size 598898116
|
run-2/checkpoint-600/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1197886411
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a5e880c068ce9544975471f48cc4c92dc9f5d68252ba51d7a6ac95e64ec8bdc
|
| 3 |
size 1197886411
|
run-2/checkpoint-600/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5bf3fb53f5149438e86367cf7dc1034ffdf5899cb1e2f8a4da236e4f899094c7
|
| 3 |
size 1465
|
run-2/checkpoint-600/trainer_state.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": 600,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": "student_modernBERT_clinc_oos/run-2/checkpoint-600",
|
| 5 |
"epoch": 5.0,
|
| 6 |
"eval_steps": 500,
|
|
@@ -11,61 +11,61 @@
|
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 1.0,
|
| 14 |
-
"eval_accuracy": 0.
|
| 15 |
-
"eval_loss":
|
| 16 |
-
"eval_runtime": 8.
|
| 17 |
-
"eval_samples_per_second": 358.
|
| 18 |
-
"eval_steps_per_second": 11.
|
| 19 |
"step": 120
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"epoch": 2.0,
|
| 23 |
-
"eval_accuracy": 0.
|
| 24 |
-
"eval_loss":
|
| 25 |
-
"eval_runtime":
|
| 26 |
-
"eval_samples_per_second":
|
| 27 |
-
"eval_steps_per_second":
|
| 28 |
"step": 240
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"epoch": 3.0,
|
| 32 |
-
"eval_accuracy": 0.
|
| 33 |
-
"eval_loss":
|
| 34 |
-
"eval_runtime": 9.
|
| 35 |
-
"eval_samples_per_second":
|
| 36 |
-
"eval_steps_per_second": 10.
|
| 37 |
"step": 360
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"epoch": 4.0,
|
| 41 |
-
"eval_accuracy": 0.
|
| 42 |
-
"eval_loss":
|
| 43 |
-
"eval_runtime": 8.
|
| 44 |
-
"eval_samples_per_second":
|
| 45 |
-
"eval_steps_per_second": 10.
|
| 46 |
"step": 480
|
| 47 |
},
|
| 48 |
{
|
| 49 |
"epoch": 4.167714884696017,
|
| 50 |
-
"grad_norm":
|
| 51 |
-
"learning_rate":
|
| 52 |
-
"loss":
|
| 53 |
"step": 500
|
| 54 |
},
|
| 55 |
{
|
| 56 |
"epoch": 5.0,
|
| 57 |
-
"eval_accuracy": 0.
|
| 58 |
-
"eval_loss": 1.
|
| 59 |
-
"eval_runtime":
|
| 60 |
-
"eval_samples_per_second":
|
| 61 |
-
"eval_steps_per_second": 10.
|
| 62 |
"step": 600
|
| 63 |
}
|
| 64 |
],
|
| 65 |
"logging_steps": 500,
|
| 66 |
-
"max_steps":
|
| 67 |
"num_input_tokens_seen": 0,
|
| 68 |
-
"num_train_epochs":
|
| 69 |
"save_steps": 500,
|
| 70 |
"stateful_callbacks": {
|
| 71 |
"TrainerControl": {
|
|
@@ -83,8 +83,8 @@
|
|
| 83 |
"train_batch_size": 32,
|
| 84 |
"trial_name": null,
|
| 85 |
"trial_params": {
|
| 86 |
-
"alpha": 0.
|
| 87 |
-
"num_train_epochs":
|
| 88 |
-
"temperature":
|
| 89 |
}
|
| 90 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": 600,
|
| 3 |
+
"best_metric": 0.9393548387096774,
|
| 4 |
"best_model_checkpoint": "student_modernBERT_clinc_oos/run-2/checkpoint-600",
|
| 5 |
"epoch": 5.0,
|
| 6 |
"eval_steps": 500,
|
|
|
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 1.0,
|
| 14 |
+
"eval_accuracy": 0.6767741935483871,
|
| 15 |
+
"eval_loss": 7.750784873962402,
|
| 16 |
+
"eval_runtime": 8.6464,
|
| 17 |
+
"eval_samples_per_second": 358.531,
|
| 18 |
+
"eval_steps_per_second": 11.219,
|
| 19 |
"step": 120
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"epoch": 2.0,
|
| 23 |
+
"eval_accuracy": 0.8777419354838709,
|
| 24 |
+
"eval_loss": 3.922584056854248,
|
| 25 |
+
"eval_runtime": 8.802,
|
| 26 |
+
"eval_samples_per_second": 352.193,
|
| 27 |
+
"eval_steps_per_second": 11.02,
|
| 28 |
"step": 240
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"epoch": 3.0,
|
| 32 |
+
"eval_accuracy": 0.922258064516129,
|
| 33 |
+
"eval_loss": 2.6575753688812256,
|
| 34 |
+
"eval_runtime": 9.5831,
|
| 35 |
+
"eval_samples_per_second": 323.485,
|
| 36 |
+
"eval_steps_per_second": 10.122,
|
| 37 |
"step": 360
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"epoch": 4.0,
|
| 41 |
+
"eval_accuracy": 0.9312903225806451,
|
| 42 |
+
"eval_loss": 2.112471342086792,
|
| 43 |
+
"eval_runtime": 8.867,
|
| 44 |
+
"eval_samples_per_second": 349.611,
|
| 45 |
+
"eval_steps_per_second": 10.939,
|
| 46 |
"step": 480
|
| 47 |
},
|
| 48 |
{
|
| 49 |
"epoch": 4.167714884696017,
|
| 50 |
+
"grad_norm": 28.72196388244629,
|
| 51 |
+
"learning_rate": 6.13888888888889e-06,
|
| 52 |
+
"loss": 22.9799,
|
| 53 |
"step": 500
|
| 54 |
},
|
| 55 |
{
|
| 56 |
"epoch": 5.0,
|
| 57 |
+
"eval_accuracy": 0.9393548387096774,
|
| 58 |
+
"eval_loss": 1.863680362701416,
|
| 59 |
+
"eval_runtime": 8.8973,
|
| 60 |
+
"eval_samples_per_second": 348.42,
|
| 61 |
+
"eval_steps_per_second": 10.902,
|
| 62 |
"step": 600
|
| 63 |
}
|
| 64 |
],
|
| 65 |
"logging_steps": 500,
|
| 66 |
+
"max_steps": 720,
|
| 67 |
"num_input_tokens_seen": 0,
|
| 68 |
+
"num_train_epochs": 6,
|
| 69 |
"save_steps": 500,
|
| 70 |
"stateful_callbacks": {
|
| 71 |
"TrainerControl": {
|
|
|
|
| 83 |
"train_batch_size": 32,
|
| 84 |
"trial_name": null,
|
| 85 |
"trial_params": {
|
| 86 |
+
"alpha": 0.7203792274973846,
|
| 87 |
+
"num_train_epochs": 6,
|
| 88 |
+
"temperature": 7
|
| 89 |
}
|
| 90 |
}
|
run-2/checkpoint-600/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5905
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30763bea4e95fddafc920e08c1ea1c5e03c960715d174ba50f005d578e068b60
|
| 3 |
size 5905
|
run-2/checkpoint-720/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 598898116
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6590962f40d2c0c783a5746d100afd9db7ec6b03610c2894af9467c18e00f0d3
|
| 3 |
size 598898116
|
run-2/checkpoint-720/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aba6bd0191facb8da3d8c41285b6d8db14979919284c22f722d03d1e981bab38
|
| 3 |
+
size 1197886411
|
run-2/checkpoint-720/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06eef7f954d1b09437a102c64a6d35db565f4aaad01b684ed37a17f3c3f88e86
|
| 3 |
+
size 14645
|
run-2/checkpoint-720/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3863a446d67309a219068d04b8e7029d8b680b52900fd8ef93439bde5094d2a4
|
| 3 |
+
size 1465
|
run-2/checkpoint-720/trainer_state.json
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": 720,
|
| 3 |
+
"best_metric": 0.9425806451612904,
|
| 4 |
+
"best_model_checkpoint": "student_modernBERT_clinc_oos/run-2/checkpoint-720",
|
| 5 |
+
"epoch": 6.0,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 720,
|
| 8 |
+
"is_hyper_param_search": true,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 1.0,
|
| 14 |
+
"eval_accuracy": 0.6767741935483871,
|
| 15 |
+
"eval_loss": 7.750784873962402,
|
| 16 |
+
"eval_runtime": 8.6464,
|
| 17 |
+
"eval_samples_per_second": 358.531,
|
| 18 |
+
"eval_steps_per_second": 11.219,
|
| 19 |
+
"step": 120
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"epoch": 2.0,
|
| 23 |
+
"eval_accuracy": 0.8777419354838709,
|
| 24 |
+
"eval_loss": 3.922584056854248,
|
| 25 |
+
"eval_runtime": 8.802,
|
| 26 |
+
"eval_samples_per_second": 352.193,
|
| 27 |
+
"eval_steps_per_second": 11.02,
|
| 28 |
+
"step": 240
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"epoch": 3.0,
|
| 32 |
+
"eval_accuracy": 0.922258064516129,
|
| 33 |
+
"eval_loss": 2.6575753688812256,
|
| 34 |
+
"eval_runtime": 9.5831,
|
| 35 |
+
"eval_samples_per_second": 323.485,
|
| 36 |
+
"eval_steps_per_second": 10.122,
|
| 37 |
+
"step": 360
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 4.0,
|
| 41 |
+
"eval_accuracy": 0.9312903225806451,
|
| 42 |
+
"eval_loss": 2.112471342086792,
|
| 43 |
+
"eval_runtime": 8.867,
|
| 44 |
+
"eval_samples_per_second": 349.611,
|
| 45 |
+
"eval_steps_per_second": 10.939,
|
| 46 |
+
"step": 480
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"epoch": 4.167714884696017,
|
| 50 |
+
"grad_norm": 28.72196388244629,
|
| 51 |
+
"learning_rate": 6.13888888888889e-06,
|
| 52 |
+
"loss": 22.9799,
|
| 53 |
+
"step": 500
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"epoch": 5.0,
|
| 57 |
+
"eval_accuracy": 0.9393548387096774,
|
| 58 |
+
"eval_loss": 1.863680362701416,
|
| 59 |
+
"eval_runtime": 8.8973,
|
| 60 |
+
"eval_samples_per_second": 348.42,
|
| 61 |
+
"eval_steps_per_second": 10.902,
|
| 62 |
+
"step": 600
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"epoch": 6.0,
|
| 66 |
+
"eval_accuracy": 0.9425806451612904,
|
| 67 |
+
"eval_loss": 1.7904150485992432,
|
| 68 |
+
"eval_runtime": 9.4998,
|
| 69 |
+
"eval_samples_per_second": 326.321,
|
| 70 |
+
"eval_steps_per_second": 10.211,
|
| 71 |
+
"step": 720
|
| 72 |
+
}
|
| 73 |
+
],
|
| 74 |
+
"logging_steps": 500,
|
| 75 |
+
"max_steps": 720,
|
| 76 |
+
"num_input_tokens_seen": 0,
|
| 77 |
+
"num_train_epochs": 6,
|
| 78 |
+
"save_steps": 500,
|
| 79 |
+
"stateful_callbacks": {
|
| 80 |
+
"TrainerControl": {
|
| 81 |
+
"args": {
|
| 82 |
+
"should_epoch_stop": false,
|
| 83 |
+
"should_evaluate": false,
|
| 84 |
+
"should_log": false,
|
| 85 |
+
"should_save": true,
|
| 86 |
+
"should_training_stop": true
|
| 87 |
+
},
|
| 88 |
+
"attributes": {}
|
| 89 |
+
}
|
| 90 |
+
},
|
| 91 |
+
"total_flos": 891058846689456.0,
|
| 92 |
+
"train_batch_size": 32,
|
| 93 |
+
"trial_name": null,
|
| 94 |
+
"trial_params": {
|
| 95 |
+
"alpha": 0.7203792274973846,
|
| 96 |
+
"num_train_epochs": 6,
|
| 97 |
+
"temperature": 7
|
| 98 |
+
}
|
| 99 |
+
}
|
run-2/checkpoint-720/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5905
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30763bea4e95fddafc920e08c1ea1c5e03c960715d174ba50f005d578e068b60
|
| 3 |
size 5905
|
runs/Oct22_10-42-10_5515f8f51c79/events.out.tfevents.1761132371.5515f8f51c79.36495.3
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e39c74d3b7afbaf88687b5fce085a0d250458d5b92a90bd2bbd77e86e72eb2f4
|
| 3 |
+
size 15718
|
runs/Oct22_10-42-10_5515f8f51c79/events.out.tfevents.1761133175.5515f8f51c79.36495.4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4f6be3cc29f8c83b7ddef83f92deaf61b64c8828e775002346da9c16c147ae4
|
| 3 |
+
size 15725
|
runs/Oct22_15-09-25_5515f8f51c79/events.out.tfevents.1761145777.5515f8f51c79.102745.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:615a882739da2feb430a50d5b9acff8d7bab3eaf80a93bed537e2f1af4450667
|
| 3 |
+
size 13861
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5905
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a9027474d38a9f6a2b10e091b1562daaa0163abd36a73ab4256da426ba345b2
|
| 3 |
size 5905
|