Training in progress, epoch 6
Browse files- model.safetensors +1 -1
- run-0/checkpoint-720/model.safetensors +1 -1
- run-0/checkpoint-720/optimizer.pt +1 -1
- run-0/checkpoint-720/scaler.pt +3 -0
- run-0/checkpoint-720/trainer_state.json +37 -37
- run-0/checkpoint-720/training_args.bin +1 -1
- runs/Oct26_17-22-45_49ca29516c75/events.out.tfevents.1761500015.49ca29516c75.609.3 +2 -2
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 598898116
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1efac210aef14a836eb8349cce523c66e2d6fa5a154e59184f0b9fac6a1455c0
|
| 3 |
size 598898116
|
run-0/checkpoint-720/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 598898116
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1efac210aef14a836eb8349cce523c66e2d6fa5a154e59184f0b9fac6a1455c0
|
| 3 |
size 598898116
|
run-0/checkpoint-720/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1197886411
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccc361d9ba19744a8fac05c2041365476665d92b97a6be454d52ff012130de0b
|
| 3 |
size 1197886411
|
run-0/checkpoint-720/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5995efe96d9a5f379fc424e908527d23598b0bf812a8ecdac188a585ddec1b05
|
| 3 |
+
size 1383
|
run-0/checkpoint-720/trainer_state.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "student_modernBERT_clinc_oos/run-0/checkpoint-
|
| 5 |
"epoch": 6.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
"global_step": 720,
|
|
@@ -11,63 +11,63 @@
|
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 1.0,
|
| 14 |
-
"eval_accuracy": 0.
|
| 15 |
-
"eval_loss":
|
| 16 |
-
"eval_runtime":
|
| 17 |
-
"eval_samples_per_second":
|
| 18 |
-
"eval_steps_per_second":
|
| 19 |
"step": 120
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"epoch": 2.0,
|
| 23 |
-
"eval_accuracy": 0.
|
| 24 |
-
"eval_loss": 1.
|
| 25 |
-
"eval_runtime":
|
| 26 |
-
"eval_samples_per_second":
|
| 27 |
-
"eval_steps_per_second":
|
| 28 |
"step": 240
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"epoch": 3.0,
|
| 32 |
-
"eval_accuracy": 0.
|
| 33 |
-
"eval_loss": 0.
|
| 34 |
-
"eval_runtime":
|
| 35 |
-
"eval_samples_per_second":
|
| 36 |
-
"eval_steps_per_second":
|
| 37 |
"step": 360
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"epoch": 4.0,
|
| 41 |
-
"eval_accuracy": 0.
|
| 42 |
-
"eval_loss": 0.
|
| 43 |
-
"eval_runtime":
|
| 44 |
-
"eval_samples_per_second":
|
| 45 |
-
"eval_steps_per_second":
|
| 46 |
"step": 480
|
| 47 |
},
|
| 48 |
{
|
| 49 |
"epoch": 4.167714884696017,
|
| 50 |
-
"grad_norm":
|
| 51 |
"learning_rate": 6.13888888888889e-06,
|
| 52 |
-
"loss":
|
| 53 |
"step": 500
|
| 54 |
},
|
| 55 |
{
|
| 56 |
"epoch": 5.0,
|
| 57 |
-
"eval_accuracy": 0.
|
| 58 |
-
"eval_loss": 0.
|
| 59 |
-
"eval_runtime":
|
| 60 |
-
"eval_samples_per_second":
|
| 61 |
-
"eval_steps_per_second":
|
| 62 |
"step": 600
|
| 63 |
},
|
| 64 |
{
|
| 65 |
"epoch": 6.0,
|
| 66 |
-
"eval_accuracy": 0.
|
| 67 |
-
"eval_loss": 0.
|
| 68 |
-
"eval_runtime":
|
| 69 |
-
"eval_samples_per_second":
|
| 70 |
-
"eval_steps_per_second":
|
| 71 |
"step": 720
|
| 72 |
}
|
| 73 |
],
|
|
@@ -92,8 +92,8 @@
|
|
| 92 |
"train_batch_size": 32,
|
| 93 |
"trial_name": null,
|
| 94 |
"trial_params": {
|
| 95 |
-
"alpha": 0.
|
| 96 |
"num_train_epochs": 6,
|
| 97 |
-
"temperature":
|
| 98 |
}
|
| 99 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 720,
|
| 3 |
+
"best_metric": 0.9245161290322581,
|
| 4 |
+
"best_model_checkpoint": "student_modernBERT_clinc_oos/run-0/checkpoint-720",
|
| 5 |
"epoch": 6.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
"global_step": 720,
|
|
|
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 1.0,
|
| 14 |
+
"eval_accuracy": 0.5541935483870968,
|
| 15 |
+
"eval_loss": 1.7238802909851074,
|
| 16 |
+
"eval_runtime": 7.0038,
|
| 17 |
+
"eval_samples_per_second": 442.616,
|
| 18 |
+
"eval_steps_per_second": 13.85,
|
| 19 |
"step": 120
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"epoch": 2.0,
|
| 23 |
+
"eval_accuracy": 0.8570967741935483,
|
| 24 |
+
"eval_loss": 1.1208338737487793,
|
| 25 |
+
"eval_runtime": 6.8486,
|
| 26 |
+
"eval_samples_per_second": 452.646,
|
| 27 |
+
"eval_steps_per_second": 14.163,
|
| 28 |
"step": 240
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"epoch": 3.0,
|
| 32 |
+
"eval_accuracy": 0.9051612903225806,
|
| 33 |
+
"eval_loss": 0.9070075154304504,
|
| 34 |
+
"eval_runtime": 6.8675,
|
| 35 |
+
"eval_samples_per_second": 451.403,
|
| 36 |
+
"eval_steps_per_second": 14.125,
|
| 37 |
"step": 360
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"epoch": 4.0,
|
| 41 |
+
"eval_accuracy": 0.9164516129032259,
|
| 42 |
+
"eval_loss": 0.8145634531974792,
|
| 43 |
+
"eval_runtime": 6.8676,
|
| 44 |
+
"eval_samples_per_second": 451.395,
|
| 45 |
+
"eval_steps_per_second": 14.124,
|
| 46 |
"step": 480
|
| 47 |
},
|
| 48 |
{
|
| 49 |
"epoch": 4.167714884696017,
|
| 50 |
+
"grad_norm": 6.844017505645752,
|
| 51 |
"learning_rate": 6.13888888888889e-06,
|
| 52 |
+
"loss": 5.4805,
|
| 53 |
"step": 500
|
| 54 |
},
|
| 55 |
{
|
| 56 |
"epoch": 5.0,
|
| 57 |
+
"eval_accuracy": 0.9219354838709677,
|
| 58 |
+
"eval_loss": 0.7720023989677429,
|
| 59 |
+
"eval_runtime": 6.8461,
|
| 60 |
+
"eval_samples_per_second": 452.813,
|
| 61 |
+
"eval_steps_per_second": 14.169,
|
| 62 |
"step": 600
|
| 63 |
},
|
| 64 |
{
|
| 65 |
"epoch": 6.0,
|
| 66 |
+
"eval_accuracy": 0.9245161290322581,
|
| 67 |
+
"eval_loss": 0.7577660083770752,
|
| 68 |
+
"eval_runtime": 6.835,
|
| 69 |
+
"eval_samples_per_second": 453.548,
|
| 70 |
+
"eval_steps_per_second": 14.192,
|
| 71 |
"step": 720
|
| 72 |
}
|
| 73 |
],
|
|
|
|
| 92 |
"train_batch_size": 32,
|
| 93 |
"trial_name": null,
|
| 94 |
"trial_params": {
|
| 95 |
+
"alpha": 0.17839594203233045,
|
| 96 |
"num_train_epochs": 6,
|
| 97 |
+
"temperature": 13
|
| 98 |
}
|
| 99 |
}
|
run-0/checkpoint-720/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5905
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:defea8818495be5704f8f89261e105141cdc7770477d9f23e5c577ed2e3f7868
|
| 3 |
size 5905
|
runs/Oct26_17-22-45_49ca29516c75/events.out.tfevents.1761500015.49ca29516c75.609.3
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f41c254dc090469b35f8a4d9a7bcc851a64bd7ca9b81cdea3b3c2426806f6cc9
|
| 3 |
+
size 15718
|