Training in progress, step 270, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73911112
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a96bb255d96fa6a5c36b2a68ed9f157cb7ce4d904166afe1554a5aeab24df468
|
| 3 |
size 73911112
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 148047722
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0bb86e83d07a08af27b3be9f2175e8a0f16b03763e0c8cdd676dff4d98e6b334
|
| 3 |
size 148047722
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9795343af1ea69aed71e480b971fd4b8490c009519e844b26a34d8fe7d5fb38
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:978b2fc5e17da46b55d130e174b6d196c16b7559a4a5513939af034ef3402d76
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 0.
|
| 4 |
-
"best_model_checkpoint": "/teamspace/studios/this_studio/workspace_3/outputs/Qwen25_Coder_MCQ_5Epochs_0402_2229/checkpoint-
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 30,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -98,6 +98,51 @@
|
|
| 98 |
"eval_samples_per_second": 1.813,
|
| 99 |
"eval_steps_per_second": 0.23,
|
| 100 |
"step": 180
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
}
|
| 102 |
],
|
| 103 |
"logging_steps": 30,
|
|
@@ -117,7 +162,7 @@
|
|
| 117 |
"attributes": {}
|
| 118 |
}
|
| 119 |
},
|
| 120 |
-
"total_flos":
|
| 121 |
"train_batch_size": 32,
|
| 122 |
"trial_name": null,
|
| 123 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 270,
|
| 3 |
+
"best_metric": 0.5840281248092651,
|
| 4 |
+
"best_model_checkpoint": "/teamspace/studios/this_studio/workspace_3/outputs/Qwen25_Coder_MCQ_5Epochs_0402_2229/checkpoint-270",
|
| 5 |
+
"epoch": 2.5233644859813085,
|
| 6 |
"eval_steps": 30,
|
| 7 |
+
"global_step": 270,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 98 |
"eval_samples_per_second": 1.813,
|
| 99 |
"eval_steps_per_second": 0.23,
|
| 100 |
"step": 180
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 1.9626168224299065,
|
| 104 |
+
"grad_norm": 0.17269107699394226,
|
| 105 |
+
"learning_rate": 3.531442281389441e-05,
|
| 106 |
+
"loss": 0.594,
|
| 107 |
+
"step": 210
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"epoch": 1.9626168224299065,
|
| 111 |
+
"eval_loss": 0.5875635147094727,
|
| 112 |
+
"eval_runtime": 78.9327,
|
| 113 |
+
"eval_samples_per_second": 1.799,
|
| 114 |
+
"eval_steps_per_second": 0.228,
|
| 115 |
+
"step": 210
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 2.2429906542056073,
|
| 119 |
+
"grad_norm": 0.1980254054069519,
|
| 120 |
+
"learning_rate": 2.0056981513219942e-05,
|
| 121 |
+
"loss": 0.5762,
|
| 122 |
+
"step": 240
|
| 123 |
+
},
|
| 124 |
+
{
|
| 125 |
+
"epoch": 2.2429906542056073,
|
| 126 |
+
"eval_loss": 0.5854137539863586,
|
| 127 |
+
"eval_runtime": 78.9377,
|
| 128 |
+
"eval_samples_per_second": 1.799,
|
| 129 |
+
"eval_steps_per_second": 0.228,
|
| 130 |
+
"step": 240
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"epoch": 2.5233644859813085,
|
| 134 |
+
"grad_norm": 0.19368061423301697,
|
| 135 |
+
"learning_rate": 8.307466849412366e-06,
|
| 136 |
+
"loss": 0.5782,
|
| 137 |
+
"step": 270
|
| 138 |
+
},
|
| 139 |
+
{
|
| 140 |
+
"epoch": 2.5233644859813085,
|
| 141 |
+
"eval_loss": 0.5840281248092651,
|
| 142 |
+
"eval_runtime": 77.425,
|
| 143 |
+
"eval_samples_per_second": 1.834,
|
| 144 |
+
"eval_steps_per_second": 0.232,
|
| 145 |
+
"step": 270
|
| 146 |
}
|
| 147 |
],
|
| 148 |
"logging_steps": 30,
|
|
|
|
| 162 |
"attributes": {}
|
| 163 |
}
|
| 164 |
},
|
| 165 |
+
"total_flos": 1.4052187012123853e+17,
|
| 166 |
"train_batch_size": 32,
|
| 167 |
"trial_name": null,
|
| 168 |
"trial_params": null
|