Training in progress, step 1880, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 100697728
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2afddf3ec7ee4eb13b2ada4bf4720336a66bcb17f46f329977812b67bd67d4e2
|
| 3 |
size 100697728
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 201541754
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c254584d7510ca4eeaff0cca975c08ed55d713bbf9e668172f8cbf97495e77e2
|
| 3 |
size 201541754
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac4550c76533f857f7cf55740ed3623cc9b4550e64e43f48fe63acb894cf87d3
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fcbe60adc58daefe5828383acc365415f8f2c12797679d185c0ae4e7a1e01d05
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1495,14 +1495,30 @@
|
|
| 1495 |
"loss": 0.3846,
|
| 1496 |
"num_input_tokens_seen": 1260977,
|
| 1497 |
"step": 1860
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1498 |
}
|
| 1499 |
],
|
| 1500 |
"logging_steps": 10,
|
| 1501 |
"max_steps": 2795,
|
| 1502 |
-
"num_input_tokens_seen":
|
| 1503 |
"num_train_epochs": 1,
|
| 1504 |
"save_steps": 20,
|
| 1505 |
-
"total_flos": 2.
|
| 1506 |
"train_batch_size": 1,
|
| 1507 |
"trial_name": null,
|
| 1508 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.6726296958855098,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 1880,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1495 |
"loss": 0.3846,
|
| 1496 |
"num_input_tokens_seen": 1260977,
|
| 1497 |
"step": 1860
|
| 1498 |
+
},
|
| 1499 |
+
{
|
| 1500 |
+
"epoch": 0.669051878354204,
|
| 1501 |
+
"grad_norm": 0.3520377576351166,
|
| 1502 |
+
"learning_rate": 6.618962432915921e-05,
|
| 1503 |
+
"loss": 0.406,
|
| 1504 |
+
"num_input_tokens_seen": 1268015,
|
| 1505 |
+
"step": 1870
|
| 1506 |
+
},
|
| 1507 |
+
{
|
| 1508 |
+
"epoch": 0.6726296958855098,
|
| 1509 |
+
"grad_norm": 0.48654794692993164,
|
| 1510 |
+
"learning_rate": 6.547406082289803e-05,
|
| 1511 |
+
"loss": 0.3948,
|
| 1512 |
+
"num_input_tokens_seen": 1273256,
|
| 1513 |
+
"step": 1880
|
| 1514 |
}
|
| 1515 |
],
|
| 1516 |
"logging_steps": 10,
|
| 1517 |
"max_steps": 2795,
|
| 1518 |
+
"num_input_tokens_seen": 1273256,
|
| 1519 |
"num_train_epochs": 1,
|
| 1520 |
"save_steps": 20,
|
| 1521 |
+
"total_flos": 2.863103107394765e+16,
|
| 1522 |
"train_batch_size": 1,
|
| 1523 |
"trial_name": null,
|
| 1524 |
"trial_params": null
|