Commit Β·
0c60668
1
Parent(s): a2a6b42
Training in progress, step 1200
Browse files- {checkpoint-800 β checkpoint-1200}/config.json +0 -0
- {checkpoint-800 β checkpoint-1200}/generation_config.json +0 -0
- {checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/zero_pp_rank_0_mp_rank_00_model_states.pt +1 -1
- {checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/zero_pp_rank_1_mp_rank_00_model_states.pt +1 -1
- {checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/zero_pp_rank_2_mp_rank_00_model_states.pt +1 -1
- {checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/zero_pp_rank_3_mp_rank_00_model_states.pt +1 -1
- {checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/zero_pp_rank_4_mp_rank_00_model_states.pt +1 -1
- {checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/zero_pp_rank_5_mp_rank_00_model_states.pt +1 -1
- {checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/zero_pp_rank_6_mp_rank_00_model_states.pt +1 -1
- {checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/zero_pp_rank_7_mp_rank_00_model_states.pt +1 -1
- checkpoint-1200/latest +1 -0
- {checkpoint-800 β checkpoint-1200}/model-00001-of-00002.safetensors +1 -1
- {checkpoint-800 β checkpoint-1200}/model-00002-of-00002.safetensors +1 -1
- {checkpoint-800 β checkpoint-1200}/model.safetensors.index.json +0 -0
- {checkpoint-800 β checkpoint-1200}/rng_state_0.pth +0 -0
- {checkpoint-800 β checkpoint-1200}/rng_state_1.pth +0 -0
- {checkpoint-800 β checkpoint-1200}/rng_state_2.pth +0 -0
- {checkpoint-800 β checkpoint-1200}/rng_state_3.pth +0 -0
- {checkpoint-800 β checkpoint-1200}/rng_state_4.pth +0 -0
- {checkpoint-800 β checkpoint-1200}/rng_state_5.pth +0 -0
- {checkpoint-800 β checkpoint-1200}/rng_state_6.pth +0 -0
- {checkpoint-800 β checkpoint-1200}/rng_state_7.pth +0 -0
- {checkpoint-800 β checkpoint-1200}/special_tokens_map.json +0 -0
- {checkpoint-800 β checkpoint-1200}/tokenizer.json +0 -0
- {checkpoint-800 β checkpoint-1200}/tokenizer.model +0 -0
- {checkpoint-800 β checkpoint-1200}/tokenizer_config.json +0 -0
- {checkpoint-800 β checkpoint-1200}/trainer_state.json +243 -3
- {checkpoint-800 β checkpoint-1200}/training_args.bin +0 -0
- {checkpoint-800 β checkpoint-1200}/zero_to_fp32.py +0 -0
- checkpoint-800/latest +0 -1
- runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 +2 -2
{checkpoint-800 β checkpoint-1200}/config.json
RENAMED
|
File without changes
|
{checkpoint-800 β checkpoint-1200}/generation_config.json
RENAMED
|
File without changes
|
{checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10107626487
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4fb262812656b3c83b6881b1dd67c5767d8042011235ae6468454646240b5c07
|
| 3 |
size 10107626487
|
{checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10107626487
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7df89141b87c7b3401cf3dc6d0f0e35b07ee4007b03d264482fa67df22e6e386
|
| 3 |
size 10107626487
|
{checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10107626487
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:304104b8210689a208414ce338a1a8170d07fd50ede690c06cc17f711af7d23b
|
| 3 |
size 10107626487
|
{checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10107626487
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11e7a70ad03df41c4584486b35cde5c60d0c66a22c3f5c4048017d438d1d7dd5
|
| 3 |
size 10107626487
|
{checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10107626487
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11e5c69f087a06dfa4218e7205637830d59e2eb6ab4496e20b6099fcccbc9b07
|
| 3 |
size 10107626487
|
{checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10107626487
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37dd48878dd48bb7d2a1f2ea052444d84a13e6f4cbab8f1d27904757ae116bd0
|
| 3 |
size 10107626487
|
{checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10107626487
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d614936212979e10ebba5d41469df3f0f5d966c2da7d3029cfa1ec1e9dd897a
|
| 3 |
size 10107626487
|
{checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10107626487
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3266f649233e5304749c7c1f07769559935f00b03922e28bff0442991ec9388a
|
| 3 |
size 10107626487
|
{checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/zero_pp_rank_0_mp_rank_00_model_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168086
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee838bbe2d8ed873c80d9a5740fcb7f54c91e439e0f4bdde882b4456ea232c7c
|
| 3 |
size 168086
|
{checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/zero_pp_rank_1_mp_rank_00_model_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168086
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4326e6a49ee2354fb513f4f301a33c21ffb84a9a51a4e3d0ed33bc12d02bd10b
|
| 3 |
size 168086
|
{checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/zero_pp_rank_2_mp_rank_00_model_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168086
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6c42d8a6eeae2786121f541dc5d4a06728de74fdd785c4e82bf7be14708e787
|
| 3 |
size 168086
|
{checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/zero_pp_rank_3_mp_rank_00_model_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168086
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca8f493f0fa1429963ac77a6e815a9f189220490b4f4457f7be2b942cdc01726
|
| 3 |
size 168086
|
{checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/zero_pp_rank_4_mp_rank_00_model_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168086
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c1911bcfb001e09fe12e3ee69beaea09f5f021dc647d8b1040b0d66821cc448
|
| 3 |
size 168086
|
{checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/zero_pp_rank_5_mp_rank_00_model_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168086
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c7c6cda3f5f3d82146b47b1ad4b836ecfff16ca2e586ea073dd665626f8757e
|
| 3 |
size 168086
|
{checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/zero_pp_rank_6_mp_rank_00_model_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168086
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23cb97ab9d849ada9216fd67b47e13c5ebfbeaeb019c59c09a9cb3298aa097e6
|
| 3 |
size 168086
|
{checkpoint-800/global_step800 β checkpoint-1200/global_step1200}/zero_pp_rank_7_mp_rank_00_model_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168086
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5bb17570ce5f68b67d4232815d8eb3b18765ca6071753b7ef99040ac8710b3ec
|
| 3 |
size 168086
|
checkpoint-1200/latest
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
global_step1200
|
{checkpoint-800 β checkpoint-1200}/model-00001-of-00002.safetensors
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9976576392
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:050176f4baa1bd8c33427c5122e69e830566029acf7a4cca7415b3b41047fcbd
|
| 3 |
size 9976576392
|
{checkpoint-800 β checkpoint-1200}/model-00002-of-00002.safetensors
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3500296504
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30aa4a97478659826d8dbb57cb3e6ba545204647306edfcfec56c36a5b191c85
|
| 3 |
size 3500296504
|
{checkpoint-800 β checkpoint-1200}/model.safetensors.index.json
RENAMED
|
File without changes
|
{checkpoint-800 β checkpoint-1200}/rng_state_0.pth
RENAMED
|
File without changes
|
{checkpoint-800 β checkpoint-1200}/rng_state_1.pth
RENAMED
|
File without changes
|
{checkpoint-800 β checkpoint-1200}/rng_state_2.pth
RENAMED
|
File without changes
|
{checkpoint-800 β checkpoint-1200}/rng_state_3.pth
RENAMED
|
File without changes
|
{checkpoint-800 β checkpoint-1200}/rng_state_4.pth
RENAMED
|
File without changes
|
{checkpoint-800 β checkpoint-1200}/rng_state_5.pth
RENAMED
|
File without changes
|
{checkpoint-800 β checkpoint-1200}/rng_state_6.pth
RENAMED
|
File without changes
|
{checkpoint-800 β checkpoint-1200}/rng_state_7.pth
RENAMED
|
File without changes
|
{checkpoint-800 β checkpoint-1200}/special_tokens_map.json
RENAMED
|
File without changes
|
{checkpoint-800 β checkpoint-1200}/tokenizer.json
RENAMED
|
File without changes
|
{checkpoint-800 β checkpoint-1200}/tokenizer.model
RENAMED
|
File without changes
|
{checkpoint-800 β checkpoint-1200}/tokenizer_config.json
RENAMED
|
File without changes
|
{checkpoint-800 β checkpoint-1200}/trainer_state.json
RENAMED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -486,11 +486,251 @@
|
|
| 486 |
"learning_rate": 0.0003,
|
| 487 |
"loss": 1.118,
|
| 488 |
"step": 800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
}
|
| 490 |
],
|
| 491 |
"max_steps": 3000,
|
| 492 |
"num_train_epochs": 9223372036854775807,
|
| 493 |
-
"total_flos":
|
| 494 |
"trial_name": null,
|
| 495 |
"trial_params": null
|
| 496 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.4,
|
| 5 |
+
"global_step": 1200,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 486 |
"learning_rate": 0.0003,
|
| 487 |
"loss": 1.118,
|
| 488 |
"step": 800
|
| 489 |
+
},
|
| 490 |
+
{
|
| 491 |
+
"epoch": 0.27,
|
| 492 |
+
"learning_rate": 0.0003,
|
| 493 |
+
"loss": 1.1228,
|
| 494 |
+
"step": 810
|
| 495 |
+
},
|
| 496 |
+
{
|
| 497 |
+
"epoch": 0.27,
|
| 498 |
+
"learning_rate": 0.0003,
|
| 499 |
+
"loss": 1.1339,
|
| 500 |
+
"step": 820
|
| 501 |
+
},
|
| 502 |
+
{
|
| 503 |
+
"epoch": 0.28,
|
| 504 |
+
"learning_rate": 0.0003,
|
| 505 |
+
"loss": 1.0853,
|
| 506 |
+
"step": 830
|
| 507 |
+
},
|
| 508 |
+
{
|
| 509 |
+
"epoch": 0.28,
|
| 510 |
+
"learning_rate": 0.0003,
|
| 511 |
+
"loss": 1.0676,
|
| 512 |
+
"step": 840
|
| 513 |
+
},
|
| 514 |
+
{
|
| 515 |
+
"epoch": 0.28,
|
| 516 |
+
"learning_rate": 0.0003,
|
| 517 |
+
"loss": 1.0905,
|
| 518 |
+
"step": 850
|
| 519 |
+
},
|
| 520 |
+
{
|
| 521 |
+
"epoch": 0.29,
|
| 522 |
+
"learning_rate": 0.0003,
|
| 523 |
+
"loss": 1.076,
|
| 524 |
+
"step": 860
|
| 525 |
+
},
|
| 526 |
+
{
|
| 527 |
+
"epoch": 0.29,
|
| 528 |
+
"learning_rate": 0.0003,
|
| 529 |
+
"loss": 1.0202,
|
| 530 |
+
"step": 870
|
| 531 |
+
},
|
| 532 |
+
{
|
| 533 |
+
"epoch": 0.29,
|
| 534 |
+
"learning_rate": 0.0003,
|
| 535 |
+
"loss": 1.0123,
|
| 536 |
+
"step": 880
|
| 537 |
+
},
|
| 538 |
+
{
|
| 539 |
+
"epoch": 0.3,
|
| 540 |
+
"learning_rate": 0.0003,
|
| 541 |
+
"loss": 0.9863,
|
| 542 |
+
"step": 890
|
| 543 |
+
},
|
| 544 |
+
{
|
| 545 |
+
"epoch": 0.3,
|
| 546 |
+
"learning_rate": 0.0003,
|
| 547 |
+
"loss": 0.9347,
|
| 548 |
+
"step": 900
|
| 549 |
+
},
|
| 550 |
+
{
|
| 551 |
+
"epoch": 0.3,
|
| 552 |
+
"learning_rate": 0.0003,
|
| 553 |
+
"loss": 0.9416,
|
| 554 |
+
"step": 910
|
| 555 |
+
},
|
| 556 |
+
{
|
| 557 |
+
"epoch": 0.31,
|
| 558 |
+
"learning_rate": 0.0003,
|
| 559 |
+
"loss": 0.9165,
|
| 560 |
+
"step": 920
|
| 561 |
+
},
|
| 562 |
+
{
|
| 563 |
+
"epoch": 0.31,
|
| 564 |
+
"learning_rate": 0.0003,
|
| 565 |
+
"loss": 0.8996,
|
| 566 |
+
"step": 930
|
| 567 |
+
},
|
| 568 |
+
{
|
| 569 |
+
"epoch": 0.31,
|
| 570 |
+
"learning_rate": 0.0003,
|
| 571 |
+
"loss": 0.8673,
|
| 572 |
+
"step": 940
|
| 573 |
+
},
|
| 574 |
+
{
|
| 575 |
+
"epoch": 0.32,
|
| 576 |
+
"learning_rate": 0.0003,
|
| 577 |
+
"loss": 0.8449,
|
| 578 |
+
"step": 950
|
| 579 |
+
},
|
| 580 |
+
{
|
| 581 |
+
"epoch": 0.32,
|
| 582 |
+
"learning_rate": 0.0003,
|
| 583 |
+
"loss": 0.8468,
|
| 584 |
+
"step": 960
|
| 585 |
+
},
|
| 586 |
+
{
|
| 587 |
+
"epoch": 0.32,
|
| 588 |
+
"learning_rate": 0.0003,
|
| 589 |
+
"loss": 0.817,
|
| 590 |
+
"step": 970
|
| 591 |
+
},
|
| 592 |
+
{
|
| 593 |
+
"epoch": 0.33,
|
| 594 |
+
"learning_rate": 0.0003,
|
| 595 |
+
"loss": 0.7947,
|
| 596 |
+
"step": 980
|
| 597 |
+
},
|
| 598 |
+
{
|
| 599 |
+
"epoch": 0.33,
|
| 600 |
+
"learning_rate": 0.0003,
|
| 601 |
+
"loss": 0.7706,
|
| 602 |
+
"step": 990
|
| 603 |
+
},
|
| 604 |
+
{
|
| 605 |
+
"epoch": 0.33,
|
| 606 |
+
"learning_rate": 0.0003,
|
| 607 |
+
"loss": 0.7357,
|
| 608 |
+
"step": 1000
|
| 609 |
+
},
|
| 610 |
+
{
|
| 611 |
+
"epoch": 0.34,
|
| 612 |
+
"learning_rate": 0.0003,
|
| 613 |
+
"loss": 0.6983,
|
| 614 |
+
"step": 1010
|
| 615 |
+
},
|
| 616 |
+
{
|
| 617 |
+
"epoch": 0.34,
|
| 618 |
+
"learning_rate": 0.0003,
|
| 619 |
+
"loss": 0.6989,
|
| 620 |
+
"step": 1020
|
| 621 |
+
},
|
| 622 |
+
{
|
| 623 |
+
"epoch": 0.34,
|
| 624 |
+
"learning_rate": 0.0003,
|
| 625 |
+
"loss": 0.6692,
|
| 626 |
+
"step": 1030
|
| 627 |
+
},
|
| 628 |
+
{
|
| 629 |
+
"epoch": 0.35,
|
| 630 |
+
"learning_rate": 0.0003,
|
| 631 |
+
"loss": 0.6553,
|
| 632 |
+
"step": 1040
|
| 633 |
+
},
|
| 634 |
+
{
|
| 635 |
+
"epoch": 0.35,
|
| 636 |
+
"learning_rate": 0.0003,
|
| 637 |
+
"loss": 0.6272,
|
| 638 |
+
"step": 1050
|
| 639 |
+
},
|
| 640 |
+
{
|
| 641 |
+
"epoch": 0.35,
|
| 642 |
+
"learning_rate": 0.0003,
|
| 643 |
+
"loss": 0.6092,
|
| 644 |
+
"step": 1060
|
| 645 |
+
},
|
| 646 |
+
{
|
| 647 |
+
"epoch": 0.36,
|
| 648 |
+
"learning_rate": 0.0003,
|
| 649 |
+
"loss": 0.604,
|
| 650 |
+
"step": 1070
|
| 651 |
+
},
|
| 652 |
+
{
|
| 653 |
+
"epoch": 0.36,
|
| 654 |
+
"learning_rate": 0.0003,
|
| 655 |
+
"loss": 0.6494,
|
| 656 |
+
"step": 1080
|
| 657 |
+
},
|
| 658 |
+
{
|
| 659 |
+
"epoch": 0.36,
|
| 660 |
+
"learning_rate": 0.0003,
|
| 661 |
+
"loss": 0.6155,
|
| 662 |
+
"step": 1090
|
| 663 |
+
},
|
| 664 |
+
{
|
| 665 |
+
"epoch": 0.37,
|
| 666 |
+
"learning_rate": 0.0003,
|
| 667 |
+
"loss": 0.5713,
|
| 668 |
+
"step": 1100
|
| 669 |
+
},
|
| 670 |
+
{
|
| 671 |
+
"epoch": 0.37,
|
| 672 |
+
"learning_rate": 0.0003,
|
| 673 |
+
"loss": 0.5358,
|
| 674 |
+
"step": 1110
|
| 675 |
+
},
|
| 676 |
+
{
|
| 677 |
+
"epoch": 0.37,
|
| 678 |
+
"learning_rate": 0.0003,
|
| 679 |
+
"loss": 0.5005,
|
| 680 |
+
"step": 1120
|
| 681 |
+
},
|
| 682 |
+
{
|
| 683 |
+
"epoch": 0.38,
|
| 684 |
+
"learning_rate": 0.0003,
|
| 685 |
+
"loss": 0.483,
|
| 686 |
+
"step": 1130
|
| 687 |
+
},
|
| 688 |
+
{
|
| 689 |
+
"epoch": 0.38,
|
| 690 |
+
"learning_rate": 0.0003,
|
| 691 |
+
"loss": 0.4736,
|
| 692 |
+
"step": 1140
|
| 693 |
+
},
|
| 694 |
+
{
|
| 695 |
+
"epoch": 0.38,
|
| 696 |
+
"learning_rate": 0.0003,
|
| 697 |
+
"loss": 0.4585,
|
| 698 |
+
"step": 1150
|
| 699 |
+
},
|
| 700 |
+
{
|
| 701 |
+
"epoch": 0.39,
|
| 702 |
+
"learning_rate": 0.0003,
|
| 703 |
+
"loss": 0.4442,
|
| 704 |
+
"step": 1160
|
| 705 |
+
},
|
| 706 |
+
{
|
| 707 |
+
"epoch": 0.39,
|
| 708 |
+
"learning_rate": 0.0003,
|
| 709 |
+
"loss": 0.4392,
|
| 710 |
+
"step": 1170
|
| 711 |
+
},
|
| 712 |
+
{
|
| 713 |
+
"epoch": 0.39,
|
| 714 |
+
"learning_rate": 0.0003,
|
| 715 |
+
"loss": 0.4084,
|
| 716 |
+
"step": 1180
|
| 717 |
+
},
|
| 718 |
+
{
|
| 719 |
+
"epoch": 0.4,
|
| 720 |
+
"learning_rate": 0.0003,
|
| 721 |
+
"loss": 0.4104,
|
| 722 |
+
"step": 1190
|
| 723 |
+
},
|
| 724 |
+
{
|
| 725 |
+
"epoch": 0.4,
|
| 726 |
+
"learning_rate": 0.0003,
|
| 727 |
+
"loss": 0.3855,
|
| 728 |
+
"step": 1200
|
| 729 |
}
|
| 730 |
],
|
| 731 |
"max_steps": 3000,
|
| 732 |
"num_train_epochs": 9223372036854775807,
|
| 733 |
+
"total_flos": 502511173632000.0,
|
| 734 |
"trial_name": null,
|
| 735 |
"trial_params": null
|
| 736 |
}
|
{checkpoint-800 β checkpoint-1200}/training_args.bin
RENAMED
|
File without changes
|
{checkpoint-800 β checkpoint-1200}/zero_to_fp32.py
RENAMED
|
File without changes
|
checkpoint-800/latest
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
global_step800
|
|
|
|
|
|
runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80c6f49f860f2ea4ccc6f90490738df78763e614072a048d7839d11aebd11a94
|
| 3 |
+
size 22991
|