Commit Β·
dd5b800
1
Parent(s): 42d082a
Training in progress, step 1600
Browse files- checkpoint-1200/latest +0 -1
- {checkpoint-1200 β checkpoint-1600}/config.json +0 -0
- {checkpoint-1200 β checkpoint-1600}/generation_config.json +0 -0
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_0_mp_rank_00_model_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_1_mp_rank_00_model_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_2_mp_rank_00_model_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_3_mp_rank_00_model_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_4_mp_rank_00_model_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_5_mp_rank_00_model_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_6_mp_rank_00_model_states.pt +1 -1
- {checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_7_mp_rank_00_model_states.pt +1 -1
- checkpoint-1600/latest +1 -0
- {checkpoint-1200 β checkpoint-1600}/model-00001-of-00002.safetensors +1 -1
- {checkpoint-1200 β checkpoint-1600}/model-00002-of-00002.safetensors +1 -1
- {checkpoint-1200 β checkpoint-1600}/model.safetensors.index.json +0 -0
- {checkpoint-1200 β checkpoint-1600}/rng_state_0.pth +0 -0
- {checkpoint-1200 β checkpoint-1600}/rng_state_1.pth +0 -0
- {checkpoint-1200 β checkpoint-1600}/rng_state_2.pth +0 -0
- {checkpoint-1200 β checkpoint-1600}/rng_state_3.pth +0 -0
- {checkpoint-1200 β checkpoint-1600}/rng_state_4.pth +0 -0
- {checkpoint-1200 β checkpoint-1600}/rng_state_5.pth +0 -0
- {checkpoint-1200 β checkpoint-1600}/rng_state_6.pth +0 -0
- {checkpoint-1200 β checkpoint-1600}/rng_state_7.pth +0 -0
- {checkpoint-1200 β checkpoint-1600}/special_tokens_map.json +0 -0
- {checkpoint-1200 β checkpoint-1600}/tokenizer.json +0 -0
- {checkpoint-1200 β checkpoint-1600}/tokenizer.model +0 -0
- {checkpoint-1200 β checkpoint-1600}/tokenizer_config.json +0 -0
- {checkpoint-1200 β checkpoint-1600}/trainer_state.json +243 -3
- {checkpoint-1200 β checkpoint-1600}/training_args.bin +0 -0
- {checkpoint-1200 β checkpoint-1600}/zero_to_fp32.py +0 -0
- runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 +2 -2
checkpoint-1200/latest
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
global_step1200
|
|
|
|
|
|
{checkpoint-1200 β checkpoint-1600}/config.json
RENAMED
|
File without changes
|
{checkpoint-1200 β checkpoint-1600}/generation_config.json
RENAMED
|
File without changes
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10107626487
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a340884351df9f228d3ba5317543b112e21edc4d1572228a4abd7118e419a6b
|
| 3 |
size 10107626487
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10107626487
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27f75017ef2953071a44cde3f1155976fb76dde43b6395726f2683ee1ec2c250
|
| 3 |
size 10107626487
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10107626487
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fdfad233cd4ea82dc4f02a8e5b074984bb6a29a18f6262838ec7b7f1630e0ac8
|
| 3 |
size 10107626487
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10107626487
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5919bc8e571e808ff3b2bcb9ad7014597b2ee31e29993555c14491a33d11f095
|
| 3 |
size 10107626487
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10107626487
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c354eb1ceeeb29fb3a5f70328aa4850898429dd56b0f49cfb1b15ac4000f975
|
| 3 |
size 10107626487
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10107626487
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fead0347839e08434f2eed3165dad3e70691836ea6e9cf64ef56551331997bca
|
| 3 |
size 10107626487
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10107626487
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8ffe9fbb7a126ec39112724337caecc4eae7d8e492d63490d8567816ba07929
|
| 3 |
size 10107626487
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10107626487
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96e21de250ee92a08fe926befd4f33f363a7c9bd7ca8bbb6aef12bce9df04133
|
| 3 |
size 10107626487
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_0_mp_rank_00_model_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168086
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91599707db75d7b03814882cbcbb8b854875d9e88102c0831463b1e42ed49ab1
|
| 3 |
size 168086
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_1_mp_rank_00_model_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168086
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6831020de473339b10b00488798cd0193af1763454bfa118f482faf07d70a44d
|
| 3 |
size 168086
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_2_mp_rank_00_model_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168086
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ddb9abf6f8ac61756727e9fcc585fd5a1a70c1a20ca3eb82c79388ae9fe06779
|
| 3 |
size 168086
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_3_mp_rank_00_model_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168086
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a24b271888c79b57824d2821f84a48dcb6381041a8b0c6c06463ebd491bb032d
|
| 3 |
size 168086
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_4_mp_rank_00_model_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168086
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93af96588d797250ee8a47897e3e963aa391dabb815f6714e949496001e60f03
|
| 3 |
size 168086
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_5_mp_rank_00_model_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168086
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:288e31b1bddda038ec9427bc5bb7b953b1d58f1e7c11dbbf18e74dc37512d377
|
| 3 |
size 168086
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_6_mp_rank_00_model_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168086
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bab9e0ea1b5660b9e2221c0e11c6a20f979a0b1641b3514c45386900d1001699
|
| 3 |
size 168086
|
{checkpoint-1200/global_step1200 β checkpoint-1600/global_step1600}/zero_pp_rank_7_mp_rank_00_model_states.pt
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168086
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:122e850bb9d0ec838955d8d4d6eabbb7c4437e56754813e6645e6ab2a32d64c8
|
| 3 |
size 168086
|
checkpoint-1600/latest
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
global_step1600
|
{checkpoint-1200 β checkpoint-1600}/model-00001-of-00002.safetensors
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9976576392
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bb8059dad24ddb21134712cb2d7c09e74afdb9c0ad8407d026104190bec0ae3
|
| 3 |
size 9976576392
|
{checkpoint-1200 β checkpoint-1600}/model-00002-of-00002.safetensors
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3500296504
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf47ea3281e028d797e91e4208dafdd29c9912946218b26b9fc740a42513cda2
|
| 3 |
size 3500296504
|
{checkpoint-1200 β checkpoint-1600}/model.safetensors.index.json
RENAMED
|
File without changes
|
{checkpoint-1200 β checkpoint-1600}/rng_state_0.pth
RENAMED
|
File without changes
|
{checkpoint-1200 β checkpoint-1600}/rng_state_1.pth
RENAMED
|
File without changes
|
{checkpoint-1200 β checkpoint-1600}/rng_state_2.pth
RENAMED
|
File without changes
|
{checkpoint-1200 β checkpoint-1600}/rng_state_3.pth
RENAMED
|
File without changes
|
{checkpoint-1200 β checkpoint-1600}/rng_state_4.pth
RENAMED
|
File without changes
|
{checkpoint-1200 β checkpoint-1600}/rng_state_5.pth
RENAMED
|
File without changes
|
{checkpoint-1200 β checkpoint-1600}/rng_state_6.pth
RENAMED
|
File without changes
|
{checkpoint-1200 β checkpoint-1600}/rng_state_7.pth
RENAMED
|
File without changes
|
{checkpoint-1200 β checkpoint-1600}/special_tokens_map.json
RENAMED
|
File without changes
|
{checkpoint-1200 β checkpoint-1600}/tokenizer.json
RENAMED
|
File without changes
|
{checkpoint-1200 β checkpoint-1600}/tokenizer.model
RENAMED
|
File without changes
|
{checkpoint-1200 β checkpoint-1600}/tokenizer_config.json
RENAMED
|
File without changes
|
{checkpoint-1200 β checkpoint-1600}/trainer_state.json
RENAMED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -726,11 +726,251 @@
|
|
| 726 |
"learning_rate": 0.0003,
|
| 727 |
"loss": 0.3855,
|
| 728 |
"step": 1200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 729 |
}
|
| 730 |
],
|
| 731 |
"max_steps": 3000,
|
| 732 |
"num_train_epochs": 9223372036854775807,
|
| 733 |
-
"total_flos":
|
| 734 |
"trial_name": null,
|
| 735 |
"trial_params": null
|
| 736 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.5333333333333333,
|
| 5 |
+
"global_step": 1600,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 726 |
"learning_rate": 0.0003,
|
| 727 |
"loss": 0.3855,
|
| 728 |
"step": 1200
|
| 729 |
+
},
|
| 730 |
+
{
|
| 731 |
+
"epoch": 0.4,
|
| 732 |
+
"learning_rate": 0.0003,
|
| 733 |
+
"loss": 0.3662,
|
| 734 |
+
"step": 1210
|
| 735 |
+
},
|
| 736 |
+
{
|
| 737 |
+
"epoch": 0.41,
|
| 738 |
+
"learning_rate": 0.0003,
|
| 739 |
+
"loss": 0.3727,
|
| 740 |
+
"step": 1220
|
| 741 |
+
},
|
| 742 |
+
{
|
| 743 |
+
"epoch": 0.41,
|
| 744 |
+
"learning_rate": 0.0003,
|
| 745 |
+
"loss": 0.3563,
|
| 746 |
+
"step": 1230
|
| 747 |
+
},
|
| 748 |
+
{
|
| 749 |
+
"epoch": 0.41,
|
| 750 |
+
"learning_rate": 0.0003,
|
| 751 |
+
"loss": 0.3297,
|
| 752 |
+
"step": 1240
|
| 753 |
+
},
|
| 754 |
+
{
|
| 755 |
+
"epoch": 0.42,
|
| 756 |
+
"learning_rate": 0.0003,
|
| 757 |
+
"loss": 0.314,
|
| 758 |
+
"step": 1250
|
| 759 |
+
},
|
| 760 |
+
{
|
| 761 |
+
"epoch": 0.42,
|
| 762 |
+
"learning_rate": 0.0003,
|
| 763 |
+
"loss": 0.2996,
|
| 764 |
+
"step": 1260
|
| 765 |
+
},
|
| 766 |
+
{
|
| 767 |
+
"epoch": 0.42,
|
| 768 |
+
"learning_rate": 0.0003,
|
| 769 |
+
"loss": 0.3,
|
| 770 |
+
"step": 1270
|
| 771 |
+
},
|
| 772 |
+
{
|
| 773 |
+
"epoch": 0.43,
|
| 774 |
+
"learning_rate": 0.0003,
|
| 775 |
+
"loss": 0.2773,
|
| 776 |
+
"step": 1280
|
| 777 |
+
},
|
| 778 |
+
{
|
| 779 |
+
"epoch": 0.43,
|
| 780 |
+
"learning_rate": 0.0003,
|
| 781 |
+
"loss": 0.2665,
|
| 782 |
+
"step": 1290
|
| 783 |
+
},
|
| 784 |
+
{
|
| 785 |
+
"epoch": 0.43,
|
| 786 |
+
"learning_rate": 0.0003,
|
| 787 |
+
"loss": 0.2646,
|
| 788 |
+
"step": 1300
|
| 789 |
+
},
|
| 790 |
+
{
|
| 791 |
+
"epoch": 0.44,
|
| 792 |
+
"learning_rate": 0.0003,
|
| 793 |
+
"loss": 0.2406,
|
| 794 |
+
"step": 1310
|
| 795 |
+
},
|
| 796 |
+
{
|
| 797 |
+
"epoch": 0.44,
|
| 798 |
+
"learning_rate": 0.0003,
|
| 799 |
+
"loss": 0.2206,
|
| 800 |
+
"step": 1320
|
| 801 |
+
},
|
| 802 |
+
{
|
| 803 |
+
"epoch": 0.44,
|
| 804 |
+
"learning_rate": 0.0003,
|
| 805 |
+
"loss": 0.2325,
|
| 806 |
+
"step": 1330
|
| 807 |
+
},
|
| 808 |
+
{
|
| 809 |
+
"epoch": 0.45,
|
| 810 |
+
"learning_rate": 0.0003,
|
| 811 |
+
"loss": 0.2152,
|
| 812 |
+
"step": 1340
|
| 813 |
+
},
|
| 814 |
+
{
|
| 815 |
+
"epoch": 0.45,
|
| 816 |
+
"learning_rate": 0.0003,
|
| 817 |
+
"loss": 0.2011,
|
| 818 |
+
"step": 1350
|
| 819 |
+
},
|
| 820 |
+
{
|
| 821 |
+
"epoch": 0.45,
|
| 822 |
+
"learning_rate": 0.0003,
|
| 823 |
+
"loss": 0.1953,
|
| 824 |
+
"step": 1360
|
| 825 |
+
},
|
| 826 |
+
{
|
| 827 |
+
"epoch": 0.46,
|
| 828 |
+
"learning_rate": 0.0003,
|
| 829 |
+
"loss": 0.1862,
|
| 830 |
+
"step": 1370
|
| 831 |
+
},
|
| 832 |
+
{
|
| 833 |
+
"epoch": 0.46,
|
| 834 |
+
"learning_rate": 0.0003,
|
| 835 |
+
"loss": 0.1814,
|
| 836 |
+
"step": 1380
|
| 837 |
+
},
|
| 838 |
+
{
|
| 839 |
+
"epoch": 0.46,
|
| 840 |
+
"learning_rate": 0.0003,
|
| 841 |
+
"loss": 0.1677,
|
| 842 |
+
"step": 1390
|
| 843 |
+
},
|
| 844 |
+
{
|
| 845 |
+
"epoch": 0.47,
|
| 846 |
+
"learning_rate": 0.0003,
|
| 847 |
+
"loss": 0.1657,
|
| 848 |
+
"step": 1400
|
| 849 |
+
},
|
| 850 |
+
{
|
| 851 |
+
"epoch": 0.47,
|
| 852 |
+
"learning_rate": 0.0003,
|
| 853 |
+
"loss": 0.155,
|
| 854 |
+
"step": 1410
|
| 855 |
+
},
|
| 856 |
+
{
|
| 857 |
+
"epoch": 0.47,
|
| 858 |
+
"learning_rate": 0.0003,
|
| 859 |
+
"loss": 0.1517,
|
| 860 |
+
"step": 1420
|
| 861 |
+
},
|
| 862 |
+
{
|
| 863 |
+
"epoch": 0.48,
|
| 864 |
+
"learning_rate": 0.0003,
|
| 865 |
+
"loss": 0.1481,
|
| 866 |
+
"step": 1430
|
| 867 |
+
},
|
| 868 |
+
{
|
| 869 |
+
"epoch": 0.48,
|
| 870 |
+
"learning_rate": 0.0003,
|
| 871 |
+
"loss": 0.1396,
|
| 872 |
+
"step": 1440
|
| 873 |
+
},
|
| 874 |
+
{
|
| 875 |
+
"epoch": 0.48,
|
| 876 |
+
"learning_rate": 0.0003,
|
| 877 |
+
"loss": 0.1301,
|
| 878 |
+
"step": 1450
|
| 879 |
+
},
|
| 880 |
+
{
|
| 881 |
+
"epoch": 0.49,
|
| 882 |
+
"learning_rate": 0.0003,
|
| 883 |
+
"loss": 0.1274,
|
| 884 |
+
"step": 1460
|
| 885 |
+
},
|
| 886 |
+
{
|
| 887 |
+
"epoch": 0.49,
|
| 888 |
+
"learning_rate": 0.0003,
|
| 889 |
+
"loss": 0.1244,
|
| 890 |
+
"step": 1470
|
| 891 |
+
},
|
| 892 |
+
{
|
| 893 |
+
"epoch": 0.49,
|
| 894 |
+
"learning_rate": 0.0003,
|
| 895 |
+
"loss": 0.1172,
|
| 896 |
+
"step": 1480
|
| 897 |
+
},
|
| 898 |
+
{
|
| 899 |
+
"epoch": 0.5,
|
| 900 |
+
"learning_rate": 0.0003,
|
| 901 |
+
"loss": 0.1177,
|
| 902 |
+
"step": 1490
|
| 903 |
+
},
|
| 904 |
+
{
|
| 905 |
+
"epoch": 0.5,
|
| 906 |
+
"learning_rate": 0.0003,
|
| 907 |
+
"loss": 0.1118,
|
| 908 |
+
"step": 1500
|
| 909 |
+
},
|
| 910 |
+
{
|
| 911 |
+
"epoch": 0.5,
|
| 912 |
+
"learning_rate": 0.0003,
|
| 913 |
+
"loss": 0.113,
|
| 914 |
+
"step": 1510
|
| 915 |
+
},
|
| 916 |
+
{
|
| 917 |
+
"epoch": 0.51,
|
| 918 |
+
"learning_rate": 0.0003,
|
| 919 |
+
"loss": 0.3687,
|
| 920 |
+
"step": 1520
|
| 921 |
+
},
|
| 922 |
+
{
|
| 923 |
+
"epoch": 0.51,
|
| 924 |
+
"learning_rate": 0.0003,
|
| 925 |
+
"loss": 0.1801,
|
| 926 |
+
"step": 1530
|
| 927 |
+
},
|
| 928 |
+
{
|
| 929 |
+
"epoch": 0.51,
|
| 930 |
+
"learning_rate": 0.0003,
|
| 931 |
+
"loss": 0.1534,
|
| 932 |
+
"step": 1540
|
| 933 |
+
},
|
| 934 |
+
{
|
| 935 |
+
"epoch": 0.52,
|
| 936 |
+
"learning_rate": 0.0003,
|
| 937 |
+
"loss": 0.1248,
|
| 938 |
+
"step": 1550
|
| 939 |
+
},
|
| 940 |
+
{
|
| 941 |
+
"epoch": 0.52,
|
| 942 |
+
"learning_rate": 0.0003,
|
| 943 |
+
"loss": 0.1091,
|
| 944 |
+
"step": 1560
|
| 945 |
+
},
|
| 946 |
+
{
|
| 947 |
+
"epoch": 0.52,
|
| 948 |
+
"learning_rate": 0.0003,
|
| 949 |
+
"loss": 0.0965,
|
| 950 |
+
"step": 1570
|
| 951 |
+
},
|
| 952 |
+
{
|
| 953 |
+
"epoch": 0.53,
|
| 954 |
+
"learning_rate": 0.0003,
|
| 955 |
+
"loss": 0.098,
|
| 956 |
+
"step": 1580
|
| 957 |
+
},
|
| 958 |
+
{
|
| 959 |
+
"epoch": 0.53,
|
| 960 |
+
"learning_rate": 0.0003,
|
| 961 |
+
"loss": 0.097,
|
| 962 |
+
"step": 1590
|
| 963 |
+
},
|
| 964 |
+
{
|
| 965 |
+
"epoch": 0.53,
|
| 966 |
+
"learning_rate": 0.0003,
|
| 967 |
+
"loss": 0.0885,
|
| 968 |
+
"step": 1600
|
| 969 |
}
|
| 970 |
],
|
| 971 |
"max_steps": 3000,
|
| 972 |
"num_train_epochs": 9223372036854775807,
|
| 973 |
+
"total_flos": 670014898176000.0,
|
| 974 |
"trial_name": null,
|
| 975 |
"trial_params": null
|
| 976 |
}
|
{checkpoint-1200 β checkpoint-1600}/training_args.bin
RENAMED
|
File without changes
|
{checkpoint-1200 β checkpoint-1600}/zero_to_fp32.py
RENAMED
|
File without changes
|
runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:677d38e5d0f2898fe52eabe578b18234ab0985e274317f79127ce656648b82b9
|
| 3 |
+
size 29271
|