Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step5200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step5200/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +160 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1037269336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ec12a67f9dfa7e82f7d1fa27e46947cfa5b2e70dc641605dca0f15edc26ac5b
|
| 3 |
size 1037269336
|
last-checkpoint/global_step5200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:376d8867ef6a010707f6d5ffde3b70f1fc37fbb25fc4c67986ce621672102162
|
| 3 |
+
size 781993445
|
last-checkpoint/global_step5200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94681bee6eff66c9426bb36f1f8b5f82871ec40e34245855236063b0768f353d
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step5200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43446a308e9f0fe1c37260f8f2fc16128b95eec4bef7121becf3b52c6cd5e5ae
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step5200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccdadbf4b397a4566ebe26955a4e21a4bdbf0380614debe2fa34aa6ddd4a065f
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step5200/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c48adfda58d5714ac6a30167ebed520731034b75a271625bea166e61975b263e
|
| 3 |
+
size 2610290277
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step5200
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c05264822189d459f4c1c5b27ebdb9b6b9e8dcee1a009b4f2e28ecf49dc4f5b
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f645a6078c88ee4b1185c0d7c1ae791e9bd6d926fdbc01aebef5ee84d1159b5
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c59312f152825042f7e3a29466b0959cd08f51130fdfed991ab5bf960815a6dc
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1de6530daf248f1aed878ddb32856047ec142beb86757afc3c303de79cdabc8
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:096748cf67d302b535a039b43df4da991eba6c27882d0848f447a99c87428013
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -7964,6 +7964,162 @@
|
|
| 7964 |
"eval_samples_per_second": 172.648,
|
| 7965 |
"eval_steps_per_second": 10.826,
|
| 7966 |
"step": 5100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7967 |
}
|
| 7968 |
],
|
| 7969 |
"logging_steps": 5,
|
|
@@ -7992,7 +8148,7 @@
|
|
| 7992 |
"attributes": {}
|
| 7993 |
}
|
| 7994 |
},
|
| 7995 |
-
"total_flos": 1.
|
| 7996 |
"train_batch_size": 4,
|
| 7997 |
"trial_name": null,
|
| 7998 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 1.8277243375778198,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.75592382613752,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 5200,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 7964 |
"eval_samples_per_second": 172.648,
|
| 7965 |
"eval_steps_per_second": 10.826,
|
| 7966 |
"step": 5100
|
| 7967 |
+
},
|
| 7968 |
+
{
|
| 7969 |
+
"epoch": 0.7421136793138537,
|
| 7970 |
+
"grad_norm": 2.1775574684143066,
|
| 7971 |
+
"learning_rate": 4.923992598825168e-05,
|
| 7972 |
+
"loss": 1.8894,
|
| 7973 |
+
"step": 5105
|
| 7974 |
+
},
|
| 7975 |
+
{
|
| 7976 |
+
"epoch": 0.7428405291466783,
|
| 7977 |
+
"grad_norm": 2.293163299560547,
|
| 7978 |
+
"learning_rate": 4.9160449548493304e-05,
|
| 7979 |
+
"loss": 2.0062,
|
| 7980 |
+
"step": 5110
|
| 7981 |
+
},
|
| 7982 |
+
{
|
| 7983 |
+
"epoch": 0.7435673789795029,
|
| 7984 |
+
"grad_norm": 2.4334089756011963,
|
| 7985 |
+
"learning_rate": 4.908097546994249e-05,
|
| 7986 |
+
"loss": 1.8894,
|
| 7987 |
+
"step": 5115
|
| 7988 |
+
},
|
| 7989 |
+
{
|
| 7990 |
+
"epoch": 0.7442942288123273,
|
| 7991 |
+
"grad_norm": 2.508547782897949,
|
| 7992 |
+
"learning_rate": 4.9001503952675144e-05,
|
| 7993 |
+
"loss": 2.1935,
|
| 7994 |
+
"step": 5120
|
| 7995 |
+
},
|
| 7996 |
+
{
|
| 7997 |
+
"epoch": 0.7450210786451519,
|
| 7998 |
+
"grad_norm": 2.257105588912964,
|
| 7999 |
+
"learning_rate": 4.89220351967607e-05,
|
| 8000 |
+
"loss": 1.9659,
|
| 8001 |
+
"step": 5125
|
| 8002 |
+
},
|
| 8003 |
+
{
|
| 8004 |
+
"epoch": 0.7457479284779764,
|
| 8005 |
+
"grad_norm": 2.537111520767212,
|
| 8006 |
+
"learning_rate": 4.884256940226167e-05,
|
| 8007 |
+
"loss": 1.9314,
|
| 8008 |
+
"step": 5130
|
| 8009 |
+
},
|
| 8010 |
+
{
|
| 8011 |
+
"epoch": 0.746474778310801,
|
| 8012 |
+
"grad_norm": 2.178720474243164,
|
| 8013 |
+
"learning_rate": 4.876310676923307e-05,
|
| 8014 |
+
"loss": 1.9614,
|
| 8015 |
+
"step": 5135
|
| 8016 |
+
},
|
| 8017 |
+
{
|
| 8018 |
+
"epoch": 0.7472016281436256,
|
| 8019 |
+
"grad_norm": 2.6238718032836914,
|
| 8020 |
+
"learning_rate": 4.868364749772204e-05,
|
| 8021 |
+
"loss": 1.8404,
|
| 8022 |
+
"step": 5140
|
| 8023 |
+
},
|
| 8024 |
+
{
|
| 8025 |
+
"epoch": 0.74792847797645,
|
| 8026 |
+
"grad_norm": 2.7192604541778564,
|
| 8027 |
+
"learning_rate": 4.860419178776716e-05,
|
| 8028 |
+
"loss": 1.965,
|
| 8029 |
+
"step": 5145
|
| 8030 |
+
},
|
| 8031 |
+
{
|
| 8032 |
+
"epoch": 0.7486553278092746,
|
| 8033 |
+
"grad_norm": 2.0032546520233154,
|
| 8034 |
+
"learning_rate": 4.852473983939808e-05,
|
| 8035 |
+
"loss": 1.9087,
|
| 8036 |
+
"step": 5150
|
| 8037 |
+
},
|
| 8038 |
+
{
|
| 8039 |
+
"epoch": 0.7486553278092746,
|
| 8040 |
+
"eval_loss": 1.838592529296875,
|
| 8041 |
+
"eval_runtime": 21.3886,
|
| 8042 |
+
"eval_samples_per_second": 154.335,
|
| 8043 |
+
"eval_steps_per_second": 9.678,
|
| 8044 |
+
"step": 5150
|
| 8045 |
+
},
|
| 8046 |
+
{
|
| 8047 |
+
"epoch": 0.7493821776420991,
|
| 8048 |
+
"grad_norm": 1.9931970834732056,
|
| 8049 |
+
"learning_rate": 4.844529185263501e-05,
|
| 8050 |
+
"loss": 2.1584,
|
| 8051 |
+
"step": 5155
|
| 8052 |
+
},
|
| 8053 |
+
{
|
| 8054 |
+
"epoch": 0.7501090274749237,
|
| 8055 |
+
"grad_norm": 2.349775791168213,
|
| 8056 |
+
"learning_rate": 4.836584802748814e-05,
|
| 8057 |
+
"loss": 2.0698,
|
| 8058 |
+
"step": 5160
|
| 8059 |
+
},
|
| 8060 |
+
{
|
| 8061 |
+
"epoch": 0.7508358773077483,
|
| 8062 |
+
"grad_norm": 4.791730880737305,
|
| 8063 |
+
"learning_rate": 4.828640856395723e-05,
|
| 8064 |
+
"loss": 2.1494,
|
| 8065 |
+
"step": 5165
|
| 8066 |
+
},
|
| 8067 |
+
{
|
| 8068 |
+
"epoch": 0.7515627271405727,
|
| 8069 |
+
"grad_norm": 2.025981903076172,
|
| 8070 |
+
"learning_rate": 4.8206973662030984e-05,
|
| 8071 |
+
"loss": 2.0689,
|
| 8072 |
+
"step": 5170
|
| 8073 |
+
},
|
| 8074 |
+
{
|
| 8075 |
+
"epoch": 0.7522895769733973,
|
| 8076 |
+
"grad_norm": 2.32045841217041,
|
| 8077 |
+
"learning_rate": 4.8127543521686746e-05,
|
| 8078 |
+
"loss": 2.0441,
|
| 8079 |
+
"step": 5175
|
| 8080 |
+
},
|
| 8081 |
+
{
|
| 8082 |
+
"epoch": 0.7530164268062218,
|
| 8083 |
+
"grad_norm": 2.6872143745422363,
|
| 8084 |
+
"learning_rate": 4.8048118342889746e-05,
|
| 8085 |
+
"loss": 1.863,
|
| 8086 |
+
"step": 5180
|
| 8087 |
+
},
|
| 8088 |
+
{
|
| 8089 |
+
"epoch": 0.7537432766390464,
|
| 8090 |
+
"grad_norm": 2.622974395751953,
|
| 8091 |
+
"learning_rate": 4.7968698325592805e-05,
|
| 8092 |
+
"loss": 2.0201,
|
| 8093 |
+
"step": 5185
|
| 8094 |
+
},
|
| 8095 |
+
{
|
| 8096 |
+
"epoch": 0.754470126471871,
|
| 8097 |
+
"grad_norm": 2.663489818572998,
|
| 8098 |
+
"learning_rate": 4.7889283669735706e-05,
|
| 8099 |
+
"loss": 2.0436,
|
| 8100 |
+
"step": 5190
|
| 8101 |
+
},
|
| 8102 |
+
{
|
| 8103 |
+
"epoch": 0.7551969763046954,
|
| 8104 |
+
"grad_norm": 2.5928540229797363,
|
| 8105 |
+
"learning_rate": 4.780987457524476e-05,
|
| 8106 |
+
"loss": 2.0155,
|
| 8107 |
+
"step": 5195
|
| 8108 |
+
},
|
| 8109 |
+
{
|
| 8110 |
+
"epoch": 0.75592382613752,
|
| 8111 |
+
"grad_norm": 2.380448579788208,
|
| 8112 |
+
"learning_rate": 4.7730471242032245e-05,
|
| 8113 |
+
"loss": 2.0713,
|
| 8114 |
+
"step": 5200
|
| 8115 |
+
},
|
| 8116 |
+
{
|
| 8117 |
+
"epoch": 0.75592382613752,
|
| 8118 |
+
"eval_loss": 1.8277243375778198,
|
| 8119 |
+
"eval_runtime": 19.2213,
|
| 8120 |
+
"eval_samples_per_second": 171.736,
|
| 8121 |
+
"eval_steps_per_second": 10.769,
|
| 8122 |
+
"step": 5200
|
| 8123 |
}
|
| 8124 |
],
|
| 8125 |
"logging_steps": 5,
|
|
|
|
| 8148 |
"attributes": {}
|
| 8149 |
}
|
| 8150 |
},
|
| 8151 |
+
"total_flos": 1.3550775535088435e+18,
|
| 8152 |
"train_batch_size": 4,
|
| 8153 |
"trial_name": null,
|
| 8154 |
"trial_params": null
|