Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step3900/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3900/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3900/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3900/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step3900/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +160 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1037269336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0240b6e222ded106342ff50b761da38ad2b38fcb3808077be942ca362e7e7671
|
| 3 |
size 1037269336
|
last-checkpoint/global_step3900/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:502a73599633b052195a6a26af0bedf02509a127fbc1570b8b556d21bdf5d271
|
| 3 |
+
size 781993445
|
last-checkpoint/global_step3900/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:908ea792c39c911c85d9d5492e49693edb28c198dc634bde11de57ff5240ffae
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step3900/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5996472e52d43ed85c4ac34d5c558f5521176e0c8c6d5d172e3c29ef4c51ab1
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step3900/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d9475970e91582e8a4622d6cb5268e5d8dfe47471a26d20b6489bde951f01d3
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step3900/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa4e30b1691db220ebbbf7a914ce92bda79ed6b8493e61bfbe649f84608bb961
|
| 3 |
+
size 2610290277
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step3900
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b12b40563b99c2baee008fe86357b2292b938122b66c4fd030619ed3a7e249c2
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c324bba1f61bf365a138212f43772e0143abdeacc0a0a8df262a19f5484c461
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66d131ba9a870afc277bffc705ecd17f99202d034a2e308e14148808e10f8866
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52ad6bb7a439bb1c3f9f1f35e584026ae43dfcd4373e8b47d872d00c633752f2
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5fd6e854e3b09e0cbb5e0b9ed1447e26fda6e84966f68c365186f77f59549fc
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5936,6 +5936,162 @@
|
|
| 5936 |
"eval_samples_per_second": 171.941,
|
| 5937 |
"eval_steps_per_second": 10.782,
|
| 5938 |
"step": 3800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5939 |
}
|
| 5940 |
],
|
| 5941 |
"logging_steps": 5,
|
|
@@ -5964,7 +6120,7 @@
|
|
| 5964 |
"attributes": {}
|
| 5965 |
}
|
| 5966 |
},
|
| 5967 |
-
"total_flos":
|
| 5968 |
"train_batch_size": 4,
|
| 5969 |
"trial_name": null,
|
| 5970 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 1.9406747817993164,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.56694286960314,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 3900,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5936 |
"eval_samples_per_second": 171.941,
|
| 5937 |
"eval_steps_per_second": 10.782,
|
| 5938 |
"step": 3800
|
| 5939 |
+
},
|
| 5940 |
+
{
|
| 5941 |
+
"epoch": 0.5531327227794738,
|
| 5942 |
+
"grad_norm": 3.0287039279937744,
|
| 5943 |
+
"learning_rate": 6.939495836072836e-05,
|
| 5944 |
+
"loss": 2.1146,
|
| 5945 |
+
"step": 3805
|
| 5946 |
+
},
|
| 5947 |
+
{
|
| 5948 |
+
"epoch": 0.5538595726122983,
|
| 5949 |
+
"grad_norm": 2.5071959495544434,
|
| 5950 |
+
"learning_rate": 6.932157795240215e-05,
|
| 5951 |
+
"loss": 2.0004,
|
| 5952 |
+
"step": 3810
|
| 5953 |
+
},
|
| 5954 |
+
{
|
| 5955 |
+
"epoch": 0.5545864224451228,
|
| 5956 |
+
"grad_norm": 2.4799954891204834,
|
| 5957 |
+
"learning_rate": 6.924814914966674e-05,
|
| 5958 |
+
"loss": 2.0815,
|
| 5959 |
+
"step": 3815
|
| 5960 |
+
},
|
| 5961 |
+
{
|
| 5962 |
+
"epoch": 0.5553132722779474,
|
| 5963 |
+
"grad_norm": 2.5911128520965576,
|
| 5964 |
+
"learning_rate": 6.917467213737908e-05,
|
| 5965 |
+
"loss": 2.1649,
|
| 5966 |
+
"step": 3820
|
| 5967 |
+
},
|
| 5968 |
+
{
|
| 5969 |
+
"epoch": 0.5560401221107719,
|
| 5970 |
+
"grad_norm": 2.4524548053741455,
|
| 5971 |
+
"learning_rate": 6.910114710051744e-05,
|
| 5972 |
+
"loss": 2.0344,
|
| 5973 |
+
"step": 3825
|
| 5974 |
+
},
|
| 5975 |
+
{
|
| 5976 |
+
"epoch": 0.5567669719435965,
|
| 5977 |
+
"grad_norm": 2.5558533668518066,
|
| 5978 |
+
"learning_rate": 6.902757422418104e-05,
|
| 5979 |
+
"loss": 2.2114,
|
| 5980 |
+
"step": 3830
|
| 5981 |
+
},
|
| 5982 |
+
{
|
| 5983 |
+
"epoch": 0.557493821776421,
|
| 5984 |
+
"grad_norm": 2.460690498352051,
|
| 5985 |
+
"learning_rate": 6.895395369358949e-05,
|
| 5986 |
+
"loss": 2.0785,
|
| 5987 |
+
"step": 3835
|
| 5988 |
+
},
|
| 5989 |
+
{
|
| 5990 |
+
"epoch": 0.5582206716092455,
|
| 5991 |
+
"grad_norm": 2.2994587421417236,
|
| 5992 |
+
"learning_rate": 6.888028569408238e-05,
|
| 5993 |
+
"loss": 2.0985,
|
| 5994 |
+
"step": 3840
|
| 5995 |
+
},
|
| 5996 |
+
{
|
| 5997 |
+
"epoch": 0.55894752144207,
|
| 5998 |
+
"grad_norm": 2.4622437953948975,
|
| 5999 |
+
"learning_rate": 6.880657041111886e-05,
|
| 6000 |
+
"loss": 2.1873,
|
| 6001 |
+
"step": 3845
|
| 6002 |
+
},
|
| 6003 |
+
{
|
| 6004 |
+
"epoch": 0.5596743712748946,
|
| 6005 |
+
"grad_norm": 2.566040515899658,
|
| 6006 |
+
"learning_rate": 6.873280803027698e-05,
|
| 6007 |
+
"loss": 2.0761,
|
| 6008 |
+
"step": 3850
|
| 6009 |
+
},
|
| 6010 |
+
{
|
| 6011 |
+
"epoch": 0.5596743712748946,
|
| 6012 |
+
"eval_loss": 1.9471417665481567,
|
| 6013 |
+
"eval_runtime": 22.8564,
|
| 6014 |
+
"eval_samples_per_second": 144.423,
|
| 6015 |
+
"eval_steps_per_second": 9.057,
|
| 6016 |
+
"step": 3850
|
| 6017 |
+
},
|
| 6018 |
+
{
|
| 6019 |
+
"epoch": 0.5604012211077192,
|
| 6020 |
+
"grad_norm": 2.9277586936950684,
|
| 6021 |
+
"learning_rate": 6.865899873725354e-05,
|
| 6022 |
+
"loss": 2.1336,
|
| 6023 |
+
"step": 3855
|
| 6024 |
+
},
|
| 6025 |
+
{
|
| 6026 |
+
"epoch": 0.5611280709405437,
|
| 6027 |
+
"grad_norm": 2.224175214767456,
|
| 6028 |
+
"learning_rate": 6.858514271786328e-05,
|
| 6029 |
+
"loss": 1.9701,
|
| 6030 |
+
"step": 3860
|
| 6031 |
+
},
|
| 6032 |
+
{
|
| 6033 |
+
"epoch": 0.5618549207733682,
|
| 6034 |
+
"grad_norm": 2.4121415615081787,
|
| 6035 |
+
"learning_rate": 6.851124015803867e-05,
|
| 6036 |
+
"loss": 2.0505,
|
| 6037 |
+
"step": 3865
|
| 6038 |
+
},
|
| 6039 |
+
{
|
| 6040 |
+
"epoch": 0.5625817706061927,
|
| 6041 |
+
"grad_norm": 2.3348071575164795,
|
| 6042 |
+
"learning_rate": 6.843729124382931e-05,
|
| 6043 |
+
"loss": 2.0927,
|
| 6044 |
+
"step": 3870
|
| 6045 |
+
},
|
| 6046 |
+
{
|
| 6047 |
+
"epoch": 0.5633086204390173,
|
| 6048 |
+
"grad_norm": 2.6254260540008545,
|
| 6049 |
+
"learning_rate": 6.836329616140152e-05,
|
| 6050 |
+
"loss": 2.0223,
|
| 6051 |
+
"step": 3875
|
| 6052 |
+
},
|
| 6053 |
+
{
|
| 6054 |
+
"epoch": 0.5640354702718419,
|
| 6055 |
+
"grad_norm": 2.551982879638672,
|
| 6056 |
+
"learning_rate": 6.82892550970378e-05,
|
| 6057 |
+
"loss": 2.1343,
|
| 6058 |
+
"step": 3880
|
| 6059 |
+
},
|
| 6060 |
+
{
|
| 6061 |
+
"epoch": 0.5647623201046664,
|
| 6062 |
+
"grad_norm": 2.6784307956695557,
|
| 6063 |
+
"learning_rate": 6.821516823713646e-05,
|
| 6064 |
+
"loss": 2.1829,
|
| 6065 |
+
"step": 3885
|
| 6066 |
+
},
|
| 6067 |
+
{
|
| 6068 |
+
"epoch": 0.5654891699374909,
|
| 6069 |
+
"grad_norm": 2.5187387466430664,
|
| 6070 |
+
"learning_rate": 6.81410357682111e-05,
|
| 6071 |
+
"loss": 2.1628,
|
| 6072 |
+
"step": 3890
|
| 6073 |
+
},
|
| 6074 |
+
{
|
| 6075 |
+
"epoch": 0.5662160197703154,
|
| 6076 |
+
"grad_norm": 2.4125635623931885,
|
| 6077 |
+
"learning_rate": 6.806685787689007e-05,
|
| 6078 |
+
"loss": 2.1097,
|
| 6079 |
+
"step": 3895
|
| 6080 |
+
},
|
| 6081 |
+
{
|
| 6082 |
+
"epoch": 0.56694286960314,
|
| 6083 |
+
"grad_norm": 2.3090174198150635,
|
| 6084 |
+
"learning_rate": 6.799263474991618e-05,
|
| 6085 |
+
"loss": 1.9982,
|
| 6086 |
+
"step": 3900
|
| 6087 |
+
},
|
| 6088 |
+
{
|
| 6089 |
+
"epoch": 0.56694286960314,
|
| 6090 |
+
"eval_loss": 1.9406747817993164,
|
| 6091 |
+
"eval_runtime": 19.0434,
|
| 6092 |
+
"eval_samples_per_second": 173.341,
|
| 6093 |
+
"eval_steps_per_second": 10.87,
|
| 6094 |
+
"step": 3900
|
| 6095 |
}
|
| 6096 |
],
|
| 6097 |
"logging_steps": 5,
|
|
|
|
| 6120 |
"attributes": {}
|
| 6121 |
}
|
| 6122 |
},
|
| 6123 |
+
"total_flos": 1.0171604246022062e+18,
|
| 6124 |
"train_batch_size": 4,
|
| 6125 |
"trial_name": null,
|
| 6126 |
"trial_params": null
|