Training in progress, step 420000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2f3d6b44eb31a5904720c6ef9f3e390e0825c4e9bc44d807b8966079fa39c18
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43bdf8dfd2503753ae90f61b7bb9fe12717af35ea212a47dbf67cd5903f5bff5
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc2e9e51fe4eee7b642ed745674287b01af0553fd6bc1b8f03d1bfb83d45dd88
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7303688061045aa652b3652b66ace516eee8a80cc0cf60b455e243337822a42
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f87a487e7aba37c8dacc6a7f2504b596187c112ee85cf9f5586b459f5ed62ff7
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8eef20e1504bbd40c9a4046a6d63e017b73406d2bc77b0cb51859e2e7910822
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ecb3d640c7c2649b2bba280f1d77ce8c7f1955289fab0ee4959a38aa2646819
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 6.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -3040,11 +3040,85 @@
|
|
| 3040 |
"eval_samples_per_second": 926.22,
|
| 3041 |
"eval_steps_per_second": 14.82,
|
| 3042 |
"step": 410000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3043 |
}
|
| 3044 |
],
|
| 3045 |
"max_steps": 1000000,
|
| 3046 |
"num_train_epochs": 16,
|
| 3047 |
-
"total_flos": 2.
|
| 3048 |
"trial_name": null,
|
| 3049 |
"trial_params": null
|
| 3050 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 6.41348664620459,
|
| 5 |
+
"global_step": 420000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 3040 |
"eval_samples_per_second": 926.22,
|
| 3041 |
"eval_steps_per_second": 14.82,
|
| 3042 |
"step": 410000
|
| 3043 |
+
},
|
| 3044 |
+
{
|
| 3045 |
+
"epoch": 6.28,
|
| 3046 |
+
"learning_rate": 0.00010576871868792746,
|
| 3047 |
+
"loss": 0.275,
|
| 3048 |
+
"step": 411000
|
| 3049 |
+
},
|
| 3050 |
+
{
|
| 3051 |
+
"epoch": 6.29,
|
| 3052 |
+
"learning_rate": 0.0001055533481490004,
|
| 3053 |
+
"loss": 0.2746,
|
| 3054 |
+
"step": 412000
|
| 3055 |
+
},
|
| 3056 |
+
{
|
| 3057 |
+
"epoch": 6.31,
|
| 3058 |
+
"learning_rate": 0.000105337698162752,
|
| 3059 |
+
"loss": 0.2741,
|
| 3060 |
+
"step": 413000
|
| 3061 |
+
},
|
| 3062 |
+
{
|
| 3063 |
+
"epoch": 6.32,
|
| 3064 |
+
"learning_rate": 0.00010512177108749594,
|
| 3065 |
+
"loss": 0.2746,
|
| 3066 |
+
"step": 414000
|
| 3067 |
+
},
|
| 3068 |
+
{
|
| 3069 |
+
"epoch": 6.34,
|
| 3070 |
+
"learning_rate": 0.00010490556928457616,
|
| 3071 |
+
"loss": 0.2743,
|
| 3072 |
+
"step": 415000
|
| 3073 |
+
},
|
| 3074 |
+
{
|
| 3075 |
+
"epoch": 6.34,
|
| 3076 |
+
"eval_runtime": 1.0107,
|
| 3077 |
+
"eval_samples_per_second": 989.389,
|
| 3078 |
+
"eval_steps_per_second": 15.83,
|
| 3079 |
+
"step": 415000
|
| 3080 |
+
},
|
| 3081 |
+
{
|
| 3082 |
+
"epoch": 6.35,
|
| 3083 |
+
"learning_rate": 0.00010468909511834088,
|
| 3084 |
+
"loss": 0.2741,
|
| 3085 |
+
"step": 416000
|
| 3086 |
+
},
|
| 3087 |
+
{
|
| 3088 |
+
"epoch": 6.37,
|
| 3089 |
+
"learning_rate": 0.00010447235095611692,
|
| 3090 |
+
"loss": 0.2738,
|
| 3091 |
+
"step": 417000
|
| 3092 |
+
},
|
| 3093 |
+
{
|
| 3094 |
+
"epoch": 6.38,
|
| 3095 |
+
"learning_rate": 0.00010425533916818376,
|
| 3096 |
+
"loss": 0.2738,
|
| 3097 |
+
"step": 418000
|
| 3098 |
+
},
|
| 3099 |
+
{
|
| 3100 |
+
"epoch": 6.4,
|
| 3101 |
+
"learning_rate": 0.00010403806212774747,
|
| 3102 |
+
"loss": 0.2742,
|
| 3103 |
+
"step": 419000
|
| 3104 |
+
},
|
| 3105 |
+
{
|
| 3106 |
+
"epoch": 6.41,
|
| 3107 |
+
"learning_rate": 0.000103820522210915,
|
| 3108 |
+
"loss": 0.2737,
|
| 3109 |
+
"step": 420000
|
| 3110 |
+
},
|
| 3111 |
+
{
|
| 3112 |
+
"epoch": 6.41,
|
| 3113 |
+
"eval_runtime": 1.055,
|
| 3114 |
+
"eval_samples_per_second": 947.861,
|
| 3115 |
+
"eval_steps_per_second": 15.166,
|
| 3116 |
+
"step": 420000
|
| 3117 |
}
|
| 3118 |
],
|
| 3119 |
"max_steps": 1000000,
|
| 3120 |
"num_train_epochs": 16,
|
| 3121 |
+
"total_flos": 2.9442070208656875e+22,
|
| 3122 |
"trial_name": null,
|
| 3123 |
"trial_params": null
|
| 3124 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43bdf8dfd2503753ae90f61b7bb9fe12717af35ea212a47dbf67cd5903f5bff5
|
| 3 |
size 449471589
|