Auto-save flat update: checkpoint-10000
Browse files- model.safetensors +1 -1
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +144 -4
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 17315288
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b711109f0ad3f319d1bc6cd06062dae48744a3395a51ba0e6af33cece506c46c
|
| 3 |
size 17315288
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 34640005
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72fed1de07ba7856e90f74545259c787b689f6231e854dd0b9e5288e17c92f2c
|
| 3 |
size 34640005
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a9f154b1359afdda4d3ed65870a0e8dc4d46259d77c6ba1858bc2095aee8d67
|
| 3 |
size 1465
|
trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -13867,6 +13867,146 @@
|
|
| 13867 |
"learning_rate": 2.002002002002002e-06,
|
| 13868 |
"loss": 2.3372,
|
| 13869 |
"step": 9900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13870 |
}
|
| 13871 |
],
|
| 13872 |
"logging_steps": 5,
|
|
@@ -13881,12 +14021,12 @@
|
|
| 13881 |
"should_evaluate": false,
|
| 13882 |
"should_log": false,
|
| 13883 |
"should_save": true,
|
| 13884 |
-
"should_training_stop":
|
| 13885 |
},
|
| 13886 |
"attributes": {}
|
| 13887 |
}
|
| 13888 |
},
|
| 13889 |
-
"total_flos":
|
| 13890 |
"train_batch_size": 4,
|
| 13891 |
"trial_name": null,
|
| 13892 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 10000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 13867 |
"learning_rate": 2.002002002002002e-06,
|
| 13868 |
"loss": 2.3372,
|
| 13869 |
"step": 9900
|
| 13870 |
+
},
|
| 13871 |
+
{
|
| 13872 |
+
"epoch": 0.9905,
|
| 13873 |
+
"grad_norm": 0.8105862140655518,
|
| 13874 |
+
"learning_rate": 1.9019019019019022e-06,
|
| 13875 |
+
"loss": 2.3333,
|
| 13876 |
+
"step": 9905
|
| 13877 |
+
},
|
| 13878 |
+
{
|
| 13879 |
+
"epoch": 0.991,
|
| 13880 |
+
"grad_norm": 0.8059213757514954,
|
| 13881 |
+
"learning_rate": 1.801801801801802e-06,
|
| 13882 |
+
"loss": 2.3359,
|
| 13883 |
+
"step": 9910
|
| 13884 |
+
},
|
| 13885 |
+
{
|
| 13886 |
+
"epoch": 0.9915,
|
| 13887 |
+
"grad_norm": 0.818318247795105,
|
| 13888 |
+
"learning_rate": 1.7017017017017019e-06,
|
| 13889 |
+
"loss": 2.3476,
|
| 13890 |
+
"step": 9915
|
| 13891 |
+
},
|
| 13892 |
+
{
|
| 13893 |
+
"epoch": 0.992,
|
| 13894 |
+
"grad_norm": 0.7882871031761169,
|
| 13895 |
+
"learning_rate": 1.6016016016016016e-06,
|
| 13896 |
+
"loss": 2.3351,
|
| 13897 |
+
"step": 9920
|
| 13898 |
+
},
|
| 13899 |
+
{
|
| 13900 |
+
"epoch": 0.9925,
|
| 13901 |
+
"grad_norm": 0.817363440990448,
|
| 13902 |
+
"learning_rate": 1.5015015015015015e-06,
|
| 13903 |
+
"loss": 2.3523,
|
| 13904 |
+
"step": 9925
|
| 13905 |
+
},
|
| 13906 |
+
{
|
| 13907 |
+
"epoch": 0.993,
|
| 13908 |
+
"grad_norm": 0.8170154094696045,
|
| 13909 |
+
"learning_rate": 1.4014014014014014e-06,
|
| 13910 |
+
"loss": 2.3567,
|
| 13911 |
+
"step": 9930
|
| 13912 |
+
},
|
| 13913 |
+
{
|
| 13914 |
+
"epoch": 0.9935,
|
| 13915 |
+
"grad_norm": 0.8051609992980957,
|
| 13916 |
+
"learning_rate": 1.3013013013013014e-06,
|
| 13917 |
+
"loss": 2.3376,
|
| 13918 |
+
"step": 9935
|
| 13919 |
+
},
|
| 13920 |
+
{
|
| 13921 |
+
"epoch": 0.994,
|
| 13922 |
+
"grad_norm": 0.8126572370529175,
|
| 13923 |
+
"learning_rate": 1.2012012012012013e-06,
|
| 13924 |
+
"loss": 2.3451,
|
| 13925 |
+
"step": 9940
|
| 13926 |
+
},
|
| 13927 |
+
{
|
| 13928 |
+
"epoch": 0.9945,
|
| 13929 |
+
"grad_norm": 0.8231433629989624,
|
| 13930 |
+
"learning_rate": 1.1011011011011012e-06,
|
| 13931 |
+
"loss": 2.3174,
|
| 13932 |
+
"step": 9945
|
| 13933 |
+
},
|
| 13934 |
+
{
|
| 13935 |
+
"epoch": 0.995,
|
| 13936 |
+
"grad_norm": 0.8126521110534668,
|
| 13937 |
+
"learning_rate": 1.001001001001001e-06,
|
| 13938 |
+
"loss": 2.3333,
|
| 13939 |
+
"step": 9950
|
| 13940 |
+
},
|
| 13941 |
+
{
|
| 13942 |
+
"epoch": 0.9955,
|
| 13943 |
+
"grad_norm": 0.7962733507156372,
|
| 13944 |
+
"learning_rate": 9.00900900900901e-07,
|
| 13945 |
+
"loss": 2.3245,
|
| 13946 |
+
"step": 9955
|
| 13947 |
+
},
|
| 13948 |
+
{
|
| 13949 |
+
"epoch": 0.996,
|
| 13950 |
+
"grad_norm": 0.8092034459114075,
|
| 13951 |
+
"learning_rate": 8.008008008008008e-07,
|
| 13952 |
+
"loss": 2.3472,
|
| 13953 |
+
"step": 9960
|
| 13954 |
+
},
|
| 13955 |
+
{
|
| 13956 |
+
"epoch": 0.9965,
|
| 13957 |
+
"grad_norm": 0.8146346807479858,
|
| 13958 |
+
"learning_rate": 7.007007007007007e-07,
|
| 13959 |
+
"loss": 2.3337,
|
| 13960 |
+
"step": 9965
|
| 13961 |
+
},
|
| 13962 |
+
{
|
| 13963 |
+
"epoch": 0.997,
|
| 13964 |
+
"grad_norm": 0.7841795086860657,
|
| 13965 |
+
"learning_rate": 6.006006006006006e-07,
|
| 13966 |
+
"loss": 2.3382,
|
| 13967 |
+
"step": 9970
|
| 13968 |
+
},
|
| 13969 |
+
{
|
| 13970 |
+
"epoch": 0.9975,
|
| 13971 |
+
"grad_norm": 0.7981340885162354,
|
| 13972 |
+
"learning_rate": 5.005005005005005e-07,
|
| 13973 |
+
"loss": 2.3166,
|
| 13974 |
+
"step": 9975
|
| 13975 |
+
},
|
| 13976 |
+
{
|
| 13977 |
+
"epoch": 0.998,
|
| 13978 |
+
"grad_norm": 0.8195034861564636,
|
| 13979 |
+
"learning_rate": 4.004004004004004e-07,
|
| 13980 |
+
"loss": 2.325,
|
| 13981 |
+
"step": 9980
|
| 13982 |
+
},
|
| 13983 |
+
{
|
| 13984 |
+
"epoch": 0.9985,
|
| 13985 |
+
"grad_norm": 0.8242753148078918,
|
| 13986 |
+
"learning_rate": 3.003003003003003e-07,
|
| 13987 |
+
"loss": 2.3474,
|
| 13988 |
+
"step": 9985
|
| 13989 |
+
},
|
| 13990 |
+
{
|
| 13991 |
+
"epoch": 0.999,
|
| 13992 |
+
"grad_norm": 0.9042975902557373,
|
| 13993 |
+
"learning_rate": 2.002002002002002e-07,
|
| 13994 |
+
"loss": 2.322,
|
| 13995 |
+
"step": 9990
|
| 13996 |
+
},
|
| 13997 |
+
{
|
| 13998 |
+
"epoch": 0.9995,
|
| 13999 |
+
"grad_norm": 0.8336848020553589,
|
| 14000 |
+
"learning_rate": 1.001001001001001e-07,
|
| 14001 |
+
"loss": 2.3474,
|
| 14002 |
+
"step": 9995
|
| 14003 |
+
},
|
| 14004 |
+
{
|
| 14005 |
+
"epoch": 1.0,
|
| 14006 |
+
"grad_norm": 0.8093213438987732,
|
| 14007 |
+
"learning_rate": 0.0,
|
| 14008 |
+
"loss": 2.3473,
|
| 14009 |
+
"step": 10000
|
| 14010 |
}
|
| 14011 |
],
|
| 14012 |
"logging_steps": 5,
|
|
|
|
| 14021 |
"should_evaluate": false,
|
| 14022 |
"should_log": false,
|
| 14023 |
"should_save": true,
|
| 14024 |
+
"should_training_stop": true
|
| 14025 |
},
|
| 14026 |
"attributes": {}
|
| 14027 |
}
|
| 14028 |
},
|
| 14029 |
+
"total_flos": 8252377006080000.0,
|
| 14030 |
"train_batch_size": 4,
|
| 14031 |
"trial_name": null,
|
| 14032 |
"trial_params": null
|