Training in progress, step 75000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 304481530
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a67b2c60c5b42d0ad22d6b38771528b94fc53ceec628d0597d6fa521952a684c
|
| 3 |
size 304481530
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 402029570
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6916c88bb66f81e6f1308f6aadeffdb932cc73012f17c967d2f81582f0d6ec4
|
| 3 |
size 402029570
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1caa66015d3956d30ec507257de058a8c2fd4bde8e3572a38d393062e23e25fa
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43374ebce165dffb63c7f0a02b8a1fb69d9d2182c0805086854a706ff35de8db
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23f5704701def73bff9de54ed2bc9c44e464b4fd7bf79cf9e15b571b97700de5
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2ec98dcfff897ba38371ec424fd9cb0533d296496a8ad5f5af6ba3e2b631320
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83d418122fbb0fa369cfecb2f66848d24fc6c35ef433b91965b2ecce9163409e
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -25908,6 +25908,356 @@
|
|
| 25908 |
"learning_rate": 0.0004818496607939437,
|
| 25909 |
"loss": 16.3994,
|
| 25910 |
"step": 74000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25911 |
}
|
| 25912 |
],
|
| 25913 |
"logging_steps": 20,
|
|
@@ -25927,7 +26277,7 @@
|
|
| 25927 |
"attributes": {}
|
| 25928 |
}
|
| 25929 |
},
|
| 25930 |
-
"total_flos": 5.
|
| 25931 |
"train_batch_size": 48,
|
| 25932 |
"trial_name": null,
|
| 25933 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.11109860223145246,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 75000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 25908 |
"learning_rate": 0.0004818496607939437,
|
| 25909 |
"loss": 16.3994,
|
| 25910 |
"step": 74000
|
| 25911 |
+
},
|
| 25912 |
+
{
|
| 25913 |
+
"epoch": 0.10964691382896148,
|
| 25914 |
+
"grad_norm": 6.34375,
|
| 25915 |
+
"learning_rate": 0.0004818447218589361,
|
| 25916 |
+
"loss": 16.3548,
|
| 25917 |
+
"step": 74020
|
| 25918 |
+
},
|
| 25919 |
+
{
|
| 25920 |
+
"epoch": 0.10967654012288987,
|
| 25921 |
+
"grad_norm": 6.84375,
|
| 25922 |
+
"learning_rate": 0.00048183978292392856,
|
| 25923 |
+
"loss": 16.4066,
|
| 25924 |
+
"step": 74040
|
| 25925 |
+
},
|
| 25926 |
+
{
|
| 25927 |
+
"epoch": 0.10970616641681825,
|
| 25928 |
+
"grad_norm": 7.34375,
|
| 25929 |
+
"learning_rate": 0.00048183484398892096,
|
| 25930 |
+
"loss": 16.4287,
|
| 25931 |
+
"step": 74060
|
| 25932 |
+
},
|
| 25933 |
+
{
|
| 25934 |
+
"epoch": 0.10973579271074664,
|
| 25935 |
+
"grad_norm": 6.34375,
|
| 25936 |
+
"learning_rate": 0.00048182990505391346,
|
| 25937 |
+
"loss": 16.4596,
|
| 25938 |
+
"step": 74080
|
| 25939 |
+
},
|
| 25940 |
+
{
|
| 25941 |
+
"epoch": 0.10976541900467503,
|
| 25942 |
+
"grad_norm": 7.625,
|
| 25943 |
+
"learning_rate": 0.00048182496611890585,
|
| 25944 |
+
"loss": 16.4221,
|
| 25945 |
+
"step": 74100
|
| 25946 |
+
},
|
| 25947 |
+
{
|
| 25948 |
+
"epoch": 0.10979504529860341,
|
| 25949 |
+
"grad_norm": 6.9375,
|
| 25950 |
+
"learning_rate": 0.0004818200271838983,
|
| 25951 |
+
"loss": 16.4392,
|
| 25952 |
+
"step": 74120
|
| 25953 |
+
},
|
| 25954 |
+
{
|
| 25955 |
+
"epoch": 0.1098246715925318,
|
| 25956 |
+
"grad_norm": 6.5625,
|
| 25957 |
+
"learning_rate": 0.0004818150882488907,
|
| 25958 |
+
"loss": 16.4181,
|
| 25959 |
+
"step": 74140
|
| 25960 |
+
},
|
| 25961 |
+
{
|
| 25962 |
+
"epoch": 0.10985429788646019,
|
| 25963 |
+
"grad_norm": 7.40625,
|
| 25964 |
+
"learning_rate": 0.0004818101493138832,
|
| 25965 |
+
"loss": 16.4593,
|
| 25966 |
+
"step": 74160
|
| 25967 |
+
},
|
| 25968 |
+
{
|
| 25969 |
+
"epoch": 0.10988392418038857,
|
| 25970 |
+
"grad_norm": 6.8125,
|
| 25971 |
+
"learning_rate": 0.0004818052103788756,
|
| 25972 |
+
"loss": 16.4166,
|
| 25973 |
+
"step": 74180
|
| 25974 |
+
},
|
| 25975 |
+
{
|
| 25976 |
+
"epoch": 0.10991355047431696,
|
| 25977 |
+
"grad_norm": 6.90625,
|
| 25978 |
+
"learning_rate": 0.00048180027144386804,
|
| 25979 |
+
"loss": 16.422,
|
| 25980 |
+
"step": 74200
|
| 25981 |
+
},
|
| 25982 |
+
{
|
| 25983 |
+
"epoch": 0.10994317676824536,
|
| 25984 |
+
"grad_norm": 6.84375,
|
| 25985 |
+
"learning_rate": 0.0004817953325088605,
|
| 25986 |
+
"loss": 16.3946,
|
| 25987 |
+
"step": 74220
|
| 25988 |
+
},
|
| 25989 |
+
{
|
| 25990 |
+
"epoch": 0.10997280306217375,
|
| 25991 |
+
"grad_norm": 6.5,
|
| 25992 |
+
"learning_rate": 0.0004817903935738529,
|
| 25993 |
+
"loss": 16.4022,
|
| 25994 |
+
"step": 74240
|
| 25995 |
+
},
|
| 25996 |
+
{
|
| 25997 |
+
"epoch": 0.11000242935610213,
|
| 25998 |
+
"grad_norm": 7.25,
|
| 25999 |
+
"learning_rate": 0.0004817854546388453,
|
| 26000 |
+
"loss": 16.4411,
|
| 26001 |
+
"step": 74260
|
| 26002 |
+
},
|
| 26003 |
+
{
|
| 26004 |
+
"epoch": 0.11003205565003052,
|
| 26005 |
+
"grad_norm": 7.28125,
|
| 26006 |
+
"learning_rate": 0.0004817805157038377,
|
| 26007 |
+
"loss": 16.3874,
|
| 26008 |
+
"step": 74280
|
| 26009 |
+
},
|
| 26010 |
+
{
|
| 26011 |
+
"epoch": 0.1100616819439589,
|
| 26012 |
+
"grad_norm": 6.40625,
|
| 26013 |
+
"learning_rate": 0.0004817755767688302,
|
| 26014 |
+
"loss": 16.4294,
|
| 26015 |
+
"step": 74300
|
| 26016 |
+
},
|
| 26017 |
+
{
|
| 26018 |
+
"epoch": 0.11009130823788729,
|
| 26019 |
+
"grad_norm": 6.8125,
|
| 26020 |
+
"learning_rate": 0.0004817706378338226,
|
| 26021 |
+
"loss": 16.4261,
|
| 26022 |
+
"step": 74320
|
| 26023 |
+
},
|
| 26024 |
+
{
|
| 26025 |
+
"epoch": 0.11012093453181568,
|
| 26026 |
+
"grad_norm": 7.34375,
|
| 26027 |
+
"learning_rate": 0.00048176569889881506,
|
| 26028 |
+
"loss": 16.4238,
|
| 26029 |
+
"step": 74340
|
| 26030 |
+
},
|
| 26031 |
+
{
|
| 26032 |
+
"epoch": 0.11015056082574406,
|
| 26033 |
+
"grad_norm": 7.125,
|
| 26034 |
+
"learning_rate": 0.00048176075996380746,
|
| 26035 |
+
"loss": 16.3817,
|
| 26036 |
+
"step": 74360
|
| 26037 |
+
},
|
| 26038 |
+
{
|
| 26039 |
+
"epoch": 0.11018018711967245,
|
| 26040 |
+
"grad_norm": 6.65625,
|
| 26041 |
+
"learning_rate": 0.00048175582102879996,
|
| 26042 |
+
"loss": 16.3883,
|
| 26043 |
+
"step": 74380
|
| 26044 |
+
},
|
| 26045 |
+
{
|
| 26046 |
+
"epoch": 0.11020981341360084,
|
| 26047 |
+
"grad_norm": 6.71875,
|
| 26048 |
+
"learning_rate": 0.00048175088209379235,
|
| 26049 |
+
"loss": 16.4262,
|
| 26050 |
+
"step": 74400
|
| 26051 |
+
},
|
| 26052 |
+
{
|
| 26053 |
+
"epoch": 0.11023943970752922,
|
| 26054 |
+
"grad_norm": 7.0,
|
| 26055 |
+
"learning_rate": 0.0004817459431587848,
|
| 26056 |
+
"loss": 16.4212,
|
| 26057 |
+
"step": 74420
|
| 26058 |
+
},
|
| 26059 |
+
{
|
| 26060 |
+
"epoch": 0.11026906600145761,
|
| 26061 |
+
"grad_norm": 6.90625,
|
| 26062 |
+
"learning_rate": 0.0004817410042237772,
|
| 26063 |
+
"loss": 16.4718,
|
| 26064 |
+
"step": 74440
|
| 26065 |
+
},
|
| 26066 |
+
{
|
| 26067 |
+
"epoch": 0.110298692295386,
|
| 26068 |
+
"grad_norm": 7.8125,
|
| 26069 |
+
"learning_rate": 0.0004817360652887697,
|
| 26070 |
+
"loss": 16.4202,
|
| 26071 |
+
"step": 74460
|
| 26072 |
+
},
|
| 26073 |
+
{
|
| 26074 |
+
"epoch": 0.11032831858931438,
|
| 26075 |
+
"grad_norm": 7.28125,
|
| 26076 |
+
"learning_rate": 0.0004817311263537621,
|
| 26077 |
+
"loss": 16.3904,
|
| 26078 |
+
"step": 74480
|
| 26079 |
+
},
|
| 26080 |
+
{
|
| 26081 |
+
"epoch": 0.11035794488324277,
|
| 26082 |
+
"grad_norm": 6.75,
|
| 26083 |
+
"learning_rate": 0.00048172618741875454,
|
| 26084 |
+
"loss": 16.4918,
|
| 26085 |
+
"step": 74500
|
| 26086 |
+
},
|
| 26087 |
+
{
|
| 26088 |
+
"epoch": 0.11038757117717116,
|
| 26089 |
+
"grad_norm": 7.09375,
|
| 26090 |
+
"learning_rate": 0.000481721248483747,
|
| 26091 |
+
"loss": 16.3993,
|
| 26092 |
+
"step": 74520
|
| 26093 |
+
},
|
| 26094 |
+
{
|
| 26095 |
+
"epoch": 0.11041719747109956,
|
| 26096 |
+
"grad_norm": 6.5,
|
| 26097 |
+
"learning_rate": 0.00048171630954873943,
|
| 26098 |
+
"loss": 16.3485,
|
| 26099 |
+
"step": 74540
|
| 26100 |
+
},
|
| 26101 |
+
{
|
| 26102 |
+
"epoch": 0.11044682376502794,
|
| 26103 |
+
"grad_norm": 6.59375,
|
| 26104 |
+
"learning_rate": 0.0004817113706137318,
|
| 26105 |
+
"loss": 16.3389,
|
| 26106 |
+
"step": 74560
|
| 26107 |
+
},
|
| 26108 |
+
{
|
| 26109 |
+
"epoch": 0.11047645005895633,
|
| 26110 |
+
"grad_norm": 6.9375,
|
| 26111 |
+
"learning_rate": 0.0004817064316787242,
|
| 26112 |
+
"loss": 16.3738,
|
| 26113 |
+
"step": 74580
|
| 26114 |
+
},
|
| 26115 |
+
{
|
| 26116 |
+
"epoch": 0.11050607635288472,
|
| 26117 |
+
"grad_norm": 7.5,
|
| 26118 |
+
"learning_rate": 0.0004817014927437167,
|
| 26119 |
+
"loss": 16.3881,
|
| 26120 |
+
"step": 74600
|
| 26121 |
+
},
|
| 26122 |
+
{
|
| 26123 |
+
"epoch": 0.1105357026468131,
|
| 26124 |
+
"grad_norm": 6.1875,
|
| 26125 |
+
"learning_rate": 0.0004816965538087091,
|
| 26126 |
+
"loss": 16.3802,
|
| 26127 |
+
"step": 74620
|
| 26128 |
+
},
|
| 26129 |
+
{
|
| 26130 |
+
"epoch": 0.11056532894074149,
|
| 26131 |
+
"grad_norm": 7.375,
|
| 26132 |
+
"learning_rate": 0.00048169161487370156,
|
| 26133 |
+
"loss": 16.4216,
|
| 26134 |
+
"step": 74640
|
| 26135 |
+
},
|
| 26136 |
+
{
|
| 26137 |
+
"epoch": 0.11059495523466988,
|
| 26138 |
+
"grad_norm": 7.1875,
|
| 26139 |
+
"learning_rate": 0.00048168667593869396,
|
| 26140 |
+
"loss": 16.4166,
|
| 26141 |
+
"step": 74660
|
| 26142 |
+
},
|
| 26143 |
+
{
|
| 26144 |
+
"epoch": 0.11062458152859826,
|
| 26145 |
+
"grad_norm": 7.21875,
|
| 26146 |
+
"learning_rate": 0.00048168173700368646,
|
| 26147 |
+
"loss": 16.358,
|
| 26148 |
+
"step": 74680
|
| 26149 |
+
},
|
| 26150 |
+
{
|
| 26151 |
+
"epoch": 0.11065420782252665,
|
| 26152 |
+
"grad_norm": 7.59375,
|
| 26153 |
+
"learning_rate": 0.00048167679806867885,
|
| 26154 |
+
"loss": 16.4844,
|
| 26155 |
+
"step": 74700
|
| 26156 |
+
},
|
| 26157 |
+
{
|
| 26158 |
+
"epoch": 0.11068383411645503,
|
| 26159 |
+
"grad_norm": 7.59375,
|
| 26160 |
+
"learning_rate": 0.0004816718591336713,
|
| 26161 |
+
"loss": 16.4061,
|
| 26162 |
+
"step": 74720
|
| 26163 |
+
},
|
| 26164 |
+
{
|
| 26165 |
+
"epoch": 0.11071346041038342,
|
| 26166 |
+
"grad_norm": 7.09375,
|
| 26167 |
+
"learning_rate": 0.0004816669201986637,
|
| 26168 |
+
"loss": 16.4073,
|
| 26169 |
+
"step": 74740
|
| 26170 |
+
},
|
| 26171 |
+
{
|
| 26172 |
+
"epoch": 0.11074308670431181,
|
| 26173 |
+
"grad_norm": 6.28125,
|
| 26174 |
+
"learning_rate": 0.0004816619812636562,
|
| 26175 |
+
"loss": 16.3988,
|
| 26176 |
+
"step": 74760
|
| 26177 |
+
},
|
| 26178 |
+
{
|
| 26179 |
+
"epoch": 0.1107727129982402,
|
| 26180 |
+
"grad_norm": 5.96875,
|
| 26181 |
+
"learning_rate": 0.0004816570423286486,
|
| 26182 |
+
"loss": 16.4417,
|
| 26183 |
+
"step": 74780
|
| 26184 |
+
},
|
| 26185 |
+
{
|
| 26186 |
+
"epoch": 0.11080233929216858,
|
| 26187 |
+
"grad_norm": 7.15625,
|
| 26188 |
+
"learning_rate": 0.00048165210339364104,
|
| 26189 |
+
"loss": 16.3517,
|
| 26190 |
+
"step": 74800
|
| 26191 |
+
},
|
| 26192 |
+
{
|
| 26193 |
+
"epoch": 0.11083196558609697,
|
| 26194 |
+
"grad_norm": 6.40625,
|
| 26195 |
+
"learning_rate": 0.0004816471644586335,
|
| 26196 |
+
"loss": 16.3409,
|
| 26197 |
+
"step": 74820
|
| 26198 |
+
},
|
| 26199 |
+
{
|
| 26200 |
+
"epoch": 0.11086159188002535,
|
| 26201 |
+
"grad_norm": 6.65625,
|
| 26202 |
+
"learning_rate": 0.00048164222552362593,
|
| 26203 |
+
"loss": 16.3664,
|
| 26204 |
+
"step": 74840
|
| 26205 |
+
},
|
| 26206 |
+
{
|
| 26207 |
+
"epoch": 0.11089121817395375,
|
| 26208 |
+
"grad_norm": 6.75,
|
| 26209 |
+
"learning_rate": 0.0004816372865886183,
|
| 26210 |
+
"loss": 16.4146,
|
| 26211 |
+
"step": 74860
|
| 26212 |
+
},
|
| 26213 |
+
{
|
| 26214 |
+
"epoch": 0.11092084446788214,
|
| 26215 |
+
"grad_norm": 6.90625,
|
| 26216 |
+
"learning_rate": 0.0004816323476536108,
|
| 26217 |
+
"loss": 16.3548,
|
| 26218 |
+
"step": 74880
|
| 26219 |
+
},
|
| 26220 |
+
{
|
| 26221 |
+
"epoch": 0.11095047076181053,
|
| 26222 |
+
"grad_norm": 6.8125,
|
| 26223 |
+
"learning_rate": 0.0004816274087186032,
|
| 26224 |
+
"loss": 16.4546,
|
| 26225 |
+
"step": 74900
|
| 26226 |
+
},
|
| 26227 |
+
{
|
| 26228 |
+
"epoch": 0.11098009705573891,
|
| 26229 |
+
"grad_norm": 6.59375,
|
| 26230 |
+
"learning_rate": 0.0004816224697835956,
|
| 26231 |
+
"loss": 16.3883,
|
| 26232 |
+
"step": 74920
|
| 26233 |
+
},
|
| 26234 |
+
{
|
| 26235 |
+
"epoch": 0.1110097233496673,
|
| 26236 |
+
"grad_norm": 7.0,
|
| 26237 |
+
"learning_rate": 0.00048161753084858806,
|
| 26238 |
+
"loss": 16.4069,
|
| 26239 |
+
"step": 74940
|
| 26240 |
+
},
|
| 26241 |
+
{
|
| 26242 |
+
"epoch": 0.11103934964359569,
|
| 26243 |
+
"grad_norm": 7.15625,
|
| 26244 |
+
"learning_rate": 0.00048161259191358046,
|
| 26245 |
+
"loss": 16.3556,
|
| 26246 |
+
"step": 74960
|
| 26247 |
+
},
|
| 26248 |
+
{
|
| 26249 |
+
"epoch": 0.11106897593752407,
|
| 26250 |
+
"grad_norm": 6.65625,
|
| 26251 |
+
"learning_rate": 0.00048160765297857296,
|
| 26252 |
+
"loss": 16.4227,
|
| 26253 |
+
"step": 74980
|
| 26254 |
+
},
|
| 26255 |
+
{
|
| 26256 |
+
"epoch": 0.11109860223145246,
|
| 26257 |
+
"grad_norm": 6.90625,
|
| 26258 |
+
"learning_rate": 0.00048160271404356535,
|
| 26259 |
+
"loss": 16.4454,
|
| 26260 |
+
"step": 75000
|
| 26261 |
}
|
| 26262 |
],
|
| 26263 |
"logging_steps": 20,
|
|
|
|
| 26277 |
"attributes": {}
|
| 26278 |
}
|
| 26279 |
},
|
| 26280 |
+
"total_flos": 5.514317317520595e+19,
|
| 26281 |
"train_batch_size": 48,
|
| 26282 |
"trial_name": null,
|
| 26283 |
"trial_params": null
|