Training in progress, step 58000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 304481530
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55010ca37211cc6b640c88e9f40807107bec277ebcc5b0b118f1cea15eed44f5
|
| 3 |
size 304481530
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 402029570
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc364080893bb423d47b8bfaac6a84d534e79aa0580cf54e20a609c7ac276c5b
|
| 3 |
size 402029570
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c37c5923f3d68f847ed300ddb34aea7ac5e2328c7df69f2be7f755bc9e45036
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b439560e2350d72b3dd331a1a8b64962c6b47e1a1078857e970f6226f8e52122
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d72e4bdd3428ede798be981d61f831c294e4c1f306f292cd3880bbf3dd42566d
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13a06ce0cb98db3e26ada8fd779ab287dc006dddd3604ca6c762fd20a85c4365
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:703b87170b0696f3b2a83c775117cb6a49f63bf8b6fd7a85d19b5f6decf028d6
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -19958,6 +19958,356 @@
|
|
| 19958 |
"learning_rate": 0.00048604775555037517,
|
| 19959 |
"loss": 16.9712,
|
| 19960 |
"step": 57000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19961 |
}
|
| 19962 |
],
|
| 19963 |
"logging_steps": 20,
|
|
@@ -19977,7 +20327,7 @@
|
|
| 19977 |
"attributes": {}
|
| 19978 |
}
|
| 19979 |
},
|
| 19980 |
-
"total_flos": 4.
|
| 19981 |
"train_batch_size": 48,
|
| 19982 |
"trial_name": null,
|
| 19983 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.08591625239232323,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 58000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 19958 |
"learning_rate": 0.00048604775555037517,
|
| 19959 |
"loss": 16.9712,
|
| 19960 |
"step": 57000
|
| 19961 |
+
},
|
| 19962 |
+
{
|
| 19963 |
+
"epoch": 0.08446456398983225,
|
| 19964 |
+
"grad_norm": 7.34375,
|
| 19965 |
+
"learning_rate": 0.00048604281661536756,
|
| 19966 |
+
"loss": 17.056,
|
| 19967 |
+
"step": 57020
|
| 19968 |
+
},
|
| 19969 |
+
{
|
| 19970 |
+
"epoch": 0.08449419028376064,
|
| 19971 |
+
"grad_norm": 7.4375,
|
| 19972 |
+
"learning_rate": 0.00048603787768036007,
|
| 19973 |
+
"loss": 17.0091,
|
| 19974 |
+
"step": 57040
|
| 19975 |
+
},
|
| 19976 |
+
{
|
| 19977 |
+
"epoch": 0.08452381657768904,
|
| 19978 |
+
"grad_norm": 6.78125,
|
| 19979 |
+
"learning_rate": 0.00048603293874535246,
|
| 19980 |
+
"loss": 16.9849,
|
| 19981 |
+
"step": 57060
|
| 19982 |
+
},
|
| 19983 |
+
{
|
| 19984 |
+
"epoch": 0.08455344287161742,
|
| 19985 |
+
"grad_norm": 7.40625,
|
| 19986 |
+
"learning_rate": 0.0004860279998103449,
|
| 19987 |
+
"loss": 17.0407,
|
| 19988 |
+
"step": 57080
|
| 19989 |
+
},
|
| 19990 |
+
{
|
| 19991 |
+
"epoch": 0.08458306916554581,
|
| 19992 |
+
"grad_norm": 7.21875,
|
| 19993 |
+
"learning_rate": 0.0004860230608753373,
|
| 19994 |
+
"loss": 17.0114,
|
| 19995 |
+
"step": 57100
|
| 19996 |
+
},
|
| 19997 |
+
{
|
| 19998 |
+
"epoch": 0.0846126954594742,
|
| 19999 |
+
"grad_norm": 7.0,
|
| 20000 |
+
"learning_rate": 0.0004860181219403298,
|
| 20001 |
+
"loss": 17.0502,
|
| 20002 |
+
"step": 57120
|
| 20003 |
+
},
|
| 20004 |
+
{
|
| 20005 |
+
"epoch": 0.08464232175340258,
|
| 20006 |
+
"grad_norm": 7.4375,
|
| 20007 |
+
"learning_rate": 0.0004860131830053222,
|
| 20008 |
+
"loss": 17.044,
|
| 20009 |
+
"step": 57140
|
| 20010 |
+
},
|
| 20011 |
+
{
|
| 20012 |
+
"epoch": 0.08467194804733097,
|
| 20013 |
+
"grad_norm": 6.5625,
|
| 20014 |
+
"learning_rate": 0.00048600824407031464,
|
| 20015 |
+
"loss": 17.0582,
|
| 20016 |
+
"step": 57160
|
| 20017 |
+
},
|
| 20018 |
+
{
|
| 20019 |
+
"epoch": 0.08470157434125936,
|
| 20020 |
+
"grad_norm": 7.59375,
|
| 20021 |
+
"learning_rate": 0.00048600330513530704,
|
| 20022 |
+
"loss": 17.0316,
|
| 20023 |
+
"step": 57180
|
| 20024 |
+
},
|
| 20025 |
+
{
|
| 20026 |
+
"epoch": 0.08473120063518774,
|
| 20027 |
+
"grad_norm": 7.25,
|
| 20028 |
+
"learning_rate": 0.00048599836620029954,
|
| 20029 |
+
"loss": 17.0576,
|
| 20030 |
+
"step": 57200
|
| 20031 |
+
},
|
| 20032 |
+
{
|
| 20033 |
+
"epoch": 0.08476082692911613,
|
| 20034 |
+
"grad_norm": 7.6875,
|
| 20035 |
+
"learning_rate": 0.00048599342726529193,
|
| 20036 |
+
"loss": 17.0589,
|
| 20037 |
+
"step": 57220
|
| 20038 |
+
},
|
| 20039 |
+
{
|
| 20040 |
+
"epoch": 0.08479045322304452,
|
| 20041 |
+
"grad_norm": 6.46875,
|
| 20042 |
+
"learning_rate": 0.0004859884883302844,
|
| 20043 |
+
"loss": 16.9831,
|
| 20044 |
+
"step": 57240
|
| 20045 |
+
},
|
| 20046 |
+
{
|
| 20047 |
+
"epoch": 0.0848200795169729,
|
| 20048 |
+
"grad_norm": 6.125,
|
| 20049 |
+
"learning_rate": 0.0004859835493952768,
|
| 20050 |
+
"loss": 16.9896,
|
| 20051 |
+
"step": 57260
|
| 20052 |
+
},
|
| 20053 |
+
{
|
| 20054 |
+
"epoch": 0.08484970581090129,
|
| 20055 |
+
"grad_norm": 6.6875,
|
| 20056 |
+
"learning_rate": 0.0004859786104602693,
|
| 20057 |
+
"loss": 16.9824,
|
| 20058 |
+
"step": 57280
|
| 20059 |
+
},
|
| 20060 |
+
{
|
| 20061 |
+
"epoch": 0.08487933210482967,
|
| 20062 |
+
"grad_norm": 8.25,
|
| 20063 |
+
"learning_rate": 0.00048597367152526167,
|
| 20064 |
+
"loss": 16.9623,
|
| 20065 |
+
"step": 57300
|
| 20066 |
+
},
|
| 20067 |
+
{
|
| 20068 |
+
"epoch": 0.08490895839875806,
|
| 20069 |
+
"grad_norm": 7.03125,
|
| 20070 |
+
"learning_rate": 0.0004859687325902541,
|
| 20071 |
+
"loss": 16.9834,
|
| 20072 |
+
"step": 57320
|
| 20073 |
+
},
|
| 20074 |
+
{
|
| 20075 |
+
"epoch": 0.08493858469268645,
|
| 20076 |
+
"grad_norm": 6.53125,
|
| 20077 |
+
"learning_rate": 0.00048596379365524657,
|
| 20078 |
+
"loss": 16.9729,
|
| 20079 |
+
"step": 57340
|
| 20080 |
+
},
|
| 20081 |
+
{
|
| 20082 |
+
"epoch": 0.08496821098661483,
|
| 20083 |
+
"grad_norm": 6.21875,
|
| 20084 |
+
"learning_rate": 0.00048595885472023896,
|
| 20085 |
+
"loss": 17.0528,
|
| 20086 |
+
"step": 57360
|
| 20087 |
+
},
|
| 20088 |
+
{
|
| 20089 |
+
"epoch": 0.08499783728054323,
|
| 20090 |
+
"grad_norm": 7.15625,
|
| 20091 |
+
"learning_rate": 0.0004859539157852314,
|
| 20092 |
+
"loss": 17.0257,
|
| 20093 |
+
"step": 57380
|
| 20094 |
+
},
|
| 20095 |
+
{
|
| 20096 |
+
"epoch": 0.08502746357447162,
|
| 20097 |
+
"grad_norm": 7.09375,
|
| 20098 |
+
"learning_rate": 0.0004859489768502238,
|
| 20099 |
+
"loss": 16.8887,
|
| 20100 |
+
"step": 57400
|
| 20101 |
+
},
|
| 20102 |
+
{
|
| 20103 |
+
"epoch": 0.08505708986840001,
|
| 20104 |
+
"grad_norm": 7.4375,
|
| 20105 |
+
"learning_rate": 0.0004859440379152163,
|
| 20106 |
+
"loss": 17.0345,
|
| 20107 |
+
"step": 57420
|
| 20108 |
+
},
|
| 20109 |
+
{
|
| 20110 |
+
"epoch": 0.0850867161623284,
|
| 20111 |
+
"grad_norm": 6.84375,
|
| 20112 |
+
"learning_rate": 0.0004859390989802087,
|
| 20113 |
+
"loss": 16.9734,
|
| 20114 |
+
"step": 57440
|
| 20115 |
+
},
|
| 20116 |
+
{
|
| 20117 |
+
"epoch": 0.08511634245625678,
|
| 20118 |
+
"grad_norm": 6.875,
|
| 20119 |
+
"learning_rate": 0.00048593416004520114,
|
| 20120 |
+
"loss": 17.0113,
|
| 20121 |
+
"step": 57460
|
| 20122 |
+
},
|
| 20123 |
+
{
|
| 20124 |
+
"epoch": 0.08514596875018517,
|
| 20125 |
+
"grad_norm": 7.21875,
|
| 20126 |
+
"learning_rate": 0.00048592922111019354,
|
| 20127 |
+
"loss": 16.9691,
|
| 20128 |
+
"step": 57480
|
| 20129 |
+
},
|
| 20130 |
+
{
|
| 20131 |
+
"epoch": 0.08517559504411355,
|
| 20132 |
+
"grad_norm": 7.0625,
|
| 20133 |
+
"learning_rate": 0.00048592428217518604,
|
| 20134 |
+
"loss": 17.0288,
|
| 20135 |
+
"step": 57500
|
| 20136 |
+
},
|
| 20137 |
+
{
|
| 20138 |
+
"epoch": 0.08520522133804194,
|
| 20139 |
+
"grad_norm": 6.78125,
|
| 20140 |
+
"learning_rate": 0.00048591934324017843,
|
| 20141 |
+
"loss": 17.0317,
|
| 20142 |
+
"step": 57520
|
| 20143 |
+
},
|
| 20144 |
+
{
|
| 20145 |
+
"epoch": 0.08523484763197033,
|
| 20146 |
+
"grad_norm": 7.0625,
|
| 20147 |
+
"learning_rate": 0.0004859144043051709,
|
| 20148 |
+
"loss": 17.0653,
|
| 20149 |
+
"step": 57540
|
| 20150 |
+
},
|
| 20151 |
+
{
|
| 20152 |
+
"epoch": 0.08526447392589871,
|
| 20153 |
+
"grad_norm": 6.375,
|
| 20154 |
+
"learning_rate": 0.0004859094653701633,
|
| 20155 |
+
"loss": 17.0473,
|
| 20156 |
+
"step": 57560
|
| 20157 |
+
},
|
| 20158 |
+
{
|
| 20159 |
+
"epoch": 0.0852941002198271,
|
| 20160 |
+
"grad_norm": 7.375,
|
| 20161 |
+
"learning_rate": 0.0004859045264351558,
|
| 20162 |
+
"loss": 16.9975,
|
| 20163 |
+
"step": 57580
|
| 20164 |
+
},
|
| 20165 |
+
{
|
| 20166 |
+
"epoch": 0.08532372651375549,
|
| 20167 |
+
"grad_norm": 7.15625,
|
| 20168 |
+
"learning_rate": 0.00048589958750014817,
|
| 20169 |
+
"loss": 16.9854,
|
| 20170 |
+
"step": 57600
|
| 20171 |
+
},
|
| 20172 |
+
{
|
| 20173 |
+
"epoch": 0.08535335280768387,
|
| 20174 |
+
"grad_norm": 6.4375,
|
| 20175 |
+
"learning_rate": 0.0004858946485651406,
|
| 20176 |
+
"loss": 16.9975,
|
| 20177 |
+
"step": 57620
|
| 20178 |
+
},
|
| 20179 |
+
{
|
| 20180 |
+
"epoch": 0.08538297910161226,
|
| 20181 |
+
"grad_norm": 6.625,
|
| 20182 |
+
"learning_rate": 0.00048588970963013307,
|
| 20183 |
+
"loss": 17.0178,
|
| 20184 |
+
"step": 57640
|
| 20185 |
+
},
|
| 20186 |
+
{
|
| 20187 |
+
"epoch": 0.08541260539554064,
|
| 20188 |
+
"grad_norm": 7.3125,
|
| 20189 |
+
"learning_rate": 0.0004858847706951255,
|
| 20190 |
+
"loss": 16.9128,
|
| 20191 |
+
"step": 57660
|
| 20192 |
+
},
|
| 20193 |
+
{
|
| 20194 |
+
"epoch": 0.08544223168946903,
|
| 20195 |
+
"grad_norm": 7.5,
|
| 20196 |
+
"learning_rate": 0.0004858798317601179,
|
| 20197 |
+
"loss": 17.0329,
|
| 20198 |
+
"step": 57680
|
| 20199 |
+
},
|
| 20200 |
+
{
|
| 20201 |
+
"epoch": 0.08547185798339743,
|
| 20202 |
+
"grad_norm": 7.15625,
|
| 20203 |
+
"learning_rate": 0.0004858748928251103,
|
| 20204 |
+
"loss": 17.0416,
|
| 20205 |
+
"step": 57700
|
| 20206 |
+
},
|
| 20207 |
+
{
|
| 20208 |
+
"epoch": 0.08550148427732582,
|
| 20209 |
+
"grad_norm": 7.34375,
|
| 20210 |
+
"learning_rate": 0.0004858699538901028,
|
| 20211 |
+
"loss": 17.0423,
|
| 20212 |
+
"step": 57720
|
| 20213 |
+
},
|
| 20214 |
+
{
|
| 20215 |
+
"epoch": 0.0855311105712542,
|
| 20216 |
+
"grad_norm": 6.21875,
|
| 20217 |
+
"learning_rate": 0.0004858650149550952,
|
| 20218 |
+
"loss": 16.9665,
|
| 20219 |
+
"step": 57740
|
| 20220 |
+
},
|
| 20221 |
+
{
|
| 20222 |
+
"epoch": 0.08556073686518259,
|
| 20223 |
+
"grad_norm": 6.9375,
|
| 20224 |
+
"learning_rate": 0.00048586007602008765,
|
| 20225 |
+
"loss": 16.9616,
|
| 20226 |
+
"step": 57760
|
| 20227 |
+
},
|
| 20228 |
+
{
|
| 20229 |
+
"epoch": 0.08559036315911098,
|
| 20230 |
+
"grad_norm": 7.3125,
|
| 20231 |
+
"learning_rate": 0.00048585513708508004,
|
| 20232 |
+
"loss": 16.9745,
|
| 20233 |
+
"step": 57780
|
| 20234 |
+
},
|
| 20235 |
+
{
|
| 20236 |
+
"epoch": 0.08561998945303936,
|
| 20237 |
+
"grad_norm": 6.90625,
|
| 20238 |
+
"learning_rate": 0.00048585019815007254,
|
| 20239 |
+
"loss": 16.9302,
|
| 20240 |
+
"step": 57800
|
| 20241 |
+
},
|
| 20242 |
+
{
|
| 20243 |
+
"epoch": 0.08564961574696775,
|
| 20244 |
+
"grad_norm": 7.125,
|
| 20245 |
+
"learning_rate": 0.00048584525921506493,
|
| 20246 |
+
"loss": 16.9713,
|
| 20247 |
+
"step": 57820
|
| 20248 |
+
},
|
| 20249 |
+
{
|
| 20250 |
+
"epoch": 0.08567924204089614,
|
| 20251 |
+
"grad_norm": 6.59375,
|
| 20252 |
+
"learning_rate": 0.0004858403202800574,
|
| 20253 |
+
"loss": 16.9939,
|
| 20254 |
+
"step": 57840
|
| 20255 |
+
},
|
| 20256 |
+
{
|
| 20257 |
+
"epoch": 0.08570886833482452,
|
| 20258 |
+
"grad_norm": 6.78125,
|
| 20259 |
+
"learning_rate": 0.0004858353813450498,
|
| 20260 |
+
"loss": 16.9307,
|
| 20261 |
+
"step": 57860
|
| 20262 |
+
},
|
| 20263 |
+
{
|
| 20264 |
+
"epoch": 0.08573849462875291,
|
| 20265 |
+
"grad_norm": 7.125,
|
| 20266 |
+
"learning_rate": 0.0004858304424100423,
|
| 20267 |
+
"loss": 17.044,
|
| 20268 |
+
"step": 57880
|
| 20269 |
+
},
|
| 20270 |
+
{
|
| 20271 |
+
"epoch": 0.0857681209226813,
|
| 20272 |
+
"grad_norm": 7.25,
|
| 20273 |
+
"learning_rate": 0.00048582550347503467,
|
| 20274 |
+
"loss": 17.0308,
|
| 20275 |
+
"step": 57900
|
| 20276 |
+
},
|
| 20277 |
+
{
|
| 20278 |
+
"epoch": 0.08579774721660968,
|
| 20279 |
+
"grad_norm": 6.96875,
|
| 20280 |
+
"learning_rate": 0.0004858205645400271,
|
| 20281 |
+
"loss": 17.0287,
|
| 20282 |
+
"step": 57920
|
| 20283 |
+
},
|
| 20284 |
+
{
|
| 20285 |
+
"epoch": 0.08582737351053807,
|
| 20286 |
+
"grad_norm": 6.9375,
|
| 20287 |
+
"learning_rate": 0.00048581562560501957,
|
| 20288 |
+
"loss": 17.018,
|
| 20289 |
+
"step": 57940
|
| 20290 |
+
},
|
| 20291 |
+
{
|
| 20292 |
+
"epoch": 0.08585699980446646,
|
| 20293 |
+
"grad_norm": 7.28125,
|
| 20294 |
+
"learning_rate": 0.000485810686670012,
|
| 20295 |
+
"loss": 16.9714,
|
| 20296 |
+
"step": 57960
|
| 20297 |
+
},
|
| 20298 |
+
{
|
| 20299 |
+
"epoch": 0.08588662609839484,
|
| 20300 |
+
"grad_norm": 6.875,
|
| 20301 |
+
"learning_rate": 0.0004858057477350044,
|
| 20302 |
+
"loss": 16.9836,
|
| 20303 |
+
"step": 57980
|
| 20304 |
+
},
|
| 20305 |
+
{
|
| 20306 |
+
"epoch": 0.08591625239232323,
|
| 20307 |
+
"grad_norm": 8.125,
|
| 20308 |
+
"learning_rate": 0.00048580080879999686,
|
| 20309 |
+
"loss": 16.9677,
|
| 20310 |
+
"step": 58000
|
| 20311 |
}
|
| 20312 |
],
|
| 20313 |
"logging_steps": 20,
|
|
|
|
| 20327 |
"attributes": {}
|
| 20328 |
}
|
| 20329 |
},
|
| 20330 |
+
"total_flos": 4.264201071083966e+19,
|
| 20331 |
"train_batch_size": 48,
|
| 20332 |
"trial_name": null,
|
| 20333 |
"trial_params": null
|