Training in progress, step 11500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 328277848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbe911786fb4b3454d02608c237b36cc20b52333d42fa68272921c094a01a632
|
| 3 |
size 328277848
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 318646859
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83e156617695722c4ccab8876c70abb964581f51616c0cec63d83f236c2f3130
|
| 3 |
size 318646859
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ffe8fac68804a21cd2d55f992832d3e8fb9ed8d46f7a6aafd6debfef9c29633
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:116f0b85bffdc97adeb264e8dbd65d6acc7d514e82a48ea5ea50bd5091784a48
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -7891,6 +7891,364 @@
|
|
| 7891 |
"eval_samples_per_second": 262.842,
|
| 7892 |
"eval_steps_per_second": 5.52,
|
| 7893 |
"step": 11000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7894 |
}
|
| 7895 |
],
|
| 7896 |
"logging_steps": 10,
|
|
@@ -7910,7 +8268,7 @@
|
|
| 7910 |
"attributes": {}
|
| 7911 |
}
|
| 7912 |
},
|
| 7913 |
-
"total_flos": 3.
|
| 7914 |
"train_batch_size": 48,
|
| 7915 |
"trial_name": null,
|
| 7916 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.9428957594188208,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 11500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 7891 |
"eval_samples_per_second": 262.842,
|
| 7892 |
"eval_steps_per_second": 5.52,
|
| 7893 |
"step": 11000
|
| 7894 |
+
},
|
| 7895 |
+
{
|
| 7896 |
+
"epoch": 1.8601115053218449,
|
| 7897 |
+
"grad_norm": 0.455091655254364,
|
| 7898 |
+
"learning_rate": 5.225389455120976e-06,
|
| 7899 |
+
"loss": 4.29366455078125,
|
| 7900 |
+
"step": 11010
|
| 7901 |
+
},
|
| 7902 |
+
{
|
| 7903 |
+
"epoch": 1.8618009798952526,
|
| 7904 |
+
"grad_norm": 0.46570661664009094,
|
| 7905 |
+
"learning_rate": 5.1008000218881576e-06,
|
| 7906 |
+
"loss": 4.286912536621093,
|
| 7907 |
+
"step": 11020
|
| 7908 |
+
},
|
| 7909 |
+
{
|
| 7910 |
+
"epoch": 1.8634904544686601,
|
| 7911 |
+
"grad_norm": 0.4428755044937134,
|
| 7912 |
+
"learning_rate": 4.977688171116923e-06,
|
| 7913 |
+
"loss": 4.300152206420899,
|
| 7914 |
+
"step": 11030
|
| 7915 |
+
},
|
| 7916 |
+
{
|
| 7917 |
+
"epoch": 1.8651799290420679,
|
| 7918 |
+
"grad_norm": 0.4450303316116333,
|
| 7919 |
+
"learning_rate": 4.856055158217298e-06,
|
| 7920 |
+
"loss": 4.289414978027343,
|
| 7921 |
+
"step": 11040
|
| 7922 |
+
},
|
| 7923 |
+
{
|
| 7924 |
+
"epoch": 1.8668694036154756,
|
| 7925 |
+
"grad_norm": 0.45025452971458435,
|
| 7926 |
+
"learning_rate": 4.735902223519173e-06,
|
| 7927 |
+
"loss": 4.273600006103516,
|
| 7928 |
+
"step": 11050
|
| 7929 |
+
},
|
| 7930 |
+
{
|
| 7931 |
+
"epoch": 1.868558878188883,
|
| 7932 |
+
"grad_norm": 0.4395146071910858,
|
| 7933 |
+
"learning_rate": 4.6172305922595746e-06,
|
| 7934 |
+
"loss": 4.288070297241211,
|
| 7935 |
+
"step": 11060
|
| 7936 |
+
},
|
| 7937 |
+
{
|
| 7938 |
+
"epoch": 1.870248352762291,
|
| 7939 |
+
"grad_norm": 0.45017367601394653,
|
| 7940 |
+
"learning_rate": 4.500041474570265e-06,
|
| 7941 |
+
"loss": 4.298558044433594,
|
| 7942 |
+
"step": 11070
|
| 7943 |
+
},
|
| 7944 |
+
{
|
| 7945 |
+
"epoch": 1.8719378273356986,
|
| 7946 |
+
"grad_norm": 0.45083948969841003,
|
| 7947 |
+
"learning_rate": 4.384336065465349e-06,
|
| 7948 |
+
"loss": 4.278664398193359,
|
| 7949 |
+
"step": 11080
|
| 7950 |
+
},
|
| 7951 |
+
{
|
| 7952 |
+
"epoch": 1.8736273019091063,
|
| 7953 |
+
"grad_norm": 0.4492949843406677,
|
| 7954 |
+
"learning_rate": 4.270115544829017e-06,
|
| 7955 |
+
"loss": 4.304440307617187,
|
| 7956 |
+
"step": 11090
|
| 7957 |
+
},
|
| 7958 |
+
{
|
| 7959 |
+
"epoch": 1.875316776482514,
|
| 7960 |
+
"grad_norm": 0.4543094336986542,
|
| 7961 |
+
"learning_rate": 4.1573810774037044e-06,
|
| 7962 |
+
"loss": 4.284811401367188,
|
| 7963 |
+
"step": 11100
|
| 7964 |
+
},
|
| 7965 |
+
{
|
| 7966 |
+
"epoch": 1.8770062510559216,
|
| 7967 |
+
"grad_norm": 0.44232332706451416,
|
| 7968 |
+
"learning_rate": 4.046133812777985e-06,
|
| 7969 |
+
"loss": 4.275522232055664,
|
| 7970 |
+
"step": 11110
|
| 7971 |
+
},
|
| 7972 |
+
{
|
| 7973 |
+
"epoch": 1.8786957256293293,
|
| 7974 |
+
"grad_norm": 0.4532018005847931,
|
| 7975 |
+
"learning_rate": 3.936374885375049e-06,
|
| 7976 |
+
"loss": 4.325132751464844,
|
| 7977 |
+
"step": 11120
|
| 7978 |
+
},
|
| 7979 |
+
{
|
| 7980 |
+
"epoch": 1.880385200202737,
|
| 7981 |
+
"grad_norm": 0.44870230555534363,
|
| 7982 |
+
"learning_rate": 3.828105414440974e-06,
|
| 7983 |
+
"loss": 4.293384170532226,
|
| 7984 |
+
"step": 11130
|
| 7985 |
+
},
|
| 7986 |
+
{
|
| 7987 |
+
"epoch": 1.8820746747761445,
|
| 7988 |
+
"grad_norm": 0.4408150017261505,
|
| 7989 |
+
"learning_rate": 3.7213265040334394e-06,
|
| 7990 |
+
"loss": 4.296081161499023,
|
| 7991 |
+
"step": 11140
|
| 7992 |
+
},
|
| 7993 |
+
{
|
| 7994 |
+
"epoch": 1.8837641493495523,
|
| 7995 |
+
"grad_norm": 0.44336998462677,
|
| 7996 |
+
"learning_rate": 3.616039243010399e-06,
|
| 7997 |
+
"loss": 4.299095916748047,
|
| 7998 |
+
"step": 11150
|
| 7999 |
+
},
|
| 8000 |
+
{
|
| 8001 |
+
"epoch": 1.88545362392296,
|
| 8002 |
+
"grad_norm": 0.4412024915218353,
|
| 8003 |
+
"learning_rate": 3.5122447050189573e-06,
|
| 8004 |
+
"loss": 4.285486221313477,
|
| 8005 |
+
"step": 11160
|
| 8006 |
+
},
|
| 8007 |
+
{
|
| 8008 |
+
"epoch": 1.8871430984963675,
|
| 8009 |
+
"grad_norm": 0.45283156633377075,
|
| 8010 |
+
"learning_rate": 3.4099439484844947e-06,
|
| 8011 |
+
"loss": 4.294749069213867,
|
| 8012 |
+
"step": 11170
|
| 8013 |
+
},
|
| 8014 |
+
{
|
| 8015 |
+
"epoch": 1.8888325730697753,
|
| 8016 |
+
"grad_norm": 0.4460100829601288,
|
| 8017 |
+
"learning_rate": 3.3091380165998103e-06,
|
| 8018 |
+
"loss": 4.310376739501953,
|
| 8019 |
+
"step": 11180
|
| 8020 |
+
},
|
| 8021 |
+
{
|
| 8022 |
+
"epoch": 1.890522047643183,
|
| 8023 |
+
"grad_norm": 0.44468414783477783,
|
| 8024 |
+
"learning_rate": 3.2098279373145463e-06,
|
| 8025 |
+
"loss": 4.327771377563477,
|
| 8026 |
+
"step": 11190
|
| 8027 |
+
},
|
| 8028 |
+
{
|
| 8029 |
+
"epoch": 1.8922115222165905,
|
| 8030 |
+
"grad_norm": 0.4378024637699127,
|
| 8031 |
+
"learning_rate": 3.1120147233246463e-06,
|
| 8032 |
+
"loss": 4.273694610595703,
|
| 8033 |
+
"step": 11200
|
| 8034 |
+
},
|
| 8035 |
+
{
|
| 8036 |
+
"epoch": 1.8939009967899985,
|
| 8037 |
+
"grad_norm": 0.44141000509262085,
|
| 8038 |
+
"learning_rate": 3.0156993720619804e-06,
|
| 8039 |
+
"loss": 4.287034606933593,
|
| 8040 |
+
"step": 11210
|
| 8041 |
+
},
|
| 8042 |
+
{
|
| 8043 |
+
"epoch": 1.895590471363406,
|
| 8044 |
+
"grad_norm": 0.4455374479293823,
|
| 8045 |
+
"learning_rate": 2.9208828656843876e-06,
|
| 8046 |
+
"loss": 4.320920181274414,
|
| 8047 |
+
"step": 11220
|
| 8048 |
+
},
|
| 8049 |
+
{
|
| 8050 |
+
"epoch": 1.8972799459368137,
|
| 8051 |
+
"grad_norm": 0.45368343591690063,
|
| 8052 |
+
"learning_rate": 2.827566171065415e-06,
|
| 8053 |
+
"loss": 4.285198974609375,
|
| 8054 |
+
"step": 11230
|
| 8055 |
+
},
|
| 8056 |
+
{
|
| 8057 |
+
"epoch": 1.8989694205102214,
|
| 8058 |
+
"grad_norm": 0.44222062826156616,
|
| 8059 |
+
"learning_rate": 2.7357502397845454e-06,
|
| 8060 |
+
"loss": 4.296764755249024,
|
| 8061 |
+
"step": 11240
|
| 8062 |
+
},
|
| 8063 |
+
{
|
| 8064 |
+
"epoch": 1.900658895083629,
|
| 8065 |
+
"grad_norm": 0.45191657543182373,
|
| 8066 |
+
"learning_rate": 2.645436008117602e-06,
|
| 8067 |
+
"loss": 4.27384033203125,
|
| 8068 |
+
"step": 11250
|
| 8069 |
+
},
|
| 8070 |
+
{
|
| 8071 |
+
"epoch": 1.9023483696570367,
|
| 8072 |
+
"grad_norm": 0.4548667371273041,
|
| 8073 |
+
"learning_rate": 2.5566243970270073e-06,
|
| 8074 |
+
"loss": 4.297956085205078,
|
| 8075 |
+
"step": 11260
|
| 8076 |
+
},
|
| 8077 |
+
{
|
| 8078 |
+
"epoch": 1.9040378442304444,
|
| 8079 |
+
"grad_norm": 0.4443969130516052,
|
| 8080 |
+
"learning_rate": 2.469316312152575e-06,
|
| 8081 |
+
"loss": 4.291641616821289,
|
| 8082 |
+
"step": 11270
|
| 8083 |
+
},
|
| 8084 |
+
{
|
| 8085 |
+
"epoch": 1.905727318803852,
|
| 8086 |
+
"grad_norm": 0.4367770850658417,
|
| 8087 |
+
"learning_rate": 2.3835126438021156e-06,
|
| 8088 |
+
"loss": 4.266088485717773,
|
| 8089 |
+
"step": 11280
|
| 8090 |
+
},
|
| 8091 |
+
{
|
| 8092 |
+
"epoch": 1.9074167933772597,
|
| 8093 |
+
"grad_norm": 0.4365804195404053,
|
| 8094 |
+
"learning_rate": 2.299214266942495e-06,
|
| 8095 |
+
"loss": 4.263021850585938,
|
| 8096 |
+
"step": 11290
|
| 8097 |
+
},
|
| 8098 |
+
{
|
| 8099 |
+
"epoch": 1.9091062679506674,
|
| 8100 |
+
"grad_norm": 0.4369988441467285,
|
| 8101 |
+
"learning_rate": 2.2164220411906407e-06,
|
| 8102 |
+
"loss": 4.288222122192383,
|
| 8103 |
+
"step": 11300
|
| 8104 |
+
},
|
| 8105 |
+
{
|
| 8106 |
+
"epoch": 1.910795742524075,
|
| 8107 |
+
"grad_norm": 0.44547080993652344,
|
| 8108 |
+
"learning_rate": 2.1351368108047495e-06,
|
| 8109 |
+
"loss": 4.26991081237793,
|
| 8110 |
+
"step": 11310
|
| 8111 |
+
},
|
| 8112 |
+
{
|
| 8113 |
+
"epoch": 1.9124852170974826,
|
| 8114 |
+
"grad_norm": 0.45165297389030457,
|
| 8115 |
+
"learning_rate": 2.0553594046757438e-06,
|
| 8116 |
+
"loss": 4.2671764373779295,
|
| 8117 |
+
"step": 11320
|
| 8118 |
+
},
|
| 8119 |
+
{
|
| 8120 |
+
"epoch": 1.9141746916708904,
|
| 8121 |
+
"grad_norm": 0.4523044526576996,
|
| 8122 |
+
"learning_rate": 1.9770906363187787e-06,
|
| 8123 |
+
"loss": 4.28791618347168,
|
| 8124 |
+
"step": 11330
|
| 8125 |
+
},
|
| 8126 |
+
{
|
| 8127 |
+
"epoch": 1.915864166244298,
|
| 8128 |
+
"grad_norm": 0.43898409605026245,
|
| 8129 |
+
"learning_rate": 1.9003313038649826e-06,
|
| 8130 |
+
"loss": 4.301726150512695,
|
| 8131 |
+
"step": 11340
|
| 8132 |
+
},
|
| 8133 |
+
{
|
| 8134 |
+
"epoch": 1.9175536408177059,
|
| 8135 |
+
"grad_norm": 0.44454851746559143,
|
| 8136 |
+
"learning_rate": 1.825082190053262e-06,
|
| 8137 |
+
"loss": 4.280124664306641,
|
| 8138 |
+
"step": 11350
|
| 8139 |
+
},
|
| 8140 |
+
{
|
| 8141 |
+
"epoch": 1.9192431153911134,
|
| 8142 |
+
"grad_norm": 0.44016656279563904,
|
| 8143 |
+
"learning_rate": 1.7513440622223762e-06,
|
| 8144 |
+
"loss": 4.312954330444336,
|
| 8145 |
+
"step": 11360
|
| 8146 |
+
},
|
| 8147 |
+
{
|
| 8148 |
+
"epoch": 1.9209325899645209,
|
| 8149 |
+
"grad_norm": 0.439481645822525,
|
| 8150 |
+
"learning_rate": 1.6791176723030763e-06,
|
| 8151 |
+
"loss": 4.291484069824219,
|
| 8152 |
+
"step": 11370
|
| 8153 |
+
},
|
| 8154 |
+
{
|
| 8155 |
+
"epoch": 1.9226220645379288,
|
| 8156 |
+
"grad_norm": 0.44403141736984253,
|
| 8157 |
+
"learning_rate": 1.608403756810428e-06,
|
| 8158 |
+
"loss": 4.297753524780274,
|
| 8159 |
+
"step": 11380
|
| 8160 |
+
},
|
| 8161 |
+
{
|
| 8162 |
+
"epoch": 1.9243115391113363,
|
| 8163 |
+
"grad_norm": 0.4380677342414856,
|
| 8164 |
+
"learning_rate": 1.5392030368363839e-06,
|
| 8165 |
+
"loss": 4.311534881591797,
|
| 8166 |
+
"step": 11390
|
| 8167 |
+
},
|
| 8168 |
+
{
|
| 8169 |
+
"epoch": 1.926001013684744,
|
| 8170 |
+
"grad_norm": 0.4598468542098999,
|
| 8171 |
+
"learning_rate": 1.4715162180422902e-06,
|
| 8172 |
+
"loss": 4.272250747680664,
|
| 8173 |
+
"step": 11400
|
| 8174 |
+
},
|
| 8175 |
+
{
|
| 8176 |
+
"epoch": 1.9276904882581518,
|
| 8177 |
+
"grad_norm": 0.44107797741889954,
|
| 8178 |
+
"learning_rate": 1.405343990651825e-06,
|
| 8179 |
+
"loss": 4.283835601806641,
|
| 8180 |
+
"step": 11410
|
| 8181 |
+
},
|
| 8182 |
+
{
|
| 8183 |
+
"epoch": 1.9293799628315593,
|
| 8184 |
+
"grad_norm": 0.43588972091674805,
|
| 8185 |
+
"learning_rate": 1.3406870294438876e-06,
|
| 8186 |
+
"loss": 4.273925399780273,
|
| 8187 |
+
"step": 11420
|
| 8188 |
+
},
|
| 8189 |
+
{
|
| 8190 |
+
"epoch": 1.931069437404967,
|
| 8191 |
+
"grad_norm": 0.4393414258956909,
|
| 8192 |
+
"learning_rate": 1.2775459937457544e-06,
|
| 8193 |
+
"loss": 4.295301055908203,
|
| 8194 |
+
"step": 11430
|
| 8195 |
+
},
|
| 8196 |
+
{
|
| 8197 |
+
"epoch": 1.9327589119783748,
|
| 8198 |
+
"grad_norm": 0.44228672981262207,
|
| 8199 |
+
"learning_rate": 1.2159215274262834e-06,
|
| 8200 |
+
"loss": 4.273171997070312,
|
| 8201 |
+
"step": 11440
|
| 8202 |
+
},
|
| 8203 |
+
{
|
| 8204 |
+
"epoch": 1.9344483865517823,
|
| 8205 |
+
"grad_norm": 0.4422619640827179,
|
| 8206 |
+
"learning_rate": 1.155814258889437e-06,
|
| 8207 |
+
"loss": 4.285517883300781,
|
| 8208 |
+
"step": 11450
|
| 8209 |
+
},
|
| 8210 |
+
{
|
| 8211 |
+
"epoch": 1.93613786112519,
|
| 8212 |
+
"grad_norm": 0.4427924156188965,
|
| 8213 |
+
"learning_rate": 1.0972248010678365e-06,
|
| 8214 |
+
"loss": 4.312974548339843,
|
| 8215 |
+
"step": 11460
|
| 8216 |
+
},
|
| 8217 |
+
{
|
| 8218 |
+
"epoch": 1.9378273356985978,
|
| 8219 |
+
"grad_norm": 0.44030192494392395,
|
| 8220 |
+
"learning_rate": 1.040153751416517e-06,
|
| 8221 |
+
"loss": 4.302379989624024,
|
| 8222 |
+
"step": 11470
|
| 8223 |
+
},
|
| 8224 |
+
{
|
| 8225 |
+
"epoch": 1.9395168102720053,
|
| 8226 |
+
"grad_norm": 0.44400596618652344,
|
| 8227 |
+
"learning_rate": 9.846016919068167e-07,
|
| 8228 |
+
"loss": 4.280198287963867,
|
| 8229 |
+
"step": 11480
|
| 8230 |
+
},
|
| 8231 |
+
{
|
| 8232 |
+
"epoch": 1.941206284845413,
|
| 8233 |
+
"grad_norm": 0.4478650689125061,
|
| 8234 |
+
"learning_rate": 9.305691890204469e-07,
|
| 8235 |
+
"loss": 4.281633758544922,
|
| 8236 |
+
"step": 11490
|
| 8237 |
+
},
|
| 8238 |
+
{
|
| 8239 |
+
"epoch": 1.9428957594188208,
|
| 8240 |
+
"grad_norm": 0.4458984434604645,
|
| 8241 |
+
"learning_rate": 8.780567937437644e-07,
|
| 8242 |
+
"loss": 4.278944396972657,
|
| 8243 |
+
"step": 11500
|
| 8244 |
+
},
|
| 8245 |
+
{
|
| 8246 |
+
"epoch": 1.9428957594188208,
|
| 8247 |
+
"eval_loss": 4.265942573547363,
|
| 8248 |
+
"eval_runtime": 4.0629,
|
| 8249 |
+
"eval_samples_per_second": 246.128,
|
| 8250 |
+
"eval_steps_per_second": 5.169,
|
| 8251 |
+
"step": 11500
|
| 8252 |
}
|
| 8253 |
],
|
| 8254 |
"logging_steps": 10,
|
|
|
|
| 8268 |
"attributes": {}
|
| 8269 |
}
|
| 8270 |
},
|
| 8271 |
+
"total_flos": 3.846232536325816e+17,
|
| 8272 |
"train_batch_size": 48,
|
| 8273 |
"trial_name": null,
|
| 8274 |
"trial_params": null
|