Training in progress, step 52728, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 83945296
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba6684257e283f99e4654fa6c675e02d7147bd1823d28457e21845ce8ee1211c
|
| 3 |
size 83945296
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168150738
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5105fef59c6f148a4647a6d396eee790d7ef1df177c316936d04fa3cb36d6d8
|
| 3 |
size 168150738
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0349d2fe2a8c07a922fcdd7b869bf30ef13bdfa2a6c39cdb10a1e22ecb0cae68
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:195e1c218824e864593415afe68e9e8127e01aea101782826593ecf3daaaf683
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 5853,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -368926,6 +368926,363 @@
|
|
| 368926 |
"eval_test_samples_per_second": 12.628,
|
| 368927 |
"eval_test_steps_per_second": 0.789,
|
| 368928 |
"step": 52677
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368929 |
}
|
| 368930 |
],
|
| 368931 |
"logging_steps": 1,
|
|
@@ -368940,12 +369297,12 @@
|
|
| 368940 |
"should_evaluate": false,
|
| 368941 |
"should_log": false,
|
| 368942 |
"should_save": true,
|
| 368943 |
-
"should_training_stop":
|
| 368944 |
},
|
| 368945 |
"attributes": {}
|
| 368946 |
}
|
| 368947 |
},
|
| 368948 |
-
"total_flos": 1.
|
| 368949 |
"train_batch_size": 16,
|
| 368950 |
"trial_name": null,
|
| 368951 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 4.0,
|
| 5 |
"eval_steps": 5853,
|
| 6 |
+
"global_step": 52728,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 368926 |
"eval_test_samples_per_second": 12.628,
|
| 368927 |
"eval_test_steps_per_second": 0.789,
|
| 368928 |
"step": 52677
|
| 368929 |
+
},
|
| 368930 |
+
{
|
| 368931 |
+
"epoch": 3.9962069488696708,
|
| 368932 |
+
"grad_norm": 1.1055113077163696,
|
| 368933 |
+
"learning_rate": 3.0008135267917343e-06,
|
| 368934 |
+
"loss": 2.2484,
|
| 368935 |
+
"step": 52678
|
| 368936 |
+
},
|
| 368937 |
+
{
|
| 368938 |
+
"epoch": 3.9962828098922776,
|
| 368939 |
+
"grad_norm": 1.020114541053772,
|
| 368940 |
+
"learning_rate": 3.000781311159022e-06,
|
| 368941 |
+
"loss": 2.2629,
|
| 368942 |
+
"step": 52679
|
| 368943 |
+
},
|
| 368944 |
+
{
|
| 368945 |
+
"epoch": 3.996358670914884,
|
| 368946 |
+
"grad_norm": 0.8700923323631287,
|
| 368947 |
+
"learning_rate": 3.0007497463449255e-06,
|
| 368948 |
+
"loss": 2.1114,
|
| 368949 |
+
"step": 52680
|
| 368950 |
+
},
|
| 368951 |
+
{
|
| 368952 |
+
"epoch": 3.9964345319374903,
|
| 368953 |
+
"grad_norm": 1.25503671169281,
|
| 368954 |
+
"learning_rate": 3.0007188323495756e-06,
|
| 368955 |
+
"loss": 2.1506,
|
| 368956 |
+
"step": 52681
|
| 368957 |
+
},
|
| 368958 |
+
{
|
| 368959 |
+
"epoch": 3.996510392960097,
|
| 368960 |
+
"grad_norm": 1.2695883512496948,
|
| 368961 |
+
"learning_rate": 3.0006885691730872e-06,
|
| 368962 |
+
"loss": 2.226,
|
| 368963 |
+
"step": 52682
|
| 368964 |
+
},
|
| 368965 |
+
{
|
| 368966 |
+
"epoch": 3.996586253982704,
|
| 368967 |
+
"grad_norm": 1.0582082271575928,
|
| 368968 |
+
"learning_rate": 3.0006589568156268e-06,
|
| 368969 |
+
"loss": 2.2531,
|
| 368970 |
+
"step": 52683
|
| 368971 |
+
},
|
| 368972 |
+
{
|
| 368973 |
+
"epoch": 3.9966621150053103,
|
| 368974 |
+
"grad_norm": 0.9807246327400208,
|
| 368975 |
+
"learning_rate": 3.000629995277292e-06,
|
| 368976 |
+
"loss": 2.1907,
|
| 368977 |
+
"step": 52684
|
| 368978 |
+
},
|
| 368979 |
+
{
|
| 368980 |
+
"epoch": 3.9967379760279167,
|
| 368981 |
+
"grad_norm": 0.9967033863067627,
|
| 368982 |
+
"learning_rate": 3.0006016845582158e-06,
|
| 368983 |
+
"loss": 2.0615,
|
| 368984 |
+
"step": 52685
|
| 368985 |
+
},
|
| 368986 |
+
{
|
| 368987 |
+
"epoch": 3.9968138370505235,
|
| 368988 |
+
"grad_norm": 1.1809074878692627,
|
| 368989 |
+
"learning_rate": 3.0005740246585456e-06,
|
| 368990 |
+
"loss": 2.1231,
|
| 368991 |
+
"step": 52686
|
| 368992 |
+
},
|
| 368993 |
+
{
|
| 368994 |
+
"epoch": 3.99688969807313,
|
| 368995 |
+
"grad_norm": 0.9519857168197632,
|
| 368996 |
+
"learning_rate": 3.0005470155783807e-06,
|
| 368997 |
+
"loss": 2.2006,
|
| 368998 |
+
"step": 52687
|
| 368999 |
+
},
|
| 369000 |
+
{
|
| 369001 |
+
"epoch": 3.9969655590957367,
|
| 369002 |
+
"grad_norm": 1.0005104541778564,
|
| 369003 |
+
"learning_rate": 3.000520657317853e-06,
|
| 369004 |
+
"loss": 2.2421,
|
| 369005 |
+
"step": 52688
|
| 369006 |
+
},
|
| 369007 |
+
{
|
| 369008 |
+
"epoch": 3.997041420118343,
|
| 369009 |
+
"grad_norm": 0.9390363097190857,
|
| 369010 |
+
"learning_rate": 3.0004949498770612e-06,
|
| 369011 |
+
"loss": 2.2009,
|
| 369012 |
+
"step": 52689
|
| 369013 |
+
},
|
| 369014 |
+
{
|
| 369015 |
+
"epoch": 3.99711728114095,
|
| 369016 |
+
"grad_norm": 1.043871521949768,
|
| 369017 |
+
"learning_rate": 3.0004698932561214e-06,
|
| 369018 |
+
"loss": 2.0931,
|
| 369019 |
+
"step": 52690
|
| 369020 |
+
},
|
| 369021 |
+
{
|
| 369022 |
+
"epoch": 3.9971931421635563,
|
| 369023 |
+
"grad_norm": 0.9061468839645386,
|
| 369024 |
+
"learning_rate": 3.0004454874551646e-06,
|
| 369025 |
+
"loss": 2.0976,
|
| 369026 |
+
"step": 52691
|
| 369027 |
+
},
|
| 369028 |
+
{
|
| 369029 |
+
"epoch": 3.997269003186163,
|
| 369030 |
+
"grad_norm": 1.4057188034057617,
|
| 369031 |
+
"learning_rate": 3.000421732474274e-06,
|
| 369032 |
+
"loss": 2.3907,
|
| 369033 |
+
"step": 52692
|
| 369034 |
+
},
|
| 369035 |
+
{
|
| 369036 |
+
"epoch": 3.9973448642087694,
|
| 369037 |
+
"grad_norm": 1.1326065063476562,
|
| 369038 |
+
"learning_rate": 3.0003986283135803e-06,
|
| 369039 |
+
"loss": 2.2332,
|
| 369040 |
+
"step": 52693
|
| 369041 |
+
},
|
| 369042 |
+
{
|
| 369043 |
+
"epoch": 3.9974207252313763,
|
| 369044 |
+
"grad_norm": 1.048019528388977,
|
| 369045 |
+
"learning_rate": 3.0003761749731514e-06,
|
| 369046 |
+
"loss": 2.0218,
|
| 369047 |
+
"step": 52694
|
| 369048 |
+
},
|
| 369049 |
+
{
|
| 369050 |
+
"epoch": 3.9974965862539826,
|
| 369051 |
+
"grad_norm": 0.9427198767662048,
|
| 369052 |
+
"learning_rate": 3.0003543724531006e-06,
|
| 369053 |
+
"loss": 2.0542,
|
| 369054 |
+
"step": 52695
|
| 369055 |
+
},
|
| 369056 |
+
{
|
| 369057 |
+
"epoch": 3.9975724472765894,
|
| 369058 |
+
"grad_norm": 1.1140172481536865,
|
| 369059 |
+
"learning_rate": 3.0003332207535282e-06,
|
| 369060 |
+
"loss": 2.2512,
|
| 369061 |
+
"step": 52696
|
| 369062 |
+
},
|
| 369063 |
+
{
|
| 369064 |
+
"epoch": 3.997648308299196,
|
| 369065 |
+
"grad_norm": 1.0869436264038086,
|
| 369066 |
+
"learning_rate": 3.000312719874516e-06,
|
| 369067 |
+
"loss": 2.262,
|
| 369068 |
+
"step": 52697
|
| 369069 |
+
},
|
| 369070 |
+
{
|
| 369071 |
+
"epoch": 3.9977241693218026,
|
| 369072 |
+
"grad_norm": 0.9387947916984558,
|
| 369073 |
+
"learning_rate": 3.000292869816164e-06,
|
| 369074 |
+
"loss": 2.1539,
|
| 369075 |
+
"step": 52698
|
| 369076 |
+
},
|
| 369077 |
+
{
|
| 369078 |
+
"epoch": 3.997800030344409,
|
| 369079 |
+
"grad_norm": 0.8832263946533203,
|
| 369080 |
+
"learning_rate": 3.0002736705785535e-06,
|
| 369081 |
+
"loss": 2.4051,
|
| 369082 |
+
"step": 52699
|
| 369083 |
+
},
|
| 369084 |
+
{
|
| 369085 |
+
"epoch": 3.9978758913670154,
|
| 369086 |
+
"grad_norm": 1.0139085054397583,
|
| 369087 |
+
"learning_rate": 3.0002551221617832e-06,
|
| 369088 |
+
"loss": 1.9968,
|
| 369089 |
+
"step": 52700
|
| 369090 |
+
},
|
| 369091 |
+
{
|
| 369092 |
+
"epoch": 3.997951752389622,
|
| 369093 |
+
"grad_norm": 1.092558741569519,
|
| 369094 |
+
"learning_rate": 3.000237224565903e-06,
|
| 369095 |
+
"loss": 2.2382,
|
| 369096 |
+
"step": 52701
|
| 369097 |
+
},
|
| 369098 |
+
{
|
| 369099 |
+
"epoch": 3.998027613412229,
|
| 369100 |
+
"grad_norm": 1.0027174949645996,
|
| 369101 |
+
"learning_rate": 3.000219977791029e-06,
|
| 369102 |
+
"loss": 2.0828,
|
| 369103 |
+
"step": 52702
|
| 369104 |
+
},
|
| 369105 |
+
{
|
| 369106 |
+
"epoch": 3.9981034744348354,
|
| 369107 |
+
"grad_norm": 0.9388405084609985,
|
| 369108 |
+
"learning_rate": 3.00020338183721e-06,
|
| 369109 |
+
"loss": 2.0502,
|
| 369110 |
+
"step": 52703
|
| 369111 |
+
},
|
| 369112 |
+
{
|
| 369113 |
+
"epoch": 3.9981793354574418,
|
| 369114 |
+
"grad_norm": 0.9701755046844482,
|
| 369115 |
+
"learning_rate": 3.000187436704528e-06,
|
| 369116 |
+
"loss": 2.109,
|
| 369117 |
+
"step": 52704
|
| 369118 |
+
},
|
| 369119 |
+
{
|
| 369120 |
+
"epoch": 3.9982551964800486,
|
| 369121 |
+
"grad_norm": 0.9503781795501709,
|
| 369122 |
+
"learning_rate": 3.0001721423930496e-06,
|
| 369123 |
+
"loss": 2.169,
|
| 369124 |
+
"step": 52705
|
| 369125 |
+
},
|
| 369126 |
+
{
|
| 369127 |
+
"epoch": 3.9983310575026554,
|
| 369128 |
+
"grad_norm": 1.0426063537597656,
|
| 369129 |
+
"learning_rate": 3.000157498902841e-06,
|
| 369130 |
+
"loss": 2.1868,
|
| 369131 |
+
"step": 52706
|
| 369132 |
+
},
|
| 369133 |
+
{
|
| 369134 |
+
"epoch": 3.9984069185252618,
|
| 369135 |
+
"grad_norm": 0.9852115511894226,
|
| 369136 |
+
"learning_rate": 3.000143506233984e-06,
|
| 369137 |
+
"loss": 2.25,
|
| 369138 |
+
"step": 52707
|
| 369139 |
+
},
|
| 369140 |
+
{
|
| 369141 |
+
"epoch": 3.998482779547868,
|
| 369142 |
+
"grad_norm": 0.8936397433280945,
|
| 369143 |
+
"learning_rate": 3.0001301643865117e-06,
|
| 369144 |
+
"loss": 2.0757,
|
| 369145 |
+
"step": 52708
|
| 369146 |
+
},
|
| 369147 |
+
{
|
| 369148 |
+
"epoch": 3.998558640570475,
|
| 369149 |
+
"grad_norm": 0.9924890398979187,
|
| 369150 |
+
"learning_rate": 3.00011747336049e-06,
|
| 369151 |
+
"loss": 2.2185,
|
| 369152 |
+
"step": 52709
|
| 369153 |
+
},
|
| 369154 |
+
{
|
| 369155 |
+
"epoch": 3.9986345015930813,
|
| 369156 |
+
"grad_norm": 1.0129131078720093,
|
| 369157 |
+
"learning_rate": 3.0001054331560014e-06,
|
| 369158 |
+
"loss": 2.1824,
|
| 369159 |
+
"step": 52710
|
| 369160 |
+
},
|
| 369161 |
+
{
|
| 369162 |
+
"epoch": 3.998710362615688,
|
| 369163 |
+
"grad_norm": 0.9240451455116272,
|
| 369164 |
+
"learning_rate": 3.0000940437730624e-06,
|
| 369165 |
+
"loss": 2.0595,
|
| 369166 |
+
"step": 52711
|
| 369167 |
+
},
|
| 369168 |
+
{
|
| 369169 |
+
"epoch": 3.9987862236382945,
|
| 369170 |
+
"grad_norm": 1.037165641784668,
|
| 369171 |
+
"learning_rate": 3.0000833052117394e-06,
|
| 369172 |
+
"loss": 2.1509,
|
| 369173 |
+
"step": 52712
|
| 369174 |
+
},
|
| 369175 |
+
{
|
| 369176 |
+
"epoch": 3.9988620846609013,
|
| 369177 |
+
"grad_norm": 0.9835069179534912,
|
| 369178 |
+
"learning_rate": 3.000073217472098e-06,
|
| 369179 |
+
"loss": 2.0827,
|
| 369180 |
+
"step": 52713
|
| 369181 |
+
},
|
| 369182 |
+
{
|
| 369183 |
+
"epoch": 3.9989379456835077,
|
| 369184 |
+
"grad_norm": 1.0959041118621826,
|
| 369185 |
+
"learning_rate": 3.000063780554138e-06,
|
| 369186 |
+
"loss": 2.1468,
|
| 369187 |
+
"step": 52714
|
| 369188 |
+
},
|
| 369189 |
+
{
|
| 369190 |
+
"epoch": 3.9990138067061145,
|
| 369191 |
+
"grad_norm": 1.2777659893035889,
|
| 369192 |
+
"learning_rate": 3.000054994457942e-06,
|
| 369193 |
+
"loss": 2.3319,
|
| 369194 |
+
"step": 52715
|
| 369195 |
+
},
|
| 369196 |
+
{
|
| 369197 |
+
"epoch": 3.999089667728721,
|
| 369198 |
+
"grad_norm": 0.9920614957809448,
|
| 369199 |
+
"learning_rate": 3.0000468591835265e-06,
|
| 369200 |
+
"loss": 2.1972,
|
| 369201 |
+
"step": 52716
|
| 369202 |
+
},
|
| 369203 |
+
{
|
| 369204 |
+
"epoch": 3.9991655287513277,
|
| 369205 |
+
"grad_norm": 1.0651792287826538,
|
| 369206 |
+
"learning_rate": 3.000039374730924e-06,
|
| 369207 |
+
"loss": 2.0964,
|
| 369208 |
+
"step": 52717
|
| 369209 |
+
},
|
| 369210 |
+
{
|
| 369211 |
+
"epoch": 3.999241389773934,
|
| 369212 |
+
"grad_norm": 1.1940739154815674,
|
| 369213 |
+
"learning_rate": 3.000032541100185e-06,
|
| 369214 |
+
"loss": 1.9985,
|
| 369215 |
+
"step": 52718
|
| 369216 |
+
},
|
| 369217 |
+
{
|
| 369218 |
+
"epoch": 3.999317250796541,
|
| 369219 |
+
"grad_norm": 0.9705497026443481,
|
| 369220 |
+
"learning_rate": 3.0000263582913414e-06,
|
| 369221 |
+
"loss": 2.008,
|
| 369222 |
+
"step": 52719
|
| 369223 |
+
},
|
| 369224 |
+
{
|
| 369225 |
+
"epoch": 3.9993931118191473,
|
| 369226 |
+
"grad_norm": 1.0034539699554443,
|
| 369227 |
+
"learning_rate": 3.000020826304394e-06,
|
| 369228 |
+
"loss": 2.0824,
|
| 369229 |
+
"step": 52720
|
| 369230 |
+
},
|
| 369231 |
+
{
|
| 369232 |
+
"epoch": 3.999468972841754,
|
| 369233 |
+
"grad_norm": 1.047110915184021,
|
| 369234 |
+
"learning_rate": 3.0000159451393913e-06,
|
| 369235 |
+
"loss": 2.0688,
|
| 369236 |
+
"step": 52721
|
| 369237 |
+
},
|
| 369238 |
+
{
|
| 369239 |
+
"epoch": 3.9995448338643604,
|
| 369240 |
+
"grad_norm": 0.9343622922897339,
|
| 369241 |
+
"learning_rate": 3.000011714796335e-06,
|
| 369242 |
+
"loss": 2.2584,
|
| 369243 |
+
"step": 52722
|
| 369244 |
+
},
|
| 369245 |
+
{
|
| 369246 |
+
"epoch": 3.999620694886967,
|
| 369247 |
+
"grad_norm": 1.0422899723052979,
|
| 369248 |
+
"learning_rate": 3.0000081352752726e-06,
|
| 369249 |
+
"loss": 2.1829,
|
| 369250 |
+
"step": 52723
|
| 369251 |
+
},
|
| 369252 |
+
{
|
| 369253 |
+
"epoch": 3.9996965559095736,
|
| 369254 |
+
"grad_norm": 1.077273964881897,
|
| 369255 |
+
"learning_rate": 3.0000052065761888e-06,
|
| 369256 |
+
"loss": 2.0929,
|
| 369257 |
+
"step": 52724
|
| 369258 |
+
},
|
| 369259 |
+
{
|
| 369260 |
+
"epoch": 3.9997724169321804,
|
| 369261 |
+
"grad_norm": 0.9545094966888428,
|
| 369262 |
+
"learning_rate": 3.0000029286991165e-06,
|
| 369263 |
+
"loss": 2.1265,
|
| 369264 |
+
"step": 52725
|
| 369265 |
+
},
|
| 369266 |
+
{
|
| 369267 |
+
"epoch": 3.999848277954787,
|
| 369268 |
+
"grad_norm": 0.9401389956474304,
|
| 369269 |
+
"learning_rate": 3.0000013016440555e-06,
|
| 369270 |
+
"loss": 1.9835,
|
| 369271 |
+
"step": 52726
|
| 369272 |
+
},
|
| 369273 |
+
{
|
| 369274 |
+
"epoch": 3.999924138977393,
|
| 369275 |
+
"grad_norm": 0.9990488290786743,
|
| 369276 |
+
"learning_rate": 3.0000003254110053e-06,
|
| 369277 |
+
"loss": 2.2038,
|
| 369278 |
+
"step": 52727
|
| 369279 |
+
},
|
| 369280 |
+
{
|
| 369281 |
+
"epoch": 4.0,
|
| 369282 |
+
"grad_norm": 1.4670413732528687,
|
| 369283 |
+
"learning_rate": 2.9999999999999997e-06,
|
| 369284 |
+
"loss": 1.9777,
|
| 369285 |
+
"step": 52728
|
| 369286 |
}
|
| 369287 |
],
|
| 369288 |
"logging_steps": 1,
|
|
|
|
| 369297 |
"should_evaluate": false,
|
| 369298 |
"should_log": false,
|
| 369299 |
"should_save": true,
|
| 369300 |
+
"should_training_stop": true
|
| 369301 |
},
|
| 369302 |
"attributes": {}
|
| 369303 |
}
|
| 369304 |
},
|
| 369305 |
+
"total_flos": 1.3325515079398982e+19,
|
| 369306 |
"train_batch_size": 16,
|
| 369307 |
"trial_name": null,
|
| 369308 |
"trial_params": null
|