Training in progress, step 29560, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 83945296
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44d8b9c5f0b247c388d0afbe8907171a0d81b141a933b1e9361f9bbf2d35e204
|
| 3 |
size 83945296
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168150738
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f82ac87fc2dd1538493c584b6bfaa34aacaf4f8dd89f4d5c843f7d1407dde93d
|
| 3 |
size 168150738
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef6ee5c960556f49ab0201cef6ec598647c83cf5eebbbd5fbf9582e268f90127
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7c7fd32361ba3725f77c3361e30c891765755fcbe92da53fa4ea9fbbad5a7c8
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 3282,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -206953,6 +206953,160 @@
|
|
| 206953 |
"eval_test_samples_per_second": 12.934,
|
| 206954 |
"eval_test_steps_per_second": 0.809,
|
| 206955 |
"step": 29538
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206956 |
}
|
| 206957 |
],
|
| 206958 |
"logging_steps": 1,
|
|
@@ -206967,12 +207121,12 @@
|
|
| 206967 |
"should_evaluate": false,
|
| 206968 |
"should_log": false,
|
| 206969 |
"should_save": true,
|
| 206970 |
-
"should_training_stop":
|
| 206971 |
},
|
| 206972 |
"attributes": {}
|
| 206973 |
}
|
| 206974 |
},
|
| 206975 |
-
"total_flos": 7.
|
| 206976 |
"train_batch_size": 16,
|
| 206977 |
"trial_name": null,
|
| 206978 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 4.0,
|
| 5 |
"eval_steps": 3282,
|
| 6 |
+
"global_step": 29560,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 206953 |
"eval_test_samples_per_second": 12.934,
|
| 206954 |
"eval_test_steps_per_second": 0.809,
|
| 206955 |
"step": 29538
|
| 206956 |
+
},
|
| 206957 |
+
{
|
| 206958 |
+
"epoch": 3.9971583220568334,
|
| 206959 |
+
"grad_norm": 1.4479291439056396,
|
| 206960 |
+
"learning_rate": 3.0004566050305867e-06,
|
| 206961 |
+
"loss": 1.7956,
|
| 206962 |
+
"step": 29539
|
| 206963 |
+
},
|
| 206964 |
+
{
|
| 206965 |
+
"epoch": 3.997293640054127,
|
| 206966 |
+
"grad_norm": 1.3905441761016846,
|
| 206967 |
+
"learning_rate": 3.000414154242476e-06,
|
| 206968 |
+
"loss": 1.8403,
|
| 206969 |
+
"step": 29540
|
| 206970 |
+
},
|
| 206971 |
+
{
|
| 206972 |
+
"epoch": 3.9974289580514206,
|
| 206973 |
+
"grad_norm": 1.563751220703125,
|
| 206974 |
+
"learning_rate": 3.0003737742207762e-06,
|
| 206975 |
+
"loss": 1.9486,
|
| 206976 |
+
"step": 29541
|
| 206977 |
+
},
|
| 206978 |
+
{
|
| 206979 |
+
"epoch": 3.9975642760487142,
|
| 206980 |
+
"grad_norm": 1.3970868587493896,
|
| 206981 |
+
"learning_rate": 3.000335464966031e-06,
|
| 206982 |
+
"loss": 1.9456,
|
| 206983 |
+
"step": 29542
|
| 206984 |
+
},
|
| 206985 |
+
{
|
| 206986 |
+
"epoch": 3.9976995940460083,
|
| 206987 |
+
"grad_norm": 1.3398752212524414,
|
| 206988 |
+
"learning_rate": 3.000299226478785e-06,
|
| 206989 |
+
"loss": 1.8753,
|
| 206990 |
+
"step": 29543
|
| 206991 |
+
},
|
| 206992 |
+
{
|
| 206993 |
+
"epoch": 3.997834912043302,
|
| 206994 |
+
"grad_norm": 1.3150476217269897,
|
| 206995 |
+
"learning_rate": 3.0002650587595492e-06,
|
| 206996 |
+
"loss": 1.9388,
|
| 206997 |
+
"step": 29544
|
| 206998 |
+
},
|
| 206999 |
+
{
|
| 207000 |
+
"epoch": 3.9979702300405955,
|
| 207001 |
+
"grad_norm": 1.5918689966201782,
|
| 207002 |
+
"learning_rate": 3.0002329618087684e-06,
|
| 207003 |
+
"loss": 1.917,
|
| 207004 |
+
"step": 29545
|
| 207005 |
+
},
|
| 207006 |
+
{
|
| 207007 |
+
"epoch": 3.998105548037889,
|
| 207008 |
+
"grad_norm": 1.2473838329315186,
|
| 207009 |
+
"learning_rate": 3.0002029356269215e-06,
|
| 207010 |
+
"loss": 1.9181,
|
| 207011 |
+
"step": 29546
|
| 207012 |
+
},
|
| 207013 |
+
{
|
| 207014 |
+
"epoch": 3.9982408660351827,
|
| 207015 |
+
"grad_norm": 1.546493649482727,
|
| 207016 |
+
"learning_rate": 3.000174980214419e-06,
|
| 207017 |
+
"loss": 2.154,
|
| 207018 |
+
"step": 29547
|
| 207019 |
+
},
|
| 207020 |
+
{
|
| 207021 |
+
"epoch": 3.9983761840324763,
|
| 207022 |
+
"grad_norm": 1.1951613426208496,
|
| 207023 |
+
"learning_rate": 3.0001490955716424e-06,
|
| 207024 |
+
"loss": 1.7965,
|
| 207025 |
+
"step": 29548
|
| 207026 |
+
},
|
| 207027 |
+
{
|
| 207028 |
+
"epoch": 3.99851150202977,
|
| 207029 |
+
"grad_norm": 1.4026210308074951,
|
| 207030 |
+
"learning_rate": 3.000125281698969e-06,
|
| 207031 |
+
"loss": 1.9001,
|
| 207032 |
+
"step": 29549
|
| 207033 |
+
},
|
| 207034 |
+
{
|
| 207035 |
+
"epoch": 3.9986468200270635,
|
| 207036 |
+
"grad_norm": 1.2616246938705444,
|
| 207037 |
+
"learning_rate": 3.000103538596713e-06,
|
| 207038 |
+
"loss": 1.9108,
|
| 207039 |
+
"step": 29550
|
| 207040 |
+
},
|
| 207041 |
+
{
|
| 207042 |
+
"epoch": 3.998782138024357,
|
| 207043 |
+
"grad_norm": 1.3047972917556763,
|
| 207044 |
+
"learning_rate": 3.000083866265187e-06,
|
| 207045 |
+
"loss": 1.9664,
|
| 207046 |
+
"step": 29551
|
| 207047 |
+
},
|
| 207048 |
+
{
|
| 207049 |
+
"epoch": 3.9989174560216507,
|
| 207050 |
+
"grad_norm": 1.5460106134414673,
|
| 207051 |
+
"learning_rate": 3.0000662647046716e-06,
|
| 207052 |
+
"loss": 2.0249,
|
| 207053 |
+
"step": 29552
|
| 207054 |
+
},
|
| 207055 |
+
{
|
| 207056 |
+
"epoch": 3.9990527740189448,
|
| 207057 |
+
"grad_norm": 1.267030119895935,
|
| 207058 |
+
"learning_rate": 3.0000507339153984e-06,
|
| 207059 |
+
"loss": 1.9507,
|
| 207060 |
+
"step": 29553
|
| 207061 |
+
},
|
| 207062 |
+
{
|
| 207063 |
+
"epoch": 3.9991880920162384,
|
| 207064 |
+
"grad_norm": 1.4216227531433105,
|
| 207065 |
+
"learning_rate": 3.000037273897597e-06,
|
| 207066 |
+
"loss": 1.9319,
|
| 207067 |
+
"step": 29554
|
| 207068 |
+
},
|
| 207069 |
+
{
|
| 207070 |
+
"epoch": 3.999323410013532,
|
| 207071 |
+
"grad_norm": 1.4820796251296997,
|
| 207072 |
+
"learning_rate": 3.0000258846514327e-06,
|
| 207073 |
+
"loss": 2.0095,
|
| 207074 |
+
"step": 29555
|
| 207075 |
+
},
|
| 207076 |
+
{
|
| 207077 |
+
"epoch": 3.9994587280108256,
|
| 207078 |
+
"grad_norm": 1.5175132751464844,
|
| 207079 |
+
"learning_rate": 3.000016566177087e-06,
|
| 207080 |
+
"loss": 1.853,
|
| 207081 |
+
"step": 29556
|
| 207082 |
+
},
|
| 207083 |
+
{
|
| 207084 |
+
"epoch": 3.999594046008119,
|
| 207085 |
+
"grad_norm": 1.2719131708145142,
|
| 207086 |
+
"learning_rate": 3.000009318474692e-06,
|
| 207087 |
+
"loss": 1.7825,
|
| 207088 |
+
"step": 29557
|
| 207089 |
+
},
|
| 207090 |
+
{
|
| 207091 |
+
"epoch": 3.999729364005413,
|
| 207092 |
+
"grad_norm": 1.266430377960205,
|
| 207093 |
+
"learning_rate": 3.000004141544329e-06,
|
| 207094 |
+
"loss": 1.7838,
|
| 207095 |
+
"step": 29558
|
| 207096 |
+
},
|
| 207097 |
+
{
|
| 207098 |
+
"epoch": 3.9998646820027064,
|
| 207099 |
+
"grad_norm": 1.3060740232467651,
|
| 207100 |
+
"learning_rate": 3.0000010353860824e-06,
|
| 207101 |
+
"loss": 1.8332,
|
| 207102 |
+
"step": 29559
|
| 207103 |
+
},
|
| 207104 |
+
{
|
| 207105 |
+
"epoch": 4.0,
|
| 207106 |
+
"grad_norm": 3.18343448638916,
|
| 207107 |
+
"learning_rate": 2.9999999999999997e-06,
|
| 207108 |
+
"loss": 1.9557,
|
| 207109 |
+
"step": 29560
|
| 207110 |
}
|
| 207111 |
],
|
| 207112 |
"logging_steps": 1,
|
|
|
|
| 207121 |
"should_evaluate": false,
|
| 207122 |
"should_log": false,
|
| 207123 |
"should_save": true,
|
| 207124 |
+
"should_training_stop": true
|
| 207125 |
},
|
| 207126 |
"attributes": {}
|
| 207127 |
}
|
| 207128 |
},
|
| 207129 |
+
"total_flos": 7.215024419447243e+18,
|
| 207130 |
"train_batch_size": 16,
|
| 207131 |
"trial_name": null,
|
| 207132 |
"trial_params": null
|