Training in progress, step 5250, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 527048968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce8cb0c1636a06b1e76c546c4b0282f02b71ccad43c283d3d33d43185c64edec
|
| 3 |
size 527048968
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1054135994
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70afac3958025bed818f692a236ab6bf6b28db45140796294f475309149762a5
|
| 3 |
size 1054135994
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7de188e422eb0da886da3c865f1df00995a0a219ebff0d43a41d74c3b9d38d5
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7cc08ef7615af7896731786745ca416272561837649d6bc1ff644d72a48c9b0d
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7166205048561096,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -4937,6 +4937,151 @@
|
|
| 4937 |
"EMA_steps_per_second": 22.619,
|
| 4938 |
"epoch": 221.7391304347826,
|
| 4939 |
"step": 5100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4940 |
}
|
| 4941 |
],
|
| 4942 |
"logging_steps": 10,
|
|
@@ -4956,7 +5101,7 @@
|
|
| 4956 |
"attributes": {}
|
| 4957 |
}
|
| 4958 |
},
|
| 4959 |
-
"total_flos": 1.
|
| 4960 |
"train_batch_size": 4,
|
| 4961 |
"trial_name": null,
|
| 4962 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7166205048561096,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
+
"epoch": 228.2608695652174,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 5250,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 4937 |
"EMA_steps_per_second": 22.619,
|
| 4938 |
"epoch": 221.7391304347826,
|
| 4939 |
"step": 5100
|
| 4940 |
+
},
|
| 4941 |
+
{
|
| 4942 |
+
"epoch": 222.17391304347825,
|
| 4943 |
+
"grad_norm": 2.351990222930908,
|
| 4944 |
+
"learning_rate": 1.528980386974772e-06,
|
| 4945 |
+
"loss": 0.2432,
|
| 4946 |
+
"step": 5110
|
| 4947 |
+
},
|
| 4948 |
+
{
|
| 4949 |
+
"epoch": 222.6086956521739,
|
| 4950 |
+
"grad_norm": 2.001265287399292,
|
| 4951 |
+
"learning_rate": 1.5289556856653922e-06,
|
| 4952 |
+
"loss": 0.2269,
|
| 4953 |
+
"step": 5120
|
| 4954 |
+
},
|
| 4955 |
+
{
|
| 4956 |
+
"epoch": 223.04347826086956,
|
| 4957 |
+
"grad_norm": 2.3239283561706543,
|
| 4958 |
+
"learning_rate": 1.5289306815434077e-06,
|
| 4959 |
+
"loss": 0.2179,
|
| 4960 |
+
"step": 5130
|
| 4961 |
+
},
|
| 4962 |
+
{
|
| 4963 |
+
"epoch": 223.47826086956522,
|
| 4964 |
+
"grad_norm": 2.3948585987091064,
|
| 4965 |
+
"learning_rate": 1.528905374618729e-06,
|
| 4966 |
+
"loss": 0.2354,
|
| 4967 |
+
"step": 5140
|
| 4968 |
+
},
|
| 4969 |
+
{
|
| 4970 |
+
"epoch": 223.91304347826087,
|
| 4971 |
+
"grad_norm": 1.9040113687515259,
|
| 4972 |
+
"learning_rate": 1.5288797649013872e-06,
|
| 4973 |
+
"loss": 0.2289,
|
| 4974 |
+
"step": 5150
|
| 4975 |
+
},
|
| 4976 |
+
{
|
| 4977 |
+
"epoch": 224.34782608695653,
|
| 4978 |
+
"grad_norm": 2.0223822593688965,
|
| 4979 |
+
"learning_rate": 1.5288538524015332e-06,
|
| 4980 |
+
"loss": 0.2192,
|
| 4981 |
+
"step": 5160
|
| 4982 |
+
},
|
| 4983 |
+
{
|
| 4984 |
+
"epoch": 224.7826086956522,
|
| 4985 |
+
"grad_norm": 1.7613234519958496,
|
| 4986 |
+
"learning_rate": 1.5288276371294373e-06,
|
| 4987 |
+
"loss": 0.2122,
|
| 4988 |
+
"step": 5170
|
| 4989 |
+
},
|
| 4990 |
+
{
|
| 4991 |
+
"epoch": 225.2173913043478,
|
| 4992 |
+
"grad_norm": 1.8648544549942017,
|
| 4993 |
+
"learning_rate": 1.5288011190954913e-06,
|
| 4994 |
+
"loss": 0.2038,
|
| 4995 |
+
"step": 5180
|
| 4996 |
+
},
|
| 4997 |
+
{
|
| 4998 |
+
"epoch": 225.65217391304347,
|
| 4999 |
+
"grad_norm": 1.9111838340759277,
|
| 5000 |
+
"learning_rate": 1.5287742983102055e-06,
|
| 5001 |
+
"loss": 0.1973,
|
| 5002 |
+
"step": 5190
|
| 5003 |
+
},
|
| 5004 |
+
{
|
| 5005 |
+
"epoch": 226.08695652173913,
|
| 5006 |
+
"grad_norm": 2.3617441654205322,
|
| 5007 |
+
"learning_rate": 1.5287471747842112e-06,
|
| 5008 |
+
"loss": 0.2629,
|
| 5009 |
+
"step": 5200
|
| 5010 |
+
},
|
| 5011 |
+
{
|
| 5012 |
+
"epoch": 226.52173913043478,
|
| 5013 |
+
"grad_norm": 1.7532709836959839,
|
| 5014 |
+
"learning_rate": 1.5287197485282586e-06,
|
| 5015 |
+
"loss": 0.2078,
|
| 5016 |
+
"step": 5210
|
| 5017 |
+
},
|
| 5018 |
+
{
|
| 5019 |
+
"epoch": 226.95652173913044,
|
| 5020 |
+
"grad_norm": 2.7147057056427,
|
| 5021 |
+
"learning_rate": 1.5286920195532195e-06,
|
| 5022 |
+
"loss": 0.2379,
|
| 5023 |
+
"step": 5220
|
| 5024 |
+
},
|
| 5025 |
+
{
|
| 5026 |
+
"epoch": 227.3913043478261,
|
| 5027 |
+
"grad_norm": 1.9578685760498047,
|
| 5028 |
+
"learning_rate": 1.528663987870084e-06,
|
| 5029 |
+
"loss": 0.2432,
|
| 5030 |
+
"step": 5230
|
| 5031 |
+
},
|
| 5032 |
+
{
|
| 5033 |
+
"epoch": 227.82608695652175,
|
| 5034 |
+
"grad_norm": 3.356194019317627,
|
| 5035 |
+
"learning_rate": 1.5286356534899634e-06,
|
| 5036 |
+
"loss": 0.2264,
|
| 5037 |
+
"step": 5240
|
| 5038 |
+
},
|
| 5039 |
+
{
|
| 5040 |
+
"epoch": 228.2608695652174,
|
| 5041 |
+
"grad_norm": 1.8712825775146484,
|
| 5042 |
+
"learning_rate": 1.5286070164240883e-06,
|
| 5043 |
+
"loss": 0.2185,
|
| 5044 |
+
"step": 5250
|
| 5045 |
+
},
|
| 5046 |
+
{
|
| 5047 |
+
"epoch": 228.2608695652174,
|
| 5048 |
+
"eval_loss": 0.9829781651496887,
|
| 5049 |
+
"eval_runtime": 0.3975,
|
| 5050 |
+
"eval_samples_per_second": 25.156,
|
| 5051 |
+
"eval_steps_per_second": 25.156,
|
| 5052 |
+
"step": 5250
|
| 5053 |
+
},
|
| 5054 |
+
{
|
| 5055 |
+
"Start_State_loss": 0.8609819412231445,
|
| 5056 |
+
"Start_State_runtime": 0.3894,
|
| 5057 |
+
"Start_State_samples_per_second": 25.682,
|
| 5058 |
+
"Start_State_steps_per_second": 25.682,
|
| 5059 |
+
"epoch": 228.2608695652174,
|
| 5060 |
+
"step": 5250
|
| 5061 |
+
},
|
| 5062 |
+
{
|
| 5063 |
+
"Raw_Model_loss": 0.9829781651496887,
|
| 5064 |
+
"Raw_Model_runtime": 0.391,
|
| 5065 |
+
"Raw_Model_samples_per_second": 25.574,
|
| 5066 |
+
"Raw_Model_steps_per_second": 25.574,
|
| 5067 |
+
"epoch": 228.2608695652174,
|
| 5068 |
+
"step": 5250
|
| 5069 |
+
},
|
| 5070 |
+
{
|
| 5071 |
+
"SWA_loss": 0.8260501027107239,
|
| 5072 |
+
"SWA_runtime": 0.4158,
|
| 5073 |
+
"SWA_samples_per_second": 24.051,
|
| 5074 |
+
"SWA_steps_per_second": 24.051,
|
| 5075 |
+
"epoch": 228.2608695652174,
|
| 5076 |
+
"step": 5250
|
| 5077 |
+
},
|
| 5078 |
+
{
|
| 5079 |
+
"EMA_loss": 0.8600662350654602,
|
| 5080 |
+
"EMA_runtime": 0.3878,
|
| 5081 |
+
"EMA_samples_per_second": 25.783,
|
| 5082 |
+
"EMA_steps_per_second": 25.783,
|
| 5083 |
+
"epoch": 228.2608695652174,
|
| 5084 |
+
"step": 5250
|
| 5085 |
}
|
| 5086 |
],
|
| 5087 |
"logging_steps": 10,
|
|
|
|
| 5101 |
"attributes": {}
|
| 5102 |
}
|
| 5103 |
},
|
| 5104 |
+
"total_flos": 1.3495580841170534e+17,
|
| 5105 |
"train_batch_size": 4,
|
| 5106 |
"trial_name": null,
|
| 5107 |
"trial_params": null
|