Training in progress, epoch 0, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step2750/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2750/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2750/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2750/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step2750/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +160 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1037269336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41eba1008d873044192a020b36381b7428ff6627186a0aebca3a9191aa95e154
|
| 3 |
size 1037269336
|
last-checkpoint/global_step2750/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f3bccecb96b6ad79d4682b57ef76ca7727c7c5ff4811d53c75235fcd04f30e5
|
| 3 |
+
size 781993445
|
last-checkpoint/global_step2750/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:296d772ecd529ffba299a0c219a97e1c0c778ffeac847c946de62f6e7977f9b4
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step2750/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a85dff21ff59e775869cc5540252bef200b4d8f3230db33de3b3b9eb8984ec8
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step2750/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:769cbbfcebb8af82d2baa12d1b6609a0082a4b0d8eebb63f7bf23b80b070e634
|
| 3 |
+
size 781993509
|
last-checkpoint/global_step2750/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc9194cda2bd009b0d5e328ae988b1121ee5bf25708ccbb4f03dbec7c9743576
|
| 3 |
+
size 2610290277
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step2750
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:128632a5a6c84ed29b2d92372ca3a3444bcf9664bddeca70dc2e8dc1479dc9ba
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1fa70d56fe2828a4932eca6e89391bb5eb3e6023a9d392081c741ffddcb48e32
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e377c5adc37c7bb27de7eb961e8e335dc1a990c6e6cc4c52dff5bfce37db4bdb
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3309b2ba0e1ededbc01d8d5e756621af5edc2dd27c0c494a5dba8227a208c2b
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0c5c6d0d23fe4659a54b886e17b4a28c437d0eb3c1002e3af25db23c5751106
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 2.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4142,6 +4142,162 @@
|
|
| 4142 |
"eval_samples_per_second": 171.033,
|
| 4143 |
"eval_steps_per_second": 10.725,
|
| 4144 |
"step": 2650
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4145 |
}
|
| 4146 |
],
|
| 4147 |
"logging_steps": 5,
|
|
@@ -4170,7 +4326,7 @@
|
|
| 4170 |
"attributes": {}
|
| 4171 |
}
|
| 4172 |
},
|
| 4173 |
-
"total_flos":
|
| 4174 |
"train_batch_size": 4,
|
| 4175 |
"trial_name": null,
|
| 4176 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 2.048673152923584,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.39976740805349614,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 2750,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4142 |
"eval_samples_per_second": 171.033,
|
| 4143 |
"eval_steps_per_second": 10.725,
|
| 4144 |
"step": 2650
|
| 4145 |
+
},
|
| 4146 |
+
{
|
| 4147 |
+
"epoch": 0.38595726122982993,
|
| 4148 |
+
"grad_norm": 2.482224464416504,
|
| 4149 |
+
"learning_rate": 8.462414849593505e-05,
|
| 4150 |
+
"loss": 2.3298,
|
| 4151 |
+
"step": 2655
|
| 4152 |
+
},
|
| 4153 |
+
{
|
| 4154 |
+
"epoch": 0.38668411106265443,
|
| 4155 |
+
"grad_norm": 2.928178071975708,
|
| 4156 |
+
"learning_rate": 8.4566506422412e-05,
|
| 4157 |
+
"loss": 2.1993,
|
| 4158 |
+
"step": 2660
|
| 4159 |
+
},
|
| 4160 |
+
{
|
| 4161 |
+
"epoch": 0.387410960895479,
|
| 4162 |
+
"grad_norm": 2.753080129623413,
|
| 4163 |
+
"learning_rate": 8.450877757539051e-05,
|
| 4164 |
+
"loss": 2.1442,
|
| 4165 |
+
"step": 2665
|
| 4166 |
+
},
|
| 4167 |
+
{
|
| 4168 |
+
"epoch": 0.38813781072830356,
|
| 4169 |
+
"grad_norm": 2.788785934448242,
|
| 4170 |
+
"learning_rate": 8.445096210020286e-05,
|
| 4171 |
+
"loss": 2.2659,
|
| 4172 |
+
"step": 2670
|
| 4173 |
+
},
|
| 4174 |
+
{
|
| 4175 |
+
"epoch": 0.38886466056112806,
|
| 4176 |
+
"grad_norm": 2.5485122203826904,
|
| 4177 |
+
"learning_rate": 8.439306014239947e-05,
|
| 4178 |
+
"loss": 2.0677,
|
| 4179 |
+
"step": 2675
|
| 4180 |
+
},
|
| 4181 |
+
{
|
| 4182 |
+
"epoch": 0.3895915103939526,
|
| 4183 |
+
"grad_norm": 2.8225951194763184,
|
| 4184 |
+
"learning_rate": 8.433507184774844e-05,
|
| 4185 |
+
"loss": 2.1661,
|
| 4186 |
+
"step": 2680
|
| 4187 |
+
},
|
| 4188 |
+
{
|
| 4189 |
+
"epoch": 0.39031836022677713,
|
| 4190 |
+
"grad_norm": 2.5097239017486572,
|
| 4191 |
+
"learning_rate": 8.427699736223522e-05,
|
| 4192 |
+
"loss": 2.2583,
|
| 4193 |
+
"step": 2685
|
| 4194 |
+
},
|
| 4195 |
+
{
|
| 4196 |
+
"epoch": 0.3910452100596017,
|
| 4197 |
+
"grad_norm": 2.8999710083007812,
|
| 4198 |
+
"learning_rate": 8.421883683206229e-05,
|
| 4199 |
+
"loss": 1.9918,
|
| 4200 |
+
"step": 2690
|
| 4201 |
+
},
|
| 4202 |
+
{
|
| 4203 |
+
"epoch": 0.39177205989242625,
|
| 4204 |
+
"grad_norm": 2.4386322498321533,
|
| 4205 |
+
"learning_rate": 8.416059040364867e-05,
|
| 4206 |
+
"loss": 2.1094,
|
| 4207 |
+
"step": 2695
|
| 4208 |
+
},
|
| 4209 |
+
{
|
| 4210 |
+
"epoch": 0.39249890972525076,
|
| 4211 |
+
"grad_norm": 2.6135191917419434,
|
| 4212 |
+
"learning_rate": 8.41022582236297e-05,
|
| 4213 |
+
"loss": 2.2167,
|
| 4214 |
+
"step": 2700
|
| 4215 |
+
},
|
| 4216 |
+
{
|
| 4217 |
+
"epoch": 0.39249890972525076,
|
| 4218 |
+
"eval_loss": 2.0557267665863037,
|
| 4219 |
+
"eval_runtime": 20.6155,
|
| 4220 |
+
"eval_samples_per_second": 160.122,
|
| 4221 |
+
"eval_steps_per_second": 10.041,
|
| 4222 |
+
"step": 2700
|
| 4223 |
+
},
|
| 4224 |
+
{
|
| 4225 |
+
"epoch": 0.3932257595580753,
|
| 4226 |
+
"grad_norm": 2.4323067665100098,
|
| 4227 |
+
"learning_rate": 8.404384043885659e-05,
|
| 4228 |
+
"loss": 2.1704,
|
| 4229 |
+
"step": 2705
|
| 4230 |
+
},
|
| 4231 |
+
{
|
| 4232 |
+
"epoch": 0.3939526093908998,
|
| 4233 |
+
"grad_norm": 3.0446643829345703,
|
| 4234 |
+
"learning_rate": 8.3985337196396e-05,
|
| 4235 |
+
"loss": 2.2533,
|
| 4236 |
+
"step": 2710
|
| 4237 |
+
},
|
| 4238 |
+
{
|
| 4239 |
+
"epoch": 0.3946794592237244,
|
| 4240 |
+
"grad_norm": 2.5526018142700195,
|
| 4241 |
+
"learning_rate": 8.392674864352983e-05,
|
| 4242 |
+
"loss": 1.9887,
|
| 4243 |
+
"step": 2715
|
| 4244 |
+
},
|
| 4245 |
+
{
|
| 4246 |
+
"epoch": 0.39540630905654894,
|
| 4247 |
+
"grad_norm": 2.2026188373565674,
|
| 4248 |
+
"learning_rate": 8.386807492775464e-05,
|
| 4249 |
+
"loss": 2.0637,
|
| 4250 |
+
"step": 2720
|
| 4251 |
+
},
|
| 4252 |
+
{
|
| 4253 |
+
"epoch": 0.39613315888937345,
|
| 4254 |
+
"grad_norm": 2.6651768684387207,
|
| 4255 |
+
"learning_rate": 8.380931619678144e-05,
|
| 4256 |
+
"loss": 2.0884,
|
| 4257 |
+
"step": 2725
|
| 4258 |
+
},
|
| 4259 |
+
{
|
| 4260 |
+
"epoch": 0.396860008722198,
|
| 4261 |
+
"grad_norm": 2.4840121269226074,
|
| 4262 |
+
"learning_rate": 8.375047259853531e-05,
|
| 4263 |
+
"loss": 2.2356,
|
| 4264 |
+
"step": 2730
|
| 4265 |
+
},
|
| 4266 |
+
{
|
| 4267 |
+
"epoch": 0.3975868585550225,
|
| 4268 |
+
"grad_norm": 2.4644320011138916,
|
| 4269 |
+
"learning_rate": 8.369154428115492e-05,
|
| 4270 |
+
"loss": 2.1208,
|
| 4271 |
+
"step": 2735
|
| 4272 |
+
},
|
| 4273 |
+
{
|
| 4274 |
+
"epoch": 0.3983137083878471,
|
| 4275 |
+
"grad_norm": 2.8018391132354736,
|
| 4276 |
+
"learning_rate": 8.36325313929922e-05,
|
| 4277 |
+
"loss": 2.1674,
|
| 4278 |
+
"step": 2740
|
| 4279 |
+
},
|
| 4280 |
+
{
|
| 4281 |
+
"epoch": 0.3990405582206716,
|
| 4282 |
+
"grad_norm": 2.1455233097076416,
|
| 4283 |
+
"learning_rate": 8.35734340826121e-05,
|
| 4284 |
+
"loss": 2.0756,
|
| 4285 |
+
"step": 2745
|
| 4286 |
+
},
|
| 4287 |
+
{
|
| 4288 |
+
"epoch": 0.39976740805349614,
|
| 4289 |
+
"grad_norm": 2.5568268299102783,
|
| 4290 |
+
"learning_rate": 8.351425249879196e-05,
|
| 4291 |
+
"loss": 2.2644,
|
| 4292 |
+
"step": 2750
|
| 4293 |
+
},
|
| 4294 |
+
{
|
| 4295 |
+
"epoch": 0.39976740805349614,
|
| 4296 |
+
"eval_loss": 2.048673152923584,
|
| 4297 |
+
"eval_runtime": 19.1036,
|
| 4298 |
+
"eval_samples_per_second": 172.795,
|
| 4299 |
+
"eval_steps_per_second": 10.836,
|
| 4300 |
+
"step": 2750
|
| 4301 |
}
|
| 4302 |
],
|
| 4303 |
"logging_steps": 5,
|
|
|
|
| 4326 |
"attributes": {}
|
| 4327 |
}
|
| 4328 |
},
|
| 4329 |
+
"total_flos": 7.154438808702812e+17,
|
| 4330 |
"train_batch_size": 4,
|
| 4331 |
"trial_name": null,
|
| 4332 |
"trial_params": null
|