FormlessAI commited on
Commit
8a9b2be
·
verified ·
1 Parent(s): aea1964

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ed35d9e9a53737611af966cbe93a83870961407619ae9b764d5a71481806bd1
3
  size 1037269336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41eba1008d873044192a020b36381b7428ff6627186a0aebca3a9191aa95e154
3
  size 1037269336
last-checkpoint/global_step2750/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f3bccecb96b6ad79d4682b57ef76ca7727c7c5ff4811d53c75235fcd04f30e5
3
+ size 781993445
last-checkpoint/global_step2750/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:296d772ecd529ffba299a0c219a97e1c0c778ffeac847c946de62f6e7977f9b4
3
+ size 781993509
last-checkpoint/global_step2750/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a85dff21ff59e775869cc5540252bef200b4d8f3230db33de3b3b9eb8984ec8
3
+ size 781993509
last-checkpoint/global_step2750/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:769cbbfcebb8af82d2baa12d1b6609a0082a4b0d8eebb63f7bf23b80b070e634
3
+ size 781993509
last-checkpoint/global_step2750/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc9194cda2bd009b0d5e328ae988b1121ee5bf25708ccbb4f03dbec7c9743576
3
+ size 2610290277
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2650
 
1
+ global_step2750
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a555f29dd0266ec849af7131cdeb25395cc2774915812c98f84cc74f010f652f
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:128632a5a6c84ed29b2d92372ca3a3444bcf9664bddeca70dc2e8dc1479dc9ba
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7866dce40dfefc3ceb6ee4b58857dca32c5179d5176488037d421cdde3785292
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fa70d56fe2828a4932eca6e89391bb5eb3e6023a9d392081c741ffddcb48e32
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9297cc95cd9db2e8b723a119df7dc41e3e6ca3438b896056f5bdda90472c78d
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e377c5adc37c7bb27de7eb961e8e335dc1a990c6e6cc4c52dff5bfce37db4bdb
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:777bd3cda62a757b45065141f038d5fde7382e3b6d33030210d22127a50b78ac
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3309b2ba0e1ededbc01d8d5e756621af5edc2dd27c0c494a5dba8227a208c2b
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fdc06a2ad366cb343cdc321bf1e8e054e7fe10a6844e2f229b0f7a2a714c549
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0c5c6d0d23fe4659a54b886e17b4a28c437d0eb3c1002e3af25db23c5751106
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 2.050044298171997,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.38523041139700537,
6
  "eval_steps": 50,
7
- "global_step": 2650,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4142,6 +4142,162 @@
4142
  "eval_samples_per_second": 171.033,
4143
  "eval_steps_per_second": 10.725,
4144
  "step": 2650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4145
  }
4146
  ],
4147
  "logging_steps": 5,
@@ -4170,7 +4326,7 @@
4170
  "attributes": {}
4171
  }
4172
  },
4173
- "total_flos": 6.89848308616659e+17,
4174
  "train_batch_size": 4,
4175
  "trial_name": null,
4176
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 2.048673152923584,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.39976740805349614,
6
  "eval_steps": 50,
7
+ "global_step": 2750,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4142
  "eval_samples_per_second": 171.033,
4143
  "eval_steps_per_second": 10.725,
4144
  "step": 2650
4145
+ },
4146
+ {
4147
+ "epoch": 0.38595726122982993,
4148
+ "grad_norm": 2.482224464416504,
4149
+ "learning_rate": 8.462414849593505e-05,
4150
+ "loss": 2.3298,
4151
+ "step": 2655
4152
+ },
4153
+ {
4154
+ "epoch": 0.38668411106265443,
4155
+ "grad_norm": 2.928178071975708,
4156
+ "learning_rate": 8.4566506422412e-05,
4157
+ "loss": 2.1993,
4158
+ "step": 2660
4159
+ },
4160
+ {
4161
+ "epoch": 0.387410960895479,
4162
+ "grad_norm": 2.753080129623413,
4163
+ "learning_rate": 8.450877757539051e-05,
4164
+ "loss": 2.1442,
4165
+ "step": 2665
4166
+ },
4167
+ {
4168
+ "epoch": 0.38813781072830356,
4169
+ "grad_norm": 2.788785934448242,
4170
+ "learning_rate": 8.445096210020286e-05,
4171
+ "loss": 2.2659,
4172
+ "step": 2670
4173
+ },
4174
+ {
4175
+ "epoch": 0.38886466056112806,
4176
+ "grad_norm": 2.5485122203826904,
4177
+ "learning_rate": 8.439306014239947e-05,
4178
+ "loss": 2.0677,
4179
+ "step": 2675
4180
+ },
4181
+ {
4182
+ "epoch": 0.3895915103939526,
4183
+ "grad_norm": 2.8225951194763184,
4184
+ "learning_rate": 8.433507184774844e-05,
4185
+ "loss": 2.1661,
4186
+ "step": 2680
4187
+ },
4188
+ {
4189
+ "epoch": 0.39031836022677713,
4190
+ "grad_norm": 2.5097239017486572,
4191
+ "learning_rate": 8.427699736223522e-05,
4192
+ "loss": 2.2583,
4193
+ "step": 2685
4194
+ },
4195
+ {
4196
+ "epoch": 0.3910452100596017,
4197
+ "grad_norm": 2.8999710083007812,
4198
+ "learning_rate": 8.421883683206229e-05,
4199
+ "loss": 1.9918,
4200
+ "step": 2690
4201
+ },
4202
+ {
4203
+ "epoch": 0.39177205989242625,
4204
+ "grad_norm": 2.4386322498321533,
4205
+ "learning_rate": 8.416059040364867e-05,
4206
+ "loss": 2.1094,
4207
+ "step": 2695
4208
+ },
4209
+ {
4210
+ "epoch": 0.39249890972525076,
4211
+ "grad_norm": 2.6135191917419434,
4212
+ "learning_rate": 8.41022582236297e-05,
4213
+ "loss": 2.2167,
4214
+ "step": 2700
4215
+ },
4216
+ {
4217
+ "epoch": 0.39249890972525076,
4218
+ "eval_loss": 2.0557267665863037,
4219
+ "eval_runtime": 20.6155,
4220
+ "eval_samples_per_second": 160.122,
4221
+ "eval_steps_per_second": 10.041,
4222
+ "step": 2700
4223
+ },
4224
+ {
4225
+ "epoch": 0.3932257595580753,
4226
+ "grad_norm": 2.4323067665100098,
4227
+ "learning_rate": 8.404384043885659e-05,
4228
+ "loss": 2.1704,
4229
+ "step": 2705
4230
+ },
4231
+ {
4232
+ "epoch": 0.3939526093908998,
4233
+ "grad_norm": 3.0446643829345703,
4234
+ "learning_rate": 8.3985337196396e-05,
4235
+ "loss": 2.2533,
4236
+ "step": 2710
4237
+ },
4238
+ {
4239
+ "epoch": 0.3946794592237244,
4240
+ "grad_norm": 2.5526018142700195,
4241
+ "learning_rate": 8.392674864352983e-05,
4242
+ "loss": 1.9887,
4243
+ "step": 2715
4244
+ },
4245
+ {
4246
+ "epoch": 0.39540630905654894,
4247
+ "grad_norm": 2.2026188373565674,
4248
+ "learning_rate": 8.386807492775464e-05,
4249
+ "loss": 2.0637,
4250
+ "step": 2720
4251
+ },
4252
+ {
4253
+ "epoch": 0.39613315888937345,
4254
+ "grad_norm": 2.6651768684387207,
4255
+ "learning_rate": 8.380931619678144e-05,
4256
+ "loss": 2.0884,
4257
+ "step": 2725
4258
+ },
4259
+ {
4260
+ "epoch": 0.396860008722198,
4261
+ "grad_norm": 2.4840121269226074,
4262
+ "learning_rate": 8.375047259853531e-05,
4263
+ "loss": 2.2356,
4264
+ "step": 2730
4265
+ },
4266
+ {
4267
+ "epoch": 0.3975868585550225,
4268
+ "grad_norm": 2.4644320011138916,
4269
+ "learning_rate": 8.369154428115492e-05,
4270
+ "loss": 2.1208,
4271
+ "step": 2735
4272
+ },
4273
+ {
4274
+ "epoch": 0.3983137083878471,
4275
+ "grad_norm": 2.8018391132354736,
4276
+ "learning_rate": 8.36325313929922e-05,
4277
+ "loss": 2.1674,
4278
+ "step": 2740
4279
+ },
4280
+ {
4281
+ "epoch": 0.3990405582206716,
4282
+ "grad_norm": 2.1455233097076416,
4283
+ "learning_rate": 8.35734340826121e-05,
4284
+ "loss": 2.0756,
4285
+ "step": 2745
4286
+ },
4287
+ {
4288
+ "epoch": 0.39976740805349614,
4289
+ "grad_norm": 2.5568268299102783,
4290
+ "learning_rate": 8.351425249879196e-05,
4291
+ "loss": 2.2644,
4292
+ "step": 2750
4293
+ },
4294
+ {
4295
+ "epoch": 0.39976740805349614,
4296
+ "eval_loss": 2.048673152923584,
4297
+ "eval_runtime": 19.1036,
4298
+ "eval_samples_per_second": 172.795,
4299
+ "eval_steps_per_second": 10.836,
4300
+ "step": 2750
4301
  }
4302
  ],
4303
  "logging_steps": 5,
 
4326
  "attributes": {}
4327
  }
4328
  },
4329
+ "total_flos": 7.154438808702812e+17,
4330
  "train_batch_size": 4,
4331
  "trial_name": null,
4332
  "trial_params": null