QuantaSparkLabs commited on
Commit
bfb043c
·
verified ·
1 Parent(s): f7e7f75

checkpoint-4455

Browse files
Files changed (6) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +32 -4
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:421b1c3566897b50359cc94a1212bb9f4a46e09806ff05c974a4d25863395330
3
  size 2225188480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfe87e2f6d6074d8d86b668507a39edce3e7e72b9348c1748c61c17b5edcdb2c
3
  size 2225188480
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:010fd42cc33bfa044c7c52d0b6996c0334b0b753ff77599a9987f1177a2ca3ec
3
  size 4450498267
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ffeb7ace51550edaa0d04986ab4cb9c1fcaed2503bc0e748aaf54df166ad8e6
3
  size 4450498267
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d12912a41eb6d74668aa9acfb4ff7cd99f55b1aa02423c599a95b3ebc3006011
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6538b00e91d4c4de4a0c17d2634d57eabc754fc7df85477b8c6ecad7f0cf41a3
3
  size 14645
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0033c7745b46bdca3ecab5787678834ca68f7f7e1288869dceeb38812abc253
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4bcb7279143a7253dbdae3618e4ac776d6e4a1395b9ed9fcb9bc00d72d1520b
3
  size 1383
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cf6a257871b46c5a112aa215608ac0c00c2d1cd54b28ecf333a6131a03c71f6
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a24e34950d39b6dd56b12cce4107aa557148473c4972111b61f5d0bfe94716d
3
  size 1465
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.6936026936026938,
6
  "eval_steps": 500,
7
- "global_step": 4000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -352,6 +352,34 @@
352
  "eval_samples_per_second": 13.431,
353
  "eval_steps_per_second": 3.358,
354
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  }
356
  ],
357
  "logging_steps": 100,
@@ -366,12 +394,12 @@
366
  "should_evaluate": false,
367
  "should_log": false,
368
  "should_save": true,
369
- "should_training_stop": false
370
  },
371
  "attributes": {}
372
  }
373
  },
374
- "total_flos": 1.541159116578816e+16,
375
  "train_batch_size": 8,
376
  "trial_name": null,
377
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
  "eval_steps": 500,
7
+ "global_step": 4455,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
352
  "eval_samples_per_second": 13.431,
353
  "eval_steps_per_second": 3.358,
354
  "step": 4000
355
+ },
356
+ {
357
+ "epoch": 2.760942760942761,
358
+ "grad_norm": 21.673240661621094,
359
+ "learning_rate": 2.5099882491186838e-05,
360
+ "loss": 50.46708984375,
361
+ "step": 4100
362
+ },
363
+ {
364
+ "epoch": 2.8282828282828283,
365
+ "grad_norm": 21.410390853881836,
366
+ "learning_rate": 1.8049353701527613e-05,
367
+ "loss": 50.4184521484375,
368
+ "step": 4200
369
+ },
370
+ {
371
+ "epoch": 2.8956228956228958,
372
+ "grad_norm": 21.75411033630371,
373
+ "learning_rate": 1.0998824911868389e-05,
374
+ "loss": 50.68787109375,
375
+ "step": 4300
376
+ },
377
+ {
378
+ "epoch": 2.962962962962963,
379
+ "grad_norm": 21.319902420043945,
380
+ "learning_rate": 3.948296122209165e-06,
381
+ "loss": 50.4838671875,
382
+ "step": 4400
383
  }
384
  ],
385
  "logging_steps": 100,
 
394
  "should_evaluate": false,
395
  "should_log": false,
396
  "should_save": true,
397
+ "should_training_stop": true
398
  },
399
  "attributes": {}
400
  }
401
  },
402
+ "total_flos": 1.714151970521088e+16,
403
  "train_batch_size": 8,
404
  "trial_name": null,
405
  "trial_params": null