mgh6 commited on
Commit
c79973a
·
verified ·
1 Parent(s): ad05660

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb9d670069f5e1c0ce2785196918d606a858de0cbe420f28be2ee827f071c481
3
  size 2611614300
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe3339bbe444834d964d18dcdba107a588b72b643d0efbec85b6451eab84dc13
3
  size 2611614300
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7b35a1b319a7955d1cd726be6d2136954618d7589e16b936cfcb01809852964
3
  size 5213028466
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bb2d2afabcba63a06fbc795b476d6ea07db6d592c0123b971f0e20e6022f4d7
3
  size 5213028466
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3aa3697ff1b2ae74cc2afbb785f0e442fac632ef2e496ff49793437a27b21de1
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35597508a6f05cf5bbe64bbeb8123b65a03a9070967439b41c9f640a3f5b8fef
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42629e0f223924c91cbbab007a8c6a2ef4e19563ca34c56935f96f1f89aa50d2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e15a2bef98421551121722ff5a1a1bf1ea7fd4b41a8ff180068d46d2588d0fae
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.09997481103393871,
5
  "eval_steps": 500,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -35,6 +35,21 @@
35
  "learning_rate": 9.50012496875781e-05,
36
  "loss": 3199597936.64,
37
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  }
39
  ],
40
  "logging_steps": 100,
@@ -54,7 +69,7 @@
54
  "attributes": {}
55
  }
56
  },
57
- "total_flos": 7.779518930904678e+16,
58
  "train_batch_size": 1,
59
  "trial_name": null,
60
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.1249685137924234,
5
  "eval_steps": 500,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
35
  "learning_rate": 9.50012496875781e-05,
36
  "loss": 3199597936.64,
37
  "step": 400
38
+ },
39
+ {
40
+ "epoch": 0.1249685137924234,
41
+ "grad_norm": 1.2106974124908447,
42
+ "learning_rate": 9.375156210947263e-05,
43
+ "loss": 5333758443.52,
44
+ "step": 500
45
+ },
46
+ {
47
+ "epoch": 0.1249685137924234,
48
+ "eval_loss": NaN,
49
+ "eval_runtime": 904.9111,
50
+ "eval_samples_per_second": 17.153,
51
+ "eval_steps_per_second": 17.153,
52
+ "step": 500
53
  }
54
  ],
55
  "logging_steps": 100,
 
69
  "attributes": {}
70
  }
71
  },
72
+ "total_flos": 9.724398663630848e+16,
73
  "train_batch_size": 1,
74
  "trial_name": null,
75
  "trial_params": null