SystemAdmin123 commited on
Commit
50c744a
·
verified ·
1 Parent(s): 093b78d

Training in progress, step 40, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbc29a0fbddbc8f2aee33509d8b46ffb148b09e99f87205a9339cd29be7a4c9f
3
  size 250490408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ff04ffbfa8bc5bb3e988c8bb45003fcadd064e85f411987ddd44c15d8d57974
3
  size 250490408
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:011384e0bf4163d9eaa823c6126a17099909c178bcbae1f784e3255e34352ad7
3
  size 255265850
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d243f90d71e2db4783c193c779aa1f22097b9f05a55dcf25053864cf43dd5dfa
3
  size 255265850
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7f1384a95c7886d45389c40a6a90ac8328081e721eba74d7efd9c04824bd482
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d0bc5c92e4fdea01d3fa32e0069da72f87be35dfa69240211a2193fb469268f
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7c49e91ee5c51b4cb45c0ea51ec7a52a5fe9832d924e46824fac4bed28529c7
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fed34d3582eb7589bf4ae83ba37735e55c58c1dcea5c7b347cc4732a41c1fa84
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3d7496cd5cad5fcd343f51f06f864ca525a833da3ba71e9c4d90915510423ac
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:572996e9dc190ff3f3a6efdf58de929f593fa2dd2f97d74fed7646828651b17f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.6666666666666665,
5
  "eval_steps": 40,
6
- "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -29,6 +29,28 @@
29
  "learning_rate": 0.00013333333333333334,
30
  "loss": 5.8022,
31
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "logging_steps": 10,
@@ -48,7 +70,7 @@
48
  "attributes": {}
49
  }
50
  },
51
- "total_flos": 1311685964988416.0,
52
  "train_batch_size": 32,
53
  "trial_name": null,
54
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.3333333333333335,
5
  "eval_steps": 40,
6
+ "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
29
  "learning_rate": 0.00013333333333333334,
30
  "loss": 5.8022,
31
  "step": 20
32
+ },
33
+ {
34
+ "epoch": 2.5,
35
+ "grad_norm": 4.03125,
36
+ "learning_rate": 0.0002,
37
+ "loss": 5.51,
38
+ "step": 30
39
+ },
40
+ {
41
+ "epoch": 3.3333333333333335,
42
+ "grad_norm": 4.3125,
43
+ "learning_rate": 0.00019984815164333163,
44
+ "loss": 5.3024,
45
+ "step": 40
46
+ },
47
+ {
48
+ "epoch": 3.3333333333333335,
49
+ "eval_loss": 3.1405746936798096,
50
+ "eval_runtime": 10.6366,
51
+ "eval_samples_per_second": 141.117,
52
+ "eval_steps_per_second": 2.256,
53
+ "step": 40
54
  }
55
  ],
56
  "logging_steps": 10,
 
70
  "attributes": {}
71
  }
72
  },
73
+ "total_flos": 2598287911682048.0,
74
  "train_batch_size": 32,
75
  "trial_name": null,
76
  "trial_params": null