shorecode commited on
Commit
d5a242b
·
verified ·
1 Parent(s): e85fc7e

Upload folder using huggingface_hub

Browse files
checkpoint-1400/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67267547f1d2b6c7640f9b9a6d678612bb187e133080a876f715c2e22b848454
3
  size 402763
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7471e7503097821eb7cedd03029765d8c6d340f64a530fcacf1b70d8280cc43e
3
  size 402763
checkpoint-1400/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fe14eea3a51c61a652dc1a0c3a843c1a5310626d7b42f10aad4470d4d42bc84
3
  size 62314258
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7f4a4bed170f65dac3b0aeae3d2a5f812fa425e4f7fc45556f7fdc55987af2d
3
  size 62314258
checkpoint-1400/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cdf1a1a5aa1d6ce60e2b75c3ae17e1a0d7a15339cd844971d4798625dd644a2
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:543a6952dfe815e824a9d4b10abd2434f4f2a650c28d1aabc5602d81b60d9a50
3
  size 14645
checkpoint-1400/tokenizer_config.json CHANGED
@@ -935,6 +935,6 @@
935
  "model_max_length": 1000000000000000019884624838656,
936
  "pad_token": "<pad>",
937
  "sp_model_kwargs": {},
938
- "tokenizer_class": "T5Tokenizer",
939
  "unk_token": "<unk>"
940
  }
 
935
  "model_max_length": 1000000000000000019884624838656,
936
  "pad_token": "<pad>",
937
  "sp_model_kwargs": {},
938
+ "tokenizer_class": "T5TokenizerFast",
939
  "unk_token": "<unk>"
940
  }
checkpoint-1400/trainer_state.json CHANGED
@@ -11,53 +11,53 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.05752416014726185,
14
- "grad_norm": 0.5827537775039673,
15
  "learning_rate": 0.00016513000460193283,
16
- "loss": 5.0544,
17
  "step": 250
18
  },
19
  {
20
  "epoch": 0.1150483202945237,
21
- "grad_norm": 0.5847251415252686,
22
  "learning_rate": 0.00016024045098941557,
23
- "loss": 4.2362,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 0.1150483202945237,
28
- "eval_loss": 3.4322922229766846,
29
- "eval_runtime": 35.9031,
30
- "eval_samples_per_second": 109.74,
31
- "eval_steps_per_second": 3.231,
32
  "step": 500
33
  },
34
  {
35
  "epoch": 0.17257248044178555,
36
- "grad_norm": 0.6557937264442444,
37
  "learning_rate": 0.0001553508973768983,
38
- "loss": 4.0566,
39
  "step": 750
40
  },
41
  {
42
  "epoch": 0.2300966405890474,
43
- "grad_norm": 0.6734243631362915,
44
  "learning_rate": 0.00015046134376438104,
45
- "loss": 3.9349,
46
  "step": 1000
47
  },
48
  {
49
  "epoch": 0.2300966405890474,
50
- "eval_loss": 3.1976470947265625,
51
- "eval_runtime": 35.3562,
52
- "eval_samples_per_second": 111.437,
53
- "eval_steps_per_second": 3.281,
54
  "step": 1000
55
  },
56
  {
57
  "epoch": 0.28762080073630925,
58
- "grad_norm": 0.6826881766319275,
59
  "learning_rate": 0.0001455717901518638,
60
- "loss": 3.856,
61
  "step": 1250
62
  }
63
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.05752416014726185,
14
+ "grad_norm": 0.6941567659378052,
15
  "learning_rate": 0.00016513000460193283,
16
+ "loss": 3.417,
17
  "step": 250
18
  },
19
  {
20
  "epoch": 0.1150483202945237,
21
+ "grad_norm": 0.739392101764679,
22
  "learning_rate": 0.00016024045098941557,
23
+ "loss": 3.3994,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 0.1150483202945237,
28
+ "eval_loss": 2.832693576812744,
29
+ "eval_runtime": 34.8026,
30
+ "eval_samples_per_second": 113.21,
31
+ "eval_steps_per_second": 3.333,
32
  "step": 500
33
  },
34
  {
35
  "epoch": 0.17257248044178555,
36
+ "grad_norm": 0.7630258202552795,
37
  "learning_rate": 0.0001553508973768983,
38
+ "loss": 3.3881,
39
  "step": 750
40
  },
41
  {
42
  "epoch": 0.2300966405890474,
43
+ "grad_norm": 0.745369553565979,
44
  "learning_rate": 0.00015046134376438104,
45
+ "loss": 3.3655,
46
  "step": 1000
47
  },
48
  {
49
  "epoch": 0.2300966405890474,
50
+ "eval_loss": 2.821577548980713,
51
+ "eval_runtime": 34.6743,
52
+ "eval_samples_per_second": 113.629,
53
+ "eval_steps_per_second": 3.345,
54
  "step": 1000
55
  },
56
  {
57
  "epoch": 0.28762080073630925,
58
+ "grad_norm": 0.7992149591445923,
59
  "learning_rate": 0.0001455717901518638,
60
+ "loss": 3.3575,
61
  "step": 1250
62
  }
63
  ],