shorecode commited on
Commit
e85fc7e
·
verified ·
1 Parent(s): 180d696

Upload folder using huggingface_hub

Browse files
checkpoint-1050/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2efb683d1596318bd5d460ab10fe133d2d265accfad9800db712dc09aa179f60
3
  size 402763
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98fd5044314627e437dc2b7cdda294e48e3c4963a768af47d181b6e425a41dcc
3
  size 402763
checkpoint-1050/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d22db40534b825bfb6e2e3a1be52f795fedaf4e7a7685d37a2181fdd6d4a304d
3
  size 62314258
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd8a29d89acc6af99d387344d32386020a6920163d2db254b78b02b7a902ed79
3
  size 62314258
checkpoint-1050/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:321ac564783ecdca53102cf6b6549daa0a313bec83709a52ea07029f85b930f5
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:879d73f7b526f4773e67571faa23d0381f777afe5786d66898ad1914dcb91306
3
  size 14645
checkpoint-1050/tokenizer_config.json CHANGED
@@ -935,6 +935,6 @@
935
  "model_max_length": 1000000000000000019884624838656,
936
  "pad_token": "<pad>",
937
  "sp_model_kwargs": {},
938
- "tokenizer_class": "T5Tokenizer",
939
  "unk_token": "<unk>"
940
  }
 
935
  "model_max_length": 1000000000000000019884624838656,
936
  "pad_token": "<pad>",
937
  "sp_model_kwargs": {},
938
+ "tokenizer_class": "T5TokenizerFast",
939
  "unk_token": "<unk>"
940
  }
checkpoint-1050/trainer_state.json CHANGED
@@ -11,46 +11,46 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.05752416014726185,
14
- "grad_norm": 0.5827537775039673,
15
  "learning_rate": 0.00016513000460193283,
16
- "loss": 5.0544,
17
  "step": 250
18
  },
19
  {
20
  "epoch": 0.1150483202945237,
21
- "grad_norm": 0.5847251415252686,
22
  "learning_rate": 0.00016024045098941557,
23
- "loss": 4.2362,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 0.1150483202945237,
28
- "eval_loss": 3.4322922229766846,
29
- "eval_runtime": 35.9031,
30
- "eval_samples_per_second": 109.74,
31
- "eval_steps_per_second": 3.231,
32
  "step": 500
33
  },
34
  {
35
  "epoch": 0.17257248044178555,
36
- "grad_norm": 0.6557937264442444,
37
  "learning_rate": 0.0001553508973768983,
38
- "loss": 4.0566,
39
  "step": 750
40
  },
41
  {
42
  "epoch": 0.2300966405890474,
43
- "grad_norm": 0.6734243631362915,
44
  "learning_rate": 0.00015046134376438104,
45
- "loss": 3.9349,
46
  "step": 1000
47
  },
48
  {
49
  "epoch": 0.2300966405890474,
50
- "eval_loss": 3.1976470947265625,
51
- "eval_runtime": 35.3562,
52
- "eval_samples_per_second": 111.437,
53
- "eval_steps_per_second": 3.281,
54
  "step": 1000
55
  }
56
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.05752416014726185,
14
+ "grad_norm": 0.6941567659378052,
15
  "learning_rate": 0.00016513000460193283,
16
+ "loss": 3.417,
17
  "step": 250
18
  },
19
  {
20
  "epoch": 0.1150483202945237,
21
+ "grad_norm": 0.739392101764679,
22
  "learning_rate": 0.00016024045098941557,
23
+ "loss": 3.3994,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 0.1150483202945237,
28
+ "eval_loss": 2.832693576812744,
29
+ "eval_runtime": 34.8026,
30
+ "eval_samples_per_second": 113.21,
31
+ "eval_steps_per_second": 3.333,
32
  "step": 500
33
  },
34
  {
35
  "epoch": 0.17257248044178555,
36
+ "grad_norm": 0.7630258202552795,
37
  "learning_rate": 0.0001553508973768983,
38
+ "loss": 3.3881,
39
  "step": 750
40
  },
41
  {
42
  "epoch": 0.2300966405890474,
43
+ "grad_norm": 0.745369553565979,
44
  "learning_rate": 0.00015046134376438104,
45
+ "loss": 3.3655,
46
  "step": 1000
47
  },
48
  {
49
  "epoch": 0.2300966405890474,
50
+ "eval_loss": 2.821577548980713,
51
+ "eval_runtime": 34.6743,
52
+ "eval_samples_per_second": 113.629,
53
+ "eval_steps_per_second": 3.345,
54
  "step": 1000
55
  }
56
  ],