MatLumber commited on
Commit
0fc2b4f
·
1 Parent(s): 52bb323

Upload 10 files

Browse files
config.json CHANGED
@@ -32,7 +32,7 @@
32
  }
33
  },
34
  "torch_dtype": "float32",
35
- "transformers_version": "4.24.0",
36
  "use_cache": true,
37
  "vocab_size": 50257
38
  }
 
32
  }
33
  },
34
  "torch_dtype": "float32",
35
+ "transformers_version": "4.28.1",
36
  "use_cache": true,
37
  "vocab_size": 50257
38
  }
eval_results.txt CHANGED
@@ -1 +1 @@
1
- perplexity = tensor(1.4605)
 
1
+ perplexity = tensor(1.5243)
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.28.1"
6
+ }
merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ t
3
  Ġ a
4
  h e
 
1
+ #version: 0.2
2
  Ġ t
3
  Ġ a
4
  h e
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a84703ec3dec0dd27a4d01aff4c9f4c1d6a0b9c605bba1bd57df8428cf112bdd
3
- size 510396521
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:142dcf76d97dc77f993dc86af5a0f17aac6e437e901db4bae6190bb441239f01
3
+ size 510398013
tokenizer.json CHANGED
@@ -5,29 +5,32 @@
5
  "added_tokens": [
6
  {
7
  "id": 50256,
8
- "special": true,
9
  "content": "<|endoftext|>",
10
  "single_word": false,
11
  "lstrip": false,
12
  "rstrip": false,
13
- "normalized": true
 
14
  }
15
  ],
16
  "normalizer": null,
17
  "pre_tokenizer": {
18
  "type": "ByteLevel",
19
  "add_prefix_space": false,
20
- "trim_offsets": true
 
21
  },
22
  "post_processor": {
23
  "type": "ByteLevel",
24
  "add_prefix_space": true,
25
- "trim_offsets": false
 
26
  },
27
  "decoder": {
28
  "type": "ByteLevel",
29
  "add_prefix_space": true,
30
- "trim_offsets": true
 
31
  },
32
  "model": {
33
  "type": "BPE",
@@ -36,6 +39,7 @@
36
  "continuing_subword_prefix": "",
37
  "end_of_word_suffix": "",
38
  "fuse_unk": false,
 
39
  "vocab": {
40
  "!": 0,
41
  "\"": 1,
 
5
  "added_tokens": [
6
  {
7
  "id": 50256,
 
8
  "content": "<|endoftext|>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
+ "normalized": true,
13
+ "special": true
14
  }
15
  ],
16
  "normalizer": null,
17
  "pre_tokenizer": {
18
  "type": "ByteLevel",
19
  "add_prefix_space": false,
20
+ "trim_offsets": true,
21
+ "use_regex": true
22
  },
23
  "post_processor": {
24
  "type": "ByteLevel",
25
  "add_prefix_space": true,
26
+ "trim_offsets": false,
27
+ "use_regex": true
28
  },
29
  "decoder": {
30
  "type": "ByteLevel",
31
  "add_prefix_space": true,
32
+ "trim_offsets": true,
33
+ "use_regex": true
34
  },
35
  "model": {
36
  "type": "BPE",
 
39
  "continuing_subword_prefix": "",
40
  "end_of_word_suffix": "",
41
  "fuse_unk": false,
42
+ "byte_fallback": false,
43
  "vocab": {
44
  "!": 0,
45
  "\"": 1,
tokenizer_config.json CHANGED
@@ -9,6 +9,7 @@
9
  "rstrip": false,
10
  "single_word": false
11
  },
 
12
  "eos_token": {
13
  "__type": "AddedToken",
14
  "content": "<|endoftext|>",
@@ -19,9 +20,7 @@
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
22
- "name_or_path": "microsoft/DialoGPT-small",
23
  "pad_token": null,
24
- "special_tokens_map_file": null,
25
  "tokenizer_class": "GPT2Tokenizer",
26
  "unk_token": {
27
  "__type": "AddedToken",
 
9
  "rstrip": false,
10
  "single_word": false
11
  },
12
+ "clean_up_tokenization_spaces": true,
13
  "eos_token": {
14
  "__type": "AddedToken",
15
  "content": "<|endoftext|>",
 
20
  },
21
  "errors": "replace",
22
  "model_max_length": 1024,
 
23
  "pad_token": null,
 
24
  "tokenizer_class": "GPT2Tokenizer",
25
  "unk_token": {
26
  "__type": "AddedToken",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55a3334a1d6a6a14967951c36e9007636922bd7362f61f7e6a7731ba460aa791
3
- size 1327
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be6afa05e21f4092da7064719386bca54623aa11f600c3b96ac8abeb360b19d4
3
+ size 1339