Kasdeja23 commited on
Commit
d95000c
·
verified ·
1 Parent(s): 665ce6b

End of training

Browse files
Files changed (32) hide show
  1. .gitattributes +1 -0
  2. .locks/models--Kasdeja23--GPT2WaP/09e1dcf445848af2bc2447e031d425d111aed958.lock +0 -0
  3. .locks/models--Kasdeja23--GPT2WaP/226b0752cac7789c48f0cb3ec53eda48b7be36cc.lock +0 -0
  4. .locks/models--Kasdeja23--GPT2WaP/3cf9478e853077a3f5788610a7d1339473cb0761.lock +0 -0
  5. .locks/models--Kasdeja23--GPT2WaP/4a4aaf657aa4ac448fe2d1a35f600a7d28d5dbce.lock +0 -0
  6. .locks/models--Kasdeja23--GPT2WaP/5636a72629bc4ec8f857b8778c1f1d342138520b.lock +0 -0
  7. .locks/models--Kasdeja23--GPT2WaP/7433646544cc332d7eb43c85199b5ce98e2cc0ed.lock +0 -0
  8. .locks/models--Kasdeja23--GPT2WaP/84ef7fb594b5c0979e48bdeddb60a0adef33df0b.lock +0 -0
  9. .locks/models--Kasdeja23--GPT2WaP/d3dd48f142331df1755dc7e2233db8b4148e661ad6ca54881a45c6a91d16cc3a.lock +0 -0
  10. README.md +10 -14
  11. config.json +1 -1
  12. model.safetensors +1 -1
  13. models--Kasdeja23--GPT2WaP/.no_exist/665ce6b153bc886847d7ed029034eced5ce2567e/added_tokens.json +0 -0
  14. models--Kasdeja23--GPT2WaP/blobs/09e1dcf445848af2bc2447e031d425d111aed958 +39 -0
  15. models--Kasdeja23--GPT2WaP/blobs/226b0752cac7789c48f0cb3ec53eda48b7be36cc +0 -0
  16. models--Kasdeja23--GPT2WaP/blobs/3cf9478e853077a3f5788610a7d1339473cb0761 +0 -0
  17. models--Kasdeja23--GPT2WaP/blobs/4a4aaf657aa4ac448fe2d1a35f600a7d28d5dbce +20 -0
  18. models--Kasdeja23--GPT2WaP/blobs/5636a72629bc4ec8f857b8778c1f1d342138520b +6 -0
  19. models--Kasdeja23--GPT2WaP/blobs/7433646544cc332d7eb43c85199b5ce98e2cc0ed +6 -0
  20. models--Kasdeja23--GPT2WaP/blobs/84ef7fb594b5c0979e48bdeddb60a0adef33df0b +0 -0
  21. models--Kasdeja23--GPT2WaP/blobs/d3dd48f142331df1755dc7e2233db8b4148e661ad6ca54881a45c6a91d16cc3a +3 -0
  22. models--Kasdeja23--GPT2WaP/refs/main +1 -0
  23. models--Kasdeja23--GPT2WaP/snapshots/665ce6b153bc886847d7ed029034eced5ce2567e/config.json +39 -0
  24. models--Kasdeja23--GPT2WaP/snapshots/665ce6b153bc886847d7ed029034eced5ce2567e/generation_config.json +6 -0
  25. models--Kasdeja23--GPT2WaP/snapshots/665ce6b153bc886847d7ed029034eced5ce2567e/merges.txt +0 -0
  26. models--Kasdeja23--GPT2WaP/snapshots/665ce6b153bc886847d7ed029034eced5ce2567e/model.safetensors +3 -0
  27. models--Kasdeja23--GPT2WaP/snapshots/665ce6b153bc886847d7ed029034eced5ce2567e/special_tokens_map.json +6 -0
  28. models--Kasdeja23--GPT2WaP/snapshots/665ce6b153bc886847d7ed029034eced5ce2567e/tokenizer.json +0 -0
  29. models--Kasdeja23--GPT2WaP/snapshots/665ce6b153bc886847d7ed029034eced5ce2567e/tokenizer_config.json +20 -0
  30. models--Kasdeja23--GPT2WaP/snapshots/665ce6b153bc886847d7ed029034eced5ce2567e/vocab.json +0 -0
  31. tokenizer.json +1 -1
  32. training_args.bin +2 -2
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ models--Kasdeja23--GPT2WaP/blobs/d3dd48f142331df1755dc7e2233db8b4148e661ad6ca54881a45c6a91d16cc3a filter=lfs diff=lfs merge=lfs -text
.locks/models--Kasdeja23--GPT2WaP/09e1dcf445848af2bc2447e031d425d111aed958.lock ADDED
File without changes
.locks/models--Kasdeja23--GPT2WaP/226b0752cac7789c48f0cb3ec53eda48b7be36cc.lock ADDED
File without changes
.locks/models--Kasdeja23--GPT2WaP/3cf9478e853077a3f5788610a7d1339473cb0761.lock ADDED
File without changes
.locks/models--Kasdeja23--GPT2WaP/4a4aaf657aa4ac448fe2d1a35f600a7d28d5dbce.lock ADDED
File without changes
.locks/models--Kasdeja23--GPT2WaP/5636a72629bc4ec8f857b8778c1f1d342138520b.lock ADDED
File without changes
.locks/models--Kasdeja23--GPT2WaP/7433646544cc332d7eb43c85199b5ce98e2cc0ed.lock ADDED
File without changes
.locks/models--Kasdeja23--GPT2WaP/84ef7fb594b5c0979e48bdeddb60a0adef33df0b.lock ADDED
File without changes
.locks/models--Kasdeja23--GPT2WaP/d3dd48f142331df1755dc7e2233db8b4148e661ad6ca54881a45c6a91d16cc3a.lock ADDED
File without changes
README.md CHANGED
@@ -15,8 +15,8 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 8.6671
19
- - Perplexity: 5808.7070
20
 
21
  ## Model description
22
 
@@ -46,7 +46,7 @@ The following hyperparameters were used during training:
46
  - total_eval_batch_size: 128
47
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
  - lr_scheduler_type: cosine
49
- - lr_scheduler_warmup_steps: 1000
50
  - num_epochs: 20
51
  - mixed_precision_training: Native AMP
52
 
@@ -54,17 +54,13 @@ The following hyperparameters were used during training:
54
 
55
  | Training Loss | Epoch | Step | Validation Loss | Perplexity |
56
  |:-------------:|:-------:|:----:|:---------------:|:----------:|
57
- | 9.5757 | 1.7978 | 20 | 8.8741 | 7144.6958 |
58
- | 8.6138 | 3.5955 | 40 | 9.2959 | 10892.8018 |
59
- | 7.8094 | 5.3933 | 60 | 9.2828 | 10751.5010 |
60
- | 7.0822 | 7.1910 | 80 | 8.9766 | 7915.5801 |
61
- | 6.4369 | 8.9888 | 100 | 8.5389 | 5109.8931 |
62
- | 5.9333 | 10.7865 | 120 | 8.4375 | 4616.9585 |
63
- | 5.5611 | 12.5843 | 140 | 8.4776 | 4806.0811 |
64
- | 5.3067 | 14.3820 | 160 | 8.3995 | 4444.7969 |
65
- | 5.1255 | 16.1798 | 180 | 8.6841 | 5908.0918 |
66
- | 4.9194 | 17.9775 | 200 | 8.5597 | 5216.9863 |
67
- | 4.7863 | 19.7753 | 220 | 8.6671 | 5808.7070 |
68
 
69
 
70
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 8.3931
19
+ - Perplexity: 4416.4487
20
 
21
  ## Model description
22
 
 
46
  - total_eval_batch_size: 128
47
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
48
  - lr_scheduler_type: cosine
49
+ - lr_scheduler_warmup_steps: 750
50
  - num_epochs: 20
51
  - mixed_precision_training: Native AMP
52
 
 
54
 
55
  | Training Loss | Epoch | Step | Validation Loss | Perplexity |
56
  |:-------------:|:-------:|:----:|:---------------:|:----------:|
57
+ | 9.431 | 2.7586 | 20 | 9.0219 | 8282.6191 |
58
+ | 8.5038 | 5.5172 | 40 | 9.5490 | 14030.7480 |
59
+ | 7.5941 | 8.2759 | 60 | 9.3572 | 11582.0645 |
60
+ | 6.8765 | 11.0345 | 80 | 8.8650 | 7079.6768 |
61
+ | 6.1713 | 13.7931 | 100 | 8.4353 | 4607.0444 |
62
+ | 5.6932 | 16.5517 | 120 | 8.2600 | 3866.2166 |
63
+ | 5.3371 | 19.3103 | 140 | 8.3931 | 4416.4487 |
 
 
 
 
64
 
65
 
66
  ### Framework versions
config.json CHANGED
@@ -11,7 +11,7 @@
11
  "initializer_range": 0.02,
12
  "layer_norm_epsilon": 1e-05,
13
  "model_type": "gpt2",
14
- "n_ctx": 48,
15
  "n_embd": 768,
16
  "n_head": 12,
17
  "n_inner": null,
 
11
  "initializer_range": 0.02,
12
  "layer_norm_epsilon": 1e-05,
13
  "model_type": "gpt2",
14
+ "n_ctx": 64,
15
  "n_embd": 768,
16
  "n_head": 12,
17
  "n_inner": null,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3dd48f142331df1755dc7e2233db8b4148e661ad6ca54881a45c6a91d16cc3a
3
  size 497774208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cda1653126c8feb3784ad04468d842c74c8a2ea96d73b5db083602b4ddfed2f7
3
  size 497774208
models--Kasdeja23--GPT2WaP/.no_exist/665ce6b153bc886847d7ed029034eced5ce2567e/added_tokens.json ADDED
File without changes
models--Kasdeja23--GPT2WaP/blobs/09e1dcf445848af2bc2447e031d425d111aed958 ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 48,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.40.1",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
models--Kasdeja23--GPT2WaP/blobs/226b0752cac7789c48f0cb3ec53eda48b7be36cc ADDED
The diff for this file is too large to render. See raw diff
 
models--Kasdeja23--GPT2WaP/blobs/3cf9478e853077a3f5788610a7d1339473cb0761 ADDED
The diff for this file is too large to render. See raw diff
 
models--Kasdeja23--GPT2WaP/blobs/4a4aaf657aa4ac448fe2d1a35f600a7d28d5dbce ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
+ "tokenizer_class": "GPT2Tokenizer",
19
+ "unk_token": "<|endoftext|>"
20
+ }
models--Kasdeja23--GPT2WaP/blobs/5636a72629bc4ec8f857b8778c1f1d342138520b ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.40.1"
6
+ }
models--Kasdeja23--GPT2WaP/blobs/7433646544cc332d7eb43c85199b5ce98e2cc0ed ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
models--Kasdeja23--GPT2WaP/blobs/84ef7fb594b5c0979e48bdeddb60a0adef33df0b ADDED
The diff for this file is too large to render. See raw diff
 
models--Kasdeja23--GPT2WaP/blobs/d3dd48f142331df1755dc7e2233db8b4148e661ad6ca54881a45c6a91d16cc3a ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3dd48f142331df1755dc7e2233db8b4148e661ad6ca54881a45c6a91d16cc3a
3
+ size 497774208
models--Kasdeja23--GPT2WaP/refs/main ADDED
@@ -0,0 +1 @@
 
 
1
+ 665ce6b153bc886847d7ed029034eced5ce2567e
models--Kasdeja23--GPT2WaP/snapshots/665ce6b153bc886847d7ed029034eced5ce2567e/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 48,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.40.1",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
models--Kasdeja23--GPT2WaP/snapshots/665ce6b153bc886847d7ed029034eced5ce2567e/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.40.1"
6
+ }
models--Kasdeja23--GPT2WaP/snapshots/665ce6b153bc886847d7ed029034eced5ce2567e/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models--Kasdeja23--GPT2WaP/snapshots/665ce6b153bc886847d7ed029034eced5ce2567e/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3dd48f142331df1755dc7e2233db8b4148e661ad6ca54881a45c6a91d16cc3a
3
+ size 497774208
models--Kasdeja23--GPT2WaP/snapshots/665ce6b153bc886847d7ed029034eced5ce2567e/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
models--Kasdeja23--GPT2WaP/snapshots/665ce6b153bc886847d7ed029034eced5ce2567e/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models--Kasdeja23--GPT2WaP/snapshots/665ce6b153bc886847d7ed029034eced5ce2567e/tokenizer_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "pad_token": "<|endoftext|>",
18
+ "tokenizer_class": "GPT2Tokenizer",
19
+ "unk_token": "<|endoftext|>"
20
+ }
models--Kasdeja23--GPT2WaP/snapshots/665ce6b153bc886847d7ed029034eced5ce2567e/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 48,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 64,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7db1fd1ec3b8cb6efbbe80b37ff02d8452294fb0c2195ad03ca891ef195834ad
3
- size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8f8460c3bcda64677c7ebeede8050cd9a4b298bc1b17b654c3e513ddc3aa3c0
3
+ size 4920