SM commited on
Commit
90db77c
·
1 Parent(s): ba705df

Retrain with the proper data file.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +2 -2
  2. all_results.json +12 -12
  3. checkpoint-1000/config.json +39 -0
  4. checkpoint-1000/generation_config.json +6 -0
  5. checkpoint-1000/merges.txt +0 -0
  6. checkpoint-1000/model.safetensors +3 -0
  7. checkpoint-1000/optimizer.pt +3 -0
  8. checkpoint-1000/rng_state.pth +3 -0
  9. checkpoint-1000/scheduler.pt +3 -0
  10. checkpoint-1000/special_tokens_map.json +5 -0
  11. checkpoint-1000/tokenizer.json +0 -0
  12. checkpoint-1000/tokenizer_config.json +19 -0
  13. checkpoint-1000/trainer_state.json +33 -0
  14. checkpoint-1000/training_args.bin +3 -0
  15. checkpoint-1000/vocab.json +0 -0
  16. checkpoint-1500/config.json +39 -0
  17. checkpoint-1500/generation_config.json +6 -0
  18. checkpoint-1500/merges.txt +0 -0
  19. checkpoint-1500/model.safetensors +3 -0
  20. checkpoint-1500/optimizer.pt +3 -0
  21. checkpoint-1500/rng_state.pth +3 -0
  22. checkpoint-1500/scheduler.pt +3 -0
  23. checkpoint-1500/special_tokens_map.json +5 -0
  24. checkpoint-1500/tokenizer.json +0 -0
  25. checkpoint-1500/tokenizer_config.json +19 -0
  26. checkpoint-1500/trainer_state.json +39 -0
  27. checkpoint-1500/training_args.bin +3 -0
  28. checkpoint-1500/vocab.json +0 -0
  29. checkpoint-2000/config.json +39 -0
  30. checkpoint-2000/generation_config.json +6 -0
  31. checkpoint-2000/merges.txt +0 -0
  32. checkpoint-2000/model.safetensors +3 -0
  33. checkpoint-2000/optimizer.pt +3 -0
  34. checkpoint-2000/rng_state.pth +3 -0
  35. checkpoint-2000/scheduler.pt +3 -0
  36. checkpoint-2000/special_tokens_map.json +5 -0
  37. checkpoint-2000/tokenizer.json +0 -0
  38. checkpoint-2000/tokenizer_config.json +19 -0
  39. checkpoint-2000/trainer_state.json +45 -0
  40. checkpoint-2000/training_args.bin +3 -0
  41. checkpoint-2000/vocab.json +0 -0
  42. checkpoint-2500/config.json +39 -0
  43. checkpoint-2500/generation_config.json +6 -0
  44. checkpoint-2500/merges.txt +0 -0
  45. checkpoint-2500/model.safetensors +3 -0
  46. checkpoint-2500/optimizer.pt +3 -0
  47. checkpoint-2500/rng_state.pth +3 -0
  48. checkpoint-2500/scheduler.pt +3 -0
  49. checkpoint-2500/special_tokens_map.json +5 -0
  50. checkpoint-2500/tokenizer.json +0 -0
README.md CHANGED
@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 60.5072
21
- - Accuracy: 0.0
22
 
23
  ## Model description
24
 
 
17
 
18
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 52.0337
21
+ - Accuracy: 0.1243
22
 
23
  ## Model description
24
 
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 40.0,
3
- "eval_accuracy": 0.0,
4
- "eval_loss": 60.507171630859375,
5
- "eval_runtime": 2.1604,
6
- "eval_samples": 4,
7
- "eval_samples_per_second": 1.851,
8
- "eval_steps_per_second": 0.926,
9
- "perplexity": 1.8964035291436836e+26,
10
- "train_loss": 58.570675893930286,
11
- "train_runtime": 5757.0891,
12
- "train_samples": 78,
13
- "train_samples_per_second": 0.542,
14
- "train_steps_per_second": 0.271
15
  }
 
1
  {
2
  "epoch": 40.0,
3
+ "eval_accuracy": 0.12425328554360812,
4
+ "eval_loss": 52.03367233276367,
5
+ "eval_runtime": 4.1042,
6
+ "eval_samples": 9,
7
+ "eval_samples_per_second": 2.193,
8
+ "eval_steps_per_second": 1.218,
9
+ "perplexity": 3.962203408827054e+22,
10
+ "train_loss": 57.43311643738677,
11
+ "train_runtime": 10482.6781,
12
+ "train_samples": 138,
13
+ "train_samples_per_second": 0.527,
14
+ "train_steps_per_second": 0.263
15
  }
checkpoint-1000/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.37.0.dev0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-1000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.37.0.dev0"
6
+ }
checkpoint-1000/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfcdd32060421fc062c6972b23088021b78ee341a6ba56ac82f86eaea8a9be39
3
+ size 497774208
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40792add400940242337cb4f1c1ded33fc53932d579e2aafc1ad92e26b9120ad
3
+ size 995638202
checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2248774053cf007b7093c6e0bb2c3b3dd6eaa25d185fd835bab801482da4e4b0
3
+ size 13990
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3898258d676f040a88d5e204cd4b72f355d3dc5e6acf2f9d957635fad24937e8
3
+ size 1064
checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
checkpoint-1000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "tokenizer_class": "GPT2Tokenizer",
18
+ "unk_token": "<|endoftext|>"
19
+ }
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 14.492753623188406,
5
+ "eval_steps": 500,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 7.25,
13
+ "learning_rate": 4.094202898550725e-05,
14
+ "loss": 52.964,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 14.49,
19
+ "learning_rate": 3.188405797101449e-05,
20
+ "loss": 63.81,
21
+ "step": 1000
22
+ }
23
+ ],
24
+ "logging_steps": 500,
25
+ "max_steps": 2760,
26
+ "num_input_tokens_seen": 0,
27
+ "num_train_epochs": 40,
28
+ "save_steps": 500,
29
+ "total_flos": 1045168128000000.0,
30
+ "train_batch_size": 2,
31
+ "trial_name": null,
32
+ "trial_params": null
33
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3901907ca8b14655a382a70720bd9e1bb2f76f1edb2679dd829e743bc3f6bc3e
3
+ size 4664
checkpoint-1000/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1500/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.37.0.dev0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-1500/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.37.0.dev0"
6
+ }
checkpoint-1500/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42f5e565cdb79f9110a6d84d8389311e50392871d64a8891dbde0a227a8788dc
3
+ size 497774208
checkpoint-1500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e601a8de001ab43374799bb279945ab8304ecc9cb6457dd39819746e3509e5a
3
+ size 995638202
checkpoint-1500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13fd47b12859b8841c4b8248c9b246be3d9ced25781b423c40d0b3a010fa7653
3
+ size 13990
checkpoint-1500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d8150471eaa0602abf5ca49129f5d5e1a49fbee7998e0a72bf6f710952d97a1
3
+ size 1064
checkpoint-1500/special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
checkpoint-1500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1500/tokenizer_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "tokenizer_class": "GPT2Tokenizer",
18
+ "unk_token": "<|endoftext|>"
19
+ }
checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 21.73913043478261,
5
+ "eval_steps": 500,
6
+ "global_step": 1500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 7.25,
13
+ "learning_rate": 4.094202898550725e-05,
14
+ "loss": 52.964,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 14.49,
19
+ "learning_rate": 3.188405797101449e-05,
20
+ "loss": 63.81,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 21.74,
25
+ "learning_rate": 2.282608695652174e-05,
26
+ "loss": 62.5429,
27
+ "step": 1500
28
+ }
29
+ ],
30
+ "logging_steps": 500,
31
+ "max_steps": 2760,
32
+ "num_input_tokens_seen": 0,
33
+ "num_train_epochs": 40,
34
+ "save_steps": 500,
35
+ "total_flos": 1567752192000000.0,
36
+ "train_batch_size": 2,
37
+ "trial_name": null,
38
+ "trial_params": null
39
+ }
checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3901907ca8b14655a382a70720bd9e1bb2f76f1edb2679dd829e743bc3f6bc3e
3
+ size 4664
checkpoint-1500/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.37.0.dev0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-2000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.37.0.dev0"
6
+ }
checkpoint-2000/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcbe070b82059badc3cff1bfc0bcae3f883ada68f07a60fa8da20273ad31d041
3
+ size 497774208
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52b6e90b1598b433558c8544104af14d2e9899a893662f3665492f6a88cfb7e1
3
+ size 995638202
checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8af998d92b14891eae8da6a02f34398e26c284418aafc0720f904f72ebc45e9b
3
+ size 13990
checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6dd30ada5b40093c7c92eee80875a56bbece06a0cd26cc8b5c5b15dca76defd
3
+ size 1064
checkpoint-2000/special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
checkpoint-2000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2000/tokenizer_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": true,
15
+ "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
17
+ "tokenizer_class": "GPT2Tokenizer",
18
+ "unk_token": "<|endoftext|>"
19
+ }
checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 28.985507246376812,
5
+ "eval_steps": 500,
6
+ "global_step": 2000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 7.25,
13
+ "learning_rate": 4.094202898550725e-05,
14
+ "loss": 52.964,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 14.49,
19
+ "learning_rate": 3.188405797101449e-05,
20
+ "loss": 63.81,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 21.74,
25
+ "learning_rate": 2.282608695652174e-05,
26
+ "loss": 62.5429,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 28.99,
31
+ "learning_rate": 1.3768115942028985e-05,
32
+ "loss": 57.5548,
33
+ "step": 2000
34
+ }
35
+ ],
36
+ "logging_steps": 500,
37
+ "max_steps": 2760,
38
+ "num_input_tokens_seen": 0,
39
+ "num_train_epochs": 40,
40
+ "save_steps": 500,
41
+ "total_flos": 2090336256000000.0,
42
+ "train_batch_size": 2,
43
+ "trial_name": null,
44
+ "trial_params": null
45
+ }
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3901907ca8b14655a382a70720bd9e1bb2f76f1edb2679dd829e743bc3f6bc3e
3
+ size 4664
checkpoint-2000/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2500/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.37.0.dev0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
checkpoint-2500/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.37.0.dev0"
6
+ }
checkpoint-2500/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aa463b901dfd0ccc9e380c213fc921aba26e9b195485279f61c6347750b2e53
3
+ size 497774208
checkpoint-2500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d96874b37b1d821dce4c73d15ae5f0eea658e9e6e88f84ea553de7a4ba33fe3
3
+ size 995638202
checkpoint-2500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18f5998416d05c29029657954be610c8d756da442ed5608203ce274ddf272c03
3
+ size 13990
checkpoint-2500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c29bd4bf2aa870a026e9382e55e9e41abab36f126edf6f29461d731f77bcc9f
3
+ size 1064
checkpoint-2500/special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
checkpoint-2500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff