cagrigungor commited on
Commit
5210858
·
verified ·
1 Parent(s): 61ad595

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +6 -0
  2. checkpoint-1464/config.json +32 -0
  3. checkpoint-1464/generation_config.json +9 -0
  4. checkpoint-1464/model.safetensors +3 -0
  5. checkpoint-1464/optimizer.pt +3 -0
  6. checkpoint-1464/rng_state.pth +3 -0
  7. checkpoint-1464/scheduler.pt +3 -0
  8. checkpoint-1464/special_tokens_map.json +23 -0
  9. checkpoint-1464/spiece.model +3 -0
  10. checkpoint-1464/tokenizer.json +3 -0
  11. checkpoint-1464/tokenizer_config.json +39 -0
  12. checkpoint-1464/trainer_state.json +72 -0
  13. checkpoint-1464/training_args.bin +3 -0
  14. checkpoint-1952/config.json +32 -0
  15. checkpoint-1952/generation_config.json +9 -0
  16. checkpoint-1952/model.safetensors +3 -0
  17. checkpoint-1952/optimizer.pt +3 -0
  18. checkpoint-1952/rng_state.pth +3 -0
  19. checkpoint-1952/scheduler.pt +3 -0
  20. checkpoint-1952/special_tokens_map.json +23 -0
  21. checkpoint-1952/spiece.model +3 -0
  22. checkpoint-1952/tokenizer.json +3 -0
  23. checkpoint-1952/tokenizer_config.json +39 -0
  24. checkpoint-1952/trainer_state.json +87 -0
  25. checkpoint-1952/training_args.bin +3 -0
  26. checkpoint-2440/config.json +32 -0
  27. checkpoint-2440/generation_config.json +9 -0
  28. checkpoint-2440/model.safetensors +3 -0
  29. checkpoint-2440/optimizer.pt +3 -0
  30. checkpoint-2440/rng_state.pth +3 -0
  31. checkpoint-2440/scheduler.pt +3 -0
  32. checkpoint-2440/special_tokens_map.json +23 -0
  33. checkpoint-2440/spiece.model +3 -0
  34. checkpoint-2440/tokenizer.json +3 -0
  35. checkpoint-2440/tokenizer_config.json +39 -0
  36. checkpoint-2440/trainer_state.json +102 -0
  37. checkpoint-2440/training_args.bin +3 -0
  38. checkpoint-488/config.json +32 -0
  39. checkpoint-488/generation_config.json +9 -0
  40. checkpoint-488/model.safetensors +3 -0
  41. checkpoint-488/optimizer.pt +3 -0
  42. checkpoint-488/rng_state.pth +3 -0
  43. checkpoint-488/scheduler.pt +3 -0
  44. checkpoint-488/special_tokens_map.json +23 -0
  45. checkpoint-488/spiece.model +3 -0
  46. checkpoint-488/tokenizer.json +3 -0
  47. checkpoint-488/tokenizer_config.json +39 -0
  48. checkpoint-488/trainer_state.json +42 -0
  49. checkpoint-488/training_args.bin +3 -0
  50. checkpoint-976/config.json +32 -0
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-1464/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint-1952/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ checkpoint-2440/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ checkpoint-488/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ checkpoint-976/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
checkpoint-1464/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "T5ForConditionalGeneration"
4
+ ],
5
+ "classifier_dropout": 0.0,
6
+ "d_ff": 2816,
7
+ "d_kv": 64,
8
+ "d_model": 1024,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "dtype": "float32",
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "num_decoder_layers": 24,
21
+ "num_heads": 16,
22
+ "num_layers": 24,
23
+ "output_past": true,
24
+ "pad_token_id": 0,
25
+ "relative_attention_max_distance": 128,
26
+ "relative_attention_num_buckets": 32,
27
+ "tie_word_embeddings": false,
28
+ "tokenizer_class": "T5Tokenizer",
29
+ "transformers_version": "4.57.3",
30
+ "use_cache": true,
31
+ "vocab_size": 250112
32
+ }
checkpoint-1464/generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": [
5
+ 1
6
+ ],
7
+ "pad_token_id": 0,
8
+ "transformers_version": "4.57.3"
9
+ }
checkpoint-1464/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17e5dbf209582bbc423fb5a5c64f561e5120f44d596ae8ffcc3a1146ab3ba3f8
3
+ size 4918393736
checkpoint-1464/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f48f80ff7d610859426809d5076e377e14254aaa4fa923519a40fd9350bdcc0
3
+ size 9837144773
checkpoint-1464/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec2e48fdef59fa424ab4115a1f31fc06ffd46a7921c5a59b049338e878926652
3
+ size 14645
checkpoint-1464/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14e33b0696093a82d59a6b127c20fa68bd129297a52484f0e6cb84faff6e1698
3
+ size 1465
checkpoint-1464/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<pad>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
checkpoint-1464/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
+ size 4309802
checkpoint-1464/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2172c2b5ca792ef398c3357421498eca5b0ae7e5deb50f00f8515316e1fd8f3
3
+ size 15998308
checkpoint-1464/tokenizer_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": null,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<pad>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "</s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "<unk>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": [],
30
+ "clean_up_tokenization_spaces": false,
31
+ "eos_token": "</s>",
32
+ "extra_ids": 0,
33
+ "extra_special_tokens": {},
34
+ "model_max_length": 1000000000000000019884624838656,
35
+ "pad_token": "<pad>",
36
+ "sp_model_kwargs": {},
37
+ "tokenizer_class": "T5Tokenizer",
38
+ "unk_token": "<unk>"
39
+ }
checkpoint-1464/trainer_state.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1464,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_loss": 0.06042954698204994,
15
+ "eval_runtime": 10.063,
16
+ "eval_samples_per_second": 688.86,
17
+ "eval_steps_per_second": 5.466,
18
+ "step": 488
19
+ },
20
+ {
21
+ "epoch": 1.0245901639344261,
22
+ "grad_norm": 0.09357130527496338,
23
+ "learning_rate": 0.001590983606557377,
24
+ "loss": 0.225,
25
+ "step": 500
26
+ },
27
+ {
28
+ "epoch": 2.0,
29
+ "eval_loss": 0.025667887181043625,
30
+ "eval_runtime": 10.0911,
31
+ "eval_samples_per_second": 686.939,
32
+ "eval_steps_per_second": 5.45,
33
+ "step": 976
34
+ },
35
+ {
36
+ "epoch": 2.0491803278688523,
37
+ "grad_norm": 0.0897439494729042,
38
+ "learning_rate": 0.0011811475409836064,
39
+ "loss": 0.0442,
40
+ "step": 1000
41
+ },
42
+ {
43
+ "epoch": 3.0,
44
+ "eval_loss": 0.019025476649403572,
45
+ "eval_runtime": 10.0796,
46
+ "eval_samples_per_second": 687.727,
47
+ "eval_steps_per_second": 5.457,
48
+ "step": 1464
49
+ }
50
+ ],
51
+ "logging_steps": 500,
52
+ "max_steps": 2440,
53
+ "num_input_tokens_seen": 0,
54
+ "num_train_epochs": 5,
55
+ "save_steps": 500,
56
+ "stateful_callbacks": {
57
+ "TrainerControl": {
58
+ "args": {
59
+ "should_epoch_stop": false,
60
+ "should_evaluate": false,
61
+ "should_log": false,
62
+ "should_save": true,
63
+ "should_training_stop": false
64
+ },
65
+ "attributes": {}
66
+ }
67
+ },
68
+ "total_flos": 4.8634769700864e+16,
69
+ "train_batch_size": 128,
70
+ "trial_name": null,
71
+ "trial_params": null
72
+ }
checkpoint-1464/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3a5d2402b146148a8cfae628d87d12f831be6c1b6520063a29e602f61aa29fb
3
+ size 5969
checkpoint-1952/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "T5ForConditionalGeneration"
4
+ ],
5
+ "classifier_dropout": 0.0,
6
+ "d_ff": 2816,
7
+ "d_kv": 64,
8
+ "d_model": 1024,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "dtype": "float32",
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "num_decoder_layers": 24,
21
+ "num_heads": 16,
22
+ "num_layers": 24,
23
+ "output_past": true,
24
+ "pad_token_id": 0,
25
+ "relative_attention_max_distance": 128,
26
+ "relative_attention_num_buckets": 32,
27
+ "tie_word_embeddings": false,
28
+ "tokenizer_class": "T5Tokenizer",
29
+ "transformers_version": "4.57.3",
30
+ "use_cache": true,
31
+ "vocab_size": 250112
32
+ }
checkpoint-1952/generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": [
5
+ 1
6
+ ],
7
+ "pad_token_id": 0,
8
+ "transformers_version": "4.57.3"
9
+ }
checkpoint-1952/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c9de8decdc6d1ce9ba2768a6037002fb87cdae8f43730df1cfe549cffc170c5
3
+ size 4918393736
checkpoint-1952/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28619e9dff96ac422fbcb57b959f64b2b461583b30ff66f9301b39c1a7f1997a
3
+ size 9837144773
checkpoint-1952/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9c053db2a6c0fbf439270a4e66ca38d969c061f9853a6bd5693094cf5238660
3
+ size 14645
checkpoint-1952/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa774ee297e24df97fe1a164ba2b251a10808fd8c9a8b85f0e1e00dac18920a6
3
+ size 1465
checkpoint-1952/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<pad>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
checkpoint-1952/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
+ size 4309802
checkpoint-1952/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2172c2b5ca792ef398c3357421498eca5b0ae7e5deb50f00f8515316e1fd8f3
3
+ size 15998308
checkpoint-1952/tokenizer_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": null,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<pad>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "</s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "<unk>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": [],
30
+ "clean_up_tokenization_spaces": false,
31
+ "eos_token": "</s>",
32
+ "extra_ids": 0,
33
+ "extra_special_tokens": {},
34
+ "model_max_length": 1000000000000000019884624838656,
35
+ "pad_token": "<pad>",
36
+ "sp_model_kwargs": {},
37
+ "tokenizer_class": "T5Tokenizer",
38
+ "unk_token": "<unk>"
39
+ }
checkpoint-1952/trainer_state.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 4.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1952,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_loss": 0.06042954698204994,
15
+ "eval_runtime": 10.063,
16
+ "eval_samples_per_second": 688.86,
17
+ "eval_steps_per_second": 5.466,
18
+ "step": 488
19
+ },
20
+ {
21
+ "epoch": 1.0245901639344261,
22
+ "grad_norm": 0.09357130527496338,
23
+ "learning_rate": 0.001590983606557377,
24
+ "loss": 0.225,
25
+ "step": 500
26
+ },
27
+ {
28
+ "epoch": 2.0,
29
+ "eval_loss": 0.025667887181043625,
30
+ "eval_runtime": 10.0911,
31
+ "eval_samples_per_second": 686.939,
32
+ "eval_steps_per_second": 5.45,
33
+ "step": 976
34
+ },
35
+ {
36
+ "epoch": 2.0491803278688523,
37
+ "grad_norm": 0.0897439494729042,
38
+ "learning_rate": 0.0011811475409836064,
39
+ "loss": 0.0442,
40
+ "step": 1000
41
+ },
42
+ {
43
+ "epoch": 3.0,
44
+ "eval_loss": 0.019025476649403572,
45
+ "eval_runtime": 10.0796,
46
+ "eval_samples_per_second": 687.727,
47
+ "eval_steps_per_second": 5.457,
48
+ "step": 1464
49
+ },
50
+ {
51
+ "epoch": 3.0737704918032787,
52
+ "grad_norm": 0.0849112719297409,
53
+ "learning_rate": 0.0007713114754098361,
54
+ "loss": 0.0156,
55
+ "step": 1500
56
+ },
57
+ {
58
+ "epoch": 4.0,
59
+ "eval_loss": 0.01647171936929226,
60
+ "eval_runtime": 10.1303,
61
+ "eval_samples_per_second": 684.284,
62
+ "eval_steps_per_second": 5.429,
63
+ "step": 1952
64
+ }
65
+ ],
66
+ "logging_steps": 500,
67
+ "max_steps": 2440,
68
+ "num_input_tokens_seen": 0,
69
+ "num_train_epochs": 5,
70
+ "save_steps": 500,
71
+ "stateful_callbacks": {
72
+ "TrainerControl": {
73
+ "args": {
74
+ "should_epoch_stop": false,
75
+ "should_evaluate": false,
76
+ "should_log": false,
77
+ "should_save": true,
78
+ "should_training_stop": false
79
+ },
80
+ "attributes": {}
81
+ }
82
+ },
83
+ "total_flos": 6.48170329764864e+16,
84
+ "train_batch_size": 128,
85
+ "trial_name": null,
86
+ "trial_params": null
87
+ }
checkpoint-1952/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3a5d2402b146148a8cfae628d87d12f831be6c1b6520063a29e602f61aa29fb
3
+ size 5969
checkpoint-2440/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "T5ForConditionalGeneration"
4
+ ],
5
+ "classifier_dropout": 0.0,
6
+ "d_ff": 2816,
7
+ "d_kv": 64,
8
+ "d_model": 1024,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "dtype": "float32",
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "num_decoder_layers": 24,
21
+ "num_heads": 16,
22
+ "num_layers": 24,
23
+ "output_past": true,
24
+ "pad_token_id": 0,
25
+ "relative_attention_max_distance": 128,
26
+ "relative_attention_num_buckets": 32,
27
+ "tie_word_embeddings": false,
28
+ "tokenizer_class": "T5Tokenizer",
29
+ "transformers_version": "4.57.3",
30
+ "use_cache": true,
31
+ "vocab_size": 250112
32
+ }
checkpoint-2440/generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": [
5
+ 1
6
+ ],
7
+ "pad_token_id": 0,
8
+ "transformers_version": "4.57.3"
9
+ }
checkpoint-2440/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:726447629d0530500262ad0e155f899feaa07f25fe62f257facb2d60af4cdc89
3
+ size 4918393736
checkpoint-2440/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4751fc82c43849abb7796deb3c6acdd453bfde95155523982f8af42e592cf664
3
+ size 9837144773
checkpoint-2440/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f16247903338e1066458ab38d04958c959e7a57ff7c209a53e01ee6b454e3dcb
3
+ size 14645
checkpoint-2440/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5457d07c41a49fcaaedc3e20d6b4f102efd664698b52007623f016a1412e0111
3
+ size 1465
checkpoint-2440/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<pad>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
checkpoint-2440/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
+ size 4309802
checkpoint-2440/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2172c2b5ca792ef398c3357421498eca5b0ae7e5deb50f00f8515316e1fd8f3
3
+ size 15998308
checkpoint-2440/tokenizer_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": null,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<pad>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "</s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "<unk>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": [],
30
+ "clean_up_tokenization_spaces": false,
31
+ "eos_token": "</s>",
32
+ "extra_ids": 0,
33
+ "extra_special_tokens": {},
34
+ "model_max_length": 1000000000000000019884624838656,
35
+ "pad_token": "<pad>",
36
+ "sp_model_kwargs": {},
37
+ "tokenizer_class": "T5Tokenizer",
38
+ "unk_token": "<unk>"
39
+ }
checkpoint-2440/trainer_state.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 5.0,
6
+ "eval_steps": 500,
7
+ "global_step": 2440,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_loss": 0.06042954698204994,
15
+ "eval_runtime": 10.063,
16
+ "eval_samples_per_second": 688.86,
17
+ "eval_steps_per_second": 5.466,
18
+ "step": 488
19
+ },
20
+ {
21
+ "epoch": 1.0245901639344261,
22
+ "grad_norm": 0.09357130527496338,
23
+ "learning_rate": 0.001590983606557377,
24
+ "loss": 0.225,
25
+ "step": 500
26
+ },
27
+ {
28
+ "epoch": 2.0,
29
+ "eval_loss": 0.025667887181043625,
30
+ "eval_runtime": 10.0911,
31
+ "eval_samples_per_second": 686.939,
32
+ "eval_steps_per_second": 5.45,
33
+ "step": 976
34
+ },
35
+ {
36
+ "epoch": 2.0491803278688523,
37
+ "grad_norm": 0.0897439494729042,
38
+ "learning_rate": 0.0011811475409836064,
39
+ "loss": 0.0442,
40
+ "step": 1000
41
+ },
42
+ {
43
+ "epoch": 3.0,
44
+ "eval_loss": 0.019025476649403572,
45
+ "eval_runtime": 10.0796,
46
+ "eval_samples_per_second": 687.727,
47
+ "eval_steps_per_second": 5.457,
48
+ "step": 1464
49
+ },
50
+ {
51
+ "epoch": 3.0737704918032787,
52
+ "grad_norm": 0.0849112719297409,
53
+ "learning_rate": 0.0007713114754098361,
54
+ "loss": 0.0156,
55
+ "step": 1500
56
+ },
57
+ {
58
+ "epoch": 4.0,
59
+ "eval_loss": 0.01647171936929226,
60
+ "eval_runtime": 10.1303,
61
+ "eval_samples_per_second": 684.284,
62
+ "eval_steps_per_second": 5.429,
63
+ "step": 1952
64
+ },
65
+ {
66
+ "epoch": 4.098360655737705,
67
+ "grad_norm": 0.012470896355807781,
68
+ "learning_rate": 0.0003614754098360656,
69
+ "loss": 0.0055,
70
+ "step": 2000
71
+ },
72
+ {
73
+ "epoch": 5.0,
74
+ "eval_loss": 0.018309397622942924,
75
+ "eval_runtime": 10.0913,
76
+ "eval_samples_per_second": 686.926,
77
+ "eval_steps_per_second": 5.45,
78
+ "step": 2440
79
+ }
80
+ ],
81
+ "logging_steps": 500,
82
+ "max_steps": 2440,
83
+ "num_input_tokens_seen": 0,
84
+ "num_train_epochs": 5,
85
+ "save_steps": 500,
86
+ "stateful_callbacks": {
87
+ "TrainerControl": {
88
+ "args": {
89
+ "should_epoch_stop": false,
90
+ "should_evaluate": false,
91
+ "should_log": false,
92
+ "should_save": true,
93
+ "should_training_stop": true
94
+ },
95
+ "attributes": {}
96
+ }
97
+ },
98
+ "total_flos": 8.10375476093952e+16,
99
+ "train_batch_size": 128,
100
+ "trial_name": null,
101
+ "trial_params": null
102
+ }
checkpoint-2440/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3a5d2402b146148a8cfae628d87d12f831be6c1b6520063a29e602f61aa29fb
3
+ size 5969
checkpoint-488/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "T5ForConditionalGeneration"
4
+ ],
5
+ "classifier_dropout": 0.0,
6
+ "d_ff": 2816,
7
+ "d_kv": 64,
8
+ "d_model": 1024,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "dtype": "float32",
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "num_decoder_layers": 24,
21
+ "num_heads": 16,
22
+ "num_layers": 24,
23
+ "output_past": true,
24
+ "pad_token_id": 0,
25
+ "relative_attention_max_distance": 128,
26
+ "relative_attention_num_buckets": 32,
27
+ "tie_word_embeddings": false,
28
+ "tokenizer_class": "T5Tokenizer",
29
+ "transformers_version": "4.57.3",
30
+ "use_cache": true,
31
+ "vocab_size": 250112
32
+ }
checkpoint-488/generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": [
5
+ 1
6
+ ],
7
+ "pad_token_id": 0,
8
+ "transformers_version": "4.57.3"
9
+ }
checkpoint-488/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3c3cb82efd0a48e8b6eade1b01a29c47a41a8d06589501a7a5d11466ea16df9
3
+ size 4918393736
checkpoint-488/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb4df4fc7ed44c6a74ddeae554490586d61683b5742f29ccaba03654590e415b
3
+ size 9837144773
checkpoint-488/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8b8c15f920d9c62c0c04649564c403ad0810336c8a50c12fc596eafb3a62b80
3
+ size 14645
checkpoint-488/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91df1dd2dbd60145e7136cace4a1c835b853e066b101a2f07a83164b66abda9a
3
+ size 1465
checkpoint-488/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<pad>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
checkpoint-488/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
+ size 4309802
checkpoint-488/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2172c2b5ca792ef398c3357421498eca5b0ae7e5deb50f00f8515316e1fd8f3
3
+ size 15998308
checkpoint-488/tokenizer_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": null,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<pad>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "</s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "<unk>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": [],
30
+ "clean_up_tokenization_spaces": false,
31
+ "eos_token": "</s>",
32
+ "extra_ids": 0,
33
+ "extra_special_tokens": {},
34
+ "model_max_length": 1000000000000000019884624838656,
35
+ "pad_token": "<pad>",
36
+ "sp_model_kwargs": {},
37
+ "tokenizer_class": "T5Tokenizer",
38
+ "unk_token": "<unk>"
39
+ }
checkpoint-488/trainer_state.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 488,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_loss": 0.06042954698204994,
15
+ "eval_runtime": 10.063,
16
+ "eval_samples_per_second": 688.86,
17
+ "eval_steps_per_second": 5.466,
18
+ "step": 488
19
+ }
20
+ ],
21
+ "logging_steps": 500,
22
+ "max_steps": 2440,
23
+ "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 5,
25
+ "save_steps": 500,
26
+ "stateful_callbacks": {
27
+ "TrainerControl": {
28
+ "args": {
29
+ "should_epoch_stop": false,
30
+ "should_evaluate": false,
31
+ "should_log": false,
32
+ "should_save": true,
33
+ "should_training_stop": false
34
+ },
35
+ "attributes": {}
36
+ }
37
+ },
38
+ "total_flos": 1.61884136312832e+16,
39
+ "train_batch_size": 128,
40
+ "trial_name": null,
41
+ "trial_params": null
42
+ }
checkpoint-488/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3a5d2402b146148a8cfae628d87d12f831be6c1b6520063a29e602f61aa29fb
3
+ size 5969
checkpoint-976/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "T5ForConditionalGeneration"
4
+ ],
5
+ "classifier_dropout": 0.0,
6
+ "d_ff": 2816,
7
+ "d_kv": 64,
8
+ "d_model": 1024,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "dtype": "float32",
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "num_decoder_layers": 24,
21
+ "num_heads": 16,
22
+ "num_layers": 24,
23
+ "output_past": true,
24
+ "pad_token_id": 0,
25
+ "relative_attention_max_distance": 128,
26
+ "relative_attention_num_buckets": 32,
27
+ "tie_word_embeddings": false,
28
+ "tokenizer_class": "T5Tokenizer",
29
+ "transformers_version": "4.57.3",
30
+ "use_cache": true,
31
+ "vocab_size": 250112
32
+ }