khulnasoft commited on
Commit
8eed7ac
·
verified ·
1 Parent(s): 3809580

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoint-2/added_tokens.json +4 -0
  2. checkpoint-2/chat_template.jinja +1 -0
  3. checkpoint-2/config.json +38 -0
  4. checkpoint-2/generation_config.json +9 -0
  5. checkpoint-2/merges.txt +0 -0
  6. checkpoint-2/model.safetensors +3 -0
  7. checkpoint-2/optimizer.pt +3 -0
  8. checkpoint-2/rng_state.pth +3 -0
  9. checkpoint-2/scheduler.pt +3 -0
  10. checkpoint-2/special_tokens_map.json +24 -0
  11. checkpoint-2/tokenizer.json +0 -0
  12. checkpoint-2/tokenizer_config.json +39 -0
  13. checkpoint-2/trainer_state.json +42 -0
  14. checkpoint-2/training_args.bin +3 -0
  15. checkpoint-2/vocab.json +0 -0
  16. checkpoint-4/added_tokens.json +4 -0
  17. checkpoint-4/chat_template.jinja +1 -0
  18. checkpoint-4/config.json +38 -0
  19. checkpoint-4/generation_config.json +9 -0
  20. checkpoint-4/merges.txt +0 -0
  21. checkpoint-4/model.safetensors +3 -0
  22. checkpoint-4/optimizer.pt +3 -0
  23. checkpoint-4/rng_state.pth +3 -0
  24. checkpoint-4/scheduler.pt +3 -0
  25. checkpoint-4/special_tokens_map.json +24 -0
  26. checkpoint-4/tokenizer.json +0 -0
  27. checkpoint-4/tokenizer_config.json +39 -0
  28. checkpoint-4/trainer_state.json +50 -0
  29. checkpoint-4/training_args.bin +3 -0
  30. checkpoint-4/vocab.json +0 -0
  31. checkpoint-6/config.json +1 -1
  32. checkpoint-6/generation_config.json +1 -1
  33. checkpoint-6/model.safetensors +1 -1
  34. checkpoint-6/optimizer.pt +1 -1
  35. checkpoint-6/rng_state.pth +1 -1
  36. checkpoint-6/trainer_state.json +30 -5
  37. checkpoint-6/training_args.bin +1 -1
  38. config.json +1 -1
  39. eval_metrics.json +4 -4
  40. generation_config.json +1 -1
  41. logs/events.out.tfevents.1768999213.AS-AND-MacBook.8780.0 +3 -0
  42. logs/events.out.tfevents.1768999594.AS-AND-MacBook.8780.1 +3 -0
  43. logs/events.out.tfevents.1769001694.AS-AND-MacBook.37002.0 +3 -0
  44. logs/events.out.tfevents.1769001757.AS-AND-MacBook.37729.0 +3 -0
  45. logs/events.out.tfevents.1769001814.AS-AND-MacBook.38338.0 +3 -0
  46. logs/events.out.tfevents.1769001929.AS-AND-MacBook.39826.0 +3 -0
  47. logs/events.out.tfevents.1769002150.AS-AND-MacBook.39826.1 +3 -0
  48. model.safetensors +1 -1
  49. training_args.bin +1 -1
  50. training_args.json +1 -1
checkpoint-2/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<|bn|>": 50257,
3
+ "<|en|>": 50258
4
+ }
checkpoint-2/chat_template.jinja ADDED
@@ -0,0 +1 @@
 
 
1
+ {% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}
checkpoint-2/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 50256,
8
+ "dtype": "float32",
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "pad_token_id": 50256,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "task_specific_params": {
31
+ "conversational": {
32
+ "max_length": 1000
33
+ }
34
+ },
35
+ "transformers_version": "4.57.6",
36
+ "use_cache": true,
37
+ "vocab_size": 50259
38
+ }
checkpoint-2/generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": [
5
+ 50256
6
+ ],
7
+ "pad_token_id": 50256,
8
+ "transformers_version": "4.57.6"
9
+ }
checkpoint-2/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77894fb9f618c8e644851e39bbde86b79843993095a1efc4722c9a746056629d
3
+ size 497780352
checkpoint-2/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53b73626f9b8292d953ae605b42dc6a5663c0be1a2f7c9363be5051988a17ef6
3
+ size 995650490
checkpoint-2/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8098b3ea1099554a17a75b08e08b0801f551831d4783349401defb321e1c1c1d
3
+ size 13990
checkpoint-2/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33a6983f84829231430d88eed74853dfe0246012f38503c30699c06a68a4bb52
3
+ size 1064
checkpoint-2/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-2/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2/tokenizer_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "50257": {
14
+ "content": "<|bn|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "50258": {
22
+ "content": "<|en|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "bos_token": "<|endoftext|>",
31
+ "clean_up_tokenization_spaces": true,
32
+ "eos_token": "<|endoftext|>",
33
+ "errors": "replace",
34
+ "extra_special_tokens": {},
35
+ "model_max_length": 1024,
36
+ "pad_token": "<|endoftext|>",
37
+ "tokenizer_class": "GPT2Tokenizer",
38
+ "unk_token": "<|endoftext|>"
39
+ }
checkpoint-2/trainer_state.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 2,
3
+ "best_metric": 27.421092987060547,
4
+ "best_model_checkpoint": "models/bilingual-lm/checkpoint-2",
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 2,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_loss": 27.421092987060547,
15
+ "eval_runtime": 0.4037,
16
+ "eval_samples_per_second": 2.477,
17
+ "eval_steps_per_second": 2.477,
18
+ "step": 2
19
+ }
20
+ ],
21
+ "logging_steps": 100,
22
+ "max_steps": 6,
23
+ "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 3,
25
+ "save_steps": 500,
26
+ "stateful_callbacks": {
27
+ "TrainerControl": {
28
+ "args": {
29
+ "should_epoch_stop": false,
30
+ "should_evaluate": false,
31
+ "should_log": false,
32
+ "should_save": true,
33
+ "should_training_stop": false
34
+ },
35
+ "attributes": {}
36
+ }
37
+ },
38
+ "total_flos": 849199104000.0,
39
+ "train_batch_size": 8,
40
+ "trial_name": null,
41
+ "trial_params": null
42
+ }
checkpoint-2/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1bd4bfaaf6642cd6696a118c7e7a88112d0d9f6de0c75de51da006a6506a412
3
+ size 5368
checkpoint-2/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-4/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<|bn|>": 50257,
3
+ "<|en|>": 50258
4
+ }
checkpoint-4/chat_template.jinja ADDED
@@ -0,0 +1 @@
 
 
1
+ {% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}
checkpoint-4/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 50256,
8
+ "dtype": "float32",
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "pad_token_id": 50256,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "task_specific_params": {
31
+ "conversational": {
32
+ "max_length": 1000
33
+ }
34
+ },
35
+ "transformers_version": "4.57.6",
36
+ "use_cache": true,
37
+ "vocab_size": 50259
38
+ }
checkpoint-4/generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": [
5
+ 50256
6
+ ],
7
+ "pad_token_id": 50256,
8
+ "transformers_version": "4.57.6"
9
+ }
checkpoint-4/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-4/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52f2275b25c60ddad8cb9d8b94639a8df851f5f81ce0692145c4f073dadbca94
3
+ size 497780352
checkpoint-4/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f22720dc533c40ad036267aec816c07dfe2990d98b5847ffe5c2e09be1e679cb
3
+ size 995650490
checkpoint-4/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7595a3ced8366277d7ad0053a9aded9630cbddc783ac367e6e5a7ffacb5d5952
3
+ size 13990
checkpoint-4/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:341adb49042e078ddce5027f92ef183e43fbf9be31fd5090698ceed1356edab7
3
+ size 1064
checkpoint-4/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
checkpoint-4/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-4/tokenizer_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "50257": {
14
+ "content": "<|bn|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "50258": {
22
+ "content": "<|en|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "bos_token": "<|endoftext|>",
31
+ "clean_up_tokenization_spaces": true,
32
+ "eos_token": "<|endoftext|>",
33
+ "errors": "replace",
34
+ "extra_special_tokens": {},
35
+ "model_max_length": 1024,
36
+ "pad_token": "<|endoftext|>",
37
+ "tokenizer_class": "GPT2Tokenizer",
38
+ "unk_token": "<|endoftext|>"
39
+ }
checkpoint-4/trainer_state.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 4,
3
+ "best_metric": 27.379554748535156,
4
+ "best_model_checkpoint": "models/bilingual-lm/checkpoint-4",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 4,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_loss": 27.421092987060547,
15
+ "eval_runtime": 0.4037,
16
+ "eval_samples_per_second": 2.477,
17
+ "eval_steps_per_second": 2.477,
18
+ "step": 2
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_loss": 27.379554748535156,
23
+ "eval_runtime": 0.2619,
24
+ "eval_samples_per_second": 3.818,
25
+ "eval_steps_per_second": 3.818,
26
+ "step": 4
27
+ }
28
+ ],
29
+ "logging_steps": 100,
30
+ "max_steps": 6,
31
+ "num_input_tokens_seen": 0,
32
+ "num_train_epochs": 3,
33
+ "save_steps": 500,
34
+ "stateful_callbacks": {
35
+ "TrainerControl": {
36
+ "args": {
37
+ "should_epoch_stop": false,
38
+ "should_evaluate": false,
39
+ "should_log": false,
40
+ "should_save": true,
41
+ "should_training_stop": false
42
+ },
43
+ "attributes": {}
44
+ }
45
+ },
46
+ "total_flos": 1698398208000.0,
47
+ "train_batch_size": 8,
48
+ "trial_name": null,
49
+ "trial_params": null
50
+ }
checkpoint-4/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1bd4bfaaf6642cd6696a118c7e7a88112d0d9f6de0c75de51da006a6506a412
3
+ size 5368
checkpoint-4/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-6/config.json CHANGED
@@ -32,7 +32,7 @@
32
  "max_length": 1000
33
  }
34
  },
35
- "transformers_version": "4.57.3",
36
  "use_cache": true,
37
  "vocab_size": 50259
38
  }
 
32
  "max_length": 1000
33
  }
34
  },
35
+ "transformers_version": "4.57.6",
36
  "use_cache": true,
37
  "vocab_size": 50259
38
  }
checkpoint-6/generation_config.json CHANGED
@@ -5,5 +5,5 @@
5
  50256
6
  ],
7
  "pad_token_id": 50256,
8
- "transformers_version": "4.57.3"
9
  }
 
5
  50256
6
  ],
7
  "pad_token_id": 50256,
8
+ "transformers_version": "4.57.6"
9
  }
checkpoint-6/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9e315c2eb6cd582ed3518caa8d01f377c1f423cd2c09be62743143f50203582
3
  size 497780352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c1a9a14bc1ef061d8b59e369e21366dc4fd3ec15ae426cac5ee613fb925ca7b
3
  size 497780352
checkpoint-6/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e502242ed60a36d0ab6c6e6021dcab65f5e5c543abc6d12c5df1468aa5ec3e10
3
  size 995650490
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9cb6d004d297bebf3fdfc26367064c1f4521016ee6e60cc04f897dfd6442d54
3
  size 995650490
checkpoint-6/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbf11243288a5c23a3ff457eb797018d98049d17a92086a1b2f6131f32cc6fa6
3
  size 13990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:798ad3a9609f671228656df957989c4063e2027a31382d6440a1bdc1f3f29c8d
3
  size 13990
checkpoint-6/trainer_state.json CHANGED
@@ -1,19 +1,44 @@
1
  {
2
- "best_global_step": null,
3
- "best_metric": null,
4
- "best_model_checkpoint": null,
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
  "global_step": 6,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
- "log_history": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  "logging_steps": 100,
13
  "max_steps": 6,
14
  "num_input_tokens_seen": 0,
15
  "num_train_epochs": 3,
16
- "save_steps": 1000,
17
  "stateful_callbacks": {
18
  "TrainerControl": {
19
  "args": {
 
1
  {
2
+ "best_global_step": 6,
3
+ "best_metric": 27.304058074951172,
4
+ "best_model_checkpoint": "models/bilingual-lm/checkpoint-6",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
  "global_step": 6,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_loss": 27.421092987060547,
15
+ "eval_runtime": 0.4037,
16
+ "eval_samples_per_second": 2.477,
17
+ "eval_steps_per_second": 2.477,
18
+ "step": 2
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_loss": 27.379554748535156,
23
+ "eval_runtime": 0.2619,
24
+ "eval_samples_per_second": 3.818,
25
+ "eval_steps_per_second": 3.818,
26
+ "step": 4
27
+ },
28
+ {
29
+ "epoch": 3.0,
30
+ "eval_loss": 27.304058074951172,
31
+ "eval_runtime": 0.2679,
32
+ "eval_samples_per_second": 3.732,
33
+ "eval_steps_per_second": 3.732,
34
+ "step": 6
35
+ }
36
+ ],
37
  "logging_steps": 100,
38
  "max_steps": 6,
39
  "num_input_tokens_seen": 0,
40
  "num_train_epochs": 3,
41
+ "save_steps": 500,
42
  "stateful_callbacks": {
43
  "TrainerControl": {
44
  "args": {
checkpoint-6/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94a81f867659ac5799522b5174cccbcb92f30688e80e2d6642edf31c7578466f
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1bd4bfaaf6642cd6696a118c7e7a88112d0d9f6de0c75de51da006a6506a412
3
  size 5368
config.json CHANGED
@@ -32,7 +32,7 @@
32
  "max_length": 1000
33
  }
34
  },
35
- "transformers_version": "4.57.3",
36
  "use_cache": true,
37
  "vocab_size": 50259
38
  }
 
32
  "max_length": 1000
33
  }
34
  },
35
+ "transformers_version": "4.57.6",
36
  "use_cache": true,
37
  "vocab_size": 50259
38
  }
eval_metrics.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "eval_loss": 27.3120174407959,
3
- "eval_runtime": 3.2068,
4
- "eval_samples_per_second": 0.312,
5
- "eval_steps_per_second": 0.312,
6
  "epoch": 3.0
7
  }
 
1
  {
2
+ "eval_loss": 27.304058074951172,
3
+ "eval_runtime": 0.2446,
4
+ "eval_samples_per_second": 4.089,
5
+ "eval_steps_per_second": 4.089,
6
  "epoch": 3.0
7
  }
generation_config.json CHANGED
@@ -5,5 +5,5 @@
5
  50256
6
  ],
7
  "pad_token_id": 50256,
8
- "transformers_version": "4.57.3"
9
  }
 
5
  50256
6
  ],
7
  "pad_token_id": 50256,
8
+ "transformers_version": "4.57.6"
9
  }
logs/events.out.tfevents.1768999213.AS-AND-MacBook.8780.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8d941646a3dbae2774cc426839c9b0f80dc63ab67858fc345cb820581903c18
3
+ size 6381
logs/events.out.tfevents.1768999594.AS-AND-MacBook.8780.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86ea30b3704ffc6c1eb6a3fcfe320379ac4c95be3b8a713eecce27b27c909b3b
3
+ size 354
logs/events.out.tfevents.1769001694.AS-AND-MacBook.37002.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8195c7629f3a9a783cb1f2f94a4b35b8610b3edfdedb5b9bcd8b946e97aee3c4
3
+ size 5237
logs/events.out.tfevents.1769001757.AS-AND-MacBook.37729.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52e88ccf740f2df2f79f8ab4f94b8b4e7aaa27f5dc7e1c5d8dba15eab8aaaef2
3
+ size 5237
logs/events.out.tfevents.1769001814.AS-AND-MacBook.38338.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efb756f963d774c8dca3721e750e7090d470d42269d8a8b0c0e2143b2041bfbf
3
+ size 5237
logs/events.out.tfevents.1769001929.AS-AND-MacBook.39826.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:656ee5721de0e6802a00fbb2c81469bad7fb8ec71c0795b7a2e4aeac24459642
3
+ size 6382
logs/events.out.tfevents.1769002150.AS-AND-MacBook.39826.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c25c7e5594e3be8465f98c7dde909fb5972712d9cc9c478291a944d3acaf3993
3
+ size 354
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9e315c2eb6cd582ed3518caa8d01f377c1f423cd2c09be62743143f50203582
3
  size 497780352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c1a9a14bc1ef061d8b59e369e21366dc4fd3ec15ae426cac5ee613fb925ca7b
3
  size 497780352
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94a81f867659ac5799522b5174cccbcb92f30688e80e2d6642edf31c7578466f
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1bd4bfaaf6642cd6696a118c7e7a88112d0d9f6de0c75de51da006a6506a412
3
  size 5368
training_args.json CHANGED
@@ -14,5 +14,5 @@
14
  "seed": 42,
15
  "max_train_samples": null,
16
  "max_val_samples": null,
17
- "fp16": true
18
  }
 
14
  "seed": 42,
15
  "max_train_samples": null,
16
  "max_val_samples": null,
17
+ "fp16": false
18
  }