mrtuandao commited on
Commit
5da7993
·
verified ·
1 Parent(s): a5859a3

Upload folder using huggingface_hub

Browse files
experiments/sft_gpt2-120m/20251209_070038/checkpoints/epoch_15/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 50256,
8
+ "dtype": "float32",
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "pad_token_id": 50256,
21
+ "reorder_and_upcast_attn": false,
22
+ "resid_pdrop": 0.1,
23
+ "scale_attn_by_inverse_layer_idx": false,
24
+ "scale_attn_weights": true,
25
+ "summary_activation": null,
26
+ "summary_first_dropout": 0.1,
27
+ "summary_proj_to_labels": true,
28
+ "summary_type": "cls_index",
29
+ "summary_use_proj": true,
30
+ "task_specific_params": {
31
+ "text-generation": {
32
+ "do_sample": true,
33
+ "max_length": 50
34
+ }
35
+ },
36
+ "transformers_version": "4.56.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
experiments/sft_gpt2-120m/20251209_070038/checkpoints/epoch_15/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.56.0"
6
+ }
experiments/sft_gpt2-120m/20251209_070038/checkpoints/epoch_15/lr_scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c31f813ce39e2274fa22f25c097f1cc39f1e4d536cbdd13d13c70c1a50611df
3
+ size 1483
experiments/sft_gpt2-120m/20251209_070038/checkpoints/epoch_15/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
experiments/sft_gpt2-120m/20251209_070038/checkpoints/epoch_15/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d7985c191bc5a3a339bfa87c8e65473b71342ec086da6c94be7cbe70c49a4a0
3
+ size 497774208
experiments/sft_gpt2-120m/20251209_070038/checkpoints/epoch_15/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd23ae57e99a6fdc828cb826841cb2970f6bb4b3a25401cba73dc573c431f929
3
+ size 995642763
experiments/sft_gpt2-120m/20251209_070038/checkpoints/epoch_15/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
experiments/sft_gpt2-120m/20251209_070038/checkpoints/epoch_15/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/sft_gpt2-120m/20251209_070038/checkpoints/epoch_15/tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": false,
15
+ "eos_token": "<|endoftext|>",
16
+ "extra_special_tokens": {},
17
+ "model_max_length": 1024,
18
+ "pad_token": "<|endoftext|>",
19
+ "tokenizer_class": "GPT2Tokenizer",
20
+ "unk_token": "<|endoftext|>"
21
+ }
experiments/sft_gpt2-120m/20251209_070038/checkpoints/epoch_15/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
experiments/sft_gpt2-120m/20251209_070038/sft_gpt2-120m.log CHANGED
@@ -800,3 +800,57 @@
800
  2025-12-09 07:52:20,370 - root - INFO - Step 21633/28600 train rougeL: 0.9489695747408912
801
  2025-12-09 07:52:20,436 - root - INFO - Step 21633/28600 loss: 0.05737420544028282, nll_loss: 0.05737420544028282
802
  2025-12-09 07:52:31,618 - absl - INFO - Using default tokenizer.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
800
  2025-12-09 07:52:20,370 - root - INFO - Step 21633/28600 train rougeL: 0.9489695747408912
801
  2025-12-09 07:52:20,436 - root - INFO - Step 21633/28600 loss: 0.05737420544028282, nll_loss: 0.05737420544028282
802
  2025-12-09 07:52:31,618 - absl - INFO - Using default tokenizer.
803
+ 2025-12-09 07:52:33,225 - root - INFO - Step 21761/28600 train rougeL: 0.9880952380952381
804
+ 2025-12-09 07:52:33,282 - root - INFO - Step 21761/28600 loss: 0.028945179656147957, nll_loss: 0.028945179656147957
805
+ 2025-12-09 07:52:44,383 - absl - INFO - Using default tokenizer.
806
+ 2025-12-09 07:52:46,632 - root - INFO - Step 21889/28600 train rougeL: 0.9605263157894737
807
+ 2025-12-09 07:52:46,696 - root - INFO - Step 21889/28600 loss: 0.05335050821304321, nll_loss: 0.05335050821304321
808
+ 2025-12-09 07:52:57,787 - absl - INFO - Using default tokenizer.
809
+ 2025-12-09 07:53:00,397 - root - INFO - Step 22017/28600 train rougeL: 0.798895914819469
810
+ 2025-12-09 07:53:00,460 - root - INFO - Step 22017/28600 loss: 0.047433000057935715, nll_loss: 0.047433000057935715
811
+ 2025-12-09 07:53:11,554 - absl - INFO - Using default tokenizer.
812
+ 2025-12-09 07:53:14,661 - root - INFO - Step 22145/28600 train rougeL: 0.9652647369850268
813
+ 2025-12-09 07:53:14,725 - root - INFO - Step 22145/28600 loss: 0.043389417231082916, nll_loss: 0.043389417231082916
814
+ 2025-12-09 07:53:25,812 - absl - INFO - Using default tokenizer.
815
+ 2025-12-09 07:53:28,380 - root - INFO - Step 22273/28600 train rougeL: 1.0
816
+ 2025-12-09 07:53:28,443 - root - INFO - Step 22273/28600 loss: 0.039842940866947174, nll_loss: 0.039842940866947174
817
+ 2025-12-09 07:53:39,501 - absl - INFO - Using default tokenizer.
818
+ 2025-12-09 07:53:42,573 - root - INFO - Step 22401/28600 train rougeL: 0.9713541666666666
819
+ 2025-12-09 07:53:42,636 - root - INFO - Step 22401/28600 loss: 0.021895578131079674, nll_loss: 0.021895578131079674
820
+ 2025-12-09 07:53:53,695 - absl - INFO - Using default tokenizer.
821
+ 2025-12-09 07:53:56,791 - root - INFO - Step 22529/28600 train rougeL: 0.9427278401997503
822
+ 2025-12-09 07:53:56,854 - root - INFO - Step 22529/28600 loss: 0.020404642447829247, nll_loss: 0.020404642447829247
823
+ 2025-12-09 07:54:07,915 - absl - INFO - Using default tokenizer.
824
+ 2025-12-09 07:54:10,315 - root - INFO - Step 22657/28600 train rougeL: 0.9157062672367521
825
+ 2025-12-09 07:54:10,378 - root - INFO - Step 22657/28600 loss: 0.019845489412546158, nll_loss: 0.019845489412546158
826
+ 2025-12-09 07:54:21,429 - absl - INFO - Using default tokenizer.
827
+ 2025-12-09 07:54:23,401 - root - INFO - Step 22785/28600 train rougeL: 0.9723895582329317
828
+ 2025-12-09 07:54:23,464 - root - INFO - Step 22785/28600 loss: 0.06588123738765717, nll_loss: 0.06588123738765717
829
+ 2025-12-09 07:54:31,682 - root - INFO - Epoch 16/20 finished
830
+ 2025-12-09 07:54:31,713 - absl - INFO - Using default tokenizer.
831
+ 2025-12-09 07:54:35,083 - absl - INFO - Using default tokenizer.
832
+ 2025-12-09 07:54:38,466 - absl - INFO - Using default tokenizer.
833
+ 2025-12-09 07:54:41,546 - absl - INFO - Using default tokenizer.
834
+ 2025-12-09 07:54:44,875 - absl - INFO - Using default tokenizer.
835
+ 2025-12-09 07:54:48,242 - absl - INFO - Using default tokenizer.
836
+ 2025-12-09 07:54:51,597 - absl - INFO - Using default tokenizer.
837
+ 2025-12-09 07:54:54,956 - absl - INFO - Using default tokenizer.
838
+ 2025-12-09 07:54:58,381 - absl - INFO - Using default tokenizer.
839
+ 2025-12-09 07:55:01,028 - absl - INFO - Using default tokenizer.
840
+ 2025-12-09 07:55:04,356 - absl - INFO - Using default tokenizer.
841
+ 2025-12-09 07:55:07,723 - absl - INFO - Using default tokenizer.
842
+ 2025-12-09 07:55:10,619 - absl - INFO - Using default tokenizer.
843
+ 2025-12-09 07:55:13,983 - absl - INFO - Using default tokenizer.
844
+ 2025-12-09 07:55:17,312 - absl - INFO - Using default tokenizer.
845
+ 2025-12-09 07:55:20,959 - absl - INFO - Using default tokenizer.
846
+ 2025-12-09 07:55:24,124 - root - INFO - Epoch 16/20 eval rougeL: 0.24059569246404464
847
+ 2025-12-09 07:55:26,146 - root - INFO - Epoch 17/20
848
+ 2025-12-09 07:55:28,972 - absl - INFO - Using default tokenizer.
849
+ 2025-12-09 07:55:31,511 - root - INFO - Step 22913/28600 train rougeL: 0.9788781163434903
850
+ 2025-12-09 07:55:31,574 - root - INFO - Step 22913/28600 loss: 0.04002169147133827, nll_loss: 0.04002169147133827
851
+ 2025-12-09 07:55:42,626 - absl - INFO - Using default tokenizer.
852
+ 2025-12-09 07:55:45,222 - root - INFO - Step 23041/28600 train rougeL: 0.9503865979381443
853
+ 2025-12-09 07:55:45,285 - root - INFO - Step 23041/28600 loss: 0.025748349726200104, nll_loss: 0.025748349726200104
854
+ 2025-12-09 07:55:56,448 - absl - INFO - Using default tokenizer.
855
+ 2025-12-09 07:55:57,754 - root - INFO - Step 23169/28600 train rougeL: 0.979368932038835
856
+ 2025-12-09 07:55:57,810 - root - INFO - Step 23169/28600 loss: 0.017524462193250656, nll_loss: 0.017524462193250656
experiments/sft_gpt2-120m/20251209_070038/sft_gpt2-120m_metrics.jsonl CHANGED
The diff for this file is too large to render. See raw diff