deskull commited on
Commit
77a3b16
·
verified ·
1 Parent(s): 1efc4fe

Update compounds GPT-2 small (retokenized, fix #12)

Browse files
Files changed (3) hide show
  1. config.json +23 -32
  2. pytorch_model.bin +1 -1
  3. training_args.json +2 -2
config.json CHANGED
@@ -1,43 +1,34 @@
1
  {
2
- "_name_or_path": "learning_source_20260316/compounds/gpt2-output/compounds-small/checkpoint-6000",
3
- "_riken_bias": false,
4
- "_riken_model_args": {
5
- "bias": false,
6
- "block_size": 1024,
7
- "dropout": 0.0,
8
- "n_embd": 768,
9
- "n_head": 12,
10
- "n_layer": 12,
11
- "vocab_size": 612
12
- },
13
- "activation_function": "gelu_new",
14
  "architectures": [
15
  "GPT2LMHeadModel"
16
  ],
17
- "attn_pdrop": 0.0,
18
- "bos_token_id": 0,
19
- "embd_pdrop": 0.0,
20
- "eos_token_id": 0,
21
- "initializer_range": 0.02,
22
- "layer_norm_epsilon": 1e-05,
23
  "model_type": "gpt2",
 
 
24
  "n_ctx": 1024,
25
  "n_embd": 768,
 
26
  "n_head": 12,
27
  "n_inner": 3072,
28
- "n_layer": 12,
29
- "n_positions": 1024,
30
- "reorder_and_upcast_attn": false,
31
  "resid_pdrop": 0.0,
32
- "scale_attn_by_inverse_layer_idx": false,
33
- "scale_attn_weights": true,
34
- "summary_activation": null,
35
- "summary_first_dropout": 0.1,
36
- "summary_proj_to_labels": true,
37
- "summary_type": "cls_index",
38
- "summary_use_proj": true,
39
- "torch_dtype": "float32",
40
- "transformers_version": "4.45.1",
41
  "use_cache": true,
42
- "vocab_size": 612
43
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
2
  "architectures": [
3
  "GPT2LMHeadModel"
4
  ],
 
 
 
 
 
 
5
  "model_type": "gpt2",
6
+ "vocab_size": 612,
7
+ "n_positions": 1024,
8
  "n_ctx": 1024,
9
  "n_embd": 768,
10
+ "n_layer": 12,
11
  "n_head": 12,
12
  "n_inner": 3072,
13
+ "activation_function": "gelu_new",
 
 
14
  "resid_pdrop": 0.0,
15
+ "embd_pdrop": 0.0,
16
+ "attn_pdrop": 0.0,
17
+ "layer_norm_epsilon": 1e-05,
18
+ "initializer_range": 0.02,
 
 
 
 
 
19
  "use_cache": true,
20
+ "bos_token_id": 0,
21
+ "eos_token_id": 0,
22
+ "transformers_version": "4.0.0",
23
+ "_name_or_path": "riken-gpt2",
24
+ "_riken_model_args": {
25
+ "n_layer": 12,
26
+ "n_head": 12,
27
+ "n_embd": 768,
28
+ "block_size": 1024,
29
+ "bias": false,
30
+ "vocab_size": 612,
31
+ "dropout": 0.0
32
+ },
33
+ "_riken_bias": false
34
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d56393096242d67d18968e31823147a9494aed1d383b2dce0535ccbbcf5ff55
3
  size 344866536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6672efa2908532b33521405cf67139238c56f00800366d0b67facaf5974768d6
3
  size 344866536
training_args.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "iteration": 6000,
3
- "best_val_loss": 0.030465448275208473,
4
- "early_stopping_counter": 1,
5
  "learning_rate": 6e-06,
6
  "batch_size": 8,
7
  "block_size": 1024,
 
1
  {
2
  "iteration": 6000,
3
+ "best_val_loss": 0.030528374016284943,
4
+ "early_stopping_counter": 10,
5
  "learning_rate": 6e-06,
6
  "batch_size": 8,
7
  "block_size": 1024,