Sharjeelbaig commited on
Commit
b0895b3
·
verified ·
1 Parent(s): a1cf22f

Upload NeuroThinker custom checkpoint (63MB safetensors)

Browse files
added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "<|answer|>": 50259,
3
+ "<|end|>": 50260,
4
+ "<|prompt|>": 50257,
5
+ "<|think|>": 50258
6
+ }
config.json CHANGED
@@ -1,33 +1,19 @@
1
  {
2
- "activation_function": "gelu_new",
3
- "architectures": [
4
- "GPT2LMHeadModel"
5
- ],
6
- "attn_pdrop": 0.1,
7
- "bos_token_id": 50256,
8
- "dtype": "float32",
9
- "embd_pdrop": 0.1,
10
- "eos_token_id": 50256,
11
- "initializer_range": 0.02,
12
- "layer_norm_epsilon": 1e-05,
13
- "model_type": "gpt2",
14
- "n_ctx": 256,
15
- "n_embd": 192,
16
- "n_head": 3,
17
- "n_inner": null,
18
- "n_layer": 3,
19
- "n_positions": 256,
20
  "pad_token_id": 50256,
21
- "reorder_and_upcast_attn": false,
22
- "resid_pdrop": 0.1,
23
- "scale_attn_by_inverse_layer_idx": false,
24
- "scale_attn_weights": true,
25
- "summary_activation": null,
26
- "summary_first_dropout": 0.1,
27
- "summary_proj_to_labels": true,
28
- "summary_type": "cls_index",
29
- "summary_use_proj": true,
30
- "transformers_version": "4.57.3",
31
- "use_cache": true,
32
- "vocab_size": 50257
33
- }
 
1
  {
2
+ "vocab_size": 50261,
3
+ "d_model": 384,
4
+ "n_layers": 6,
5
+ "n_heads": 6,
6
+ "d_head": 64,
7
+ "d_ff": 720,
8
+ "d_memory": 192,
9
+ "max_seq_len": 256,
10
+ "dropout": 0.1,
11
+ "rope_theta": 10000.0,
12
+ "memory_decay_init": 0.99,
13
+ "surprise_threshold": 0.1,
14
+ "rms_norm_eps": 1e-06,
 
 
 
 
 
15
  "pad_token_id": 50256,
16
+ "bos_token_id": 50256,
17
+ "eos_token_id": 50260,
18
+ "model_type": "neurothinker"
19
+ }
 
 
 
 
 
 
 
 
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4822217a8aa396213d653cc022d234dd0588dd26f895ed676b559ecaa1e3d615
3
- size 44137768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c06d8248fbe1c975b80159d76b1b43ef6465b396467c8cb23739ea350478cf5b
3
+ size 60979648
special_tokens_map.json CHANGED
@@ -1,4 +1,34 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": "<|endoftext|>",
3
  "eos_token": "<|endoftext|>",
4
  "pad_token": "<|endoftext|>",
 
1
  {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|prompt|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|think|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<|answer|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "<|end|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ ],
32
  "bos_token": "<|endoftext|>",
33
  "eos_token": "<|endoftext|>",
34
  "pad_token": "<|endoftext|>",
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 256,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
@@ -16,6 +11,42 @@
16
  "rstrip": false,
17
  "normalized": true,
18
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  ],
21
  "normalizer": null,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
11
  "rstrip": false,
12
  "normalized": true,
13
  "special": true
14
+ },
15
+ {
16
+ "id": 50257,
17
+ "content": "<|prompt|>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 50258,
26
+ "content": "<|think|>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 50259,
35
+ "content": "<|answer|>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 50260,
44
+ "content": "<|end|>",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
  }
51
  ],
52
  "normalizer": null,
tokenizer_config.json CHANGED
@@ -8,8 +8,46 @@
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  }
12
  },
 
 
 
 
 
 
13
  "bos_token": "<|endoftext|>",
14
  "clean_up_tokenization_spaces": false,
15
  "eos_token": "<|endoftext|>",
 
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
11
+ },
12
+ "50257": {
13
+ "content": "<|prompt|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "50258": {
21
+ "content": "<|think|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "50259": {
29
+ "content": "<|answer|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50260": {
37
+ "content": "<|end|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
  }
44
  },
45
+ "additional_special_tokens": [
46
+ "<|prompt|>",
47
+ "<|think|>",
48
+ "<|answer|>",
49
+ "<|end|>"
50
+ ],
51
  "bos_token": "<|endoftext|>",
52
  "clean_up_tokenization_spaces": false,
53
  "eos_token": "<|endoftext|>",