C10X commited on
Commit
ffb28a0
·
verified ·
1 Parent(s): b6d8fea

Upload 7 files

Browse files
config.json CHANGED
@@ -1,39 +1,65 @@
1
- {
2
- "architectures": [
3
- "Qwen3ForCausalLM"
4
- ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
- "bos_token_id": 0,
8
- "dtype": "bfloat16",
9
- "eos_token_id": 4,
10
- "head_dim": 32,
11
- "hidden_act": "silu",
12
- "hidden_size": 64,
13
- "initializer_range": 0.02,
14
- "intermediate_size": 256,
15
- "layer_types": [
16
- "full_attention",
17
- "full_attention",
18
- "full_attention",
19
- "full_attention"
20
- ],
21
- "max_position_embeddings": 2048,
22
- "max_window_layers": 4,
23
- "model_type": "qwen3",
24
- "num_attention_heads": 4,
25
- "num_hidden_layers": 4,
26
- "num_key_value_heads": 2,
27
- "pad_token_id": 2,
28
- "rms_norm_eps": 1e-06,
29
- "rope_parameters": {
30
- "rope_theta": 10000,
31
- "rope_type": "default"
32
- },
33
- "sliding_window": null,
34
- "tie_word_embeddings": true,
35
- "transformers_version": "5.8.0.dev0",
36
- "use_cache": false,
37
- "use_sliding_window": false,
38
- "vocab_size": 2048
39
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dtype": "float32",
9
+ "eos_token_id": 4,
10
+ "head_dim": 32,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 64,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 256,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention"
48
+ ],
49
+ "max_position_embeddings": 2048,
50
+ "max_window_layers": 32,
51
+ "model_type": "qwen3",
52
+ "num_attention_heads": 4,
53
+ "num_hidden_layers": 32,
54
+ "num_key_value_heads": 2,
55
+ "pad_token_id": 1,
56
+ "rms_norm_eps": 1e-06,
57
+ "rope_scaling": null,
58
+ "rope_theta": 10000.0,
59
+ "sliding_window": null,
60
+ "tie_word_embeddings": true,
61
+ "transformers_version": "4.57.6",
62
+ "use_cache": true,
63
+ "use_sliding_window": false,
64
+ "vocab_size": 2048
65
+ }
generation_config.json CHANGED
@@ -1,7 +1,7 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 0,
4
- "eos_token_id": 4,
5
- "pad_token_id": 1,
6
- "transformers_version": "5.8.0.dev0"
7
- }
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 4,
5
+ "pad_token_id": 1,
6
+ "use_cache": true
7
+ }
metadata.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "preset": "qwen3-32layer-c10x",
3
+ "family": "qwen3",
4
+ "source": "inline-preset",
5
+ "output_dir": "D:\\Qwen3-80m-tinystories-A\\workspace\\outputs\\qwen3-32layer-c10x",
6
+ "parameters": 2496576,
7
+ "vocab_size": 2048,
8
+ "hidden_size": 64,
9
+ "num_hidden_layers": 32,
10
+ "num_attention_heads": 4,
11
+ "num_key_value_heads": 2,
12
+ "rope_theta": 10000.0
13
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b9637ecf9fac807c5c1d081e5de541fad0a49d5ed626df78e9120d519e130ed
3
- size 858424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e223f0d607fdc649ddfa5bca27b1675e6ee75bdd65198b00450abe76cba4d283
3
+ size 10023688
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|start_story|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|end_story|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json CHANGED
@@ -31,13 +31,23 @@
31
  "special": true
32
  }
33
  ],
34
- "normalizer": null,
35
- "pre_tokenizer": {
36
- "type": "Metaspace",
37
- "replacement": "▁",
38
- "prepend_scheme": "first",
39
- "split": false
 
 
 
 
 
 
 
 
 
40
  },
 
41
  "post_processor": {
42
  "type": "TemplateProcessing",
43
  "single": [
@@ -119,7 +129,7 @@
119
  "model": {
120
  "type": "BPE",
121
  "dropout": null,
122
- "unk_token": null,
123
  "continuing_subword_prefix": null,
124
  "end_of_word_suffix": null,
125
  "fuse_unk": true,
 
31
  "special": true
32
  }
33
  ],
34
+ "normalizer": {
35
+ "type": "Sequence",
36
+ "normalizers": [
37
+ {
38
+ "type": "Prepend",
39
+ "prepend": "▁"
40
+ },
41
+ {
42
+ "type": "Replace",
43
+ "pattern": {
44
+ "String": " "
45
+ },
46
+ "content": "▁"
47
+ }
48
+ ]
49
  },
50
+ "pre_tokenizer": null,
51
  "post_processor": {
52
  "type": "TemplateProcessing",
53
  "single": [
 
129
  "model": {
130
  "type": "BPE",
131
  "dropout": null,
132
+ "unk_token": "<unk>",
133
  "continuing_subword_prefix": null,
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
tokenizer_config.json CHANGED
@@ -1,15 +1,42 @@
1
- {
2
- "add_prefix_space": null,
3
- "backend": "tokenizers",
4
- "bos_token": "<|start_story|>",
5
- "clean_up_tokenization_spaces": false,
6
- "eos_token": "<|end_story|>",
7
- "is_local": false,
8
- "local_files_only": false,
9
- "model_max_length": 1000000000000000019884624838656,
10
- "pad_token": "<|end_story|>",
11
- "sp_model_kwargs": {},
12
- "tokenizer_class": "LlamaTokenizer",
13
- "unk_token": "<unk>",
14
- "use_default_system_prompt": false
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<|start_story|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "<|end_story|>",
24
+ "lstrip": false,
25
+ "normalized": true,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<|start_story|>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "<|end_story|>",
34
+ "extra_special_tokens": {},
35
+ "legacy": false,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": "<unk>",
38
+ "sp_model_kwargs": {},
39
+ "tokenizer_class": "LlamaTokenizerFast",
40
+ "unk_token": "<unk>",
41
+ "use_default_system_prompt": false
42
+ }