ntsmarkv commited on
Commit
dc83daa
·
verified ·
1 Parent(s): f4428d8

Upload Qwen3ForCausalLM

Browse files
Files changed (3) hide show
  1. config.json +37 -11
  2. generation_config.json +1 -1
  3. model.safetensors +2 -2
config.json CHANGED
@@ -4,13 +4,14 @@
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "dtype": "bfloat16",
 
8
  "eos_token_id": 151645,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
11
- "hidden_size": 2048,
12
  "initializer_range": 0.02,
13
- "intermediate_size": 6144,
14
  "layer_types": [
15
  "full_attention",
16
  "full_attention",
@@ -39,22 +40,47 @@
39
  "full_attention",
40
  "full_attention",
41
  "full_attention",
 
 
 
 
 
 
 
 
42
  "full_attention"
43
  ],
44
  "max_position_embeddings": 40960,
45
- "max_window_layers": 28,
46
  "model_type": "qwen3",
47
- "num_attention_heads": 16,
48
- "num_hidden_layers": 28,
49
  "num_key_value_heads": 8,
50
  "pad_token_id": 151643,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  "rms_norm_eps": 1e-06,
52
- "rope_scaling": null,
53
- "rope_theta": 1000000,
 
 
54
  "sliding_window": null,
55
- "tie_word_embeddings": true,
56
- "transformers_version": "4.57.3",
57
- "use_cache": true,
58
  "use_sliding_window": false,
59
  "vocab_size": 151936
60
  }
 
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "dtype": "float32",
9
  "eos_token_id": 151645,
10
  "head_dim": 128,
11
  "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
  "initializer_range": 0.02,
14
+ "intermediate_size": 12288,
15
  "layer_types": [
16
  "full_attention",
17
  "full_attention",
 
40
  "full_attention",
41
  "full_attention",
42
  "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
  "full_attention"
52
  ],
53
  "max_position_embeddings": 40960,
54
+ "max_window_layers": 36,
55
  "model_type": "qwen3",
56
+ "num_attention_heads": 32,
57
+ "num_hidden_layers": 36,
58
  "num_key_value_heads": 8,
59
  "pad_token_id": 151643,
60
+ "quantization_config": {
61
+ "_load_in_4bit": true,
62
+ "_load_in_8bit": false,
63
+ "bnb_4bit_compute_dtype": "bfloat16",
64
+ "bnb_4bit_quant_storage": "uint8",
65
+ "bnb_4bit_quant_type": "nf4",
66
+ "bnb_4bit_use_double_quant": true,
67
+ "llm_int8_enable_fp32_cpu_offload": false,
68
+ "llm_int8_has_fp16_weight": false,
69
+ "llm_int8_skip_modules": null,
70
+ "llm_int8_threshold": 6.0,
71
+ "load_in_4bit": true,
72
+ "load_in_8bit": false,
73
+ "quant_method": "bitsandbytes"
74
+ },
75
  "rms_norm_eps": 1e-06,
76
+ "rope_parameters": {
77
+ "rope_theta": 1000000,
78
+ "rope_type": "default"
79
+ },
80
  "sliding_window": null,
81
+ "tie_word_embeddings": false,
82
+ "transformers_version": "5.2.0",
83
+ "use_cache": false,
84
  "use_sliding_window": false,
85
  "vocab_size": 151936
86
  }
generation_config.json CHANGED
@@ -8,5 +8,5 @@
8
  "temperature": 0.6,
9
  "top_k": 20,
10
  "top_p": 0.95,
11
- "transformers_version": "4.57.3"
12
  }
 
8
  "temperature": 0.6,
9
  "top_k": 20,
10
  "top_p": 0.95,
11
+ "transformers_version": "5.2.0"
12
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8598f470bf7e568c8623450ea15d68b5f532dc8c379bf29212411b3c6cdec7f2
3
- size 3441185608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e0b7f48f05f0d40a0b805958f21fdc306327a2a66fa22b710336a1ef21dfdfa
3
+ size 8563499179