aarabil commited on
Commit
3a8569b
·
verified ·
1 Parent(s): dc1dc8e

Upload model

Browse files
Files changed (2) hide show
  1. config.json +33 -3
  2. model.safetensors +2 -2
config.json CHANGED
@@ -1,16 +1,47 @@
1
  {
2
  "architectures": [
3
- "Qwen3ForCausalLM"
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
 
8
  "eos_token_id": 151643,
9
  "head_dim": 128,
10
  "hidden_act": "silu",
11
  "hidden_size": 1024,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 3072,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  "max_position_embeddings": 32768,
15
  "max_window_layers": 28,
16
  "model_type": "qwen3",
@@ -22,8 +53,7 @@
22
  "rope_theta": 1000000,
23
  "sliding_window": null,
24
  "tie_word_embeddings": true,
25
- "torch_dtype": "float32",
26
- "transformers_version": "4.52.4",
27
  "use_cache": true,
28
  "use_sliding_window": false,
29
  "vocab_size": 151669
 
1
  {
2
  "architectures": [
3
+ "Qwen3Model"
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 151643,
8
+ "dtype": "float32",
9
  "eos_token_id": 151643,
10
  "head_dim": 128,
11
  "hidden_act": "silu",
12
  "hidden_size": 1024,
13
  "initializer_range": 0.02,
14
  "intermediate_size": 3072,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention"
44
+ ],
45
  "max_position_embeddings": 32768,
46
  "max_window_layers": 28,
47
  "model_type": "qwen3",
 
53
  "rope_theta": 1000000,
54
  "sliding_window": null,
55
  "tie_word_embeddings": true,
56
+ "transformers_version": "4.56.0",
 
57
  "use_cache": true,
58
  "use_sliding_window": false,
59
  "vocab_size": 151669
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b37cc0fe306b402a94e75ebe116c60e9feef0ec2405d28fb2d29b2f805abb64e
3
- size 2383141336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fea1a806ad4460c25fe92c57617d33d3e92499ebd024cb102bb7052cb91dd509
3
+ size 2383139480