typeof commited on
Commit
2a44b36
·
verified ·
1 Parent(s): a94bf8f

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. config.json +72 -6
  2. generation_config.json +1 -1
  3. model.safetensors +2 -2
config.json CHANGED
@@ -5,13 +5,81 @@
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 1,
 
8
  "eos_token_id": 2,
9
  "head_dim": 64,
10
  "hidden_act": "silu",
11
  "hidden_size": 256,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 768,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  "max_position_embeddings": 8192,
 
15
  "mlp_bias": false,
16
  "model_type": "qwen2",
17
  "num_attention_heads": 4,
@@ -22,16 +90,14 @@
22
  "rope_scaling": {
23
  "factor": 4.0,
24
  "original_max_position_embeddings": 2048,
 
25
  "type": "yarn"
26
  },
27
  "rope_theta": 10000,
 
28
  "tie_word_embeddings": true,
29
- "torch_dtype": "bfloat16",
30
- "transformers_version": "4.51.3",
31
  "use_cache": true,
32
- "vocab_size": 8192,
33
-
34
- "max_window_layers": 64,
35
  "use_sliding_window": false,
36
- "sliding_window": 8192
37
  }
 
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 1,
8
+ "dtype": "float32",
9
  "eos_token_id": 2,
10
  "head_dim": 64,
11
  "hidden_act": "silu",
12
  "hidden_size": 256,
13
  "initializer_range": 0.02,
14
  "intermediate_size": 768,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention",
60
+ "full_attention",
61
+ "full_attention",
62
+ "full_attention",
63
+ "full_attention",
64
+ "full_attention",
65
+ "full_attention",
66
+ "full_attention",
67
+ "full_attention",
68
+ "full_attention",
69
+ "full_attention",
70
+ "full_attention",
71
+ "full_attention",
72
+ "full_attention",
73
+ "full_attention",
74
+ "full_attention",
75
+ "full_attention",
76
+ "full_attention",
77
+ "full_attention",
78
+ "full_attention",
79
+ "full_attention"
80
+ ],
81
  "max_position_embeddings": 8192,
82
+ "max_window_layers": 64,
83
  "mlp_bias": false,
84
  "model_type": "qwen2",
85
  "num_attention_heads": 4,
 
90
  "rope_scaling": {
91
  "factor": 4.0,
92
  "original_max_position_embeddings": 2048,
93
+ "rope_type": "yarn",
94
  "type": "yarn"
95
  },
96
  "rope_theta": 10000,
97
+ "sliding_window": null,
98
  "tie_word_embeddings": true,
99
+ "transformers_version": "4.57.1",
 
100
  "use_cache": true,
 
 
 
101
  "use_sliding_window": false,
102
+ "vocab_size": 8192
103
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.51.3"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "transformers_version": "4.57.1"
6
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d99bffef2d388cfbd0b39b8a0e1665e64afd7e0f7b055f7abebadc756a7227bf
3
- size 113376216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ab3ac0d21c4fd8fa0ef36ea4e56a858018778600c2ddd695edbe88a99f73297
3
+ size 226905440