Taksu commited on
Commit
010bb0f
·
verified ·
1 Parent(s): 5b2d9b0

Upload updated model files trained with diff version of Tirex

Browse files
config.json CHANGED
@@ -4,50 +4,11 @@
4
  ],
5
  "attention_dropout": 0.0,
6
  "bos_token_id": 151643,
7
- "dtype": "bfloat16",
8
  "eos_token_id": 151645,
9
  "hidden_act": "silu",
10
  "hidden_size": 2048,
11
  "initializer_range": 0.02,
12
  "intermediate_size": 11008,
13
- "layer_types": [
14
- "full_attention",
15
- "full_attention",
16
- "full_attention",
17
- "full_attention",
18
- "full_attention",
19
- "full_attention",
20
- "full_attention",
21
- "full_attention",
22
- "full_attention",
23
- "full_attention",
24
- "full_attention",
25
- "full_attention",
26
- "full_attention",
27
- "full_attention",
28
- "full_attention",
29
- "full_attention",
30
- "full_attention",
31
- "full_attention",
32
- "full_attention",
33
- "full_attention",
34
- "full_attention",
35
- "full_attention",
36
- "full_attention",
37
- "full_attention",
38
- "full_attention",
39
- "full_attention",
40
- "full_attention",
41
- "full_attention",
42
- "full_attention",
43
- "full_attention",
44
- "full_attention",
45
- "full_attention",
46
- "full_attention",
47
- "full_attention",
48
- "full_attention",
49
- "full_attention"
50
- ],
51
  "max_position_embeddings": 32768,
52
  "max_window_layers": 70,
53
  "model_type": "qwen2",
@@ -57,9 +18,10 @@
57
  "rms_norm_eps": 1e-06,
58
  "rope_scaling": null,
59
  "rope_theta": 1000000.0,
60
- "sliding_window": null,
61
  "tie_word_embeddings": true,
62
- "transformers_version": "4.57.0",
 
63
  "use_cache": true,
64
  "use_sliding_window": false,
65
  "vocab_size": 151936
 
4
  ],
5
  "attention_dropout": 0.0,
6
  "bos_token_id": 151643,
 
7
  "eos_token_id": 151645,
8
  "hidden_act": "silu",
9
  "hidden_size": 2048,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 11008,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  "max_position_embeddings": 32768,
13
  "max_window_layers": 70,
14
  "model_type": "qwen2",
 
18
  "rms_norm_eps": 1e-06,
19
  "rope_scaling": null,
20
  "rope_theta": 1000000.0,
21
+ "sliding_window": 32768,
22
  "tie_word_embeddings": true,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.52.4",
25
  "use_cache": true,
26
  "use_sliding_window": false,
27
  "vocab_size": 151936
generation_config.json CHANGED
@@ -1,11 +1,14 @@
1
  {
2
  "bos_token_id": 151643,
 
3
  "eos_token_id": [
4
  151645,
5
  151643
6
  ],
7
- "max_new_tokens": 128,
8
  "pad_token_id": 151643,
9
  "repetition_penalty": 1.05,
10
- "transformers_version": "4.57.0"
 
 
 
11
  }
 
1
  {
2
  "bos_token_id": 151643,
3
+ "do_sample": true,
4
  "eos_token_id": [
5
  151645,
6
  151643
7
  ],
 
8
  "pad_token_id": 151643,
9
  "repetition_penalty": 1.05,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.52.4"
14
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7eb8c061fa689757265acae1d09f3ff124cfd2ef9194d7bbffeccf700a821130
3
  size 4957560304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c52ddb3452f55a95a6403c9328fc2e2bdd6f20318d1dbd09370b19bba6695af2
3
  size 4957560304
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8d55d25116be14dfb5cd3e4db7765b50ccea05fae638e108112da6a56c34289
3
- size 1214366696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:988045146c92d892e255d7aa938d6ab2f637b05c12f9e25ca89c51af2a44fa38
3
+ size 1836696752
model.safetensors.index.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "metadata": {
3
- "total_parameters": 3085938688,
4
- "total_size": 6171877376
5
  },
6
  "weight_map": {
 
7
  "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
8
  "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
9
  "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 6794207232
 
4
  },
5
  "weight_map": {
6
+ "lm_head.weight": "model-00002-of-00002.safetensors",
7
  "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
8
  "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
9
  "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",