Upload updated model files trained with diff version of Tirex

Files changed (5) hide show

config.json CHANGED Viewed

@@ -4,50 +4,11 @@
   ],
   "attention_dropout": 0.0,
   "bos_token_id": 151643,
-  "dtype": "bfloat16",
   "eos_token_id": 151645,
   "hidden_act": "silu",
   "hidden_size": 2048,
   "initializer_range": 0.02,
   "intermediate_size": 11008,
-  "layer_types": [
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention"
-  ],
   "max_position_embeddings": 32768,
   "max_window_layers": 70,
   "model_type": "qwen2",
@@ -57,9 +18,10 @@
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "rope_theta": 1000000.0,
-  "sliding_window": null,
   "tie_word_embeddings": true,
-  "transformers_version": "4.57.0",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936

   ],
   "attention_dropout": 0.0,
   "bos_token_id": 151643,
   "eos_token_id": 151645,
   "hidden_act": "silu",
   "hidden_size": 2048,
   "initializer_range": 0.02,
   "intermediate_size": 11008,
   "max_position_embeddings": 32768,
   "max_window_layers": 70,
   "model_type": "qwen2",
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,
   "rope_theta": 1000000.0,
+  "sliding_window": 32768,
   "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.52.4",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936

generation_config.json CHANGED Viewed

@@ -1,11 +1,14 @@
 {
   "bos_token_id": 151643,
   "eos_token_id": [
     151645,
     151643
   ],
-  "max_new_tokens": 128,
   "pad_token_id": 151643,
   "repetition_penalty": 1.05,
-  "transformers_version": "4.57.0"
 }

 {
   "bos_token_id": 151643,
+  "do_sample": true,
   "eos_token_id": [
     151645,
     151643
   ],
   "pad_token_id": 151643,
   "repetition_penalty": 1.05,
+  "temperature": 0.7,
+  "top_k": 20,
+  "top_p": 0.8,
+  "transformers_version": "4.52.4"
 }

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7eb8c061fa689757265acae1d09f3ff124cfd2ef9194d7bbffeccf700a821130
 size 4957560304

 version https://git-lfs.github.com/spec/v1
+oid sha256:c52ddb3452f55a95a6403c9328fc2e2bdd6f20318d1dbd09370b19bba6695af2
 size 4957560304

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8d55d25116be14dfb5cd3e4db7765b50ccea05fae638e108112da6a56c34289
-size 1214366696

 version https://git-lfs.github.com/spec/v1
+oid sha256:988045146c92d892e255d7aa938d6ab2f637b05c12f9e25ca89c51af2a44fa38
+size 1836696752

model.safetensors.index.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "metadata": {
-    "total_parameters": 3085938688,
-    "total_size": 6171877376
   },
   "weight_map": {
     "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
     "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
     "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",

 {
   "metadata": {
+    "total_size": 6794207232
   },
   "weight_map": {
+    "lm_head.weight": "model-00002-of-00002.safetensors",
     "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
     "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
     "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",