Upload folder using huggingface_hub

Files changed (5) hide show

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ license_name: youtu-llm
 license_link: https://huggingface.co/tencent/Youtu-LLM-2B-Base/LICENSE.txt
 pipeline_tag: text-generation
 instruct_model:
-  - tencent/Youtu-LLM-2B
 ---
 <div align="center">
@@ -38,6 +38,7 @@ instruct_model:
 | Youtu-LLM-2B-GGUF | Instruct model of Youtu-LLM-2B, in GGUF format | 🤗 [Model](https://huggingface.co/tencent/Youtu-LLM-2B-GGUF)|
 ## 📰 News
 - [2026.01.07] You can now fine-tuning Youtu-LLM with [ModelScope](https://mp.weixin.qq.com/s/JJtQWSYEjnE7GnPkaJ7UNA).
 - [2026.01.04] You can now fine-tuning Youtu-LLM with [LlamaFactory](https://github.com/hiyouga/LlamaFactory/pull/9707).

 license_link: https://huggingface.co/tencent/Youtu-LLM-2B-Base/LICENSE.txt
 pipeline_tag: text-generation
 instruct_model:
+- tencent/Youtu-LLM-2B
 ---
 <div align="center">
 | Youtu-LLM-2B-GGUF | Instruct model of Youtu-LLM-2B, in GGUF format | 🤗 [Model](https://huggingface.co/tencent/Youtu-LLM-2B-GGUF)|
 ## 📰 News
+- [2026.01.28] You can now directly use Youtu-LLM with [Transformers](https://github.com/huggingface/transformers/pull/43166).
 - [2026.01.07] You can now fine-tuning Youtu-LLM with [ModelScope](https://mp.weixin.qq.com/s/JJtQWSYEjnE7GnPkaJ7UNA).
 - [2026.01.04] You can now fine-tuning Youtu-LLM with [LlamaFactory](https://github.com/hiyouga/LlamaFactory/pull/9707).

config.json CHANGED Viewed

@@ -1,39 +1,38 @@
 {
-    "architectures": [
-      "YoutuForCausalLM"
-    ],
-    "attention_bias": false,
-    "attention_dropout": 0.0,
-    "auto_map": {
-      "AutoConfig": "configuration_youtu.YoutuConfig",
-      "AutoModel": "modeling_youtu.YoutuModel",
-      "AutoModelForCausalLM": "modeling_youtu.YoutuForCausalLM"
-    },
-    "bos_token_id": 128000,
-    "eos_token_id": 128001,
-    "hidden_act": "silu",
-    "hidden_size": 2048,
-    "initializer_range": null,
-    "embedding_initializer_range": null,
-    "intermediate_size": 6144,
-    "kv_lora_rank": 512,
-    "max_position_embeddings": 131072,
-    "mlp_bias": false,
-    "model_type": "youtu_llm",
-    "num_attention_heads": 16,
-    "num_hidden_layers": 32,
-    "num_key_value_heads": 16,
-    "q_lora_rank": 1536,
-    "qk_nope_head_dim": 128,
-    "qk_rope_head_dim": 64,
-    "rms_norm_eps": 1e-06,
-    "rope_interleave": true,
-    "rope_scaling": null,
     "rope_theta": 1600000,
-    "tie_word_embeddings": true,
-    "torch_dtype": "bfloat16",
-    "transformers_version": "4.56.0",
-    "use_cache": true,
-    "v_head_dim": 128,
-    "vocab_size": 128256
 }

 {
+  "architectures": [
+    "YoutuForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 128000,
+  "dtype": "bfloat16",
+  "embedding_initializer_range": null,
+  "eos_token_id": 128001,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": null,
+  "intermediate_size": 6144,
+  "kv_lora_rank": 512,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "youtu",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 16,
+  "pad_token_id": null,
+  "q_lora_rank": 1536,
+  "qk_head_dim": 192,
+  "qk_nope_head_dim": 128,
+  "qk_rope_head_dim": 64,
+  "rms_norm_eps": 1e-06,
+  "rope_interleave": true,
+  "rope_parameters": {
     "rope_theta": 1600000,
+    "rope_type": "default"
+  },
+  "tie_word_embeddings": true,
+  "transformers_version": "5.0.0.dev0",
+  "use_cache": true,
+  "v_head_dim": 128,
+  "vocab_size": 128256
 }

generation_config.json CHANGED Viewed

@@ -2,6 +2,6 @@
     "_from_model_config": true,
     "bos_token_id": 128000,
     "eos_token_id": 128001,
-    "transformers_version": "4.56.0",
     "use_cache": false
 }

     "_from_model_config": true,
     "bos_token_id": 128000,
     "eos_token_id": 128001,
+    "transformers_version": "5.0.0.dev0",
     "use_cache": false
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:76879a571c8e3b3407668bb63331f78600a99d4d9ec607d0694511b166bc7bc3
-size 4448502448

 version https://git-lfs.github.com/spec/v1
+oid sha256:a3e92f7df7618bab5f22b90528fa692c41c3bfb8fabc016e04bf6f2403b05a79
+size 3923165640

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff