Upload 8 files

Browse files

Files changed (9) hide show

.gitattributes +1 -0
README.md +122 -3
config.json +22 -0
export_args.json +33 -0
llm.mnn +3 -0
llm.mnn.json +0 -0
llm_config.json +30 -0
model_info.json +13 -0
tokenizer.txt +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+llm.mnn filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,122 @@
----
-license: apache-2.0
----

+# Hunyuan-MT-7B-MNN
+這是 [Tencent/Hunyuan-MT-7B](https://huggingface.co/Tencent/Hunyuan-MT-7B) 的 MNN 格式版本，針對移動端和桌面端推理優化。
+## 模型信息
+- **原始模型**: Tencent/Hunyuan-MT-7B
+- **框架**: MNN (Mobile Neural Network)
+- **量化**: 4-bit 量化
+- **模型大小**: ~4.6 GB (相比原始 FP16 模型顯著減小)
+- **轉換日期**: 2025-11-30
+## 文件說明
+- `llm.mnn`: MNN 模型結構文件 (~510 KB)
+- `llm.mnn.weight`: 量化後的模型權重 (~4.6 GB)
+- `llm.mnn.json`: 模型結構的 JSON 表示 (~1.0 MB)
+- `tokenizer.txt`: 分詞器文件 (~1.3 MB)
+- `config.json`: 模型配置文件
+- `llm_config.json`: LLM 特定配置
+- `export_args.json`: 導出參數記錄
+- `model_info.json`: 模型元信息
+## 使用方法
+### 1. 使用 MNN C++ API
+```cpp
+#include <MNN/expr/Executor.hpp>
+#include <MNN/AutoTime.hpp>
+#include "llm/llm.hpp"
+// 創建 LLM 實例
+std::unique_ptr<Llm> llm(Llm::createLLM(config));
+// 加載模型
+llm->load("path/to/llm.mnn");
+// 生成文本
+auto response = llm->response("翻譯這段文字");
+```
+### 2. 在 iOS/macOS 應用中使用
+```swift
+import MNN
+// 配置 MNN 後端
+let config = MNNLLMConfig()
+config.backend = .metal  // 使用 Metal GPU 加速
+// 創建 LLM wrapper
+let wrapper = MNNLLMWrapper(backendType: .metal)
+// 加載模型
+wrapper.loadModel(
+    atPath: modelPath,
+    modelType: "llm"
+) { success, error in
+    if success {
+        // 生成翻譯
+        let result = wrapper.generateResponse(
+            forPrompt: sourceText,
+            useChatTemplate: true
+        )
+    }
+}
+```
+## 性能特點
+- **量化優勢**: 4-bit 量化將模型大小從 ~14GB 減少到 ~4.6GB
+- **推理速度**: 在 Apple Silicon (M1/M2/M3) 上利用 Metal 加速
+- **內存占用**: 較原始模型顯著降低
+- **精度損失**: 4-bit 量化保持了良好的翻譯質量
+## 系統要求
+- **macOS**: 10.15+ (推薦 12.0+ 以支持 Metal 3)
+- **iOS**: 13.0+
+- **架構**: arm64 (Apple Silicon 或 iOS 設備)
+- **內存**: 至少 8GB RAM 推薦
+## 轉換命令
+此模型使用以下命令轉換：
+```bash
+python llmexport.py \
+  --path Tencent/Hunyuan-MT-7B \
+  --export mnn \
+  --quant_bit 4 \
+  --dst_path ./hunyuan-mt-7b-mnn
+```
+## License
+本模型繼承自原始 Tencent/Hunyuan-MT-7B 的許可協議。請參閱原始模型倉庫了解詳細許可信息。
+## 引用
+如果您使用此模型，請引用原始 Hunyuan-MT 模型：
+```bibtex
+@misc{hunyuan-mt-7b,
+  title={Hunyuan-MT-7B: A Multilingual Translation Model},
+  author={Tencent AI Lab},
+  year={2024},
+  publisher={Hugging Face},
+  howpublished={\url{https://huggingface.co/Tencent/Hunyuan-MT-7B}}
+}
+```
+## 相關資源
+- [原始模型](https://huggingface.co/Tencent/Hunyuan-MT-7B)
+- [MNN 框架](https://github.com/alibaba/MNN)
+- [MNN Transformers](https://github.com/alibaba/MNN/tree/master/transformers)
+## 問題反饋
+如有問題或建議，請在此倉庫提交 Issue。

config.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+    "llm_model": "llm.mnn",
+    "llm_weight": "llm.mnn.weight",
+    "backend_type": "auto",
+    "thread_num": 8,
+    "precision": "low",
+    "memory": "low",
+    "max_new_tokens": 2048,
+    "sampler_type": "mixed",
+    "mixed_samplers": [
+        "penalty",
+        "topK",
+        "topP",
+        "min_p",
+        "temperature"
+    ],
+    "penalty": 1.05,
+    "temperature": 0.7,
+    "topP": 0.6,
+    "topK": 20,
+    "min_p": 0
+}

export_args.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+    "path": "/Users/jazzwang/Documents/kindle-ai-export/models/huggingface/Hunyuan-MT-7B",
+    "type": null,
+    "tokenizer_path": "/Users/jazzwang/Documents/kindle-ai-export/models/huggingface/Hunyuan-MT-7B",
+    "eagle_path": null,
+    "lora_path": null,
+    "gptq_path": null,
+    "dst_path": "/Users/jazzwang/Documents/kindle-ai-export/models/mnn/translation/hunyuan-mt-7b-mnn",
+    "verbose": false,
+    "test": null,
+    "export": "mnn",
+    "onnx_slim": false,
+    "quant_bit": 4,
+    "quant_block": 64,
+    "visual_quant_bit": null,
+    "visual_quant_block": null,
+    "lm_quant_bit": 8,
+    "mnnconvert": "../../../build/MNNConvert",
+    "ppl": false,
+    "awq": false,
+    "hqq": false,
+    "transformer_fuse": false,
+    "group_conv_native": false,
+    "smooth": false,
+    "sym": false,
+    "visual_sym": false,
+    "seperate_embed": false,
+    "lora_split": false,
+    "calib_data": null,
+    "act_bit": 16,
+    "embed_bit": 16,
+    "act_sym": false
+}

llm.mnn ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9007e986c7cc677644afc4de03e1ee429fb4a908f38ce1fad02d890bbb91501e
+size 521888

llm.mnn.json ADDED Viewed

The diff for this file is too large to render. See raw diff

llm_config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+    "hidden_size": 4096,
+    "layer_nums": 32,
+    "attention_mask": "float",
+    "key_value_shape": [
+        2,
+        1,
+        0,
+        8,
+        128
+    ],
+    "bos": "",
+    "system_prompt_template": "%s",
+    "user_prompt_template": "%s",
+    "assistant_prompt_template": "%s",
+    "is_visual": false,
+    "attention_type": "full",
+    "jinja": {
+        "chat_template": "{% set ns = namespace(has_head=true) %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = message['content'] %}{% if loop.index0 == 0 %}{% if content == '' %}{% set ns.has_head = false %}{% elif message['role'] == 'system' %}{% set content = '<|startoftext|>' + content + '<|extra_4|>' %}{% endif %}{% endif %}{% if message['role'] == 'user' %}{% if loop.index0 == 1 and ns.has_head %}{% set content = content + '<|extra_0|>' %}{% else %}{% set content = '<|startoftext|>' + content + '<|extra_0|>' %}{% endif %}{% elif message['role'] == 'assistant' %}{% set content = content + '<|eos|>' %}{% endif %}{{ content }}{% endfor %}",
+        "bos": "<|startoftext|>",
+        "eos": "<|eos|>"
+    },
+    "tie_embeddings": [
+        4364276810,
+        4889613386,
+        65667072,
+        8,
+        64
+    ]
+}

model_info.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "name": "Hunyuan-MT-7B",
+    "display_name": "Hunyuan-MT-7B (Translation)",
+    "description": "Tencent Hunyuan translation model, supports 33 languages, no thinking mode, optimized for translation tasks",
+    "version": "1.0",
+    "type": "translation",
+    "languages": 33,
+    "source": "tencent/Hunyuan-MT-7B",
+    "quantization": "4-bit",
+    "prompt_format": "Translate the following text into {target_language}, without any additional explanation.\n{source_text}",
+    "has_thinking_mode": false,
+    "recommended_max_tokens": 2048
+}

tokenizer.txt ADDED Viewed

The diff for this file is too large to render. See raw diff