wangjazz commited on
Commit
24cd02b
·
verified ·
1 Parent(s): 5f00475

Upload 8 files

Browse files
Files changed (9) hide show
  1. .gitattributes +1 -0
  2. README.md +122 -3
  3. config.json +22 -0
  4. export_args.json +33 -0
  5. llm.mnn +3 -0
  6. llm.mnn.json +0 -0
  7. llm_config.json +30 -0
  8. model_info.json +13 -0
  9. tokenizer.txt +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ llm.mnn filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,122 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hunyuan-MT-7B-MNN
2
+
3
+ 這是 [Tencent/Hunyuan-MT-7B](https://huggingface.co/Tencent/Hunyuan-MT-7B) 的 MNN 格式版本,針對移動端和桌面端推理優化。
4
+
5
+ ## 模型信息
6
+
7
+ - **原始模型**: Tencent/Hunyuan-MT-7B
8
+ - **框架**: MNN (Mobile Neural Network)
9
+ - **量化**: 4-bit 量化
10
+ - **模型大小**: ~4.6 GB (相比原始 FP16 模型顯著減小)
11
+ - **轉換日期**: 2025-11-30
12
+
13
+ ## 文件說明
14
+
15
+ - `llm.mnn`: MNN 模型結構文件 (~510 KB)
16
+ - `llm.mnn.weight`: 量化後的模型權重 (~4.6 GB)
17
+ - `llm.mnn.json`: 模型結構的 JSON 表示 (~1.0 MB)
18
+ - `tokenizer.txt`: 分詞器文件 (~1.3 MB)
19
+ - `config.json`: 模型配置文件
20
+ - `llm_config.json`: LLM 特定配置
21
+ - `export_args.json`: 導出參數記錄
22
+ - `model_info.json`: 模型元信息
23
+
24
+ ## 使用方法
25
+
26
+ ### 1. 使用 MNN C++ API
27
+
28
+ ```cpp
29
+ #include <MNN/expr/Executor.hpp>
30
+ #include <MNN/AutoTime.hpp>
31
+ #include "llm/llm.hpp"
32
+
33
+ // 創建 LLM 實例
34
+ std::unique_ptr<Llm> llm(Llm::createLLM(config));
35
+
36
+ // 加載模型
37
+ llm->load("path/to/llm.mnn");
38
+
39
+ // 生成文本
40
+ auto response = llm->response("翻譯這段文字");
41
+ ```
42
+
43
+ ### 2. 在 iOS/macOS 應用中使用
44
+
45
+ ```swift
46
+ import MNN
47
+
48
+ // 配置 MNN 後端
49
+ let config = MNNLLMConfig()
50
+ config.backend = .metal // 使用 Metal GPU 加速
51
+
52
+ // 創建 LLM wrapper
53
+ let wrapper = MNNLLMWrapper(backendType: .metal)
54
+
55
+ // 加載模型
56
+ wrapper.loadModel(
57
+ atPath: modelPath,
58
+ modelType: "llm"
59
+ ) { success, error in
60
+ if success {
61
+ // 生成翻譯
62
+ let result = wrapper.generateResponse(
63
+ forPrompt: sourceText,
64
+ useChatTemplate: true
65
+ )
66
+ }
67
+ }
68
+ ```
69
+
70
+ ## 性能特點
71
+
72
+ - **量化優勢**: 4-bit 量化將模型大小從 ~14GB 減少到 ~4.6GB
73
+ - **推理速度**: 在 Apple Silicon (M1/M2/M3) 上利用 Metal 加速
74
+ - **內存占用**: 較原始模型顯著降低
75
+ - **精度損失**: 4-bit 量化保持了良好的翻譯質量
76
+
77
+ ## 系統要求
78
+
79
+ - **macOS**: 10.15+ (推薦 12.0+ 以支持 Metal 3)
80
+ - **iOS**: 13.0+
81
+ - **架構**: arm64 (Apple Silicon 或 iOS 設備)
82
+ - **內存**: 至少 8GB RAM 推薦
83
+
84
+ ## 轉換命令
85
+
86
+ 此模型使用以下命令轉換:
87
+
88
+ ```bash
89
+ python llmexport.py \
90
+ --path Tencent/Hunyuan-MT-7B \
91
+ --export mnn \
92
+ --quant_bit 4 \
93
+ --dst_path ./hunyuan-mt-7b-mnn
94
+ ```
95
+
96
+ ## License
97
+
98
+ 本模型繼承自原始 Tencent/Hunyuan-MT-7B 的許可協議。請參閱原始模型倉庫了解詳細許可信息。
99
+
100
+ ## 引用
101
+
102
+ 如果您使用此模型,請引用原始 Hunyuan-MT 模型:
103
+
104
+ ```bibtex
105
+ @misc{hunyuan-mt-7b,
106
+ title={Hunyuan-MT-7B: A Multilingual Translation Model},
107
+ author={Tencent AI Lab},
108
+ year={2024},
109
+ publisher={Hugging Face},
110
+ howpublished={\url{https://huggingface.co/Tencent/Hunyuan-MT-7B}}
111
+ }
112
+ ```
113
+
114
+ ## 相關資源
115
+
116
+ - [原始模型](https://huggingface.co/Tencent/Hunyuan-MT-7B)
117
+ - [MNN 框架](https://github.com/alibaba/MNN)
118
+ - [MNN Transformers](https://github.com/alibaba/MNN/tree/master/transformers)
119
+
120
+ ## 問題反饋
121
+
122
+ 如有問題或建議,請在此倉庫提交 Issue。
config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "llm_model": "llm.mnn",
3
+ "llm_weight": "llm.mnn.weight",
4
+ "backend_type": "auto",
5
+ "thread_num": 8,
6
+ "precision": "low",
7
+ "memory": "low",
8
+ "max_new_tokens": 2048,
9
+ "sampler_type": "mixed",
10
+ "mixed_samplers": [
11
+ "penalty",
12
+ "topK",
13
+ "topP",
14
+ "min_p",
15
+ "temperature"
16
+ ],
17
+ "penalty": 1.05,
18
+ "temperature": 0.7,
19
+ "topP": 0.6,
20
+ "topK": 20,
21
+ "min_p": 0
22
+ }
export_args.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "path": "/Users/jazzwang/Documents/kindle-ai-export/models/huggingface/Hunyuan-MT-7B",
3
+ "type": null,
4
+ "tokenizer_path": "/Users/jazzwang/Documents/kindle-ai-export/models/huggingface/Hunyuan-MT-7B",
5
+ "eagle_path": null,
6
+ "lora_path": null,
7
+ "gptq_path": null,
8
+ "dst_path": "/Users/jazzwang/Documents/kindle-ai-export/models/mnn/translation/hunyuan-mt-7b-mnn",
9
+ "verbose": false,
10
+ "test": null,
11
+ "export": "mnn",
12
+ "onnx_slim": false,
13
+ "quant_bit": 4,
14
+ "quant_block": 64,
15
+ "visual_quant_bit": null,
16
+ "visual_quant_block": null,
17
+ "lm_quant_bit": 8,
18
+ "mnnconvert": "../../../build/MNNConvert",
19
+ "ppl": false,
20
+ "awq": false,
21
+ "hqq": false,
22
+ "transformer_fuse": false,
23
+ "group_conv_native": false,
24
+ "smooth": false,
25
+ "sym": false,
26
+ "visual_sym": false,
27
+ "seperate_embed": false,
28
+ "lora_split": false,
29
+ "calib_data": null,
30
+ "act_bit": 16,
31
+ "embed_bit": 16,
32
+ "act_sym": false
33
+ }
llm.mnn ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9007e986c7cc677644afc4de03e1ee429fb4a908f38ce1fad02d890bbb91501e
3
+ size 521888
llm.mnn.json ADDED
The diff for this file is too large to render. See raw diff
 
llm_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hidden_size": 4096,
3
+ "layer_nums": 32,
4
+ "attention_mask": "float",
5
+ "key_value_shape": [
6
+ 2,
7
+ 1,
8
+ 0,
9
+ 8,
10
+ 128
11
+ ],
12
+ "bos": "",
13
+ "system_prompt_template": "%s",
14
+ "user_prompt_template": "%s",
15
+ "assistant_prompt_template": "%s",
16
+ "is_visual": false,
17
+ "attention_type": "full",
18
+ "jinja": {
19
+ "chat_template": "{% set ns = namespace(has_head=true) %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = message['content'] %}{% if loop.index0 == 0 %}{% if content == '' %}{% set ns.has_head = false %}{% elif message['role'] == 'system' %}{% set content = '<|startoftext|>' + content + '<|extra_4|>' %}{% endif %}{% endif %}{% if message['role'] == 'user' %}{% if loop.index0 == 1 and ns.has_head %}{% set content = content + '<|extra_0|>' %}{% else %}{% set content = '<|startoftext|>' + content + '<|extra_0|>' %}{% endif %}{% elif message['role'] == 'assistant' %}{% set content = content + '<|eos|>' %}{% endif %}{{ content }}{% endfor %}",
20
+ "bos": "<|startoftext|>",
21
+ "eos": "<|eos|>"
22
+ },
23
+ "tie_embeddings": [
24
+ 4364276810,
25
+ 4889613386,
26
+ 65667072,
27
+ 8,
28
+ 64
29
+ ]
30
+ }
model_info.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Hunyuan-MT-7B",
3
+ "display_name": "Hunyuan-MT-7B (Translation)",
4
+ "description": "Tencent Hunyuan translation model, supports 33 languages, no thinking mode, optimized for translation tasks",
5
+ "version": "1.0",
6
+ "type": "translation",
7
+ "languages": 33,
8
+ "source": "tencent/Hunyuan-MT-7B",
9
+ "quantization": "4-bit",
10
+ "prompt_format": "Translate the following text into {target_language}, without any additional explanation.\n{source_text}",
11
+ "has_thinking_mode": false,
12
+ "recommended_max_tokens": 2048
13
+ }
tokenizer.txt ADDED
The diff for this file is too large to render. See raw diff