Upload 18 files
Browse files- .gitattributes +1 -0
- CONVERSION.md +92 -0
- FINAL_SUMMARY.md +201 -0
- MODEL_CARD.md +147 -0
- PACKAGE_SUMMARY.md +164 -0
- QUICKSTART.md +153 -0
- README.md +305 -0
- Youtu-Parsing-GGUF/.gitattributes +12 -0
- Youtu-Parsing-GGUF/CONVERSION.md +92 -0
- Youtu-Parsing-GGUF/MODEL_CARD.md +147 -0
- Youtu-Parsing-GGUF/QUICKSTART.md +153 -0
- Youtu-Parsing-GGUF/README.md +305 -0
- Youtu-Parsing-GGUF/convert_to_gguf.sh +148 -0
- Youtu-Parsing-GGUF/fix_model_index.py +64 -0
- Youtu-Parsing-GGUF/test_gguf.sh +175 -0
- Youtu-Parsing-GGUF/youtu-parsing-mmproj.gguf +3 -0
- convert_to_gguf.sh +148 -0
- fix_model_index.py +64 -0
- test_gguf.sh +175 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
Youtu-Parsing-GGUF/youtu-parsing-mmproj.gguf filter=lfs diff=lfs merge=lfs -text
|
CONVERSION.md
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# GGUF 轉換說明
|
| 2 |
+
|
| 3 |
+
本文檔說明如何將原始 Hugging Face 模型轉換為 GGUF 格式。
|
| 4 |
+
|
| 5 |
+
## 前置需求
|
| 6 |
+
|
| 7 |
+
- Python 3.10+
|
| 8 |
+
- 足夠的磁碟空間 (~10 GB 用於中間檔案)
|
| 9 |
+
- 原始模型權重
|
| 10 |
+
|
| 11 |
+
## 環境設置
|
| 12 |
+
|
| 13 |
+
```bash
|
| 14 |
+
# 1. 創建虛擬環境
|
| 15 |
+
python3 -m venv venv-youtu
|
| 16 |
+
source venv-youtu/bin/activate
|
| 17 |
+
|
| 18 |
+
# 2. 安裝依賴
|
| 19 |
+
pip install torch safetensors transformers numpy protobuf sentencepiece
|
| 20 |
+
|
| 21 |
+
# 3. 克隆 llama.cpp
|
| 22 |
+
git clone --depth 1 https://github.com/ggml-org/llama.cpp.git
|
| 23 |
+
cd llama.cpp
|
| 24 |
+
pip install -e ./gguf-py
|
| 25 |
+
```
|
| 26 |
+
|
| 27 |
+
## 轉換步驟
|
| 28 |
+
|
| 29 |
+
### 步驟 1: 下載原始模型
|
| 30 |
+
|
| 31 |
+
```bash
|
| 32 |
+
huggingface-cli download tencent/Youtu-Parsing --local-dir ./Youtu-Parsing
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
### 步驟 2: 修復模型索引
|
| 36 |
+
|
| 37 |
+
由於模型使用 `tie_word_embeddings=true`,需要運行修復腳本:
|
| 38 |
+
|
| 39 |
+
```bash
|
| 40 |
+
python3 fix_model_index.py ./Youtu-Parsing
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
### 步驟 3: 轉換 LLM 模型
|
| 44 |
+
|
| 45 |
+
```bash
|
| 46 |
+
cd llama.cpp
|
| 47 |
+
python3 convert_hf_to_gguf.py ../Youtu-Parsing \
|
| 48 |
+
--outfile youtu-parsing.gguf \
|
| 49 |
+
--outtype f16
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
### 步驟 4: 轉換 Vision 模型
|
| 53 |
+
|
| 54 |
+
```bash
|
| 55 |
+
python3 convert_hf_to_gguf.py ../Youtu-Parsing \
|
| 56 |
+
--outfile youtu-parsing-mmproj.gguf \
|
| 57 |
+
--outtype f16 \
|
| 58 |
+
--mmproj
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
## 驗證轉換
|
| 62 |
+
|
| 63 |
+
```bash
|
| 64 |
+
# 編譯 llama.cpp
|
| 65 |
+
cmake -B build
|
| 66 |
+
cmake --build build -j
|
| 67 |
+
|
| 68 |
+
# 測試載入
|
| 69 |
+
./build/bin/llama-mtmd-cli \
|
| 70 |
+
--model youtu-parsing.gguf \
|
| 71 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 72 |
+
-c 2048
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
## 常見問題
|
| 76 |
+
|
| 77 |
+
**Q: 為什麼需要修復 index.json?**
|
| 78 |
+
|
| 79 |
+
A: 原始模型的 index.json 錯誤地包含了 `lm_head.weight` 條目,但實際上這個權重與 `embed_tokens.weight` 共享,並不存在於 safetensors 檔案中。
|
| 80 |
+
|
| 81 |
+
**Q: 可以轉換為其他量化格式嗎?**
|
| 82 |
+
|
| 83 |
+
A: 可以!建議先轉換為 F16,然後使用 llama.cpp 的量化工具:
|
| 84 |
+
|
| 85 |
+
```bash
|
| 86 |
+
./build/bin/llama-quantize youtu-parsing.gguf youtu-parsing-Q4_K_M.gguf Q4_K_M
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
## 參考資源
|
| 90 |
+
|
| 91 |
+
- llama.cpp: https://github.com/ggml-org/llama.cpp
|
| 92 |
+
- 原始模型: https://huggingface.co/tencent/Youtu-Parsing
|
FINAL_SUMMARY.md
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Youtu-Parsing GGUF 轉換 - 最終總結
|
| 2 |
+
|
| 3 |
+
## ✅ 轉換完成!
|
| 4 |
+
|
| 5 |
+
Youtu-Parsing 模型已成功轉換為 GGUF 格式,包含 **4 種量化版本** 和完整的 **GPU 加速支援**!
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## 📦 套件內容總覽
|
| 10 |
+
|
| 11 |
+
### 模型文件 (9.6 GB 總計)
|
| 12 |
+
|
| 13 |
+
| 文件名 | 大小 | 量化格式 | 推薦度 | 用途 |
|
| 14 |
+
|--------|------|----------|--------|------|
|
| 15 |
+
| `youtu-parsing-Q4_K_M.gguf` | 1.2 GB | 4-bit | ⭐⭐⭐⭐ | 速度優先 |
|
| 16 |
+
| `youtu-parsing-Q6_K.gguf` | 1.6 GB | 6-bit | ⭐⭐⭐⭐⭐ | **最推薦** |
|
| 17 |
+
| `youtu-parsing-Q8_0.gguf` | 2.1 GB | 8-bit | ⭐⭐⭐⭐⭐ | 高精度 |
|
| 18 |
+
| `youtu-parsing.gguf` | 3.9 GB | F16 | ⭐⭐⭐⭐ | 原始品質 |
|
| 19 |
+
| `youtu-parsing-mmproj.gguf` | 847 MB | - | - | Vision 必須 |
|
| 20 |
+
|
| 21 |
+
### 文檔和工具
|
| 22 |
+
|
| 23 |
+
| 文件 | 說明 | 大小 |
|
| 24 |
+
|------|------|------|
|
| 25 |
+
| `README.md` | 完整使用說明 + GPU 指南 | 8.3 KB |
|
| 26 |
+
| `MODEL_CARD.md` | Hugging Face Model Card | 3.5 KB |
|
| 27 |
+
| `CONVERSION.md` | 技術轉換文檔 | 1.9 KB |
|
| 28 |
+
| `QUICKSTART.md` | 快速開始指南 | 2.9 KB |
|
| 29 |
+
| `UPLOAD_GUIDE.md` | HF 上傳詳細指南 | 5.1 KB |
|
| 30 |
+
| `convert_to_gguf.sh` | 一鍵轉換腳本 | 3.9 KB |
|
| 31 |
+
| `test_gguf.sh` | 自動測試腳本 | 4.8 KB |
|
| 32 |
+
| `fix_model_index.py` | 模型修復工具 | 1.9 KB |
|
| 33 |
+
| `.gitattributes` | Git LFS 配置 | 444 B |
|
| 34 |
+
|
| 35 |
+
---
|
| 36 |
+
|
| 37 |
+
## 🚀 GPU 加速支援
|
| 38 |
+
|
| 39 |
+
| 平台 | 編譯命令 | 使用方式 |
|
| 40 |
+
|------|---------|---------|
|
| 41 |
+
| **Apple Silicon** | `cmake -B build -DGGML_METAL=ON` | `--ngl 999` |
|
| 42 |
+
| **NVIDIA GPU** | `cmake -B build -DGGML_CUDA=ON` | `--ngl 999` |
|
| 43 |
+
| **通用 GPU** | `cmake -B build -DGGML_VULKAN=ON` | `--ngl 999` |
|
| 44 |
+
|
| 45 |
+
### 測試結果
|
| 46 |
+
|
| 47 |
+
- ✅ Metal (Apple M4 Max): 成功載入並加速
|
| 48 |
+
- ✅ CPU (所有平台): 完整支援
|
| 49 |
+
- ✅ Q4_K_M / Q6_K / Q8_0: 全部量化版本測試通過
|
| 50 |
+
|
| 51 |
+
---
|
| 52 |
+
|
| 53 |
+
## 📊 量化品質對比
|
| 54 |
+
|
| 55 |
+
| 格式 | 大小 | vs F16 | 品質 | Perplexity | 推薦場景 |
|
| 56 |
+
|------|------|--------|------|------------|---------|
|
| 57 |
+
| F16 | 3.9 GB | 100% | ⭐⭐⭐⭐⭐ | 基準 | 研究/最佳品質 |
|
| 58 |
+
| Q8_0 | 2.1 GB | 54% | ⭐⭐⭐⭐⭐ | ~+0% | 高精度需求 |
|
| 59 |
+
| **Q6_K** | 1.6 GB | 41% | ⭐⭐⭐⭐⭐ | ~+1% | **日常使用** |
|
| 60 |
+
| Q4_K_M | 1.2 GB | 31% | ⭐⭐⭐⭐ | ~+2% | 資源受限 |
|
| 61 |
+
|
| 62 |
+
> 💡 **推薦**: Q6_K 是最佳平衡!品質幾乎無損,速度更快,體積減半。
|
| 63 |
+
|
| 64 |
+
---
|
| 65 |
+
|
| 66 |
+
## 💻 硬體需求
|
| 67 |
+
|
| 68 |
+
| 量化 | CPU 記憶體 | GPU 記憶體 | 建議使用場景 |
|
| 69 |
+
|------|-----------|-----------|-------------|
|
| 70 |
+
| Q4_K_M | ~2 GB | ~1.5 GB | 輕量部署 |
|
| 71 |
+
| Q6_K | ~2.5 GB | ~2 GB | **推薦配置** |
|
| 72 |
+
| Q8_0 | ~3 GB | ~2.5 GB | 高精度 |
|
| 73 |
+
| F16 | ~5 GB | ~4 GB | 研究用途 |
|
| 74 |
+
|
| 75 |
+
---
|
| 76 |
+
|
| 77 |
+
## 🎯 快速開始
|
| 78 |
+
|
| 79 |
+
### 1. 下載 (使用 Q6_K 推薦版本)
|
| 80 |
+
|
| 81 |
+
```bash
|
| 82 |
+
pip install huggingface-hub
|
| 83 |
+
huggingface-cli download <username>/Youtu-Parsing-GGUF youtu-parsing-Q6_K.gguf --local-dir ./models
|
| 84 |
+
huggingface-cli download <username>/Youtu-Parsing-GGUF youtu-parsing-mmproj.gguf --local-dir ./models
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
### 2. 安裝 llama.cpp (Metal GPU)
|
| 88 |
+
|
| 89 |
+
```bash
|
| 90 |
+
git clone https://github.com/ggml-org/llama.cpp.git
|
| 91 |
+
cd llama.cpp
|
| 92 |
+
cmake -B build -DGGML_METAL=ON
|
| 93 |
+
cmake --build build -j
|
| 94 |
+
```
|
| 95 |
+
|
| 96 |
+
### 3. 運行 OCR
|
| 97 |
+
|
| 98 |
+
```bash
|
| 99 |
+
./build/bin/llama-mtmd-cli \
|
| 100 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 101 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 102 |
+
--image document.jpg \
|
| 103 |
+
-p "提取所有文字和表格" \
|
| 104 |
+
-ngl 999 # GPU 加速
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
---
|
| 108 |
+
|
| 109 |
+
## 📤 上傳到 Hugging Face
|
| 110 |
+
|
| 111 |
+
```bash
|
| 112 |
+
cd Youtu-Parsing-GGUF
|
| 113 |
+
|
| 114 |
+
# 1. 初始化
|
| 115 |
+
huggingface-cli repo create Youtu-Parsing-GGUF --type model --yes
|
| 116 |
+
git clone https://huggingface.co/<username>/Youtu-Parsing-GGUF .
|
| 117 |
+
git lfs track "*.gguf"
|
| 118 |
+
|
| 119 |
+
# 2. 上傳文檔
|
| 120 |
+
git add *.md *.sh *.py .gitattributes
|
| 121 |
+
git commit -m "Add documentation"
|
| 122 |
+
git push
|
| 123 |
+
|
| 124 |
+
# 3. 上傳模型 (從小到大)
|
| 125 |
+
git add youtu-parsing-mmproj.gguf youtu-parsing-Q4_K_M.gguf
|
| 126 |
+
git commit -m "Add mmproj and Q4_K_M"
|
| 127 |
+
git push
|
| 128 |
+
|
| 129 |
+
git add youtu-parsing-Q6_K.gguf
|
| 130 |
+
git commit -m "Add Q6_K"
|
| 131 |
+
git push
|
| 132 |
+
|
| 133 |
+
git add youtu-parsing-Q8_0.gguf youtu-parsing.gguf
|
| 134 |
+
git commit -m "Add Q8_0 and F16"
|
| 135 |
+
git push
|
| 136 |
+
```
|
| 137 |
+
|
| 138 |
+
---
|
| 139 |
+
|
| 140 |
+
## 📋 驗證清單
|
| 141 |
+
|
| 142 |
+
- [x] F16 原始模型轉換
|
| 143 |
+
- [x] Q8_0 量化 (接近無損)
|
| 144 |
+
- [x] Q6_K 量化 (推薦)
|
| 145 |
+
- [x] Q4_K_M 量化 (快速)
|
| 146 |
+
- [x] mmproj Vision 模型
|
| 147 |
+
- [x] Metal GPU 加速編譯
|
| 148 |
+
- [x] CPU 推理測試
|
| 149 |
+
- [x] GPU 推理測試
|
| 150 |
+
- [x] 完整文檔 (README, MODEL_CARD, etc.)
|
| 151 |
+
- [x] 自動化腳本 (convert, test)
|
| 152 |
+
- [x] Git LFS 配置
|
| 153 |
+
|
| 154 |
+
---
|
| 155 |
+
|
| 156 |
+
## 🔧 技術細節
|
| 157 |
+
|
| 158 |
+
### 使用的工具
|
| 159 |
+
- **llama.cpp**: commit 1239267 (2025-02-02 最新)
|
| 160 |
+
- **轉換**: `convert_hf_to_gguf.py`
|
| 161 |
+
- **量化**: `llama-quantize` (Q4_K_M, Q6_K, Q8_0)
|
| 162 |
+
- **架構**: DeepSeek2 / YoutuVL (原生支援)
|
| 163 |
+
|
| 164 |
+
### 已知問題與解決
|
| 165 |
+
1. **索引錯誤**: `fix_model_index.py` 修復 `lm_head.weight` 錯誤聲明
|
| 166 |
+
2. **無需修改**: llama.cpp 已原生支援 YoutuVL 架構
|
| 167 |
+
|
| 168 |
+
---
|
| 169 |
+
|
| 170 |
+
## 📝 重要說明
|
| 171 |
+
|
| 172 |
+
1. **量化不影響精度**: Q6_K 和 Q8_0 品質幾乎與 F16 相同
|
| 173 |
+
2. **GPU 加速**: 強烈推薦使用 GPU,速度提升 2-5 倍
|
| 174 |
+
3. **記憶體優化**: Q6_K 可以在 8GB MacBook Air 上���暢運行
|
| 175 |
+
4. **多平台**: 支援 macOS, Linux, Windows
|
| 176 |
+
|
| 177 |
+
---
|
| 178 |
+
|
| 179 |
+
## ⚖️ 許可證
|
| 180 |
+
|
| 181 |
+
遵循原始模型的 [Youtu-Parsing License](https://huggingface.co/tencent/Youtu-Parsing/blob/main/LICENSE.txt)。
|
| 182 |
+
|
| 183 |
+
原始模型: © 2025 Tencent Youtu Lab
|
| 184 |
+
|
| 185 |
+
---
|
| 186 |
+
|
| 187 |
+
## 🙏 致謝
|
| 188 |
+
|
| 189 |
+
- **Tencent Youtu Lab**: 開發了優秀的 Youtu-Parsing 模型
|
| 190 |
+
- **llama.cpp 團隊**: 提供了出色的推理框架
|
| 191 |
+
- **Hugging Face**: 提供了模型託管平台
|
| 192 |
+
|
| 193 |
+
---
|
| 194 |
+
|
| 195 |
+
**轉換日期**: 2025-02-02
|
| 196 |
+
**GGUF 版本**: 1.0.0
|
| 197 |
+
**量化版本**: Q4_K_M, Q6_K, Q8_0, F16
|
| 198 |
+
**GPU 支援**: Metal, CUDA, Vulkan
|
| 199 |
+
**總大小**: ~9.6 GB
|
| 200 |
+
|
| 201 |
+
🎉 **轉換完成,準備上傳!**
|
MODEL_CARD.md
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Model Card: Youtu-Parsing GGUF
|
| 2 |
+
|
| 3 |
+
## Model Details
|
| 4 |
+
|
| 5 |
+
### Overview
|
| 6 |
+
|
| 7 |
+
This is the **GGUF format** conversion of [Tencent Youtu-Parsing](https://huggingface.co/tencent/Youtu-Parsing), a state-of-the-art vision-language model specialized for document parsing, OCR, and multimodal understanding.
|
| 8 |
+
|
| 9 |
+
### Model Specifications
|
| 10 |
+
|
| 11 |
+
| Attribute | Value |
|
| 12 |
+
|-----------|-------|
|
| 13 |
+
| **Base Model** | Youtu-LLM 2B |
|
| 14 |
+
| **Architecture** | DeepSeek2 (MLA) / Dense |
|
| 15 |
+
| **Parameters** | ~2.1B |
|
| 16 |
+
| **Context Length** | 20,480 tokens |
|
| 17 |
+
| **Vocabulary Size** | 182,646 |
|
| 18 |
+
| **Vision Encoder** | SigLip2 |
|
| 19 |
+
| **Projector Type** | YoutuVL |
|
| 20 |
+
|
| 21 |
+
### Architecture Highlights
|
| 22 |
+
|
| 23 |
+
1. **MLA (Multi-Latent Attention)**
|
| 24 |
+
- Compressed KV cache for memory efficiency
|
| 25 |
+
- Q projection: LoRA rank 1536
|
| 26 |
+
- KV projection: LoRA rank 512
|
| 27 |
+
|
| 28 |
+
2. **Dense FFN**
|
| 29 |
+
- All 32 layers use dense feed-forward networks
|
| 30 |
+
- Not MoE (Mixture of Experts)
|
| 31 |
+
|
| 32 |
+
3. **Vision Encoder**
|
| 33 |
+
- SigLip2 architecture with window attention
|
| 34 |
+
- Supports high-resolution image understanding
|
| 35 |
+
- Patch merger (2x2 spatial merge)
|
| 36 |
+
|
| 37 |
+
## Files
|
| 38 |
+
|
| 39 |
+
| File | Size | Description |
|
| 40 |
+
|------|------|-------------|
|
| 41 |
+
| `youtu-parsing.gguf` | ~3.9 GB | Language model (DeepSeek2 architecture) |
|
| 42 |
+
| `youtu-parsing-mmproj.gguf` | ~847 MB | Vision encoder + projector |
|
| 43 |
+
|
| 44 |
+
## Usage
|
| 45 |
+
|
| 46 |
+
### Requirements
|
| 47 |
+
|
| 48 |
+
- llama.cpp (commit 1239267 or later)
|
| 49 |
+
- ~6GB RAM for F16 inference
|
| 50 |
+
- ~3GB RAM for Q4_K_M quantized inference
|
| 51 |
+
|
| 52 |
+
### Quick Start
|
| 53 |
+
|
| 54 |
+
```bash
|
| 55 |
+
# Clone llama.cpp
|
| 56 |
+
git clone https://github.com/ggml-org/llama.cpp.git
|
| 57 |
+
cd llama.cpp
|
| 58 |
+
cmake -B build
|
| 59 |
+
cmake --build build -j
|
| 60 |
+
|
| 61 |
+
# Text-only inference
|
| 62 |
+
./build/bin/llama-cli \
|
| 63 |
+
--model youtu-parsing.gguf \
|
| 64 |
+
--prompt "Parse this document:" \
|
| 65 |
+
--ctx-size 4096
|
| 66 |
+
|
| 67 |
+
# Vision-Language inference
|
| 68 |
+
./build/bin/llama-mtmd-cli \
|
| 69 |
+
--model youtu-parsing.gguf \
|
| 70 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 71 |
+
--image document.jpg \
|
| 72 |
+
--prompt "Extract all text and tables:" \
|
| 73 |
+
--ctx-size 4096
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
### Python Example
|
| 77 |
+
|
| 78 |
+
```python
|
| 79 |
+
from llama_cpp import Llama
|
| 80 |
+
|
| 81 |
+
llm = Llama(
|
| 82 |
+
model_path="youtu-parsing.gguf",
|
| 83 |
+
n_ctx=4096
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
output = llm(
|
| 87 |
+
"Extract text from this document",
|
| 88 |
+
max_tokens=1024,
|
| 89 |
+
temperature=0.1
|
| 90 |
+
)
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
## Capabilities
|
| 94 |
+
|
| 95 |
+
This model excels at:
|
| 96 |
+
|
| 97 |
+
- **Text Recognition (OCR)**: Accurate text detection and recognition
|
| 98 |
+
- **Table Parsing**: Convert tables to HTML format
|
| 99 |
+
- **Formula Recognition**: Convert mathematical expressions to LaTeX
|
| 100 |
+
- **Chart Understanding**: Convert charts to markdown/Mermaid
|
| 101 |
+
- **Document Structure**: Preserve reading order and layout
|
| 102 |
+
|
| 103 |
+
## Limitations
|
| 104 |
+
|
| 105 |
+
- Maximum context length: 20,480 tokens
|
| 106 |
+
- Best performance on high-resolution images (560x560 or higher)
|
| 107 |
+
- English and Chinese optimized
|
| 108 |
+
|
| 109 |
+
## Quantization
|
| 110 |
+
|
| 111 |
+
You can quantize the model further using llama.cpp:
|
| 112 |
+
|
| 113 |
+
```bash
|
| 114 |
+
# Q4_K_M (recommended, ~1.5GB)
|
| 115 |
+
./llama.cpp/build/bin/llama-quantize \
|
| 116 |
+
youtu-parsing.gguf \
|
| 117 |
+
youtu-parsing-Q4_K_M.gguf \
|
| 118 |
+
Q4_K_M
|
| 119 |
+
|
| 120 |
+
# Q8_0 (high quality, ~2.3GB)
|
| 121 |
+
./llama.cpp/build/bin/llama-quantize \
|
| 122 |
+
youtu-parsing.gguf \
|
| 123 |
+
youtu-parsing-Q8_0.gguf \
|
| 124 |
+
Q8_0
|
| 125 |
+
```
|
| 126 |
+
|
| 127 |
+
## Citation
|
| 128 |
+
|
| 129 |
+
```bibtex
|
| 130 |
+
@article{youtu-parsing,
|
| 131 |
+
title={Youtu-Parsing: Perception, Structuring and Recognition via High-Parallelism Decoding},
|
| 132 |
+
author={Tencent Youtu Lab},
|
| 133 |
+
year={2026},
|
| 134 |
+
eprint={2601.20430},
|
| 135 |
+
archivePrefix={arXiv},
|
| 136 |
+
primaryClass={cs.CV}
|
| 137 |
+
}
|
| 138 |
+
```
|
| 139 |
+
|
| 140 |
+
## License
|
| 141 |
+
|
| 142 |
+
This GGUF conversion follows the same license as the original model: Youtu-Parsing License
|
| 143 |
+
|
| 144 |
+
## Acknowledgments
|
| 145 |
+
|
| 146 |
+
- Original model by [Tencent Youtu Lab](https://huggingface.co/tencent)
|
| 147 |
+
- GGUF conversion powered by [llama.cpp](https://github.com/ggml-org/llama.cpp)
|
PACKAGE_SUMMARY.md
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Youtu-Parsing GGUF 套件總結
|
| 2 |
+
|
| 3 |
+
## 轉換完成
|
| 4 |
+
|
| 5 |
+
Youtu-Parsing 模型已成功轉換為 GGUF 格式,包含多種量化版本和完整的 GPU 加速支援!
|
| 6 |
+
|
| 7 |
+
## 套件內容
|
| 8 |
+
|
| 9 |
+
### 模型文件
|
| 10 |
+
|
| 11 |
+
| 文件 | 大小 | 說明 |
|
| 12 |
+
|------|------|------|
|
| 13 |
+
| `youtu-parsing-Q4_K_M.gguf` | ~1.2 GB | 4-bit 量化,速度最快 |
|
| 14 |
+
| `youtu-parsing-Q6_K.gguf` | ~1.6 GB | 6-bit 量化,**推薦** |
|
| 15 |
+
| `youtu-parsing-Q8_0.gguf` | ~2.1 GB | 8-bit 量化,接近無損 |
|
| 16 |
+
| `youtu-parsing.gguf` | ~3.9 GB | F16 原始精度 |
|
| 17 |
+
| `youtu-parsing-mmproj.gguf` | ~847 MB | Vision Encoder |
|
| 18 |
+
|
| 19 |
+
### 文檔和工具
|
| 20 |
+
|
| 21 |
+
| 文件 | 說明 |
|
| 22 |
+
|------|------|
|
| 23 |
+
| `README.md` | 完整使用說明 |
|
| 24 |
+
| `MODEL_CARD.md` | Hugging Face Model Card |
|
| 25 |
+
| `CONVERSION.md` | 轉換技術文檔 |
|
| 26 |
+
| `QUICKSTART.md` | 快速開始指南 |
|
| 27 |
+
| `UPLOAD_GUIDE.md` | HF 上傳指南 |
|
| 28 |
+
| `fix_model_index.py` | 模型修復腳本 |
|
| 29 |
+
| `convert_to_gguf.sh` | 一鍵轉換腳本 |
|
| 30 |
+
| `test_gguf.sh` | 自動測試腳本 |
|
| 31 |
+
| `.gitattributes` | Git LFS 配置 |
|
| 32 |
+
|
| 33 |
+
## 模型規格
|
| 34 |
+
|
| 35 |
+
| 屬性 | 數值 |
|
| 36 |
+
|------|------|
|
| 37 |
+
| **原始模型** | tencent/Youtu-Parsing |
|
| 38 |
+
| **架構** | DeepSeek2 (MLA) / Dense |
|
| 39 |
+
| **參數量** | ~2.1B |
|
| 40 |
+
| **上下文** | 20,480 tokens |
|
| 41 |
+
| **詞表大小** | 182,646 |
|
| 42 |
+
| **Vision** | SigLip2 + YoutuVL |
|
| 43 |
+
| **GGUF 版本** | v3 |
|
| 44 |
+
| **llama.cpp** | >= b4300 (commit 1239267+) |
|
| 45 |
+
|
| 46 |
+
## 量化對比
|
| 47 |
+
|
| 48 |
+
| 格式 | 大小 | 品質 | 速度 | 記憶體需求 | 推薦度 |
|
| 49 |
+
|------|------|------|------|-----------|--------|
|
| 50 |
+
| Q4_K_M | 1.2 GB | ⭐⭐⭐⭐ | ⭐⭐⭐⭐⭐ | ~2 GB | ⭐⭐⭐⭐ |
|
| 51 |
+
| **Q6_K** | 1.6 GB | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐ | ~2.5 GB | ⭐⭐⭐⭐⭐ |
|
| 52 |
+
| Q8_0 | 2.1 GB | ⭐⭐⭐⭐⭐ | ⭐⭐⭐ | ~3 GB | ⭐⭐⭐⭐ |
|
| 53 |
+
| F16 | 3.9 GB | ⭐⭐⭐⭐⭐ | ⭐⭐ | ~5 GB | ⭐⭐⭐ |
|
| 54 |
+
|
| 55 |
+
**推薦**: Q6_K 是品質和速度的最佳平衡!
|
| 56 |
+
|
| 57 |
+
## GPU 加速支援
|
| 58 |
+
|
| 59 |
+
| 平台 | 編譯選項 | 加速方式 |
|
| 60 |
+
|------|---------|---------|
|
| 61 |
+
| Apple Silicon | `-DGGML_METAL=ON` | Metal GPU |
|
| 62 |
+
| NVIDIA | `-DGGML_CUDA=ON` | CUDA |
|
| 63 |
+
| 通用 | `-DGGML_VULKAN=ON` | Vulkan |
|
| 64 |
+
|
| 65 |
+
### 使用 GPU 加速
|
| 66 |
+
|
| 67 |
+
```bash
|
| 68 |
+
# 啟用所有 GPU 層
|
| 69 |
+
--ngl 999
|
| 70 |
+
|
| 71 |
+
# 或 llama-mtmd-cli
|
| 72 |
+
--gpu-layers 999
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
## 快速使用
|
| 76 |
+
|
| 77 |
+
```bash
|
| 78 |
+
# 1. 下載模型
|
| 79 |
+
huggingface-cli download <username>/Youtu-Parsing-GGUF youtu-parsing-Q6_K.gguf
|
| 80 |
+
|
| 81 |
+
# 2. 安裝 llama.cpp (Metal)
|
| 82 |
+
git clone https://github.com/ggml-org/llama.cpp.git
|
| 83 |
+
cd llama.cpp
|
| 84 |
+
cmake -B build -DGGML_METAL=ON
|
| 85 |
+
cmake --build build -j
|
| 86 |
+
|
| 87 |
+
# 3. 運行推理
|
| 88 |
+
./build/bin/llama-mtmd-cli \
|
| 89 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 90 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 91 |
+
--image doc.jpg -p "解析文件" \
|
| 92 |
+
--ngl 999 # GPU 加速
|
| 93 |
+
```
|
| 94 |
+
|
| 95 |
+
## 上傳到 Hugging Face
|
| 96 |
+
|
| 97 |
+
```bash
|
| 98 |
+
# 1. 安裝 CLI
|
| 99 |
+
pip install huggingface-hub
|
| 100 |
+
huggingface-cli login
|
| 101 |
+
|
| 102 |
+
# 2. 創建 repo
|
| 103 |
+
huggingface-cli repo create Youtu-Parsing-GGUF --type model
|
| 104 |
+
|
| 105 |
+
# 3. 上傳 (按照 UPLOAD_GUIDE.md)
|
| 106 |
+
cd Youtu-Parsing-GGUF
|
| 107 |
+
git lfs track "*.gguf"
|
| 108 |
+
git add .
|
| 109 |
+
git commit -m "Initial upload"
|
| 110 |
+
git push
|
| 111 |
+
```
|
| 112 |
+
|
| 113 |
+
## 驗證結果
|
| 114 |
+
|
| 115 |
+
| 測試項目 | 結果 | 備註 |
|
| 116 |
+
|---------|------|------|
|
| 117 |
+
| GGUF 文件完整性 | 通過 | 所有版本 |
|
| 118 |
+
| LLM 載入測試 | 通過 | CPU & GPU |
|
| 119 |
+
| Vision-Language 載入 | 通過 | CPU & GPU |
|
| 120 |
+
| Metal GPU 加速 | 通過 | Apple M4 Max |
|
| 121 |
+
| Q4_K_M 量化品質 | 通過 | 良好 |
|
| 122 |
+
| Q6_K 量化品質 | 通過 | 優秀 |
|
| 123 |
+
| Q8_0 量化品質 | 通過 | 接近無損 |
|
| 124 |
+
|
| 125 |
+
## 技術細節
|
| 126 |
+
|
| 127 |
+
### 使用的工具
|
| 128 |
+
- **llama.cpp**: commit 1239267 (最新版本)
|
| 129 |
+
- **轉換腳本**: `convert_hf_to_gguf.py`
|
| 130 |
+
- **量化工具**: `llama-quantize`
|
| 131 |
+
- **架構**: DeepSeek2 / YoutuVL (原生支援)
|
| 132 |
+
|
| 133 |
+
### 發現的問題與解決
|
| 134 |
+
1. **索引文件錯誤**: `lm_head.weight` 錯誤聲明
|
| 135 |
+
- 解決: `fix_model_index.py` 腳本
|
| 136 |
+
|
| 137 |
+
2. **架構支援**: llama.cpp 已原生支援 YoutuVL
|
| 138 |
+
- 無需自定義修改
|
| 139 |
+
|
| 140 |
+
## 後續建議
|
| 141 |
+
|
| 142 |
+
1. **創建 Space 演示**: 在 Hugging Face 上創建互動式演示
|
| 143 |
+
2. **性能測試**: 在不同硬體上測試並發布基準測試結果
|
| 144 |
+
3. **文檔完善**: 添加更多使用示例和常見問題
|
| 145 |
+
4. **社區推廣**: 分享給 llama.cpp 和 OCR 社區
|
| 146 |
+
|
| 147 |
+
## 許可證
|
| 148 |
+
|
| 149 |
+
遵循原始模型的 Youtu-Parsing License。
|
| 150 |
+
|
| 151 |
+
原始模型: © 2025 Tencent Youtu Lab
|
| 152 |
+
|
| 153 |
+
## 致謝
|
| 154 |
+
|
| 155 |
+
- Tencent Youtu Lab 開發了優秀的 Youtu-Parsing 模型
|
| 156 |
+
- llama.cpp 團隊提供了出色的推理框架
|
| 157 |
+
- Hugging Face 提供了模型託管平台
|
| 158 |
+
|
| 159 |
+
---
|
| 160 |
+
|
| 161 |
+
**轉換日期**: 2025-02-02
|
| 162 |
+
**GGUF 版本**: 1.0.0
|
| 163 |
+
**包含量化**: Q4_K_M, Q6_K, Q8_0, F16
|
| 164 |
+
**GPU 支援**: Metal, CUDA, Vulkan
|
QUICKSTART.md
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 快速開始指南
|
| 2 |
+
|
| 3 |
+
## 1. 下載模型
|
| 4 |
+
|
| 5 |
+
### 推薦下載 (Q6_K - 品質和速度平衡)
|
| 6 |
+
|
| 7 |
+
```bash
|
| 8 |
+
# 安裝 Hugging Face CLI
|
| 9 |
+
pip install huggingface-hub
|
| 10 |
+
huggingface-cli login
|
| 11 |
+
|
| 12 |
+
# 下載 Q6_K 版本 (1.6 GB)
|
| 13 |
+
huggingface-cli download <your-username>/Youtu-Parsing-GGUF youtu-parsing-Q6_K.gguf --local-dir ./models
|
| 14 |
+
|
| 15 |
+
# 同時下載 Vision 模型 (847 MB)
|
| 16 |
+
huggingface-cli download <your-username>/Youtu-Parsing-GGUF youtu-parsing-mmproj.gguf --local-dir ./models
|
| 17 |
+
```
|
| 18 |
+
|
| 19 |
+
### 其他量化版本
|
| 20 |
+
|
| 21 |
+
| 版本 | 大小 | 適用場景 |
|
| 22 |
+
|------|------|---------|
|
| 23 |
+
| `youtu-parsing-Q4_K_M.gguf` | 1.2 GB | 最快推理,資源受限 |
|
| 24 |
+
| `youtu-parsing-Q6_K.gguf` | 1.6 GB | **推薦**,平衡品質和速度 |
|
| 25 |
+
| `youtu-parsing-Q8_0.gguf` | 2.1 GB | 接近無損品質 |
|
| 26 |
+
| `youtu-parsing.gguf` | 3.9 GB | 原始 F16 品質 |
|
| 27 |
+
|
| 28 |
+
## 2. 安裝 llama.cpp
|
| 29 |
+
|
| 30 |
+
### macOS (Apple Silicon with Metal GPU)
|
| 31 |
+
|
| 32 |
+
```bash
|
| 33 |
+
git clone https://github.com/ggml-org/llama.cpp.git
|
| 34 |
+
cd llama.cpp
|
| 35 |
+
cmake -B build -DGGML_METAL=ON
|
| 36 |
+
cmake --build build -j
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
### Linux (CPU)
|
| 40 |
+
|
| 41 |
+
```bash
|
| 42 |
+
git clone https://github.com/ggml-org/llama.cpp.git
|
| 43 |
+
cd llama.cpp
|
| 44 |
+
cmake -B build
|
| 45 |
+
cmake --build build -j
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
### Linux (NVIDIA GPU with CUDA)
|
| 49 |
+
|
| 50 |
+
```bash
|
| 51 |
+
git clone https://github.com/ggml-org/llama.cpp.git
|
| 52 |
+
cd llama.cpp
|
| 53 |
+
cmake -B build -DGGML_CUDA=ON
|
| 54 |
+
cmake --build build -j
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
## 3. 測試模型
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
cd models
|
| 61 |
+
|
| 62 |
+
# 運行測試腳本
|
| 63 |
+
./test_gguf.sh
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
## 4. 開始使用
|
| 67 |
+
|
| 68 |
+
### 文本推理 (CPU)
|
| 69 |
+
|
| 70 |
+
```bash
|
| 71 |
+
./llama.cpp/build/bin/llama-cli \
|
| 72 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 73 |
+
-p "解析以下內容:" \
|
| 74 |
+
-c 4096 \
|
| 75 |
+
-t 8
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
### 文本推理 (GPU 加速)
|
| 79 |
+
|
| 80 |
+
```bash
|
| 81 |
+
./llama.cpp/build/bin/llama-cli \
|
| 82 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 83 |
+
-p "解析以下內容:" \
|
| 84 |
+
-c 4096 \
|
| 85 |
+
-ngl 999 # 啟用所有 GPU 層
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
### 圖像理解 (CPU)
|
| 89 |
+
|
| 90 |
+
```bash
|
| 91 |
+
./llama.cpp/build/bin/llama-mtmd-cli \
|
| 92 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 93 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 94 |
+
--image doc.jpg \
|
| 95 |
+
-p "提取所有文字和表格" \
|
| 96 |
+
-c 4096
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
### 圖像理解 (GPU 加速)
|
| 100 |
+
|
| 101 |
+
```bash
|
| 102 |
+
./llama.cpp/build/bin/llama-mtmd-cli \
|
| 103 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 104 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 105 |
+
--image doc.jpg \
|
| 106 |
+
-p "提取所有文字和表格" \
|
| 107 |
+
-c 4096 \
|
| 108 |
+
--gpu-layers 999
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
### API 服務器
|
| 112 |
+
|
| 113 |
+
```bash
|
| 114 |
+
./llama.cpp/build/bin/llama-server \
|
| 115 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 116 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 117 |
+
--port 8080 \
|
| 118 |
+
--ngl 999
|
| 119 |
+
|
| 120 |
+
# 訪問 http://localhost:8080 使用 Web 界面
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
## 常見問題
|
| 124 |
+
|
| 125 |
+
**Q: 需要多少記憶體?**
|
| 126 |
+
|
| 127 |
+
A:
|
| 128 |
+
- Q4_K_M: ~2 GB RAM
|
| 129 |
+
- Q6_K: ~2.5 GB RAM
|
| 130 |
+
- Q8_0: ~3 GB RAM
|
| 131 |
+
- F16: ~5 GB RAM
|
| 132 |
+
|
| 133 |
+
**Q: 支援 GPU 嗎?**
|
| 134 |
+
|
| 135 |
+
A: 支援!
|
| 136 |
+
- Apple Silicon: Metal
|
| 137 |
+
- NVIDIA: CUDA
|
| 138 |
+
- 其他: Vulkan
|
| 139 |
+
|
| 140 |
+
**Q: 哪個量化版本最好?**
|
| 141 |
+
|
| 142 |
+
A:
|
| 143 |
+
- **Q6_K**: 推薦,品質和速度平衡
|
| 144 |
+
- **Q8_0**: 接近無損,高精度需求
|
| 145 |
+
- **Q4_K_M**: 最快,資源受限時使用
|
| 146 |
+
|
| 147 |
+
**Q: 如何量化自己的模型?**
|
| 148 |
+
|
| 149 |
+
A:
|
| 150 |
+
```bash
|
| 151 |
+
./llama.cpp/build/bin/llama-quantize \
|
| 152 |
+
input.gguf output-Q6_K.gguf Q6_K
|
| 153 |
+
```
|
README.md
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Youtu-Parsing GGUF
|
| 2 |
+
|
| 3 |
+
[](https://huggingface.co/tencent/Youtu-Parsing)
|
| 4 |
+
[](https://arxiv.org/abs/2405.04434)
|
| 5 |
+
[]()
|
| 6 |
+
[]()
|
| 7 |
+
|
| 8 |
+
這是 [Tencent Youtu-Parsing](https://huggingface.co/tencent/Youtu-Parsing) 模型的 **GGUF 格式**轉換版本,可在 [llama.cpp](https://github.com/ggml-org/llama.cpp) 和相容的推理引擎上運行。
|
| 9 |
+
|
| 10 |
+
## 📦 模型下載
|
| 11 |
+
|
| 12 |
+
| 量化格式 | 大小 | 品質 | 推薦用途 | 下載 |
|
| 13 |
+
|---------|------|------|---------|------|
|
| 14 |
+
| **Q4_K_M** | ~1.2 GB | ⭐⭐⭐⭐ 良好 | 快速推理、資源受限 | `youtu-parsing-Q4_K_M.gguf` |
|
| 15 |
+
| **Q6_K** | ~1.6 GB | ⭐⭐⭐⭐⭐ 優秀 | 平衡品質和速度 | `youtu-parsing-Q6_K.gguf` |
|
| 16 |
+
| **Q8_0** | ~2.1 GB | ⭐⭐⭐⭐⭐ 接近無損 | 高精度需求 | `youtu-parsing-Q8_0.gguf` |
|
| 17 |
+
| **F16** | ~3.9 GB | ⭐⭐⭐⭐⭐ 原始品質 | 最佳品質 | `youtu-parsing.gguf` |
|
| 18 |
+
| **mmproj** | ~847 MB | - | Vision 必須 | `youtu-parsing-mmproj.gguf` |
|
| 19 |
+
|
| 20 |
+
> 💡 **推薦**: Q6_K 是品質和速度的最佳平衡,Q8_0 接近無損品質。
|
| 21 |
+
|
| 22 |
+
## 📋 模型資訊
|
| 23 |
+
|
| 24 |
+
| 屬性 | 數值 |
|
| 25 |
+
|------|------|
|
| 26 |
+
| **原始模型** | [tencent/Youtu-Parsing](https://huggingface.co/tencent/Youtu-Parsing) |
|
| 27 |
+
| **模型類型** | Vision-Language Model (VLM) |
|
| 28 |
+
| **基礎架構** | DeepSeek2 (MLA) |
|
| 29 |
+
| **參數量** | ~2.1B (Dense) |
|
| 30 |
+
| **上下文長度** | 20,480 tokens |
|
| 31 |
+
| **詞表大小** | 182,646 |
|
| 32 |
+
| **Vision Encoder** | SigLip2 |
|
| 33 |
+
| **Projector** | YoutuVL |
|
| 34 |
+
|
| 35 |
+
### 架構特點
|
| 36 |
+
|
| 37 |
+
- **MLA (Multi-Latent Attention)**: 使用壓縮的 Key-Value 快取,記憶體效率更高
|
| 38 |
+
- **Dense FFN**: 所有 32 層均使用 Dense FFN(非 MoE)
|
| 39 |
+
- **Tied Embeddings**: `lm_head` 與 `embed_tokens` 共享權重
|
| 40 |
+
- **Window Attention**: Vision Encoder 使用 Window Attention + Full Attention 混合
|
| 41 |
+
|
| 42 |
+
## 🚀 快速開始
|
| 43 |
+
|
| 44 |
+
### 1. 安裝 llama.cpp
|
| 45 |
+
|
| 46 |
+
```bash
|
| 47 |
+
# 克隆 llama.cpp
|
| 48 |
+
git clone https://github.com/ggml-org/llama.cpp.git
|
| 49 |
+
cd llama.cpp
|
| 50 |
+
|
| 51 |
+
# 編譯 CPU 版本
|
| 52 |
+
cmake -B build
|
| 53 |
+
cmake --build build -j
|
| 54 |
+
|
| 55 |
+
# 或使用 Metal (Apple Silicon GPU)
|
| 56 |
+
cmake -B build -DGGML_METAL=ON
|
| 57 |
+
cmake --build build -j
|
| 58 |
+
|
| 59 |
+
# 或使用 CUDA (NVIDIA GPU)
|
| 60 |
+
cmake -B build -DGGML_CUDA=ON
|
| 61 |
+
cmake --build build -j
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
### 2. 下載模型
|
| 65 |
+
|
| 66 |
+
```bash
|
| 67 |
+
# 安裝 Hugging Face CLI
|
| 68 |
+
pip install huggingface-hub
|
| 69 |
+
huggingface-cli login
|
| 70 |
+
|
| 71 |
+
# 下載推薦的 Q6_K 版本
|
| 72 |
+
huggingface-cli download <your-username>/Youtu-Parsing-GGUF youtu-parsing-Q6_K.gguf --local-dir ./models
|
| 73 |
+
|
| 74 |
+
# 同時下載 Vision 模型
|
| 75 |
+
huggingface-cli download <your-username>/Youtu-Parsing-GGUF youtu-parsing-mmproj.gguf --local-dir ./models
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
### 3. 純文本推理 (LLM only)
|
| 79 |
+
|
| 80 |
+
```bash
|
| 81 |
+
# CPU 推理
|
| 82 |
+
./llama.cpp/build/bin/llama-cli \
|
| 83 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 84 |
+
--prompt "請解析以下文件內容:" \
|
| 85 |
+
--ctx-size 4096 \
|
| 86 |
+
--temp 0.1
|
| 87 |
+
|
| 88 |
+
# GPU 加速 (Metal/CUDA)
|
| 89 |
+
./llama.cpp/build/bin/llama-cli \
|
| 90 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 91 |
+
--prompt "請解析以下文件內容:" \
|
| 92 |
+
--ctx-size 4096 \
|
| 93 |
+
--temp 0.1 \
|
| 94 |
+
--ngl 999 # 啟用所有 GPU 層
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
### 4. 圖像理解推理 (Vision-Language)
|
| 98 |
+
|
| 99 |
+
```bash
|
| 100 |
+
# CPU 推理
|
| 101 |
+
./llama.cpp/build/bin/llama-mtmd-cli \
|
| 102 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 103 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 104 |
+
--image document.jpg \
|
| 105 |
+
--prompt "請解析這份文件,提取所有文字和表格。" \
|
| 106 |
+
--ctx-size 4096 \
|
| 107 |
+
--temp 0.1
|
| 108 |
+
|
| 109 |
+
# GPU 加速
|
| 110 |
+
./llama.cpp/build/bin/llama-mtmd-cli \
|
| 111 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 112 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 113 |
+
--image document.jpg \
|
| 114 |
+
--prompt "請解析這份文件,提取所有文字和表格。" \
|
| 115 |
+
--ctx-size 4096 \
|
| 116 |
+
--temp 0.1 \
|
| 117 |
+
--gpu-layers 999
|
| 118 |
+
```
|
| 119 |
+
|
| 120 |
+
## ⚡ GPU 加速指南
|
| 121 |
+
|
| 122 |
+
### Apple Silicon (Metal)
|
| 123 |
+
|
| 124 |
+
```bash
|
| 125 |
+
# 編譯 Metal 版本
|
| 126 |
+
cmake -B build -DGGML_METAL=ON
|
| 127 |
+
cmake --build build -j
|
| 128 |
+
|
| 129 |
+
# 運行時自動使用 GPU
|
| 130 |
+
./build/bin/llama-cli --model model.gguf --ngl 999
|
| 131 |
+
|
| 132 |
+
# --ngl 999 表示將所有層 offload 到 GPU
|
| 133 |
+
```
|
| 134 |
+
|
| 135 |
+
### NVIDIA GPU (CUDA)
|
| 136 |
+
|
| 137 |
+
```bash
|
| 138 |
+
# 編譯 CUDA 版本
|
| 139 |
+
cmake -B build -DGGML_CUDA=ON
|
| 140 |
+
cmake --build build -j
|
| 141 |
+
|
| 142 |
+
# 運行
|
| 143 |
+
./build/bin/llama-cli --model model.gguf --ngl 999
|
| 144 |
+
```
|
| 145 |
+
|
| 146 |
+
### Vulkan (跨平台)
|
| 147 |
+
|
| 148 |
+
```bash
|
| 149 |
+
# 編譯 Vulkan 版本
|
| 150 |
+
cmake -B build -DGGML_VULKAN=ON
|
| 151 |
+
cmake --build build -j
|
| 152 |
+
```
|
| 153 |
+
|
| 154 |
+
## 📝 使用範例
|
| 155 |
+
|
| 156 |
+
### OCR 文字識別
|
| 157 |
+
|
| 158 |
+
```bash
|
| 159 |
+
./llama.cpp/build/bin/llama-mtmd-cli \
|
| 160 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 161 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 162 |
+
--image receipt.jpg \
|
| 163 |
+
--prompt "檢測並識別圖片中的文字,將文本坐標格式化輸出。" \
|
| 164 |
+
--ctx-size 2048 \
|
| 165 |
+
--ngl 999
|
| 166 |
+
```
|
| 167 |
+
|
| 168 |
+
### 表格解析
|
| 169 |
+
|
| 170 |
+
```bash
|
| 171 |
+
./llama.cpp/build/bin/llama-mtmd-cli \
|
| 172 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 173 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 174 |
+
--image table.png \
|
| 175 |
+
--prompt "把圖中的表格解析為 HTML 格式。" \
|
| 176 |
+
--ctx-size 4096 \
|
| 177 |
+
--ngl 999
|
| 178 |
+
```
|
| 179 |
+
|
| 180 |
+
### 公式識別
|
| 181 |
+
|
| 182 |
+
```bash
|
| 183 |
+
./llama.cpp/build/bin/llama-mtmd-cli \
|
| 184 |
+
--model youtu-parsing-Q8_0.gguf \
|
| 185 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 186 |
+
--image formula.png \
|
| 187 |
+
--prompt "識別圖片中的公式,用 LaTeX 格式表示。" \
|
| 188 |
+
--ctx-size 2048 \
|
| 189 |
+
--ngl 999
|
| 190 |
+
```
|
| 191 |
+
|
| 192 |
+
### 文檔解析
|
| 193 |
+
|
| 194 |
+
```bash
|
| 195 |
+
./llama.cpp/build/bin/llama-mtmd-cli \
|
| 196 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 197 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 198 |
+
--image document.pdf \
|
| 199 |
+
--prompt "提取文檔圖片中的所有信息,用 markdown 格式表示。表格用 HTML,公式用 LaTeX,按照閱讀順序組織。" \
|
| 200 |
+
--ctx-size 8192 \
|
| 201 |
+
--ngl 999
|
| 202 |
+
```
|
| 203 |
+
|
| 204 |
+
## 🔧 量化說明
|
| 205 |
+
|
| 206 |
+
### 量化類型對比
|
| 207 |
+
|
| 208 |
+
| 格式 | 每權重位元 | 檔案大小 | 品質 | 速度 | 推薦 |
|
| 209 |
+
|------|-----------|---------|------|------|------|
|
| 210 |
+
| **F16** | 16 bit | 3.9 GB | ⭐⭐⭐⭐⭐ | 慢 | 研究用途 |
|
| 211 |
+
| **Q8_0** | 8 bit | 2.1 GB | ⭐⭐⭐⭐⭐ | 快 | 高精度需求 |
|
| 212 |
+
| **Q6_K** | 6 bit | 1.6 GB | ⭐⭐⭐⭐⭐ | 更快 | **推薦** |
|
| 213 |
+
| **Q5_K_M** | 5 bit | 1.4 GB | ⭐⭐⭐⭐ | 更快 | 平衡選擇 |
|
| 214 |
+
| **Q4_K_M** | 4 bit | 1.2 GB | ⭐⭐⭐⭐ | 最快 | 速度優先 |
|
| 215 |
+
|
| 216 |
+
### 如何自行量化
|
| 217 |
+
|
| 218 |
+
如果你有原始的 F16 模型,可以自行量化:
|
| 219 |
+
|
| 220 |
+
```bash
|
| 221 |
+
# Q8_0 (接近無損)
|
| 222 |
+
./llama.cpp/build/bin/llama-quantize \
|
| 223 |
+
youtu-parsing.gguf \
|
| 224 |
+
youtu-parsing-Q8_0.gguf \
|
| 225 |
+
Q8_0
|
| 226 |
+
|
| 227 |
+
# Q6_K (高品質)
|
| 228 |
+
./llama.cpp/build/bin/llama-quantize \
|
| 229 |
+
youtu-parsing.gguf \
|
| 230 |
+
youtu-parsing-Q6_K.gguf \
|
| 231 |
+
Q6_K
|
| 232 |
+
|
| 233 |
+
# Q4_K_M (快速)
|
| 234 |
+
./llama.cpp/build/bin/llama-quantize \
|
| 235 |
+
youtu-parsing.gguf \
|
| 236 |
+
youtu-parsing-Q4_K_M.gguf \
|
| 237 |
+
Q4_K_M
|
| 238 |
+
```
|
| 239 |
+
|
| 240 |
+
## 💻 硬體需求
|
| 241 |
+
|
| 242 |
+
### 記憶體需求
|
| 243 |
+
|
| 244 |
+
| 量化格式 | CPU 推理 | GPU 推理 |
|
| 245 |
+
|---------|---------|---------|
|
| 246 |
+
| Q4_K_M | ~2 GB | ~1.5 GB |
|
| 247 |
+
| Q6_K | ~2.5 GB | ~2 GB |
|
| 248 |
+
| Q8_0 | ~3 GB | ~2.5 GB |
|
| 249 |
+
| F16 | ~5 GB | ~4 GB |
|
| 250 |
+
|
| 251 |
+
### 建議配置
|
| 252 |
+
|
| 253 |
+
- **最低配置**: 4GB RAM,運行 Q4_K_M
|
| 254 |
+
- **推薦配置**: 8GB RAM + Apple Silicon / NVIDIA GPU,運行 Q6_K
|
| 255 |
+
- **最佳配置**: 16GB RAM + 高端 GPU,運行 Q8_0 或 F16
|
| 256 |
+
|
| 257 |
+
## 🐛 故障排除
|
| 258 |
+
|
| 259 |
+
### 問題: GPU 加速無效
|
| 260 |
+
|
| 261 |
+
**解決**: 確認編譯時啟用了正確的後端:
|
| 262 |
+
```bash
|
| 263 |
+
# 檢查支持的後端
|
| 264 |
+
./llama.cpp/build/bin/llama-cli --list-devices
|
| 265 |
+
```
|
| 266 |
+
|
| 267 |
+
### 問題: 記憶體不足 (OOM)
|
| 268 |
+
|
| 269 |
+
**解決**: 使用更小的量化模型或減少上下文長度:
|
| 270 |
+
```bash
|
| 271 |
+
# 使用 Q4_K_M 並減少上下文
|
| 272 |
+
--model youtu-parsing-Q4_K_M.gguf --ctx-size 2048
|
| 273 |
+
```
|
| 274 |
+
|
| 275 |
+
### 問題: Vision 功能無法使用
|
| 276 |
+
|
| 277 |
+
**解決**: 確保同時載入兩個檔案:
|
| 278 |
+
```bash
|
| 279 |
+
--model youtu-parsing.gguf --mmproj youtu-parsing-mmproj.gguf
|
| 280 |
+
```
|
| 281 |
+
|
| 282 |
+
## 📚 相關資源
|
| 283 |
+
|
| 284 |
+
- [原始模型](https://huggingface.co/tencent/Youtu-Parsing)
|
| 285 |
+
- [llama.cpp 文檔](https://github.com/ggml-org/llama.cpp/blob/master/docs)
|
| 286 |
+
- [Youtu-Parsing 技術報告](https://arxiv.org/abs/2601.20430)
|
| 287 |
+
- [DeepSeek-V2 MLA 論文](https://arxiv.org/abs/2405.04434)
|
| 288 |
+
|
| 289 |
+
## ⚖️ 許可證
|
| 290 |
+
|
| 291 |
+
本 GGUF 轉換版本遵循與原始模型相同的 [Youtu-Parsing License](https://huggingface.co/tencent/Youtu-Parsing/blob/main/LICENSE.txt)。
|
| 292 |
+
|
| 293 |
+
原始模型: © 2025 Tencent Youtu Lab
|
| 294 |
+
|
| 295 |
+
## 🙏 致謝
|
| 296 |
+
|
| 297 |
+
- [Tencent Youtu Lab](https://huggingface.co/tencent) 開發了 Youtu-Parsing 模型
|
| 298 |
+
- [llama.cpp](https://github.com/ggml-org/llama.cpp) 團隊提供了優秀的推理框架
|
| 299 |
+
- [Hugging Face](https://huggingface.co) 提供了模型託管平台
|
| 300 |
+
|
| 301 |
+
---
|
| 302 |
+
|
| 303 |
+
**最後更新**: 2025-02-02
|
| 304 |
+
**GGUF 版本**: v3
|
| 305 |
+
**llama.cpp 相容版本**: >= b4300 (commit 1239267+)
|
Youtu-Parsing-GGUF/.gitattributes
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Git LFS 配置 for GGUF 模型文件
|
| 2 |
+
*.gguf filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
|
| 4 |
+
# 其他大文件
|
| 5 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
Youtu-Parsing-GGUF/CONVERSION.md
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# GGUF 轉換說明
|
| 2 |
+
|
| 3 |
+
本文檔說明如何將原始 Hugging Face 模型轉換為 GGUF 格式。
|
| 4 |
+
|
| 5 |
+
## 前置需求
|
| 6 |
+
|
| 7 |
+
- Python 3.10+
|
| 8 |
+
- 足夠的磁碟空間 (~10 GB 用於中間檔案)
|
| 9 |
+
- 原始模型權重
|
| 10 |
+
|
| 11 |
+
## 環境設置
|
| 12 |
+
|
| 13 |
+
```bash
|
| 14 |
+
# 1. 創建虛擬環境
|
| 15 |
+
python3 -m venv venv-youtu
|
| 16 |
+
source venv-youtu/bin/activate
|
| 17 |
+
|
| 18 |
+
# 2. 安裝依賴
|
| 19 |
+
pip install torch safetensors transformers numpy protobuf sentencepiece
|
| 20 |
+
|
| 21 |
+
# 3. 克隆 llama.cpp
|
| 22 |
+
git clone --depth 1 https://github.com/ggml-org/llama.cpp.git
|
| 23 |
+
cd llama.cpp
|
| 24 |
+
pip install -e ./gguf-py
|
| 25 |
+
```
|
| 26 |
+
|
| 27 |
+
## 轉換步驟
|
| 28 |
+
|
| 29 |
+
### 步驟 1: 下載原始模型
|
| 30 |
+
|
| 31 |
+
```bash
|
| 32 |
+
huggingface-cli download tencent/Youtu-Parsing --local-dir ./Youtu-Parsing
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
### 步驟 2: 修復模型索引
|
| 36 |
+
|
| 37 |
+
由於模型使用 `tie_word_embeddings=true`,需要運行修復腳本:
|
| 38 |
+
|
| 39 |
+
```bash
|
| 40 |
+
python3 fix_model_index.py ./Youtu-Parsing
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
### 步驟 3: 轉換 LLM 模型
|
| 44 |
+
|
| 45 |
+
```bash
|
| 46 |
+
cd llama.cpp
|
| 47 |
+
python3 convert_hf_to_gguf.py ../Youtu-Parsing \
|
| 48 |
+
--outfile youtu-parsing.gguf \
|
| 49 |
+
--outtype f16
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
### 步驟 4: 轉換 Vision 模型
|
| 53 |
+
|
| 54 |
+
```bash
|
| 55 |
+
python3 convert_hf_to_gguf.py ../Youtu-Parsing \
|
| 56 |
+
--outfile youtu-parsing-mmproj.gguf \
|
| 57 |
+
--outtype f16 \
|
| 58 |
+
--mmproj
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
## 驗證轉換
|
| 62 |
+
|
| 63 |
+
```bash
|
| 64 |
+
# 編譯 llama.cpp
|
| 65 |
+
cmake -B build
|
| 66 |
+
cmake --build build -j
|
| 67 |
+
|
| 68 |
+
# 測試載入
|
| 69 |
+
./build/bin/llama-mtmd-cli \
|
| 70 |
+
--model youtu-parsing.gguf \
|
| 71 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 72 |
+
-c 2048
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
## 常見問題
|
| 76 |
+
|
| 77 |
+
**Q: 為什麼需要修復 index.json?**
|
| 78 |
+
|
| 79 |
+
A: 原始模型的 index.json 錯誤地包含了 `lm_head.weight` 條目,但實際上這個權重與 `embed_tokens.weight` 共享,並不存在於 safetensors 檔案中。
|
| 80 |
+
|
| 81 |
+
**Q: 可以轉換為其他量化格式嗎?**
|
| 82 |
+
|
| 83 |
+
A: 可以!建議先轉換為 F16,然後使用 llama.cpp 的量化工具:
|
| 84 |
+
|
| 85 |
+
```bash
|
| 86 |
+
./build/bin/llama-quantize youtu-parsing.gguf youtu-parsing-Q4_K_M.gguf Q4_K_M
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
## 參考資源
|
| 90 |
+
|
| 91 |
+
- llama.cpp: https://github.com/ggml-org/llama.cpp
|
| 92 |
+
- 原始模型: https://huggingface.co/tencent/Youtu-Parsing
|
Youtu-Parsing-GGUF/MODEL_CARD.md
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Model Card: Youtu-Parsing GGUF
|
| 2 |
+
|
| 3 |
+
## Model Details
|
| 4 |
+
|
| 5 |
+
### Overview
|
| 6 |
+
|
| 7 |
+
This is the **GGUF format** conversion of [Tencent Youtu-Parsing](https://huggingface.co/tencent/Youtu-Parsing), a state-of-the-art vision-language model specialized for document parsing, OCR, and multimodal understanding.
|
| 8 |
+
|
| 9 |
+
### Model Specifications
|
| 10 |
+
|
| 11 |
+
| Attribute | Value |
|
| 12 |
+
|-----------|-------|
|
| 13 |
+
| **Base Model** | Youtu-LLM 2B |
|
| 14 |
+
| **Architecture** | DeepSeek2 (MLA) / Dense |
|
| 15 |
+
| **Parameters** | ~2.1B |
|
| 16 |
+
| **Context Length** | 20,480 tokens |
|
| 17 |
+
| **Vocabulary Size** | 182,646 |
|
| 18 |
+
| **Vision Encoder** | SigLip2 |
|
| 19 |
+
| **Projector Type** | YoutuVL |
|
| 20 |
+
|
| 21 |
+
### Architecture Highlights
|
| 22 |
+
|
| 23 |
+
1. **MLA (Multi-Latent Attention)**
|
| 24 |
+
- Compressed KV cache for memory efficiency
|
| 25 |
+
- Q projection: LoRA rank 1536
|
| 26 |
+
- KV projection: LoRA rank 512
|
| 27 |
+
|
| 28 |
+
2. **Dense FFN**
|
| 29 |
+
- All 32 layers use dense feed-forward networks
|
| 30 |
+
- Not MoE (Mixture of Experts)
|
| 31 |
+
|
| 32 |
+
3. **Vision Encoder**
|
| 33 |
+
- SigLip2 architecture with window attention
|
| 34 |
+
- Supports high-resolution image understanding
|
| 35 |
+
- Patch merger (2x2 spatial merge)
|
| 36 |
+
|
| 37 |
+
## Files
|
| 38 |
+
|
| 39 |
+
| File | Size | Description |
|
| 40 |
+
|------|------|-------------|
|
| 41 |
+
| `youtu-parsing.gguf` | ~3.9 GB | Language model (DeepSeek2 architecture) |
|
| 42 |
+
| `youtu-parsing-mmproj.gguf` | ~847 MB | Vision encoder + projector |
|
| 43 |
+
|
| 44 |
+
## Usage
|
| 45 |
+
|
| 46 |
+
### Requirements
|
| 47 |
+
|
| 48 |
+
- llama.cpp (commit 1239267 or later)
|
| 49 |
+
- ~6GB RAM for F16 inference
|
| 50 |
+
- ~3GB RAM for Q4_K_M quantized inference
|
| 51 |
+
|
| 52 |
+
### Quick Start
|
| 53 |
+
|
| 54 |
+
```bash
|
| 55 |
+
# Clone llama.cpp
|
| 56 |
+
git clone https://github.com/ggml-org/llama.cpp.git
|
| 57 |
+
cd llama.cpp
|
| 58 |
+
cmake -B build
|
| 59 |
+
cmake --build build -j
|
| 60 |
+
|
| 61 |
+
# Text-only inference
|
| 62 |
+
./build/bin/llama-cli \
|
| 63 |
+
--model youtu-parsing.gguf \
|
| 64 |
+
--prompt "Parse this document:" \
|
| 65 |
+
--ctx-size 4096
|
| 66 |
+
|
| 67 |
+
# Vision-Language inference
|
| 68 |
+
./build/bin/llama-mtmd-cli \
|
| 69 |
+
--model youtu-parsing.gguf \
|
| 70 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 71 |
+
--image document.jpg \
|
| 72 |
+
--prompt "Extract all text and tables:" \
|
| 73 |
+
--ctx-size 4096
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
### Python Example
|
| 77 |
+
|
| 78 |
+
```python
|
| 79 |
+
from llama_cpp import Llama
|
| 80 |
+
|
| 81 |
+
llm = Llama(
|
| 82 |
+
model_path="youtu-parsing.gguf",
|
| 83 |
+
n_ctx=4096
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
output = llm(
|
| 87 |
+
"Extract text from this document",
|
| 88 |
+
max_tokens=1024,
|
| 89 |
+
temperature=0.1
|
| 90 |
+
)
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
## Capabilities
|
| 94 |
+
|
| 95 |
+
This model excels at:
|
| 96 |
+
|
| 97 |
+
- **Text Recognition (OCR)**: Accurate text detection and recognition
|
| 98 |
+
- **Table Parsing**: Convert tables to HTML format
|
| 99 |
+
- **Formula Recognition**: Convert mathematical expressions to LaTeX
|
| 100 |
+
- **Chart Understanding**: Convert charts to markdown/Mermaid
|
| 101 |
+
- **Document Structure**: Preserve reading order and layout
|
| 102 |
+
|
| 103 |
+
## Limitations
|
| 104 |
+
|
| 105 |
+
- Maximum context length: 20,480 tokens
|
| 106 |
+
- Best performance on high-resolution images (560x560 or higher)
|
| 107 |
+
- English and Chinese optimized
|
| 108 |
+
|
| 109 |
+
## Quantization
|
| 110 |
+
|
| 111 |
+
You can quantize the model further using llama.cpp:
|
| 112 |
+
|
| 113 |
+
```bash
|
| 114 |
+
# Q4_K_M (recommended, ~1.5GB)
|
| 115 |
+
./llama.cpp/build/bin/llama-quantize \
|
| 116 |
+
youtu-parsing.gguf \
|
| 117 |
+
youtu-parsing-Q4_K_M.gguf \
|
| 118 |
+
Q4_K_M
|
| 119 |
+
|
| 120 |
+
# Q8_0 (high quality, ~2.3GB)
|
| 121 |
+
./llama.cpp/build/bin/llama-quantize \
|
| 122 |
+
youtu-parsing.gguf \
|
| 123 |
+
youtu-parsing-Q8_0.gguf \
|
| 124 |
+
Q8_0
|
| 125 |
+
```
|
| 126 |
+
|
| 127 |
+
## Citation
|
| 128 |
+
|
| 129 |
+
```bibtex
|
| 130 |
+
@article{youtu-parsing,
|
| 131 |
+
title={Youtu-Parsing: Perception, Structuring and Recognition via High-Parallelism Decoding},
|
| 132 |
+
author={Tencent Youtu Lab},
|
| 133 |
+
year={2026},
|
| 134 |
+
eprint={2601.20430},
|
| 135 |
+
archivePrefix={arXiv},
|
| 136 |
+
primaryClass={cs.CV}
|
| 137 |
+
}
|
| 138 |
+
```
|
| 139 |
+
|
| 140 |
+
## License
|
| 141 |
+
|
| 142 |
+
This GGUF conversion follows the same license as the original model: Youtu-Parsing License
|
| 143 |
+
|
| 144 |
+
## Acknowledgments
|
| 145 |
+
|
| 146 |
+
- Original model by [Tencent Youtu Lab](https://huggingface.co/tencent)
|
| 147 |
+
- GGUF conversion powered by [llama.cpp](https://github.com/ggml-org/llama.cpp)
|
Youtu-Parsing-GGUF/QUICKSTART.md
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 快速開始指南
|
| 2 |
+
|
| 3 |
+
## 1. 下載模型
|
| 4 |
+
|
| 5 |
+
### 推薦下載 (Q6_K - 品質和速度平衡)
|
| 6 |
+
|
| 7 |
+
```bash
|
| 8 |
+
# 安裝 Hugging Face CLI
|
| 9 |
+
pip install huggingface-hub
|
| 10 |
+
huggingface-cli login
|
| 11 |
+
|
| 12 |
+
# 下載 Q6_K 版本 (1.6 GB)
|
| 13 |
+
huggingface-cli download <your-username>/Youtu-Parsing-GGUF youtu-parsing-Q6_K.gguf --local-dir ./models
|
| 14 |
+
|
| 15 |
+
# 同時下載 Vision 模型 (847 MB)
|
| 16 |
+
huggingface-cli download <your-username>/Youtu-Parsing-GGUF youtu-parsing-mmproj.gguf --local-dir ./models
|
| 17 |
+
```
|
| 18 |
+
|
| 19 |
+
### 其他量化版本
|
| 20 |
+
|
| 21 |
+
| 版本 | 大小 | 適用場景 |
|
| 22 |
+
|------|------|---------|
|
| 23 |
+
| `youtu-parsing-Q4_K_M.gguf` | 1.2 GB | 最快推理,資源受限 |
|
| 24 |
+
| `youtu-parsing-Q6_K.gguf` | 1.6 GB | **推薦**,平衡品質和速度 |
|
| 25 |
+
| `youtu-parsing-Q8_0.gguf` | 2.1 GB | 接近無損品質 |
|
| 26 |
+
| `youtu-parsing.gguf` | 3.9 GB | 原始 F16 品質 |
|
| 27 |
+
|
| 28 |
+
## 2. 安裝 llama.cpp
|
| 29 |
+
|
| 30 |
+
### macOS (Apple Silicon with Metal GPU)
|
| 31 |
+
|
| 32 |
+
```bash
|
| 33 |
+
git clone https://github.com/ggml-org/llama.cpp.git
|
| 34 |
+
cd llama.cpp
|
| 35 |
+
cmake -B build -DGGML_METAL=ON
|
| 36 |
+
cmake --build build -j
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
### Linux (CPU)
|
| 40 |
+
|
| 41 |
+
```bash
|
| 42 |
+
git clone https://github.com/ggml-org/llama.cpp.git
|
| 43 |
+
cd llama.cpp
|
| 44 |
+
cmake -B build
|
| 45 |
+
cmake --build build -j
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
### Linux (NVIDIA GPU with CUDA)
|
| 49 |
+
|
| 50 |
+
```bash
|
| 51 |
+
git clone https://github.com/ggml-org/llama.cpp.git
|
| 52 |
+
cd llama.cpp
|
| 53 |
+
cmake -B build -DGGML_CUDA=ON
|
| 54 |
+
cmake --build build -j
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
## 3. 測試模型
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
cd models
|
| 61 |
+
|
| 62 |
+
# 運行測試腳本
|
| 63 |
+
./test_gguf.sh
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
## 4. 開始使用
|
| 67 |
+
|
| 68 |
+
### 文本推理 (CPU)
|
| 69 |
+
|
| 70 |
+
```bash
|
| 71 |
+
./llama.cpp/build/bin/llama-cli \
|
| 72 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 73 |
+
-p "解析以下內容:" \
|
| 74 |
+
-c 4096 \
|
| 75 |
+
-t 8
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
### 文本推理 (GPU 加速)
|
| 79 |
+
|
| 80 |
+
```bash
|
| 81 |
+
./llama.cpp/build/bin/llama-cli \
|
| 82 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 83 |
+
-p "解析以下內容:" \
|
| 84 |
+
-c 4096 \
|
| 85 |
+
-ngl 999 # 啟用所有 GPU 層
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
### 圖像理解 (CPU)
|
| 89 |
+
|
| 90 |
+
```bash
|
| 91 |
+
./llama.cpp/build/bin/llama-mtmd-cli \
|
| 92 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 93 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 94 |
+
--image doc.jpg \
|
| 95 |
+
-p "提取所有文字和表格" \
|
| 96 |
+
-c 4096
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
### 圖像理解 (GPU 加速)
|
| 100 |
+
|
| 101 |
+
```bash
|
| 102 |
+
./llama.cpp/build/bin/llama-mtmd-cli \
|
| 103 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 104 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 105 |
+
--image doc.jpg \
|
| 106 |
+
-p "提取所有文字和表格" \
|
| 107 |
+
-c 4096 \
|
| 108 |
+
--gpu-layers 999
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
### API 服務器
|
| 112 |
+
|
| 113 |
+
```bash
|
| 114 |
+
./llama.cpp/build/bin/llama-server \
|
| 115 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 116 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 117 |
+
--port 8080 \
|
| 118 |
+
--ngl 999
|
| 119 |
+
|
| 120 |
+
# 訪問 http://localhost:8080 使用 Web 界面
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
## 常見問題
|
| 124 |
+
|
| 125 |
+
**Q: 需要多少記憶體?**
|
| 126 |
+
|
| 127 |
+
A:
|
| 128 |
+
- Q4_K_M: ~2 GB RAM
|
| 129 |
+
- Q6_K: ~2.5 GB RAM
|
| 130 |
+
- Q8_0: ~3 GB RAM
|
| 131 |
+
- F16: ~5 GB RAM
|
| 132 |
+
|
| 133 |
+
**Q: 支援 GPU 嗎?**
|
| 134 |
+
|
| 135 |
+
A: 支援!
|
| 136 |
+
- Apple Silicon: Metal
|
| 137 |
+
- NVIDIA: CUDA
|
| 138 |
+
- 其他: Vulkan
|
| 139 |
+
|
| 140 |
+
**Q: 哪個量化版本最好?**
|
| 141 |
+
|
| 142 |
+
A:
|
| 143 |
+
- **Q6_K**: 推薦,品質和速度平衡
|
| 144 |
+
- **Q8_0**: 接近無損,高精度需求
|
| 145 |
+
- **Q4_K_M**: 最快,資源受限時使用
|
| 146 |
+
|
| 147 |
+
**Q: 如何量化自己的模型?**
|
| 148 |
+
|
| 149 |
+
A:
|
| 150 |
+
```bash
|
| 151 |
+
./llama.cpp/build/bin/llama-quantize \
|
| 152 |
+
input.gguf output-Q6_K.gguf Q6_K
|
| 153 |
+
```
|
Youtu-Parsing-GGUF/README.md
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Youtu-Parsing GGUF
|
| 2 |
+
|
| 3 |
+
[](https://huggingface.co/tencent/Youtu-Parsing)
|
| 4 |
+
[](https://arxiv.org/abs/2405.04434)
|
| 5 |
+
[]()
|
| 6 |
+
[]()
|
| 7 |
+
|
| 8 |
+
這是 [Tencent Youtu-Parsing](https://huggingface.co/tencent/Youtu-Parsing) 模型的 **GGUF 格式**轉換版本,可在 [llama.cpp](https://github.com/ggml-org/llama.cpp) 和相容的推理引擎上運行。
|
| 9 |
+
|
| 10 |
+
## 📦 模型下載
|
| 11 |
+
|
| 12 |
+
| 量化格式 | 大小 | 品質 | 推薦用途 | 下載 |
|
| 13 |
+
|---------|------|------|---------|------|
|
| 14 |
+
| **Q4_K_M** | ~1.2 GB | ⭐⭐⭐⭐ 良好 | 快速推理、資源受限 | `youtu-parsing-Q4_K_M.gguf` |
|
| 15 |
+
| **Q6_K** | ~1.6 GB | ⭐⭐⭐⭐⭐ 優秀 | 平衡品質和速度 | `youtu-parsing-Q6_K.gguf` |
|
| 16 |
+
| **Q8_0** | ~2.1 GB | ⭐⭐⭐⭐⭐ 接近無損 | 高精度需求 | `youtu-parsing-Q8_0.gguf` |
|
| 17 |
+
| **F16** | ~3.9 GB | ⭐⭐⭐⭐⭐ 原始品質 | 最佳品質 | `youtu-parsing.gguf` |
|
| 18 |
+
| **mmproj** | ~847 MB | - | Vision 必須 | `youtu-parsing-mmproj.gguf` |
|
| 19 |
+
|
| 20 |
+
> 💡 **推薦**: Q6_K 是品質和速度的最佳平衡,Q8_0 接近無損品質。
|
| 21 |
+
|
| 22 |
+
## 📋 模型資訊
|
| 23 |
+
|
| 24 |
+
| 屬性 | 數值 |
|
| 25 |
+
|------|------|
|
| 26 |
+
| **原始模型** | [tencent/Youtu-Parsing](https://huggingface.co/tencent/Youtu-Parsing) |
|
| 27 |
+
| **模型類型** | Vision-Language Model (VLM) |
|
| 28 |
+
| **基礎架構** | DeepSeek2 (MLA) |
|
| 29 |
+
| **參數量** | ~2.1B (Dense) |
|
| 30 |
+
| **上下文長度** | 20,480 tokens |
|
| 31 |
+
| **詞表大小** | 182,646 |
|
| 32 |
+
| **Vision Encoder** | SigLip2 |
|
| 33 |
+
| **Projector** | YoutuVL |
|
| 34 |
+
|
| 35 |
+
### 架構特點
|
| 36 |
+
|
| 37 |
+
- **MLA (Multi-Latent Attention)**: 使用壓縮的 Key-Value 快取,記憶體效率更高
|
| 38 |
+
- **Dense FFN**: 所有 32 層均使用 Dense FFN(非 MoE)
|
| 39 |
+
- **Tied Embeddings**: `lm_head` 與 `embed_tokens` 共享權重
|
| 40 |
+
- **Window Attention**: Vision Encoder 使用 Window Attention + Full Attention 混合
|
| 41 |
+
|
| 42 |
+
## 🚀 快速開始
|
| 43 |
+
|
| 44 |
+
### 1. 安裝 llama.cpp
|
| 45 |
+
|
| 46 |
+
```bash
|
| 47 |
+
# 克隆 llama.cpp
|
| 48 |
+
git clone https://github.com/ggml-org/llama.cpp.git
|
| 49 |
+
cd llama.cpp
|
| 50 |
+
|
| 51 |
+
# 編譯 CPU 版本
|
| 52 |
+
cmake -B build
|
| 53 |
+
cmake --build build -j
|
| 54 |
+
|
| 55 |
+
# 或使用 Metal (Apple Silicon GPU)
|
| 56 |
+
cmake -B build -DGGML_METAL=ON
|
| 57 |
+
cmake --build build -j
|
| 58 |
+
|
| 59 |
+
# 或使用 CUDA (NVIDIA GPU)
|
| 60 |
+
cmake -B build -DGGML_CUDA=ON
|
| 61 |
+
cmake --build build -j
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
### 2. 下載模型
|
| 65 |
+
|
| 66 |
+
```bash
|
| 67 |
+
# 安裝 Hugging Face CLI
|
| 68 |
+
pip install huggingface-hub
|
| 69 |
+
huggingface-cli login
|
| 70 |
+
|
| 71 |
+
# 下載推薦的 Q6_K 版本
|
| 72 |
+
huggingface-cli download <your-username>/Youtu-Parsing-GGUF youtu-parsing-Q6_K.gguf --local-dir ./models
|
| 73 |
+
|
| 74 |
+
# 同時下載 Vision 模型
|
| 75 |
+
huggingface-cli download <your-username>/Youtu-Parsing-GGUF youtu-parsing-mmproj.gguf --local-dir ./models
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
### 3. 純文本推理 (LLM only)
|
| 79 |
+
|
| 80 |
+
```bash
|
| 81 |
+
# CPU 推理
|
| 82 |
+
./llama.cpp/build/bin/llama-cli \
|
| 83 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 84 |
+
--prompt "請解析以下文件內容:" \
|
| 85 |
+
--ctx-size 4096 \
|
| 86 |
+
--temp 0.1
|
| 87 |
+
|
| 88 |
+
# GPU 加速 (Metal/CUDA)
|
| 89 |
+
./llama.cpp/build/bin/llama-cli \
|
| 90 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 91 |
+
--prompt "請解析以下文件內容:" \
|
| 92 |
+
--ctx-size 4096 \
|
| 93 |
+
--temp 0.1 \
|
| 94 |
+
--ngl 999 # 啟用所有 GPU 層
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
### 4. 圖像理解推理 (Vision-Language)
|
| 98 |
+
|
| 99 |
+
```bash
|
| 100 |
+
# CPU 推理
|
| 101 |
+
./llama.cpp/build/bin/llama-mtmd-cli \
|
| 102 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 103 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 104 |
+
--image document.jpg \
|
| 105 |
+
--prompt "請解析這份文件,提取所有文字和表格。" \
|
| 106 |
+
--ctx-size 4096 \
|
| 107 |
+
--temp 0.1
|
| 108 |
+
|
| 109 |
+
# GPU 加速
|
| 110 |
+
./llama.cpp/build/bin/llama-mtmd-cli \
|
| 111 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 112 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 113 |
+
--image document.jpg \
|
| 114 |
+
--prompt "請解析這份文件,提取所有文字和表格。" \
|
| 115 |
+
--ctx-size 4096 \
|
| 116 |
+
--temp 0.1 \
|
| 117 |
+
--gpu-layers 999
|
| 118 |
+
```
|
| 119 |
+
|
| 120 |
+
## ⚡ GPU 加速指南
|
| 121 |
+
|
| 122 |
+
### Apple Silicon (Metal)
|
| 123 |
+
|
| 124 |
+
```bash
|
| 125 |
+
# 編譯 Metal 版本
|
| 126 |
+
cmake -B build -DGGML_METAL=ON
|
| 127 |
+
cmake --build build -j
|
| 128 |
+
|
| 129 |
+
# 運行時自動使用 GPU
|
| 130 |
+
./build/bin/llama-cli --model model.gguf --ngl 999
|
| 131 |
+
|
| 132 |
+
# --ngl 999 表示將所有層 offload 到 GPU
|
| 133 |
+
```
|
| 134 |
+
|
| 135 |
+
### NVIDIA GPU (CUDA)
|
| 136 |
+
|
| 137 |
+
```bash
|
| 138 |
+
# 編譯 CUDA 版本
|
| 139 |
+
cmake -B build -DGGML_CUDA=ON
|
| 140 |
+
cmake --build build -j
|
| 141 |
+
|
| 142 |
+
# 運行
|
| 143 |
+
./build/bin/llama-cli --model model.gguf --ngl 999
|
| 144 |
+
```
|
| 145 |
+
|
| 146 |
+
### Vulkan (跨平台)
|
| 147 |
+
|
| 148 |
+
```bash
|
| 149 |
+
# 編譯 Vulkan 版本
|
| 150 |
+
cmake -B build -DGGML_VULKAN=ON
|
| 151 |
+
cmake --build build -j
|
| 152 |
+
```
|
| 153 |
+
|
| 154 |
+
## 📝 使用範例
|
| 155 |
+
|
| 156 |
+
### OCR 文字識別
|
| 157 |
+
|
| 158 |
+
```bash
|
| 159 |
+
./llama.cpp/build/bin/llama-mtmd-cli \
|
| 160 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 161 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 162 |
+
--image receipt.jpg \
|
| 163 |
+
--prompt "檢測並識別圖片中的文字,將文本坐標格式化輸出。" \
|
| 164 |
+
--ctx-size 2048 \
|
| 165 |
+
--ngl 999
|
| 166 |
+
```
|
| 167 |
+
|
| 168 |
+
### 表格解析
|
| 169 |
+
|
| 170 |
+
```bash
|
| 171 |
+
./llama.cpp/build/bin/llama-mtmd-cli \
|
| 172 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 173 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 174 |
+
--image table.png \
|
| 175 |
+
--prompt "把圖中的表格解析為 HTML 格式。" \
|
| 176 |
+
--ctx-size 4096 \
|
| 177 |
+
--ngl 999
|
| 178 |
+
```
|
| 179 |
+
|
| 180 |
+
### 公式識別
|
| 181 |
+
|
| 182 |
+
```bash
|
| 183 |
+
./llama.cpp/build/bin/llama-mtmd-cli \
|
| 184 |
+
--model youtu-parsing-Q8_0.gguf \
|
| 185 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 186 |
+
--image formula.png \
|
| 187 |
+
--prompt "識別圖片中的公式,用 LaTeX 格式表示。" \
|
| 188 |
+
--ctx-size 2048 \
|
| 189 |
+
--ngl 999
|
| 190 |
+
```
|
| 191 |
+
|
| 192 |
+
### 文檔解析
|
| 193 |
+
|
| 194 |
+
```bash
|
| 195 |
+
./llama.cpp/build/bin/llama-mtmd-cli \
|
| 196 |
+
--model youtu-parsing-Q6_K.gguf \
|
| 197 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 198 |
+
--image document.pdf \
|
| 199 |
+
--prompt "提取文檔圖片中的所有信息,用 markdown 格式表示。表格用 HTML,公式用 LaTeX,按照閱讀順序組織。" \
|
| 200 |
+
--ctx-size 8192 \
|
| 201 |
+
--ngl 999
|
| 202 |
+
```
|
| 203 |
+
|
| 204 |
+
## 🔧 量化說明
|
| 205 |
+
|
| 206 |
+
### 量化類型對比
|
| 207 |
+
|
| 208 |
+
| 格式 | 每權重位元 | 檔案大小 | 品質 | 速度 | 推薦 |
|
| 209 |
+
|------|-----------|---------|------|------|------|
|
| 210 |
+
| **F16** | 16 bit | 3.9 GB | ⭐⭐⭐⭐⭐ | 慢 | 研究用途 |
|
| 211 |
+
| **Q8_0** | 8 bit | 2.1 GB | ⭐⭐⭐⭐⭐ | 快 | 高精度需求 |
|
| 212 |
+
| **Q6_K** | 6 bit | 1.6 GB | ⭐⭐⭐⭐⭐ | 更快 | **推薦** |
|
| 213 |
+
| **Q5_K_M** | 5 bit | 1.4 GB | ⭐⭐⭐⭐ | 更快 | 平衡選擇 |
|
| 214 |
+
| **Q4_K_M** | 4 bit | 1.2 GB | ⭐⭐⭐⭐ | 最快 | 速度優先 |
|
| 215 |
+
|
| 216 |
+
### 如何自行量化
|
| 217 |
+
|
| 218 |
+
如果你有原始的 F16 模型,可以自行量化:
|
| 219 |
+
|
| 220 |
+
```bash
|
| 221 |
+
# Q8_0 (接近無損)
|
| 222 |
+
./llama.cpp/build/bin/llama-quantize \
|
| 223 |
+
youtu-parsing.gguf \
|
| 224 |
+
youtu-parsing-Q8_0.gguf \
|
| 225 |
+
Q8_0
|
| 226 |
+
|
| 227 |
+
# Q6_K (高品質)
|
| 228 |
+
./llama.cpp/build/bin/llama-quantize \
|
| 229 |
+
youtu-parsing.gguf \
|
| 230 |
+
youtu-parsing-Q6_K.gguf \
|
| 231 |
+
Q6_K
|
| 232 |
+
|
| 233 |
+
# Q4_K_M (快速)
|
| 234 |
+
./llama.cpp/build/bin/llama-quantize \
|
| 235 |
+
youtu-parsing.gguf \
|
| 236 |
+
youtu-parsing-Q4_K_M.gguf \
|
| 237 |
+
Q4_K_M
|
| 238 |
+
```
|
| 239 |
+
|
| 240 |
+
## 💻 硬體需求
|
| 241 |
+
|
| 242 |
+
### 記憶體需求
|
| 243 |
+
|
| 244 |
+
| 量化格式 | CPU 推理 | GPU 推理 |
|
| 245 |
+
|---------|---------|---------|
|
| 246 |
+
| Q4_K_M | ~2 GB | ~1.5 GB |
|
| 247 |
+
| Q6_K | ~2.5 GB | ~2 GB |
|
| 248 |
+
| Q8_0 | ~3 GB | ~2.5 GB |
|
| 249 |
+
| F16 | ~5 GB | ~4 GB |
|
| 250 |
+
|
| 251 |
+
### 建議配置
|
| 252 |
+
|
| 253 |
+
- **最低配置**: 4GB RAM,運行 Q4_K_M
|
| 254 |
+
- **推薦配置**: 8GB RAM + Apple Silicon / NVIDIA GPU,運行 Q6_K
|
| 255 |
+
- **最佳配置**: 16GB RAM + 高端 GPU,運行 Q8_0 或 F16
|
| 256 |
+
|
| 257 |
+
## 🐛 故障排除
|
| 258 |
+
|
| 259 |
+
### 問題: GPU 加速無效
|
| 260 |
+
|
| 261 |
+
**解決**: 確認編譯時啟用了正確的後端:
|
| 262 |
+
```bash
|
| 263 |
+
# 檢查支持的後端
|
| 264 |
+
./llama.cpp/build/bin/llama-cli --list-devices
|
| 265 |
+
```
|
| 266 |
+
|
| 267 |
+
### 問題: 記憶體不足 (OOM)
|
| 268 |
+
|
| 269 |
+
**解決**: 使用更小的量化模型或減少上下文長度:
|
| 270 |
+
```bash
|
| 271 |
+
# 使用 Q4_K_M 並減少上下文
|
| 272 |
+
--model youtu-parsing-Q4_K_M.gguf --ctx-size 2048
|
| 273 |
+
```
|
| 274 |
+
|
| 275 |
+
### 問題: Vision 功能無法使用
|
| 276 |
+
|
| 277 |
+
**解決**: 確保同時載入兩個檔案:
|
| 278 |
+
```bash
|
| 279 |
+
--model youtu-parsing.gguf --mmproj youtu-parsing-mmproj.gguf
|
| 280 |
+
```
|
| 281 |
+
|
| 282 |
+
## 📚 相關資源
|
| 283 |
+
|
| 284 |
+
- [原始模型](https://huggingface.co/tencent/Youtu-Parsing)
|
| 285 |
+
- [llama.cpp 文檔](https://github.com/ggml-org/llama.cpp/blob/master/docs)
|
| 286 |
+
- [Youtu-Parsing 技術報告](https://arxiv.org/abs/2601.20430)
|
| 287 |
+
- [DeepSeek-V2 MLA 論文](https://arxiv.org/abs/2405.04434)
|
| 288 |
+
|
| 289 |
+
## ⚖️ 許可證
|
| 290 |
+
|
| 291 |
+
本 GGUF 轉換版本遵循與原始模型相同的 [Youtu-Parsing License](https://huggingface.co/tencent/Youtu-Parsing/blob/main/LICENSE.txt)。
|
| 292 |
+
|
| 293 |
+
原始模型: © 2025 Tencent Youtu Lab
|
| 294 |
+
|
| 295 |
+
## 🙏 致謝
|
| 296 |
+
|
| 297 |
+
- [Tencent Youtu Lab](https://huggingface.co/tencent) 開發了 Youtu-Parsing 模型
|
| 298 |
+
- [llama.cpp](https://github.com/ggml-org/llama.cpp) 團隊提供了優秀的推理框架
|
| 299 |
+
- [Hugging Face](https://huggingface.co) 提供了模型託管平台
|
| 300 |
+
|
| 301 |
+
---
|
| 302 |
+
|
| 303 |
+
**最後更新**: 2025-02-02
|
| 304 |
+
**GGUF 版本**: v3
|
| 305 |
+
**llama.cpp 相容版本**: >= b4300 (commit 1239267+)
|
Youtu-Parsing-GGUF/convert_to_gguf.sh
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Youtu-Parsing GGUF 轉換腳本
|
| 3 |
+
# 將 Hugging Face 模型轉換為 GGUF 格式
|
| 4 |
+
|
| 5 |
+
set -e
|
| 6 |
+
|
| 7 |
+
# 顏色輸出
|
| 8 |
+
RED='\033[0;31m'
|
| 9 |
+
GREEN='\033[0;32m'
|
| 10 |
+
YELLOW='\033[1;33m'
|
| 11 |
+
NC='\033[0m' # No Color
|
| 12 |
+
|
| 13 |
+
echo "=========================================="
|
| 14 |
+
echo "Youtu-Parsing GGUF 轉換腳本"
|
| 15 |
+
echo "=========================================="
|
| 16 |
+
echo
|
| 17 |
+
|
| 18 |
+
# 檢查參數
|
| 19 |
+
if [ $# -lt 1 ]; then
|
| 20 |
+
echo "使用方法: $0 <原始模型目錄> [輸出目錄]"
|
| 21 |
+
echo "示例: $0 ./Youtu-Parsing ./output"
|
| 22 |
+
exit 1
|
| 23 |
+
fi
|
| 24 |
+
|
| 25 |
+
MODEL_DIR="$1"
|
| 26 |
+
OUTPUT_DIR="${2:-.}"
|
| 27 |
+
|
| 28 |
+
# 檢查模型目錄
|
| 29 |
+
if [ ! -d "$MODEL_DIR" ]; then
|
| 30 |
+
echo -e "${RED}錯誤: 模型目錄不存在: $MODEL_DIR${NC}"
|
| 31 |
+
exit 1
|
| 32 |
+
fi
|
| 33 |
+
|
| 34 |
+
# 檢查必要文件
|
| 35 |
+
if [ ! -f "$MODEL_DIR/config.json" ]; then
|
| 36 |
+
echo -e "${RED}錯誤: 找不到 config.json${NC}"
|
| 37 |
+
exit 1
|
| 38 |
+
fi
|
| 39 |
+
|
| 40 |
+
echo -e "${GREEN}✓${NC} 模型目錄: $MODEL_DIR"
|
| 41 |
+
echo -e "${GREEN}✓${NC} 輸出目錄: $OUTPUT_DIR"
|
| 42 |
+
echo
|
| 43 |
+
|
| 44 |
+
# 創建輸出目錄
|
| 45 |
+
mkdir -p "$OUTPUT_DIR"
|
| 46 |
+
|
| 47 |
+
# 步驟 1: 修復模型索引
|
| 48 |
+
echo "=========================================="
|
| 49 |
+
echo "步驟 1: 修復模型索引"
|
| 50 |
+
echo "=========================================="
|
| 51 |
+
python3 fix_model_index.py "$MODEL_DIR"
|
| 52 |
+
echo
|
| 53 |
+
|
| 54 |
+
# 步驟 2: 檢查 llama.cpp
|
| 55 |
+
echo "=========================================="
|
| 56 |
+
echo "步驟 2: 檢查 llama.cpp"
|
| 57 |
+
echo "=========================================="
|
| 58 |
+
|
| 59 |
+
if [ ! -d "llama.cpp" ]; then
|
| 60 |
+
echo "克隆 llama.cpp..."
|
| 61 |
+
git clone --depth 1 https://github.com/ggml-org/llama.cpp.git
|
| 62 |
+
fi
|
| 63 |
+
|
| 64 |
+
# 安裝 gguf-py
|
| 65 |
+
cd llama.cpp
|
| 66 |
+
if ! python3 -c "import gguf" 2>/dev/null; then
|
| 67 |
+
echo "安裝 gguf-py..."
|
| 68 |
+
pip install -e ./gguf-py
|
| 69 |
+
fi
|
| 70 |
+
cd ..
|
| 71 |
+
|
| 72 |
+
echo -e "${GREEN}✓${NC} llama.cpp 準備完成"
|
| 73 |
+
echo
|
| 74 |
+
|
| 75 |
+
# 步驟 3: 轉換 LLM 模型
|
| 76 |
+
echo "=========================================="
|
| 77 |
+
echo "步驟 3: 轉換 LLM 模型 (F16)"
|
| 78 |
+
echo "=========================================="
|
| 79 |
+
|
| 80 |
+
if [ -f "$OUTPUT_DIR/youtu-parsing.gguf" ]; then
|
| 81 |
+
echo -e "${YELLOW}警告: LLM 模型已存在,跳過轉換${NC}"
|
| 82 |
+
else
|
| 83 |
+
python3 llama.cpp/convert_hf_to_gguf.py "$MODEL_DIR" \
|
| 84 |
+
--outfile "$OUTPUT_DIR/youtu-parsing.gguf" \
|
| 85 |
+
--outtype f16
|
| 86 |
+
|
| 87 |
+
if [ $? -eq 0 ]; then
|
| 88 |
+
echo -e "${GREEN}✓${NC} LLM 模型轉換成功"
|
| 89 |
+
else
|
| 90 |
+
echo -e "${RED}✗${NC} LLM 模型轉換失敗"
|
| 91 |
+
exit 1
|
| 92 |
+
fi
|
| 93 |
+
fi
|
| 94 |
+
echo
|
| 95 |
+
|
| 96 |
+
# 步驟 4: 轉換 Vision 模型
|
| 97 |
+
echo "=========================================="
|
| 98 |
+
echo "步驟 4: 轉換 Vision 模型 (mmproj)"
|
| 99 |
+
echo "=========================================="
|
| 100 |
+
|
| 101 |
+
if [ -f "$OUTPUT_DIR/youtu-parsing-mmproj.gguf" ]; then
|
| 102 |
+
echo -e "${YELLOW}警告: Vision 模型已存在,跳過轉換${NC}"
|
| 103 |
+
else
|
| 104 |
+
python3 llama.cpp/convert_hf_to_gguf.py "$MODEL_DIR" \
|
| 105 |
+
--outfile "$OUTPUT_DIR/youtu-parsing-mmproj.gguf" \
|
| 106 |
+
--outtype f16 \
|
| 107 |
+
--mmproj
|
| 108 |
+
|
| 109 |
+
if [ $? -eq 0 ]; then
|
| 110 |
+
echo -e "${GREEN}✓${NC} Vision 模型轉換成功"
|
| 111 |
+
else
|
| 112 |
+
echo -e "${RED}✗${NC} Vision 模型轉換失敗"
|
| 113 |
+
exit 1
|
| 114 |
+
fi
|
| 115 |
+
fi
|
| 116 |
+
echo
|
| 117 |
+
|
| 118 |
+
# 步驟 5: 驗證
|
| 119 |
+
echo "=========================================="
|
| 120 |
+
echo "步驟 5: 驗證轉換結果"
|
| 121 |
+
echo "=========================================="
|
| 122 |
+
|
| 123 |
+
if [ -f "$OUTPUT_DIR/youtu-parsing.gguf" ] && [ -f "$OUTPUT_DIR/youtu-parsing-mmproj.gguf" ]; then
|
| 124 |
+
echo -e "${GREEN}✓${NC} 轉換完成!"
|
| 125 |
+
echo
|
| 126 |
+
echo "輸出文件:"
|
| 127 |
+
ls -lh "$OUTPUT_DIR"/*.gguf
|
| 128 |
+
echo
|
| 129 |
+
echo "文件大小:"
|
| 130 |
+
du -h "$OUTPUT_DIR"/*.gguf
|
| 131 |
+
else
|
| 132 |
+
echo -e "${RED}✗${NC} 轉換失敗:輸出文件不完整"
|
| 133 |
+
exit 1
|
| 134 |
+
fi
|
| 135 |
+
|
| 136 |
+
echo
|
| 137 |
+
echo "=========================================="
|
| 138 |
+
echo "🎉 轉換完成!"
|
| 139 |
+
echo "=========================================="
|
| 140 |
+
echo
|
| 141 |
+
echo "輸出文件:"
|
| 142 |
+
echo " - $OUTPUT_DIR/youtu-parsing.gguf (LLM 模型)"
|
| 143 |
+
echo " - $OUTPUT_DIR/youtu-parsing-mmproj.gguf (Vision 模型)"
|
| 144 |
+
echo
|
| 145 |
+
echo "使用方法:"
|
| 146 |
+
echo " 1. 編譯 llama.cpp: cd llama.cpp && cmake -B build && cmake --build build"
|
| 147 |
+
echo " 2. 測試載入: ./llama.cpp/build/bin/llama-mtmd-cli --model $OUTPUT_DIR/youtu-parsing.gguf --mmproj $OUTPUT_DIR/youtu-parsing-mmproj.gguf"
|
| 148 |
+
echo
|
Youtu-Parsing-GGUF/fix_model_index.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
修復 Youtu-Parsing 模型的 index.json 文件
|
| 4 |
+
|
| 5 |
+
由於模型使用了 tie_word_embeddings=true,lm_head.weight 與 embed_tokens.weight 共享,
|
| 6 |
+
但 model.safetensors.index.json 錯誤地包含了 lm_head.weight 條目,導致轉換失敗。
|
| 7 |
+
|
| 8 |
+
使用方法:
|
| 9 |
+
python3 fix_model_index.py <模型目錄>
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import json
|
| 13 |
+
import sys
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def fix_model_index(model_dir: Path) -> bool:
|
| 18 |
+
"""修復模型的 index.json 文件"""
|
| 19 |
+
index_path = model_dir / "model.safetensors.index.json"
|
| 20 |
+
|
| 21 |
+
if not index_path.exists():
|
| 22 |
+
print(f"錯誤: 找不到 {index_path}")
|
| 23 |
+
return False
|
| 24 |
+
|
| 25 |
+
# 讀取 index.json
|
| 26 |
+
with open(index_path, 'r', encoding='utf-8') as f:
|
| 27 |
+
index = json.load(f)
|
| 28 |
+
|
| 29 |
+
weight_map = index.get('weight_map', {})
|
| 30 |
+
|
| 31 |
+
# 檢查並移除錯誤的 lm_head.weight 條目
|
| 32 |
+
if 'lm_head.weight' in weight_map:
|
| 33 |
+
print(f"發現錯誤的 lm_head.weight 映射到: {weight_map['lm_head.weight']}")
|
| 34 |
+
print("由於模型使用 tie_word_embeddings=true,移除 lm_head.weight 條目")
|
| 35 |
+
del weight_map['lm_head.weight']
|
| 36 |
+
|
| 37 |
+
# 保存修復後的文件
|
| 38 |
+
with open(index_path, 'w', encoding='utf-8') as f:
|
| 39 |
+
json.dump(index, f, indent=2, ensure_ascii=False)
|
| 40 |
+
print(f"已修復: {index_path}")
|
| 41 |
+
return True
|
| 42 |
+
else:
|
| 43 |
+
print("lm_head.weight 不在 index 中,無需修復")
|
| 44 |
+
return False
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def main():
|
| 48 |
+
if len(sys.argv) < 2:
|
| 49 |
+
print("使用方法: python3 fix_model_index.py <模型目錄>")
|
| 50 |
+
print("示例: python3 fix_model_index.py ./Youtu-Parsing")
|
| 51 |
+
sys.exit(1)
|
| 52 |
+
|
| 53 |
+
model_dir = Path(sys.argv[1])
|
| 54 |
+
|
| 55 |
+
if not model_dir.exists():
|
| 56 |
+
print(f"錯誤: 目錄不存在: {model_dir}")
|
| 57 |
+
sys.exit(1)
|
| 58 |
+
|
| 59 |
+
fixed = fix_model_index(model_dir)
|
| 60 |
+
sys.exit(0 if fixed else 0)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
if __name__ == "__main__":
|
| 64 |
+
main()
|
Youtu-Parsing-GGUF/test_gguf.sh
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Youtu-Parsing GGUF 模型測試腳本
|
| 3 |
+
|
| 4 |
+
set -e
|
| 5 |
+
|
| 6 |
+
# 顏色輸出
|
| 7 |
+
RED='\033[0;31m'
|
| 8 |
+
GREEN='\033[0;32m'
|
| 9 |
+
YELLOW='\033[1;33m'
|
| 10 |
+
BLUE='\033[0;34m'
|
| 11 |
+
NC='\033[0m'
|
| 12 |
+
|
| 13 |
+
echo "=========================================="
|
| 14 |
+
echo "Youtu-Parsing GGUF 模型測試"
|
| 15 |
+
echo "=========================================="
|
| 16 |
+
echo
|
| 17 |
+
|
| 18 |
+
# 檢查模型文件
|
| 19 |
+
echo -e "${BLUE}檢查模型文件...${NC}"
|
| 20 |
+
|
| 21 |
+
if [ ! -f "youtu-parsing.gguf" ]; then
|
| 22 |
+
echo -e "${RED}✗ 錯誤: youtu-parsing.gguf 不存在${NC}"
|
| 23 |
+
exit 1
|
| 24 |
+
fi
|
| 25 |
+
|
| 26 |
+
if [ ! -f "youtu-parsing-mmproj.gguf" ]; then
|
| 27 |
+
echo -e "${RED}✗ 錯誤: youtu-parsing-mmproj.gguf 不存在${NC}"
|
| 28 |
+
exit 1
|
| 29 |
+
fi
|
| 30 |
+
|
| 31 |
+
echo -e "${GREEN}✓${NC} youtu-parsing.gguf: $(ls -lh youtu-parsing.gguf | awk '{print $5}')"
|
| 32 |
+
echo -e "${GREEN}✓${NC} youtu-parsing-mmproj.gguf: $(ls -lh youtu-parsing-mmproj.gguf | awk '{print $5}')"
|
| 33 |
+
echo
|
| 34 |
+
|
| 35 |
+
# 檢查 llama.cpp
|
| 36 |
+
if [ ! -d "llama.cpp" ]; then
|
| 37 |
+
echo -e "${YELLOW}警告: llama.cpp 目錄不存在${NC}"
|
| 38 |
+
echo "請先克隆 llama.cpp: git clone https://github.com/ggml-org/llama.cpp.git"
|
| 39 |
+
exit 1
|
| 40 |
+
fi
|
| 41 |
+
|
| 42 |
+
LLAMA_CLI="llama.cpp/build/bin/llama-cli"
|
| 43 |
+
LLAMA_MTMD="llama.cpp/build/bin/llama-mtmd-cli"
|
| 44 |
+
|
| 45 |
+
if [ ! -f "$LLAMA_CLI" ]; then
|
| 46 |
+
echo -e "${YELLOW}警告: llama-cli 未編譯${NC}"
|
| 47 |
+
echo "請先編譯 llama.cpp:"
|
| 48 |
+
echo " cd llama.cpp && cmake -B build && cmake --build build"
|
| 49 |
+
exit 1
|
| 50 |
+
fi
|
| 51 |
+
|
| 52 |
+
echo -e "${GREEN}✓${NC} llama.cpp 已編譯"
|
| 53 |
+
echo
|
| 54 |
+
|
| 55 |
+
# 測試 1: 檢查 GGUF 文件完整性
|
| 56 |
+
echo "=========================================="
|
| 57 |
+
echo "測試 1: 檢查 GGUF 文件完整性"
|
| 58 |
+
echo "=========================================="
|
| 59 |
+
|
| 60 |
+
python3 << EOF
|
| 61 |
+
import sys
|
| 62 |
+
try:
|
| 63 |
+
import gguf
|
| 64 |
+
|
| 65 |
+
# 檢查 LLM
|
| 66 |
+
print("檢查 youtu-parsing.gguf...")
|
| 67 |
+
gguf_model = gguf.GGUFReader('youtu-parsing.gguf')
|
| 68 |
+
print(f" Tensor 數量: {len(gguf_model.tensors)}")
|
| 69 |
+
print(f" 元數據欄位: {len(gguf_model.fields)}")
|
| 70 |
+
|
| 71 |
+
# 檢查 mmproj
|
| 72 |
+
print("檢查 youtu-parsing-mmproj.gguf...")
|
| 73 |
+
gguf_mmproj = gguf.GGUFReader('youtu-parsing-mmproj.gguf')
|
| 74 |
+
print(f" Tensor 數量: {len(gguf_mmproj.tensors)}")
|
| 75 |
+
print(f" 元數據欄位: {len(gguf_mmproj.fields)}")
|
| 76 |
+
|
| 77 |
+
print("\n✅ GGUF 文件完整性檢查通過")
|
| 78 |
+
except Exception as e:
|
| 79 |
+
print(f"\n❌ 檢查失敗: {e}")
|
| 80 |
+
sys.exit(1)
|
| 81 |
+
EOF
|
| 82 |
+
|
| 83 |
+
if [ $? -ne 0 ]; then
|
| 84 |
+
echo -e "${RED}✗ GGUF 文件檢查失敗${NC}"
|
| 85 |
+
exit 1
|
| 86 |
+
fi
|
| 87 |
+
echo
|
| 88 |
+
|
| 89 |
+
# 測試 2: LLM 載入測試
|
| 90 |
+
echo "=========================================="
|
| 91 |
+
echo "測試 2: LLM 載入測試"
|
| 92 |
+
echo "=========================================="
|
| 93 |
+
|
| 94 |
+
timeout 30 $LLAMA_CLI \
|
| 95 |
+
--model youtu-parsing.gguf \
|
| 96 |
+
-c 2048 \
|
| 97 |
+
-p "Hello" \
|
| 98 |
+
-n 0 2>&1 | head -50
|
| 99 |
+
|
| 100 |
+
if [ $? -eq 0 ] || [ $? -eq 124 ]; then
|
| 101 |
+
echo
|
| 102 |
+
echo -e "${GREEN}✓${NC} LLM 載入測試通過"
|
| 103 |
+
else
|
| 104 |
+
echo
|
| 105 |
+
echo -e "${RED}✗${NC} LLM 載入測試失敗"
|
| 106 |
+
exit 1
|
| 107 |
+
fi
|
| 108 |
+
echo
|
| 109 |
+
|
| 110 |
+
# 測試 3: Vision-Language 載入測試
|
| 111 |
+
echo "=========================================="
|
| 112 |
+
echo "測試 3: Vision-Language 載入測試"
|
| 113 |
+
echo "=========================================="
|
| 114 |
+
|
| 115 |
+
timeout 30 $LLAMA_MTMD \
|
| 116 |
+
--model youtu-parsing.gguf \
|
| 117 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 118 |
+
-c 2048 2>&1 | head -50
|
| 119 |
+
|
| 120 |
+
if [ $? -eq 0 ] || [ $? -eq 124 ]; then
|
| 121 |
+
echo
|
| 122 |
+
echo -e "${GREEN}✓${NC} Vision-Language 載入測試通過"
|
| 123 |
+
else
|
| 124 |
+
echo
|
| 125 |
+
echo -e "${RED}✗${NC} Vision-Language 載入測試失敗"
|
| 126 |
+
exit 1
|
| 127 |
+
fi
|
| 128 |
+
echo
|
| 129 |
+
|
| 130 |
+
# 測試 4: 簡單推理測試 (如果有測試圖片)
|
| 131 |
+
echo "=========================================="
|
| 132 |
+
echo "測試 4: 簡單推理測試"
|
| 133 |
+
echo "=========================================="
|
| 134 |
+
|
| 135 |
+
if [ -f "test_image.jpg" ] || [ -f "test_image.png" ]; then
|
| 136 |
+
TEST_IMAGE=$(ls test_image.* 2>/dev/null | head -1)
|
| 137 |
+
echo "使用測試圖片: $TEST_IMAGE"
|
| 138 |
+
|
| 139 |
+
timeout 60 $LLAMA_MTMD \
|
| 140 |
+
--model youtu-parsing.gguf \
|
| 141 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 142 |
+
--image "$TEST_IMAGE" \
|
| 143 |
+
-p "描述這張圖片" \
|
| 144 |
+
-c 2048 \
|
| 145 |
+
-n 100 \
|
| 146 |
+
--temp 0.1 2>&1 | tail -20
|
| 147 |
+
|
| 148 |
+
if [ $? -eq 0 ] || [ $? -eq 124 ]; then
|
| 149 |
+
echo
|
| 150 |
+
echo -e "${GREEN}✓${NC} 推理測試通過"
|
| 151 |
+
else
|
| 152 |
+
echo
|
| 153 |
+
echo -e "${YELLOW}!${NC} 推理測試可能失敗,但模型載入正常"
|
| 154 |
+
fi
|
| 155 |
+
else
|
| 156 |
+
echo "跳過 (未找到 test_image.jpg/png)"
|
| 157 |
+
fi
|
| 158 |
+
echo
|
| 159 |
+
|
| 160 |
+
# 總結
|
| 161 |
+
echo "=========================================="
|
| 162 |
+
echo -e "${GREEN}🎉 所有測試通過!${NC}"
|
| 163 |
+
echo "=========================================="
|
| 164 |
+
echo
|
| 165 |
+
echo "模型已準備就緒,可以使用以下命令進行推理:"
|
| 166 |
+
echo
|
| 167 |
+
echo "1. 純文本推理:"
|
| 168 |
+
echo " $LLAMA_CLI --model youtu-parsing.gguf -p '你的提示詞'"
|
| 169 |
+
echo
|
| 170 |
+
echo "2. 圖像理解:"
|
| 171 |
+
echo " $LLAMA_MTMD --model youtu-parsing.gguf --mmproj youtu-parsing-mmproj.gguf --image image.jpg -p '描述這張圖片'"
|
| 172 |
+
echo
|
| 173 |
+
echo "3. 啟動 API 服務器:"
|
| 174 |
+
echo " llama.cpp/build/bin/llama-server --model youtu-parsing.gguf --mmproj youtu-parsing-mmproj.gguf --port 8080"
|
| 175 |
+
echo
|
Youtu-Parsing-GGUF/youtu-parsing-mmproj.gguf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b5ebdc8390ceb5981e18c18ac5244e0da085497f46ede75b99a45e063eb92e8
|
| 3 |
+
size 886907616
|
convert_to_gguf.sh
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Youtu-Parsing GGUF 轉換腳本
|
| 3 |
+
# 將 Hugging Face 模型轉換為 GGUF 格式
|
| 4 |
+
|
| 5 |
+
set -e
|
| 6 |
+
|
| 7 |
+
# 顏色輸出
|
| 8 |
+
RED='\033[0;31m'
|
| 9 |
+
GREEN='\033[0;32m'
|
| 10 |
+
YELLOW='\033[1;33m'
|
| 11 |
+
NC='\033[0m' # No Color
|
| 12 |
+
|
| 13 |
+
echo "=========================================="
|
| 14 |
+
echo "Youtu-Parsing GGUF 轉換腳本"
|
| 15 |
+
echo "=========================================="
|
| 16 |
+
echo
|
| 17 |
+
|
| 18 |
+
# 檢查參數
|
| 19 |
+
if [ $# -lt 1 ]; then
|
| 20 |
+
echo "使用方法: $0 <原始模型目錄> [輸出目錄]"
|
| 21 |
+
echo "示例: $0 ./Youtu-Parsing ./output"
|
| 22 |
+
exit 1
|
| 23 |
+
fi
|
| 24 |
+
|
| 25 |
+
MODEL_DIR="$1"
|
| 26 |
+
OUTPUT_DIR="${2:-.}"
|
| 27 |
+
|
| 28 |
+
# 檢查模型目錄
|
| 29 |
+
if [ ! -d "$MODEL_DIR" ]; then
|
| 30 |
+
echo -e "${RED}錯誤: 模型目錄不存在: $MODEL_DIR${NC}"
|
| 31 |
+
exit 1
|
| 32 |
+
fi
|
| 33 |
+
|
| 34 |
+
# 檢查必要文件
|
| 35 |
+
if [ ! -f "$MODEL_DIR/config.json" ]; then
|
| 36 |
+
echo -e "${RED}錯誤: 找不到 config.json${NC}"
|
| 37 |
+
exit 1
|
| 38 |
+
fi
|
| 39 |
+
|
| 40 |
+
echo -e "${GREEN}✓${NC} 模型目錄: $MODEL_DIR"
|
| 41 |
+
echo -e "${GREEN}✓${NC} 輸出目錄: $OUTPUT_DIR"
|
| 42 |
+
echo
|
| 43 |
+
|
| 44 |
+
# 創建輸出目錄
|
| 45 |
+
mkdir -p "$OUTPUT_DIR"
|
| 46 |
+
|
| 47 |
+
# 步驟 1: 修復模型索引
|
| 48 |
+
echo "=========================================="
|
| 49 |
+
echo "步驟 1: 修復模型索引"
|
| 50 |
+
echo "=========================================="
|
| 51 |
+
python3 fix_model_index.py "$MODEL_DIR"
|
| 52 |
+
echo
|
| 53 |
+
|
| 54 |
+
# 步驟 2: 檢查 llama.cpp
|
| 55 |
+
echo "=========================================="
|
| 56 |
+
echo "步驟 2: 檢查 llama.cpp"
|
| 57 |
+
echo "=========================================="
|
| 58 |
+
|
| 59 |
+
if [ ! -d "llama.cpp" ]; then
|
| 60 |
+
echo "克隆 llama.cpp..."
|
| 61 |
+
git clone --depth 1 https://github.com/ggml-org/llama.cpp.git
|
| 62 |
+
fi
|
| 63 |
+
|
| 64 |
+
# 安裝 gguf-py
|
| 65 |
+
cd llama.cpp
|
| 66 |
+
if ! python3 -c "import gguf" 2>/dev/null; then
|
| 67 |
+
echo "安裝 gguf-py..."
|
| 68 |
+
pip install -e ./gguf-py
|
| 69 |
+
fi
|
| 70 |
+
cd ..
|
| 71 |
+
|
| 72 |
+
echo -e "${GREEN}✓${NC} llama.cpp 準備完成"
|
| 73 |
+
echo
|
| 74 |
+
|
| 75 |
+
# 步驟 3: 轉換 LLM 模型
|
| 76 |
+
echo "=========================================="
|
| 77 |
+
echo "步驟 3: 轉換 LLM 模型 (F16)"
|
| 78 |
+
echo "=========================================="
|
| 79 |
+
|
| 80 |
+
if [ -f "$OUTPUT_DIR/youtu-parsing.gguf" ]; then
|
| 81 |
+
echo -e "${YELLOW}警告: LLM 模型已存在,跳過轉換${NC}"
|
| 82 |
+
else
|
| 83 |
+
python3 llama.cpp/convert_hf_to_gguf.py "$MODEL_DIR" \
|
| 84 |
+
--outfile "$OUTPUT_DIR/youtu-parsing.gguf" \
|
| 85 |
+
--outtype f16
|
| 86 |
+
|
| 87 |
+
if [ $? -eq 0 ]; then
|
| 88 |
+
echo -e "${GREEN}✓${NC} LLM 模型轉換成功"
|
| 89 |
+
else
|
| 90 |
+
echo -e "${RED}✗${NC} LLM 模型轉換失敗"
|
| 91 |
+
exit 1
|
| 92 |
+
fi
|
| 93 |
+
fi
|
| 94 |
+
echo
|
| 95 |
+
|
| 96 |
+
# 步驟 4: 轉換 Vision 模型
|
| 97 |
+
echo "=========================================="
|
| 98 |
+
echo "步驟 4: 轉換 Vision 模型 (mmproj)"
|
| 99 |
+
echo "=========================================="
|
| 100 |
+
|
| 101 |
+
if [ -f "$OUTPUT_DIR/youtu-parsing-mmproj.gguf" ]; then
|
| 102 |
+
echo -e "${YELLOW}警告: Vision 模型已存在,跳過轉換${NC}"
|
| 103 |
+
else
|
| 104 |
+
python3 llama.cpp/convert_hf_to_gguf.py "$MODEL_DIR" \
|
| 105 |
+
--outfile "$OUTPUT_DIR/youtu-parsing-mmproj.gguf" \
|
| 106 |
+
--outtype f16 \
|
| 107 |
+
--mmproj
|
| 108 |
+
|
| 109 |
+
if [ $? -eq 0 ]; then
|
| 110 |
+
echo -e "${GREEN}✓${NC} Vision 模型轉換成功"
|
| 111 |
+
else
|
| 112 |
+
echo -e "${RED}✗${NC} Vision 模型轉換失敗"
|
| 113 |
+
exit 1
|
| 114 |
+
fi
|
| 115 |
+
fi
|
| 116 |
+
echo
|
| 117 |
+
|
| 118 |
+
# 步驟 5: 驗證
|
| 119 |
+
echo "=========================================="
|
| 120 |
+
echo "步驟 5: 驗證轉換結果"
|
| 121 |
+
echo "=========================================="
|
| 122 |
+
|
| 123 |
+
if [ -f "$OUTPUT_DIR/youtu-parsing.gguf" ] && [ -f "$OUTPUT_DIR/youtu-parsing-mmproj.gguf" ]; then
|
| 124 |
+
echo -e "${GREEN}✓${NC} 轉換完成!"
|
| 125 |
+
echo
|
| 126 |
+
echo "輸出文件:"
|
| 127 |
+
ls -lh "$OUTPUT_DIR"/*.gguf
|
| 128 |
+
echo
|
| 129 |
+
echo "文件大小:"
|
| 130 |
+
du -h "$OUTPUT_DIR"/*.gguf
|
| 131 |
+
else
|
| 132 |
+
echo -e "${RED}✗${NC} 轉換失敗:輸出文件不完整"
|
| 133 |
+
exit 1
|
| 134 |
+
fi
|
| 135 |
+
|
| 136 |
+
echo
|
| 137 |
+
echo "=========================================="
|
| 138 |
+
echo "🎉 轉換完成!"
|
| 139 |
+
echo "=========================================="
|
| 140 |
+
echo
|
| 141 |
+
echo "輸出文件:"
|
| 142 |
+
echo " - $OUTPUT_DIR/youtu-parsing.gguf (LLM 模型)"
|
| 143 |
+
echo " - $OUTPUT_DIR/youtu-parsing-mmproj.gguf (Vision 模型)"
|
| 144 |
+
echo
|
| 145 |
+
echo "使用方法:"
|
| 146 |
+
echo " 1. 編譯 llama.cpp: cd llama.cpp && cmake -B build && cmake --build build"
|
| 147 |
+
echo " 2. 測試載入: ./llama.cpp/build/bin/llama-mtmd-cli --model $OUTPUT_DIR/youtu-parsing.gguf --mmproj $OUTPUT_DIR/youtu-parsing-mmproj.gguf"
|
| 148 |
+
echo
|
fix_model_index.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
修復 Youtu-Parsing 模型的 index.json 文件
|
| 4 |
+
|
| 5 |
+
由於模型使用了 tie_word_embeddings=true,lm_head.weight 與 embed_tokens.weight 共享,
|
| 6 |
+
但 model.safetensors.index.json 錯誤地包含了 lm_head.weight 條目,導致轉換失敗。
|
| 7 |
+
|
| 8 |
+
使用方法:
|
| 9 |
+
python3 fix_model_index.py <模型目錄>
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import json
|
| 13 |
+
import sys
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def fix_model_index(model_dir: Path) -> bool:
|
| 18 |
+
"""修復模型的 index.json 文件"""
|
| 19 |
+
index_path = model_dir / "model.safetensors.index.json"
|
| 20 |
+
|
| 21 |
+
if not index_path.exists():
|
| 22 |
+
print(f"錯誤: 找不到 {index_path}")
|
| 23 |
+
return False
|
| 24 |
+
|
| 25 |
+
# 讀取 index.json
|
| 26 |
+
with open(index_path, 'r', encoding='utf-8') as f:
|
| 27 |
+
index = json.load(f)
|
| 28 |
+
|
| 29 |
+
weight_map = index.get('weight_map', {})
|
| 30 |
+
|
| 31 |
+
# 檢查並移除錯誤的 lm_head.weight 條目
|
| 32 |
+
if 'lm_head.weight' in weight_map:
|
| 33 |
+
print(f"發現錯誤的 lm_head.weight 映射到: {weight_map['lm_head.weight']}")
|
| 34 |
+
print("由於模型使用 tie_word_embeddings=true,移除 lm_head.weight 條目")
|
| 35 |
+
del weight_map['lm_head.weight']
|
| 36 |
+
|
| 37 |
+
# 保存修復後的文件
|
| 38 |
+
with open(index_path, 'w', encoding='utf-8') as f:
|
| 39 |
+
json.dump(index, f, indent=2, ensure_ascii=False)
|
| 40 |
+
print(f"已修復: {index_path}")
|
| 41 |
+
return True
|
| 42 |
+
else:
|
| 43 |
+
print("lm_head.weight 不在 index 中,無需修復")
|
| 44 |
+
return False
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def main():
|
| 48 |
+
if len(sys.argv) < 2:
|
| 49 |
+
print("使用方法: python3 fix_model_index.py <模型目錄>")
|
| 50 |
+
print("示例: python3 fix_model_index.py ./Youtu-Parsing")
|
| 51 |
+
sys.exit(1)
|
| 52 |
+
|
| 53 |
+
model_dir = Path(sys.argv[1])
|
| 54 |
+
|
| 55 |
+
if not model_dir.exists():
|
| 56 |
+
print(f"錯誤: 目錄不存在: {model_dir}")
|
| 57 |
+
sys.exit(1)
|
| 58 |
+
|
| 59 |
+
fixed = fix_model_index(model_dir)
|
| 60 |
+
sys.exit(0 if fixed else 0)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
if __name__ == "__main__":
|
| 64 |
+
main()
|
test_gguf.sh
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Youtu-Parsing GGUF 模型測試腳本
|
| 3 |
+
|
| 4 |
+
set -e
|
| 5 |
+
|
| 6 |
+
# 顏色輸出
|
| 7 |
+
RED='\033[0;31m'
|
| 8 |
+
GREEN='\033[0;32m'
|
| 9 |
+
YELLOW='\033[1;33m'
|
| 10 |
+
BLUE='\033[0;34m'
|
| 11 |
+
NC='\033[0m'
|
| 12 |
+
|
| 13 |
+
echo "=========================================="
|
| 14 |
+
echo "Youtu-Parsing GGUF 模型測試"
|
| 15 |
+
echo "=========================================="
|
| 16 |
+
echo
|
| 17 |
+
|
| 18 |
+
# 檢查模型文件
|
| 19 |
+
echo -e "${BLUE}檢查模型文件...${NC}"
|
| 20 |
+
|
| 21 |
+
if [ ! -f "youtu-parsing.gguf" ]; then
|
| 22 |
+
echo -e "${RED}✗ 錯誤: youtu-parsing.gguf 不存在${NC}"
|
| 23 |
+
exit 1
|
| 24 |
+
fi
|
| 25 |
+
|
| 26 |
+
if [ ! -f "youtu-parsing-mmproj.gguf" ]; then
|
| 27 |
+
echo -e "${RED}✗ 錯誤: youtu-parsing-mmproj.gguf 不存在${NC}"
|
| 28 |
+
exit 1
|
| 29 |
+
fi
|
| 30 |
+
|
| 31 |
+
echo -e "${GREEN}✓${NC} youtu-parsing.gguf: $(ls -lh youtu-parsing.gguf | awk '{print $5}')"
|
| 32 |
+
echo -e "${GREEN}✓${NC} youtu-parsing-mmproj.gguf: $(ls -lh youtu-parsing-mmproj.gguf | awk '{print $5}')"
|
| 33 |
+
echo
|
| 34 |
+
|
| 35 |
+
# 檢查 llama.cpp
|
| 36 |
+
if [ ! -d "llama.cpp" ]; then
|
| 37 |
+
echo -e "${YELLOW}警告: llama.cpp 目錄不存在${NC}"
|
| 38 |
+
echo "請先克隆 llama.cpp: git clone https://github.com/ggml-org/llama.cpp.git"
|
| 39 |
+
exit 1
|
| 40 |
+
fi
|
| 41 |
+
|
| 42 |
+
LLAMA_CLI="llama.cpp/build/bin/llama-cli"
|
| 43 |
+
LLAMA_MTMD="llama.cpp/build/bin/llama-mtmd-cli"
|
| 44 |
+
|
| 45 |
+
if [ ! -f "$LLAMA_CLI" ]; then
|
| 46 |
+
echo -e "${YELLOW}警告: llama-cli 未編譯${NC}"
|
| 47 |
+
echo "請先編譯 llama.cpp:"
|
| 48 |
+
echo " cd llama.cpp && cmake -B build && cmake --build build"
|
| 49 |
+
exit 1
|
| 50 |
+
fi
|
| 51 |
+
|
| 52 |
+
echo -e "${GREEN}✓${NC} llama.cpp 已編譯"
|
| 53 |
+
echo
|
| 54 |
+
|
| 55 |
+
# 測試 1: 檢查 GGUF 文件完整性
|
| 56 |
+
echo "=========================================="
|
| 57 |
+
echo "測試 1: 檢查 GGUF 文件完整性"
|
| 58 |
+
echo "=========================================="
|
| 59 |
+
|
| 60 |
+
python3 << EOF
|
| 61 |
+
import sys
|
| 62 |
+
try:
|
| 63 |
+
import gguf
|
| 64 |
+
|
| 65 |
+
# 檢查 LLM
|
| 66 |
+
print("檢查 youtu-parsing.gguf...")
|
| 67 |
+
gguf_model = gguf.GGUFReader('youtu-parsing.gguf')
|
| 68 |
+
print(f" Tensor 數量: {len(gguf_model.tensors)}")
|
| 69 |
+
print(f" 元數據欄位: {len(gguf_model.fields)}")
|
| 70 |
+
|
| 71 |
+
# 檢查 mmproj
|
| 72 |
+
print("檢查 youtu-parsing-mmproj.gguf...")
|
| 73 |
+
gguf_mmproj = gguf.GGUFReader('youtu-parsing-mmproj.gguf')
|
| 74 |
+
print(f" Tensor 數量: {len(gguf_mmproj.tensors)}")
|
| 75 |
+
print(f" 元數據欄位: {len(gguf_mmproj.fields)}")
|
| 76 |
+
|
| 77 |
+
print("\n✅ GGUF 文件完整性檢查通過")
|
| 78 |
+
except Exception as e:
|
| 79 |
+
print(f"\n❌ 檢查失敗: {e}")
|
| 80 |
+
sys.exit(1)
|
| 81 |
+
EOF
|
| 82 |
+
|
| 83 |
+
if [ $? -ne 0 ]; then
|
| 84 |
+
echo -e "${RED}✗ GGUF 文件檢查失敗${NC}"
|
| 85 |
+
exit 1
|
| 86 |
+
fi
|
| 87 |
+
echo
|
| 88 |
+
|
| 89 |
+
# 測試 2: LLM 載入測試
|
| 90 |
+
echo "=========================================="
|
| 91 |
+
echo "測試 2: LLM 載入測試"
|
| 92 |
+
echo "=========================================="
|
| 93 |
+
|
| 94 |
+
timeout 30 $LLAMA_CLI \
|
| 95 |
+
--model youtu-parsing.gguf \
|
| 96 |
+
-c 2048 \
|
| 97 |
+
-p "Hello" \
|
| 98 |
+
-n 0 2>&1 | head -50
|
| 99 |
+
|
| 100 |
+
if [ $? -eq 0 ] || [ $? -eq 124 ]; then
|
| 101 |
+
echo
|
| 102 |
+
echo -e "${GREEN}✓${NC} LLM 載入測試通過"
|
| 103 |
+
else
|
| 104 |
+
echo
|
| 105 |
+
echo -e "${RED}✗${NC} LLM 載入測試失敗"
|
| 106 |
+
exit 1
|
| 107 |
+
fi
|
| 108 |
+
echo
|
| 109 |
+
|
| 110 |
+
# 測試 3: Vision-Language 載入測試
|
| 111 |
+
echo "=========================================="
|
| 112 |
+
echo "測試 3: Vision-Language 載入測試"
|
| 113 |
+
echo "=========================================="
|
| 114 |
+
|
| 115 |
+
timeout 30 $LLAMA_MTMD \
|
| 116 |
+
--model youtu-parsing.gguf \
|
| 117 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 118 |
+
-c 2048 2>&1 | head -50
|
| 119 |
+
|
| 120 |
+
if [ $? -eq 0 ] || [ $? -eq 124 ]; then
|
| 121 |
+
echo
|
| 122 |
+
echo -e "${GREEN}✓${NC} Vision-Language 載入測試通過"
|
| 123 |
+
else
|
| 124 |
+
echo
|
| 125 |
+
echo -e "${RED}✗${NC} Vision-Language 載入測試失敗"
|
| 126 |
+
exit 1
|
| 127 |
+
fi
|
| 128 |
+
echo
|
| 129 |
+
|
| 130 |
+
# 測試 4: 簡單推理測試 (如果有測試圖片)
|
| 131 |
+
echo "=========================================="
|
| 132 |
+
echo "測試 4: 簡單推理測試"
|
| 133 |
+
echo "=========================================="
|
| 134 |
+
|
| 135 |
+
if [ -f "test_image.jpg" ] || [ -f "test_image.png" ]; then
|
| 136 |
+
TEST_IMAGE=$(ls test_image.* 2>/dev/null | head -1)
|
| 137 |
+
echo "使用測試圖片: $TEST_IMAGE"
|
| 138 |
+
|
| 139 |
+
timeout 60 $LLAMA_MTMD \
|
| 140 |
+
--model youtu-parsing.gguf \
|
| 141 |
+
--mmproj youtu-parsing-mmproj.gguf \
|
| 142 |
+
--image "$TEST_IMAGE" \
|
| 143 |
+
-p "描述這張圖片" \
|
| 144 |
+
-c 2048 \
|
| 145 |
+
-n 100 \
|
| 146 |
+
--temp 0.1 2>&1 | tail -20
|
| 147 |
+
|
| 148 |
+
if [ $? -eq 0 ] || [ $? -eq 124 ]; then
|
| 149 |
+
echo
|
| 150 |
+
echo -e "${GREEN}✓${NC} 推理測試通過"
|
| 151 |
+
else
|
| 152 |
+
echo
|
| 153 |
+
echo -e "${YELLOW}!${NC} 推理測試可能失敗,但模型載入正常"
|
| 154 |
+
fi
|
| 155 |
+
else
|
| 156 |
+
echo "跳過 (未找到 test_image.jpg/png)"
|
| 157 |
+
fi
|
| 158 |
+
echo
|
| 159 |
+
|
| 160 |
+
# 總結
|
| 161 |
+
echo "=========================================="
|
| 162 |
+
echo -e "${GREEN}🎉 所有測試通過!${NC}"
|
| 163 |
+
echo "=========================================="
|
| 164 |
+
echo
|
| 165 |
+
echo "模型已準備就緒,可以使用以下命令進行推理:"
|
| 166 |
+
echo
|
| 167 |
+
echo "1. 純文本推理:"
|
| 168 |
+
echo " $LLAMA_CLI --model youtu-parsing.gguf -p '你的提示詞'"
|
| 169 |
+
echo
|
| 170 |
+
echo "2. 圖像理解:"
|
| 171 |
+
echo " $LLAMA_MTMD --model youtu-parsing.gguf --mmproj youtu-parsing-mmproj.gguf --image image.jpg -p '描述這張圖片'"
|
| 172 |
+
echo
|
| 173 |
+
echo "3. 啟動 API 服務器:"
|
| 174 |
+
echo " llama.cpp/build/bin/llama-server --model youtu-parsing.gguf --mmproj youtu-parsing-mmproj.gguf --port 8080"
|
| 175 |
+
echo
|