|
|
import os |
|
|
import shutil |
|
|
import torch |
|
|
from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig |
|
|
|
|
|
|
|
|
INPUT_PATH = "/home/nashen/deepseek-ocr/DeepSeek-OCR-master/DeepSeek-OCR-vllm/model/" |
|
|
OUTPUT_PATH = "./DeepSeek-OCR-4bit-Quantized" |
|
|
|
|
|
|
|
|
print("⏳ 正在加载并量化模型 (这可能需要几分钟)...") |
|
|
|
|
|
quantization_config = BitsAndBytesConfig( |
|
|
load_in_4bit=True, |
|
|
bnb_4bit_quant_type="nf4", |
|
|
bnb_4bit_use_double_quant=True, |
|
|
bnb_4bit_compute_dtype=torch.bfloat16, |
|
|
|
|
|
|
|
|
llm_int8_skip_modules=[ |
|
|
"sam_model", "model.sam_model", |
|
|
"vision_model", "model.vision_model", |
|
|
"projector", "model.projector", |
|
|
"lm_head", "embed_tokens" |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
model = AutoModel.from_pretrained( |
|
|
INPUT_PATH, |
|
|
trust_remote_code=True, |
|
|
quantization_config=quantization_config, |
|
|
device_map="auto" |
|
|
) |
|
|
tokenizer = AutoTokenizer.from_pretrained(INPUT_PATH, trust_remote_code=True) |
|
|
|
|
|
print("✅ 模型加载完成,准备保存...") |
|
|
|
|
|
|
|
|
|
|
|
model.save_pretrained(OUTPUT_PATH, safe_serialization=True) |
|
|
tokenizer.save_pretrained(OUTPUT_PATH) |
|
|
|
|
|
print(f"✅ 权重已保存至: {OUTPUT_PATH}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("📦 正在复制 Python 架构文件...") |
|
|
files_to_copy = [ |
|
|
"configuration_deepseek_v2.py", |
|
|
"modeling_deepseekv2.py", |
|
|
"modeling_deepseekocr.py", |
|
|
"deepencoder.py", |
|
|
|
|
|
"processor_config.json", |
|
|
"config.json", |
|
|
"special_tokens_map.json", |
|
|
"tokenizer_config.json", |
|
|
"tokenizer.json" |
|
|
] |
|
|
|
|
|
|
|
|
for filename in os.listdir(INPUT_PATH): |
|
|
if filename.endswith(".py") or filename.endswith(".json"): |
|
|
src = os.path.join(INPUT_PATH, filename) |
|
|
dst = os.path.join(OUTPUT_PATH, filename) |
|
|
|
|
|
|
|
|
if os.path.exists(dst) and "config" in filename: |
|
|
continue |
|
|
|
|
|
shutil.copy2(src, dst) |
|
|
|
|
|
print(f"🎉 打包完成!成品位于: {OUTPUT_PATH}") |
|
|
print(f" 该文件夹现在的体积应该是 ~2.7 GB 左右。") |
|
|
|