Common-Models-Collection / add_special_tokens_3ratio.py
Xin-Rui's picture
Add files using upload-large-folder tool
c0f3f18 verified
from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer
import torch
if __name__ == "__main__":
# 加载原始模型和tokenizer
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
"/mnt/lyc/wuxinrui/RoboBrain-2/BAAI-RoboBrain2.0-7B",
torch_dtype=torch.bfloat16 # 保持精度一致
)
tokenizer = AutoTokenizer.from_pretrained(
"/mnt/lyc/wuxinrui/RoboBrain-2/BAAI-RoboBrain2.0-7B"
)
# 添加特殊token并调整模型权重
bins_tokens = [f"\n<remaining>{i+1}/3</remaining>\n" for i in range(2)]
tokenizer.add_special_tokens({'additional_special_tokens': bins_tokens})
# model.resize_token_embeddings(len(tokenizer)) # 调整所有相关权重
# 保存完整模型
NEW_MODEL = '/mnt/lyc/wuxinrui/RoboBrain-2/BAAI-RoboBrain2.0-7B_budgetthinker_3ratio'
model.save_pretrained(NEW_MODEL, safe_serialization=True)
tokenizer.save_pretrained(NEW_MODEL)
# 验证加载
loaded_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(NEW_MODEL)
print("验证词汇表大小:", len(loaded_model.get_input_embeddings().weight))