from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer import torch if __name__ == "__main__": # 加载原始模型和tokenizer model = Qwen2_5_VLForConditionalGeneration.from_pretrained( "/mnt/lyc/wuxinrui/RoboBrain-2/BAAI-RoboBrain2.0-7B", torch_dtype=torch.bfloat16 # 保持精度一致 ) tokenizer = AutoTokenizer.from_pretrained( "/mnt/lyc/wuxinrui/RoboBrain-2/BAAI-RoboBrain2.0-7B" ) # 添加特殊token并调整模型权重 bins_tokens = [f"\n{i+1}/3\n" for i in range(2)] tokenizer.add_special_tokens({'additional_special_tokens': bins_tokens}) # model.resize_token_embeddings(len(tokenizer)) # 调整所有相关权重 # 保存完整模型 NEW_MODEL = '/mnt/lyc/wuxinrui/RoboBrain-2/BAAI-RoboBrain2.0-7B_budgetthinker_3ratio' model.save_pretrained(NEW_MODEL, safe_serialization=True) tokenizer.save_pretrained(NEW_MODEL) # 验证加载 loaded_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(NEW_MODEL) print("验证词汇表大小:", len(loaded_model.get_input_embeddings().weight))