| from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer | |
| import torch | |
| if __name__ == "__main__": | |
| # 加载原始模型和tokenizer | |
| model = Qwen2_5_VLForConditionalGeneration.from_pretrained( | |
| "/mnt/lyc/wuxinrui/RoboBrain-2/BAAI-RoboBrain2.0-7B", | |
| torch_dtype=torch.bfloat16 # 保持精度一致 | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| "/mnt/lyc/wuxinrui/RoboBrain-2/BAAI-RoboBrain2.0-7B" | |
| ) | |
| # 添加特殊token并调整模型权重 | |
| bins_tokens = [f"\n<remaining>{i+1}/3</remaining>\n" for i in range(2)] | |
| tokenizer.add_special_tokens({'additional_special_tokens': bins_tokens}) | |
| # model.resize_token_embeddings(len(tokenizer)) # 调整所有相关权重 | |
| # 保存完整模型 | |
| NEW_MODEL = '/mnt/lyc/wuxinrui/RoboBrain-2/BAAI-RoboBrain2.0-7B_budgetthinker_3ratio' | |
| model.save_pretrained(NEW_MODEL, safe_serialization=True) | |
| tokenizer.save_pretrained(NEW_MODEL) | |
| # 验证加载 | |
| loaded_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(NEW_MODEL) | |
| print("验证词汇表大小:", len(loaded_model.get_input_embeddings().weight)) |