from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint from transformers import AutoTokenizer, AutoModelForCausalLM ckpt = 'checkpoint-36' origin_model_id = 'beomi/Llama-3-Open-Ko-8B' model = AutoModelForCausalLM.from_pretrained(ckpt) tokenizer = AutoTokenizer.from_pretrained(origin_model_id) tokenizer.pad_token = tokenizer.eos_token model.resize_token_embeddings(len(tokenizer)) # state_dict = get_fp32_state_dict_from_zero_checkpoint(ckpt) # model.load_state_dict(state_dict) tokenizer.push_to_hub('ingeol/kosaul_ft_v0.3') model.push_to_hub('ingeol/kosaul_ft_v0.3')