from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Load from the fp32 state dict model = AutoModelForCausalLM.from_pretrained( "/path/to/consolidated_model_fp32.pth", config="/capstor/store/cscs/swissai/a06/meditron/models/meditron_CHUV_2/config.json", state_dict=torch.load("/path/to/consolidated_model_fp32.pth", map_location="cpu"), torch_dtype=torch.bfloat16, # or torch.float16 depending on what you want ) model.save_pretrained("/your/output/dir", safe_serialization=True) # writes .safetensors