from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load from the fp32 state dict
model = AutoModelForCausalLM.from_pretrained(
    "/path/to/consolidated_model_fp32.pth",
    config="/capstor/store/cscs/swissai/a06/meditron/models/meditron_CHUV_2/config.json",
    state_dict=torch.load("/path/to/consolidated_model_fp32.pth", map_location="cpu"),
    torch_dtype=torch.bfloat16,  # or torch.float16 depending on what you want
)

model.save_pretrained("/your/output/dir", safe_serialization=True)  # writes .safetensors