import os import torch from transformers import AutoModelForCausalLM, AutoTokenizer HF_KEY = os.getenv("HF_Key") MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" def load_model(): print("Đang load model:", MODEL_NAME) tokenizer = AutoTokenizer.from_pretrained( MODEL_NAME, token=HF_KEY ) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float32, # CPU nên để float32 low_cpu_mem_usage=True, device_map="cpu", token=HF_KEY ) print("Model loaded thành công") return model, tokenizer