Spaces:
Sleeping
Sleeping
File size: 592 Bytes
1e639fb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | # optimized_loading.py
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from accelerate import infer_auto_device_map
# 4-bit quantization (reduces memory by 75%)
model = AutoModelForCausalLM.from_pretrained(
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
load_in_4bit=True, # or load_in_8bit=True for 8-bit
device_map="auto",
torch_dtype=torch.float16,
)
# CPU offloading (for low RAM)
model = AutoModelForCausalLM.from_pretrained(
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
device_map="auto",
offload_folder="offload",
offload_state_dict=True,
) |