dendrokronos / app.py
devingulliver's picture
Update app.py
5b05143 verified
raw
history blame
486 Bytes
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
quant_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_skip_modules=["temporal_block"])
tokenizer = AutoTokenizer.from_pretrained("alpindale/recurrentgemma-9b-it")
model = AutoModelForCausalLM.from_pretrained(
"alpindale/recurrentgemma-9b-it",
device_map="auto", torch_dtype=torch.float16,
quantization_config=quant_config
)
model.push_to_hub("recurrentgemma-9b-it-8bit")