import torch from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig quant_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_skip_modules=["temporal_block"]) tokenizer = AutoTokenizer.from_pretrained("alpindale/recurrentgemma-9b-it") model = AutoModelForCausalLM.from_pretrained( "alpindale/recurrentgemma-9b-it", device_map="auto", torch_dtype=torch.float16, quantization_config=quant_config ) model.push_to_hub("recurrentgemma-9b-it-8bit")