Spaces:
Running
on
A10G
Running
on
A10G
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| quant_config = BitsAndBytesConfig(load_in_8bit=True, llm_int8_skip_modules=["temporal_block"]) | |
| tokenizer = AutoTokenizer.from_pretrained("alpindale/recurrentgemma-9b-it") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "alpindale/recurrentgemma-9b-it", | |
| device_map="auto", torch_dtype=torch.float16, | |
| quantization_config=quant_config | |
| ) | |
| model.push_to_hub("recurrentgemma-9b-it-8bit") |