Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import snapshot_download | |
| from accelerate.utils import BnbQuantizationConfig | |
| from accelerate.utils import load_and_quantize_model | |
| from accelerate import Accelerator | |
| from accelerate import init_empty_weights | |
| #from mingpt.model import GPT | |
| model_path="marcsun13/gpt2-xl-linear-sharded" | |
| def quantize(model_path=model_path): | |
| print("1") | |
| weights_location = snapshot_download(repo_id=f"{model_path}") | |
| print("2") | |
| bnb_quantization_config = BnbQuantizationConfig(load_in_8bit=True, llm_int8_threshold = 6) | |
| #bnb_quantization_config = BnbQuantizationConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4") | |
| print("3") | |
| #model_config = GPT.get_default_config() | |
| #model_config.model_type = 'gpt2-xl' | |
| #model_config.vocab_size = 50257 | |
| #model_config.block_size = 1024 | |
| print(weights_location) | |
| print(weights_location.config) | |
| with init_empty_weights(): | |
| empty_model = weights_location(model_config) | |
| quantized_model = load_and_quantize_model(empty_model, weights_location=weights_location, bnb_quantization_config=bnb_quantization_config, device_map = "auto") | |
| print("4") | |
| accelerate = Accelerator() | |
| print("5") | |
| new_weights_location = "./model" | |
| print("6") | |
| accelerate.save_model(quantized_model, new_weights_location) | |
| print("7") | |
| quantized_model_from_saved = load_and_quantize_model(empty_model, weights_location=new_weights_location, bnb_quantization_config=bnb_quantization_config, device_map = "auto") | |
| print("Done") | |
| with gr.Blocks() as app: | |
| btn=gr.Button() | |
| btn.click(quantize,None,None) | |
| app.launch() |