Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| BitsAndBytesConfig, | |
| pipeline | |
| ) | |
| import torch | |
| quantization_config = BitsAndBytesConfig(llm_int8_enable_fp16_cpu_offload=True) | |
| model_name = "lmsys/vicuna-7b-v1.5" | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| low_cpu_mem_usage=True, | |
| return_dict=True, | |
| #torch_dtype=torch.float16, | |
| device_map="cpu", | |
| load_in_8bit=True, | |
| quantization_config=quantization_config, | |
| ) | |
| new_model = "emya/vicuna-7b-v1.5-steve-jobs-8bit-v1" | |
| model = PeftModel.from_pretrained(base_model, new_model, load_in_8bit=True) | |
| pipe = pipeline("translation", model=model) | |
| def predict(text): | |
| prompt = f"{text} (Answer in a few sentences)" | |
| return pipe(prompt)[0]["translation_text"] | |
| demo = gr.Interface( | |
| fn=predict, | |
| inputs='text', | |
| outputs='text', | |
| ) | |
| demo.launch() | |