Spaces:
Runtime error
Runtime error
File size: 872 Bytes
d0b3af7 8b0674e 6cc14e5 d0b3af7 8b0674e 2f9c1ea 8b0674e 2f9c1ea 8b0674e 2f9c1ea d0b3af7 8ad2f92 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
import gradio as gr
from transformers import (
AutoModelForCausalLM,
BitsAndBytesConfig,
pipeline
)
import torch
quantization_config = BitsAndBytesConfig(llm_int8_enable_fp16_cpu_offload=True)
model_name = "lmsys/vicuna-7b-v1.5"
base_model = AutoModelForCausalLM.from_pretrained(
model_name,
low_cpu_mem_usage=True,
return_dict=True,
#torch_dtype=torch.float16,
device_map="cpu",
load_in_8bit=True,
quantization_config=quantization_config,
)
new_model = "emya/vicuna-7b-v1.5-steve-jobs-8bit-v1"
model = PeftModel.from_pretrained(base_model, new_model, load_in_8bit=True)
pipe = pipeline("translation", model=model)
def predict(text):
prompt = f"{text} (Answer in a few sentences)"
return pipe(prompt)[0]["translation_text"]
demo = gr.Interface(
fn=predict,
inputs='text',
outputs='text',
)
demo.launch()
|