PodcastNER-GPTJ / app.py
DavidFM43's picture
Set half revision from base model weights
e8e65ad
raw
history blame
720 Bytes
import gradio as gr
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
peft_model_id = "hackathon-somos-nlp-2023/bertin-gpt-j-6b-ner-es"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(
config.base_model_name_or_path,
return_dict=True,
load_in_8bit=True,
device_map="auto",
revision="half",
# low_cpu_mem_usage=True
)
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)
def greet(name):
return "Hello " + name + "!!"
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
iface.launch()