WLU-Phi2 / load_from_checkpoint.py
Jack4Bosco's picture
Script to load and run the model
193f3ca verified
from transformers import AutoModelForCausalLM, AutoTokenizer
save_dir = "checkpoint-1750"
print("Loading model from checkpoint...")
model = AutoModelForCausalLM.from_pretrained(save_dir, load_in_8bit=True)
print("Attaching adapter...")
model.load_adapter(save_dir, adapter_name="Adapter1")
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(save_dir)
while True:
text = input(">>> ")
if text == "exit":
break
model_inputs = tokenizer([text], return_tensors="pt", max_length=256).to("cuda")
generated_ids = model.generate(**model_inputs,
max_length=1024,
#truncation=True,
temperature=0.1,
do_sample=True,
pad_token_id=tokenizer.eos_token_id)
response=tokenizer.batch_decode(generated_ids,
skip_special_tokens=True)[0]
# remove repeat of the question
if '?' in response:
to_q = response.index('?')
if len(text)-1 <= to_q and response[:to_q] == text[:to_q]:
response = response[to_q+1:]
print(f"\n\t<<< {response} >>>\n")