ko_gemma2_korean_sentiment / inference_quickstart.py
hanjunjung
[upload]
6b8f0f2
raw
history blame contribute delete
819 Bytes
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch
base_model = "rtzr/ko-gemma-2-9b-it"
# adapter_path = "./ko-gemma2-9B-sentiment"
adapter_path = "."
prompt = """<start_of_turn>user
λŒ“κΈ€: 이 μ˜μƒ 정말 κ°λ™μ΄μ—ˆμŠ΅λ‹ˆλ‹€. 눈물이 λ‚¬μ–΄μš”.
<end_of_turn>
<start_of_turn>model
"""
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(base_model, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True)
model = PeftModel.from_pretrained(model, adapter_path)
model.eval()
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=1024, do_sample=False)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))