LazuriMT / example.py
CidQu's picture
v0.1: research preview, chrF 24.66 on 200 TR->LZ test pairs
e3423e2 verified
"""Minimal example: load LazuriMT and translate Turkish → Laz.
pip install transformers peft bitsandbytes accelerate
python example.py
"""
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
BASE = "unsloth/gemma-4-e4b-it-unsloth-bnb-4bit"
ADAPTER = "CidQuLimited/LazuriMT"
print(f"Loading base model: {BASE}")
model = AutoModelForCausalLM.from_pretrained(BASE, device_map="auto", load_in_4bit=True)
print(f"Loading adapter: {ADAPTER}")
model = PeftModel.from_pretrained(model, ADAPTER)
tok = AutoTokenizer.from_pretrained(ADAPTER)
model.eval()
def translate(text: str, to: str = "lzz") -> str:
"""Translate text. `to='lzz'` (Turkish → Laz) or `to='tr'` (Laz → Turkish)."""
if to == "lzz":
prompt = f"Translate this Turkish sentence into Laz (Lazuri):\n\n{text}"
else:
prompt = f"Translate this Laz (Lazuri) sentence into Turkish:\n\n{text}"
inputs = tok.apply_chat_template(
[{"role": "user", "content": prompt}],
tokenize=True, add_generation_prompt=True, return_tensors="pt",
).to(model.device)
out = model.generate(
input_ids=inputs, max_new_tokens=128, do_sample=False,
no_repeat_ngram_size=3, repetition_penalty=1.15, num_beams=4,
)
return tok.decode(out[0][inputs.shape[1]:], skip_special_tokens=True).strip()
if __name__ == "__main__":
for source in [
"Merhaba, nasılsın?",
"Bugün hava çok güzel.",
"Su içmek istiyorum.",
]:
print(f"\n TR: {source}")
print(f" LZ: {translate(source)}")