File size: 1,587 Bytes
e3423e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
"""Minimal example: load LazuriMT and translate Turkish → Laz.

    pip install transformers peft bitsandbytes accelerate
    python example.py
"""
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

BASE = "unsloth/gemma-4-e4b-it-unsloth-bnb-4bit"
ADAPTER = "CidQuLimited/LazuriMT"

print(f"Loading base model: {BASE}")
model = AutoModelForCausalLM.from_pretrained(BASE, device_map="auto", load_in_4bit=True)
print(f"Loading adapter: {ADAPTER}")
model = PeftModel.from_pretrained(model, ADAPTER)
tok = AutoTokenizer.from_pretrained(ADAPTER)
model.eval()


def translate(text: str, to: str = "lzz") -> str:
    """Translate text. `to='lzz'` (Turkish → Laz) or `to='tr'` (Laz → Turkish)."""
    if to == "lzz":
        prompt = f"Translate this Turkish sentence into Laz (Lazuri):\n\n{text}"
    else:
        prompt = f"Translate this Laz (Lazuri) sentence into Turkish:\n\n{text}"
    inputs = tok.apply_chat_template(
        [{"role": "user", "content": prompt}],
        tokenize=True, add_generation_prompt=True, return_tensors="pt",
    ).to(model.device)
    out = model.generate(
        input_ids=inputs, max_new_tokens=128, do_sample=False,
        no_repeat_ngram_size=3, repetition_penalty=1.15, num_beams=4,
    )
    return tok.decode(out[0][inputs.shape[1]:], skip_special_tokens=True).strip()


if __name__ == "__main__":
    for source in [
        "Merhaba, nasılsın?",
        "Bugün hava çok güzel.",
        "Su içmek istiyorum.",
    ]:
        print(f"\n  TR: {source}")
        print(f"  LZ: {translate(source)}")