File size: 2,451 Bytes
f2a39bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import torch
import gradio as gr
import spaces
import re # Metin temizleme için eklendi
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# --- Model Ayarları ---
base_model_name = "unsloth/gpt-oss-20b"
adapter_model_name = "userdotcs/gpt-oss-20b-turkish-foreign-origin-translator-adapter"

print("Model yükleniyor...")
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

model = PeftModel.from_pretrained(base_model, adapter_model_name)
model.eval()

@spaces.GPU(duration=120)
def fix_text(input_text):
    if not input_text or input_text.strip() == "":
        return ""

    messages = [
        {
            "role": "system",
            "content": "You are a helpful assistant that translates foreign-origin words in Turkish sentences."
        },
        {
            "role": "user",
            "content": f"Translate foreign-origin words in the text:\n{input_text}"
        }
    ]

    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs,
            max_new_tokens=16_384,
            pad_token_id=tokenizer.eos_token_id
        )

    input_length = inputs.shape[1]
    full_response = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)

    # --- Ayıklama Mantığı ---
    # Modelin çıktısında "assistantfinal" anahtar kelimesini arıyoruz
    separator = "assistantfinal"
    
    if separator in full_response:
        # Metni separator'a göre böl ve en son (sağdaki) parçayı al
        clean_response = full_response.split(separator)[-1]
    else:
        # Eğer separator yoksa ama "analysis" varsa, analizi temizlemeye çalışalım
        # Genellikle asıl cevap en sonda olur. 
        # Alternatif olarak son satırı almayı deneyebiliriz:
        lines = full_response.strip().split('\n')
        clean_response = lines[-1] if lines else full_response

    return clean_response.strip()

# Arayüz
demo = gr.Interface(
    fn=fix_text,
    inputs=gr.Textbox(label="Input", lines=3),
    outputs=gr.Textbox(label="Output", lines=3),
    title="gpt-oss-20b Turkish Foreign Word Translation"
)

if __name__ == "__main__":
    demo.launch()