File size: 2,400 Bytes
af187da
b4d2667
843fd97
69d604f
033a68b
 
af187da
306ae96
033a68b
 
af187da
69d604f
033a68b
306ae96
7e34946
033a68b
69d604f
843fd97
af187da
306ae96
7e34946
033a68b
 
7e34946
033a68b
 
 
af187da
1a77822
 
 
 
 
 
 
 
 
 
033a68b
 
 
 
ac01b35
306ae96
033a68b
b4d2667
033a68b
843fd97
4461914
b4d2667
 
033a68b
7e34946
69d604f
 
2c75b99
 
 
69d604f
2c75b99
 
 
 
 
 
 
 
 
 
69d604f
af187da
7e34946
af187da
033a68b
69d604f
 
 
af187da
 
306ae96
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import torch
import gradio as gr
import spaces
import re # Metin temizleme için eklendi
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# --- Model Ayarları ---
base_model_name = "unsloth/gpt-oss-20b"
adapter_model_name = "userdotcs/gpt-oss-20b-turkish-correction-adapter"

print("Model yükleniyor...")
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

model = PeftModel.from_pretrained(base_model, adapter_model_name)
model.eval()

@spaces.GPU(duration=120)
def fix_text(input_text):
    if not input_text or input_text.strip() == "":
        return ""

    messages = [
        {
            "role": "system",
            "content": "You are an intelligent assistant that corrects Turkish spelling and grammar mistakes."
        },
        {
            "role": "user",
            "content": f"Fix typos in the text:\n{input_text}"
        }
    ]

    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs,
            max_new_tokens=16_384,
            pad_token_id=tokenizer.eos_token_id
        )

    input_length = inputs.shape[1]
    full_response = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)

    # --- Ayıklama Mantığı ---
    # Modelin çıktısında "assistantfinal" anahtar kelimesini arıyoruz
    separator = "assistantfinal"
    
    if separator in full_response:
        # Metni separator'a göre böl ve en son (sağdaki) parçayı al
        clean_response = full_response.split(separator)[-1]
    else:
        # Eğer separator yoksa ama "analysis" varsa, analizi temizlemeye çalışalım
        # Genellikle asıl cevap en sonda olur. 
        # Alternatif olarak son satırı almayı deneyebiliriz:
        lines = full_response.strip().split('\n')
        clean_response = lines[-1] if lines else full_response

    return clean_response.strip()

# Arayüz
demo = gr.Interface(
    fn=fix_text,
    inputs=gr.Textbox(label="Input", lines=3),
    outputs=gr.Textbox(label="Output", lines=3),
    title="gpt-oss-20b Turkish correction"
)

if __name__ == "__main__":
    demo.launch()