File size: 2,400 Bytes
af187da b4d2667 843fd97 69d604f 033a68b af187da 306ae96 033a68b af187da 69d604f 033a68b 306ae96 7e34946 033a68b 69d604f 843fd97 af187da 306ae96 7e34946 033a68b 7e34946 033a68b af187da 1a77822 033a68b ac01b35 306ae96 033a68b b4d2667 033a68b 843fd97 4461914 b4d2667 033a68b 7e34946 69d604f 2c75b99 69d604f 2c75b99 69d604f af187da 7e34946 af187da 033a68b 69d604f af187da 306ae96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import torch
import gradio as gr
import spaces
import re # Metin temizleme için eklendi
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
# --- Model Ayarları ---
base_model_name = "unsloth/gpt-oss-20b"
adapter_model_name = "userdotcs/gpt-oss-20b-turkish-correction-adapter"
print("Model yükleniyor...")
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
torch_dtype=torch.bfloat16,
device_map="auto"
)
model = PeftModel.from_pretrained(base_model, adapter_model_name)
model.eval()
@spaces.GPU(duration=120)
def fix_text(input_text):
if not input_text or input_text.strip() == "":
return ""
messages = [
{
"role": "system",
"content": "You are an intelligent assistant that corrects Turkish spelling and grammar mistakes."
},
{
"role": "user",
"content": f"Fix typos in the text:\n{input_text}"
}
]
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt",
).to("cuda")
with torch.no_grad():
outputs = model.generate(
input_ids=inputs,
max_new_tokens=16_384,
pad_token_id=tokenizer.eos_token_id
)
input_length = inputs.shape[1]
full_response = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
# --- Ayıklama Mantığı ---
# Modelin çıktısında "assistantfinal" anahtar kelimesini arıyoruz
separator = "assistantfinal"
if separator in full_response:
# Metni separator'a göre böl ve en son (sağdaki) parçayı al
clean_response = full_response.split(separator)[-1]
else:
# Eğer separator yoksa ama "analysis" varsa, analizi temizlemeye çalışalım
# Genellikle asıl cevap en sonda olur.
# Alternatif olarak son satırı almayı deneyebiliriz:
lines = full_response.strip().split('\n')
clean_response = lines[-1] if lines else full_response
return clean_response.strip()
# Arayüz
demo = gr.Interface(
fn=fix_text,
inputs=gr.Textbox(label="Input", lines=3),
outputs=gr.Textbox(label="Output", lines=3),
title="gpt-oss-20b Turkish correction"
)
if __name__ == "__main__":
demo.launch() |