userdotcs commited on
Commit
f2a39bb
·
verified ·
1 Parent(s): 978b96d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ import spaces
4
+ import re # Metin temizleme için eklendi
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer
6
+ from peft import PeftModel
7
+
8
+ # --- Model Ayarları ---
9
+ base_model_name = "unsloth/gpt-oss-20b"
10
+ adapter_model_name = "userdotcs/gpt-oss-20b-turkish-foreign-origin-translator-adapter"
11
+
12
+ print("Model yükleniyor...")
13
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name)
14
+
15
+ base_model = AutoModelForCausalLM.from_pretrained(
16
+ base_model_name,
17
+ torch_dtype=torch.bfloat16,
18
+ device_map="auto"
19
+ )
20
+
21
+ model = PeftModel.from_pretrained(base_model, adapter_model_name)
22
+ model.eval()
23
+
24
+ @spaces.GPU(duration=120)
25
+ def fix_text(input_text):
26
+ if not input_text or input_text.strip() == "":
27
+ return ""
28
+
29
+ messages = [
30
+ {
31
+ "role": "system",
32
+ "content": "You are a helpful assistant that translates foreign-origin words in Turkish sentences."
33
+ },
34
+ {
35
+ "role": "user",
36
+ "content": f"Translate foreign-origin words in the text:\n{input_text}"
37
+ }
38
+ ]
39
+
40
+ inputs = tokenizer.apply_chat_template(
41
+ messages,
42
+ add_generation_prompt=True,
43
+ return_tensors="pt",
44
+ ).to("cuda")
45
+
46
+ with torch.no_grad():
47
+ outputs = model.generate(
48
+ input_ids=inputs,
49
+ max_new_tokens=16_384,
50
+ pad_token_id=tokenizer.eos_token_id
51
+ )
52
+
53
+ input_length = inputs.shape[1]
54
+ full_response = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
55
+
56
+ # --- Ayıklama Mantığı ---
57
+ # Modelin çıktısında "assistantfinal" anahtar kelimesini arıyoruz
58
+ separator = "assistantfinal"
59
+
60
+ if separator in full_response:
61
+ # Metni separator'a göre böl ve en son (sağdaki) parçayı al
62
+ clean_response = full_response.split(separator)[-1]
63
+ else:
64
+ # Eğer separator yoksa ama "analysis" varsa, analizi temizlemeye çalışalım
65
+ # Genellikle asıl cevap en sonda olur.
66
+ # Alternatif olarak son satırı almayı deneyebiliriz:
67
+ lines = full_response.strip().split('\n')
68
+ clean_response = lines[-1] if lines else full_response
69
+
70
+ return clean_response.strip()
71
+
72
+ # Arayüz
73
+ demo = gr.Interface(
74
+ fn=fix_text,
75
+ inputs=gr.Textbox(label="Input", lines=3),
76
+ outputs=gr.Textbox(label="Output", lines=3),
77
+ title="gpt-oss-20b Turkish Foreign Word Translation"
78
+ )
79
+
80
+ if __name__ == "__main__":
81
+ demo.launch()