Piyazon commited on
Commit
b58e253
·
1 Parent(s): 5309fbc

fix the code in sentence

Browse files
Files changed (2) hide show
  1. .gitignore +2 -1
  2. app.py +10 -4
.gitignore CHANGED
@@ -2,4 +2,5 @@ __pycache__/
2
  DS_Store
3
  .env
4
  .vscode/
5
- *.pyc
 
 
2
  DS_Store
3
  .env
4
  .vscode/
5
+ *.pyc
6
+ .gradio/
app.py CHANGED
@@ -4,6 +4,7 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  import logging
5
  from detect_language import detect_language
6
  import traceback
 
7
 
8
  # --- 1. SETUP ---
9
  logging.getLogger("transformers.tokenization_utils_base").setLevel(logging.ERROR)
@@ -48,10 +49,13 @@ def predict(text, tgt_choice):
48
  # B. GET TARGET
49
  tgt_lang = LANG_CODES.get(tgt_choice, "uig_Arab")
50
  tokenizer.src_lang = src_lang
 
 
 
51
 
52
  # C. GENERATE
53
  inputs = tokenizer(
54
- text,
55
  return_tensors="pt",
56
  padding=True,
57
  truncation=True,
@@ -69,8 +73,10 @@ def predict(text, tgt_choice):
69
  no_repeat_ngram_size=3
70
  )
71
 
72
- translation = tokenizer.batch_decode(out, skip_special_tokens=True)[0]
73
-
 
 
74
  print(f"From {src_lang} to {tgt_lang}: {translation}")
75
 
76
  # D. HANDLE UI DIRECTION
@@ -161,7 +167,7 @@ js_logic = """
161
  setTimeout(adjustTextareaHeight, 1000);
162
  """
163
 
164
- demo.launch(css=custom_css, js=js_logic, theme=gr.themes.Ocean(), share=True)
165
 
166
 
167
 
 
4
  import logging
5
  from detect_language import detect_language
6
  import traceback
7
+ import re
8
 
9
  # --- 1. SETUP ---
10
  logging.getLogger("transformers.tokenization_utils_base").setLevel(logging.ERROR)
 
49
  # B. GET TARGET
50
  tgt_lang = LANG_CODES.get(tgt_choice, "uig_Arab")
51
  tokenizer.src_lang = src_lang
52
+
53
+ sentences = re.split(r'(?<=[.!?؟])\s+|(?<=[。!?])', text.strip())
54
+ print(f"Splitting paragraph into {len(sentences)} sentences to translate...")
55
 
56
  # C. GENERATE
57
  inputs = tokenizer(
58
+ sentences,
59
  return_tensors="pt",
60
  padding=True,
61
  truncation=True,
 
73
  no_repeat_ngram_size=3
74
  )
75
 
76
+ translated_sentences = tokenizer.batch_decode(out, skip_special_tokens=True)
77
+
78
+ translation = " ".join(translated_sentences)
79
+
80
  print(f"From {src_lang} to {tgt_lang}: {translation}")
81
 
82
  # D. HANDLE UI DIRECTION
 
167
  setTimeout(adjustTextareaHeight, 1000);
168
  """
169
 
170
+ demo.launch(css=custom_css, js=js_logic, theme=gr.themes.Ocean())
171
 
172
 
173