AlimOmar commited on
Commit
8d3636c
·
1 Parent(s): 765e2da

test ali kurban

Browse files
Files changed (1) hide show
  1. app.py +5 -1
app.py CHANGED
@@ -24,6 +24,10 @@ app.add_middleware(
24
  def fix_string(batch):
25
  batch = batch.lower()
26
  batch = unicodedata.normalize('NFKC', batch)
 
 
 
 
27
  ## replace ug chars
28
  # Replace 'ژ' with 'ج'
29
  batch = batch.replace('ژ', 'ج')
@@ -79,7 +83,7 @@ def fix_string(batch):
79
  batch = batch.replace(eng_char, uyghur_char)
80
  # batch = batch.replace('e', ' ئې ')
81
  # Optional: Collapse multiple spaces into one
82
- batch = ' '.join(batch.split())
83
  return batch
84
 
85
 
 
24
  def fix_string(batch):
25
  batch = batch.lower()
26
  batch = unicodedata.normalize('NFKC', batch)
27
+ extra_punctuation = "–؛;،؟?«»‹›−—¬”“•…" # Add your additional custom punctuation from the training set here
28
+ all_punctuation = string.punctuation + extra_punctuation
29
+ for char in all_punctuation:
30
+ batch = batch.replace(char, ' ')
31
  ## replace ug chars
32
  # Replace 'ژ' with 'ج'
33
  batch = batch.replace('ژ', 'ج')
 
83
  batch = batch.replace(eng_char, uyghur_char)
84
  # batch = batch.replace('e', ' ئې ')
85
  # Optional: Collapse multiple spaces into one
86
+ # batch = ' '.join(batch.split())
87
  return batch
88
 
89