Spaces:
Running
Running
AlimOmar
commited on
Commit
·
8d3636c
1
Parent(s):
765e2da
test ali kurban
Browse files
app.py
CHANGED
|
@@ -24,6 +24,10 @@ app.add_middleware(
|
|
| 24 |
def fix_string(batch):
|
| 25 |
batch = batch.lower()
|
| 26 |
batch = unicodedata.normalize('NFKC', batch)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
## replace ug chars
|
| 28 |
# Replace 'ژ' with 'ج'
|
| 29 |
batch = batch.replace('ژ', 'ج')
|
|
@@ -79,7 +83,7 @@ def fix_string(batch):
|
|
| 79 |
batch = batch.replace(eng_char, uyghur_char)
|
| 80 |
# batch = batch.replace('e', ' ئې ')
|
| 81 |
# Optional: Collapse multiple spaces into one
|
| 82 |
-
batch = ' '.join(batch.split())
|
| 83 |
return batch
|
| 84 |
|
| 85 |
|
|
|
|
| 24 |
def fix_string(batch):
|
| 25 |
batch = batch.lower()
|
| 26 |
batch = unicodedata.normalize('NFKC', batch)
|
| 27 |
+
extra_punctuation = "–؛;،؟?«»‹›−—¬”“•…" # Add your additional custom punctuation from the training set here
|
| 28 |
+
all_punctuation = string.punctuation + extra_punctuation
|
| 29 |
+
for char in all_punctuation:
|
| 30 |
+
batch = batch.replace(char, ' ')
|
| 31 |
## replace ug chars
|
| 32 |
# Replace 'ژ' with 'ج'
|
| 33 |
batch = batch.replace('ژ', 'ج')
|
|
|
|
| 83 |
batch = batch.replace(eng_char, uyghur_char)
|
| 84 |
# batch = batch.replace('e', ' ئې ')
|
| 85 |
# Optional: Collapse multiple spaces into one
|
| 86 |
+
# batch = ' '.join(batch.split())
|
| 87 |
return batch
|
| 88 |
|
| 89 |
|