Commit
·
3c5c50b
1
Parent(s):
cb95b8e
minor
Browse files
app.py
CHANGED
|
@@ -206,6 +206,8 @@ def map_punctuation(tok: str) -> str:
|
|
| 206 |
return "."
|
| 207 |
return tok
|
| 208 |
|
|
|
|
|
|
|
| 209 |
def tokenize_with_correct_unicode(text: str):
|
| 210 |
"""
|
| 211 |
Splits text by matching contiguous word tokens (including Devanagari matras)
|
|
|
|
| 206 |
return "."
|
| 207 |
return tok
|
| 208 |
|
| 209 |
+
import regex as re
|
| 210 |
+
|
| 211 |
def tokenize_with_correct_unicode(text: str):
|
| 212 |
"""
|
| 213 |
Splits text by matching contiguous word tokens (including Devanagari matras)
|