Update models/mlm_only_with_diacritics/char_indexer.py
Browse files
models/mlm_only_with_diacritics/char_indexer.py
CHANGED
|
@@ -8,7 +8,6 @@ LETTERS_IPA = 'ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜ
|
|
| 8 |
LATIN_LETTERS = 'abcdefghijklmnopqrstuvwxyz'
|
| 9 |
PHONEME_MASK = "M"
|
| 10 |
PHONEME_SEPARATOR = " "
|
| 11 |
-
# NOTE: '¤' is a valid 'unknown' character because it is different from all the characters above it. In English PL-BERT, 'U' was used as the unknown character which was not ideal as it was part of the English alphabet
|
| 12 |
UNKNOWN='U'
|
| 13 |
|
| 14 |
# Export all symbols:
|
|
|
|
| 8 |
LATIN_LETTERS = 'abcdefghijklmnopqrstuvwxyz'
|
| 9 |
PHONEME_MASK = "M"
|
| 10 |
PHONEME_SEPARATOR = " "
|
|
|
|
| 11 |
UNKNOWN='U'
|
| 12 |
|
| 13 |
# Export all symbols:
|