Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,32 +1,85 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import epitran
|
| 3 |
-
|
|
|
|
| 4 |
# TODO: reverse transliterate?
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
if __name__ == "__main__":
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
st.write(f"
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
input_text = st.text_area(label="Whatever you type here will be transliterated!", value="Gari langu linaloangama limejaa na mikunga")
|
| 20 |
|
| 21 |
-
combined_code = "-".join([iso_lang_code, iso_script_code])
|
| 22 |
-
st.write(f"Combined code: {combined_code}")
|
| 23 |
st.info("attempting to instantiate epitran transliterator for your language/script")
|
| 24 |
-
epi = epitran.Epitran(
|
| 25 |
|
| 26 |
st.info(f"transliterating `{input_text}`\n\tusing {epi}...")
|
| 27 |
transliteration = epi.transliterate(input_text)
|
| 28 |
|
| 29 |
st.success(transliteration)
|
|
|
|
| 30 |
|
| 31 |
|
| 32 |
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import epitran
|
| 3 |
+
import langcodes
|
| 4 |
+
from langcodes import LanguageTagError
|
| 5 |
# TODO: reverse transliterate?
|
| 6 |
|
| 7 |
+
|
| 8 |
+
def get_lang_description_from_mapping_name(string_to_check):
|
| 9 |
+
if "generic-Latn" == string_to_check:
|
| 10 |
+
return "Generic Latin Script"
|
| 11 |
+
|
| 12 |
+
if len(string_to_check)<2:
|
| 13 |
+
return None
|
| 14 |
+
|
| 15 |
+
try:
|
| 16 |
+
description = None
|
| 17 |
+
lang = langcodes.get(string_to_check)
|
| 18 |
+
if lang:
|
| 19 |
+
items = []
|
| 20 |
+
for key, value in lang.describe().items():
|
| 21 |
+
items.append(f"{key}: {value}")
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
description = ", ".join(items))
|
| 25 |
+
return description
|
| 26 |
+
|
| 27 |
+
except LanguageTagError as e:
|
| 28 |
+
if any(["out of place" in str(e), "must be followed by something" in str(e)]):
|
| 29 |
+
# print("*****")
|
| 30 |
+
# print(e)
|
| 31 |
+
# LanguageTagError: This extlang subtag, 'red', is out of place. Expected territory, variant, extension, or end of string.
|
| 32 |
+
# LanguageTagError: This script subtag, 'east', is out of place. Expected territory, variant, extension, or end of string.
|
| 33 |
+
# LanguageTagError: The subtag 'p' must be followed by something
|
| 34 |
+
substrings = string_to_check.split("-")
|
| 35 |
+
substrings = substrings[:-1] # remove the last one
|
| 36 |
+
string_to_check = "-".join(substrings)
|
| 37 |
+
return get_lang_from_mapping_name(string_to_check)
|
| 38 |
+
else:
|
| 39 |
+
print("*****")
|
| 40 |
+
print(e)
|
| 41 |
+
return None
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def get_valid_epitran_mappings_list():
|
| 45 |
+
map_path = Path(epitran.__path__[0]) / "data"/"map"
|
| 46 |
+
map_files = list(map_path.glob("*.*"))
|
| 47 |
+
valid_mappings = [map_file.stem for map_file in map_files]
|
| 48 |
+
return valid_mappings
|
| 49 |
+
|
| 50 |
+
|
| 51 |
if __name__ == "__main__":
|
| 52 |
+
|
| 53 |
+
valid_epitran_mappings = get_valid_epitran_mappings_list()
|
| 54 |
+
selected_mapping = st.selectbox("Which language/script pair would you like to use?", valid_epitran_mappings)
|
| 55 |
+
description = get_lang_description_from_mapping_name(selected_mapping)
|
| 56 |
+
st.write(f"You selected {selected_mapping}")
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
# iso_lang_code = st.text_input(
|
| 60 |
+
# label="Three-letter ISO-639-3 (https://iso639-3.sil.org/) language code",
|
| 61 |
+
# value="swa"
|
| 62 |
+
# )
|
| 63 |
+
# st.write(f"iso code is {iso_lang_code}")
|
| 64 |
+
#
|
| 65 |
+
# iso_script_code = st.text_input(
|
| 66 |
+
# label="ISO 15924 (https://unicode.org/iso15924/iso15924-codes.html) script code, e.g. 'Latn' for Latin script, 'Hans' for Chinese script, etc.",
|
| 67 |
+
# value="Latn"
|
| 68 |
+
# )
|
| 69 |
+
# st.write(f'iso code is {iso_script_code}')
|
| 70 |
|
| 71 |
input_text = st.text_area(label="Whatever you type here will be transliterated!", value="Gari langu linaloangama limejaa na mikunga")
|
| 72 |
|
| 73 |
+
# combined_code = "-".join([iso_lang_code, iso_script_code])
|
| 74 |
+
# st.write(f"Combined code: {combined_code}")
|
| 75 |
st.info("attempting to instantiate epitran transliterator for your language/script")
|
| 76 |
+
epi = epitran.Epitran(selected_mapping )
|
| 77 |
|
| 78 |
st.info(f"transliterating `{input_text}`\n\tusing {epi}...")
|
| 79 |
transliteration = epi.transliterate(input_text)
|
| 80 |
|
| 81 |
st.success(transliteration)
|
| 82 |
+
|
| 83 |
|
| 84 |
|
| 85 |
|