Spaces:
Sleeping
Sleeping
File size: 9,675 Bytes
597e51e f2f3b49 560fdf7 597e51e bc98885 597e51e 7d59a1c 597e51e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 | import streamlit as st
# import streamlit.components.v1 as components
# components.iframe("https://cherokee.nicedata.eu.org/", height=500)
import streamlit.components.v1 as components
import base64
# from faker import Faker
import random
from datetime import datetime
import pandas as pd
import requests
import time
st.set_page_config(layout="wide")
# LOGO_URL_LARGE="./lora1.png"
with st.sidebar:
st.title('🌵 Cherokee Syllabary and Phonetic Converter')
st.write('This chatbot is created using the open-source Llama 3 LLM model from Meta.')
st.markdown('📖 Learn how to build this app in this [blog](https://nicedata.eu.org/Cherokee)!')
st.info(
"""
- Email: [sh.wang4067@gmail.com](mailto:sh.wang4067@gmail.com)
- Tel: +86 181-1615-2720
- Homepage: [nicedata.eu.org](https://nicedata.eu.org)
- Github: [wdzhwsh4076](https://github.com/wdzhwsh4076)
- Address: Boda Campus, Xinjiang University, Urumqi City, China
"""
)
st.markdown(
"""
### Link
[1. cherokee dictionary](https://www.cherokeedictionary.net/)
[2. cherokee 500 word](https://www.cherokeedictionary.net/first500)
"""
)
st.title("🌵 Cherokee Syllabary and Phonetic Converter")
# st.markdown(
# """
# I am excited to present the latest language model, which has been fine-tuned using the state-of-the-art LoRA (Low-Rank Adaptation) technique on the robust foundation of the LLaMA3-8B model.
# This is an open-source project and you are very welcome to contribute your comments, questions, resources, and apps as [issues](https://github.com/giswqs/streamlit-geospatial/issues) or
# [pull requests](https://github.com/giswqs/streamlit-geospatial/pulls) to the [GitHub repository](https://github.com/giswqs/streamlit-geospatial).
# """
# )s
st.info("Click on the left sidebar menu to navigate to the different apps.")
def get_file_url(path):
file_ = open(path, "rb")
contents = file_.read()
data_url = base64.b64encode(contents).decode("utf-8")
file_.close()
return data_url
## -------------------------------------------------------------------- ##
def syllabary_to_phonetic(syllabary_sentence: str) -> str:
syllabary_to_phonetic_map = {
'Ꭰ': 'a', 'Ꭱ': 'e', 'Ꭲ': 'i', 'Ꭳ': 'o', 'Ꭴ': 'u', 'Ꭵ': 'v',
'Ꭶ': 'ga', 'Ꭷ': 'ka', 'Ꭸ': 'ge', 'Ꭹ': 'gi', 'Ꭺ': 'go', 'Ꭻ': 'gu', 'Ꭼ': 'gv',
'Ꭽ': 'ha', 'Ꭾ': 'he', 'Ꭿ': 'hi', 'Ꮀ': 'ho', 'Ꮁ': 'hu', 'Ꮂ': 'hv',
'Ꮃ': 'la', 'Ꮄ': 'le', 'Ꮅ': 'li', 'Ꮆ': 'lo', 'Ꮇ': 'lu', 'Ꮈ': 'lv',
'Ꮉ': 'ma', 'Ꮊ': 'me', 'Ꮋ': 'mi', 'Ꮌ': 'mo', 'Ꮍ': 'mu', 'Ᏽ': 'mv',
'Ꮎ': 'na', 'Ꮏ': 'hna', 'Ꮐ': 'nah', 'Ꮑ': 'ne', 'Ꮒ': 'ni', 'Ꮓ': 'no', 'Ꮔ': 'nu', 'Ꮕ': 'nv',
'Ꮖ': 'qua', 'Ꮗ': 'que', 'Ꮘ': 'qui', 'Ꮙ': 'quo', 'Ꮚ': 'quu', 'Ꮛ': 'quv',
'Ꮝ': 's', 'Ꮜ': 'sa', 'Ꮞ': 'se', 'Ꮟ': 'si', 'Ꮠ': 'so', 'Ꮡ': 'su', 'Ꮢ': 'sv',
'Ꮣ': 'da', 'Ꮤ': 'ta', 'Ꮥ': 'de', 'Ꮦ': 'te', 'Ꮧ': 'di', 'Ꮨ': 'ti', 'Ꮩ': 'do', 'Ꮪ': 'du', 'Ꮫ': 'dv',
'Ꮬ': 'dla', 'Ꮭ': 'tla', 'Ꮮ': 'tle', 'Ꮯ': 'tli', 'Ꮰ': 'tlo', 'Ꮱ': 'tlu', 'Ꮲ': 'tlv',
'Ꮳ': 'tsa', 'Ꮴ': 'tse', 'Ꮵ': 'tsi', 'Ꮶ': 'tso', 'Ꮷ': 'tsu', 'Ꮸ': 'tsv',
'Ꮹ': 'wa', 'Ꮺ': 'we', 'Ꮻ': 'wi', 'Ꮼ': 'wo', 'Ꮽ': 'wu', 'Ꮾ': 'wv',
'Ꮿ': 'ya', 'Ᏸ': 'ye', 'Ᏹ': 'yi', 'Ᏺ': 'yo', 'Ᏻ': 'yu', 'Ᏼ': 'yv',
}
phonetic_sentence = ''
for char in syllabary_sentence:
if char in syllabary_to_phonetic_map:
phonetic_sentence += syllabary_to_phonetic_map[char]
else:
phonetic_sentence += char
return phonetic_sentence
def phonetic_to_syllabary(phonetic_sentence: str) -> str:
phonetic_to_syllabary_map = {
'a': 'Ꭰ', 'e': 'Ꭱ', 'i': 'Ꭲ', 'o': 'Ꭳ', 'u': 'Ꭴ', 'v': 'Ꭵ',
'ga': 'Ꭶ', 'ka': 'Ꭷ', 'ge': 'Ꭸ', 'gi': 'Ꭹ', 'go': 'Ꭺ', 'gu': 'Ꭻ', 'gv': 'Ꭼ',
'ha': 'Ꭽ', 'he': 'Ꭾ', 'hi': 'Ꭿ', 'ho': 'Ꮀ', 'hu': 'Ꮁ', 'hv': 'Ꮂ',
'la': 'Ꮃ', 'le': 'Ꮄ', 'li': 'Ꮅ', 'lo': 'Ꮆ', 'lu': 'Ꮇ', 'lv': 'Ꮈ',
'ma': 'Ꮉ', 'me': 'Ꮊ', 'mi': 'Ꮋ', 'mo': 'Ꮌ', 'mu': 'Ꮍ', 'mv': 'Ᏽ',
'na': 'Ꮎ', 'hna': 'Ꮏ', 'nah': 'Ꮐ', 'ne': 'Ꮑ', 'ni': 'Ꮒ', 'no': 'Ꮓ', 'nu': 'Ꮔ', 'nv': 'Ꮕ',
'qua': 'Ꮖ', 'que': 'Ꮗ', 'qui': 'Ꮘ', 'quo': 'Ꮙ', 'quu': 'Ꮚ', 'quv': 'Ꮛ',
's': 'Ꮝ', 'sa': 'Ꮜ', 'se': 'Ꮞ', 'si': 'Ꮟ', 'so': 'Ꮠ', 'su': 'Ꮡ', 'sv': 'Ꮢ',
'da': 'Ꮣ', 'ta': 'Ꮤ', 'de': 'Ꮥ', 'te': 'Ꮦ', 'di': 'Ꮧ', 'ti': 'Ꮨ', 'do': 'Ꮩ', 'du': 'Ꮪ', 'dv': 'Ꮫ',
'dla': 'Ꮬ', 'tla': 'Ꮭ', 'tle': 'Ꮮ', 'tli': 'Ꮯ', 'tlo': 'Ꮰ', 'tlu': 'Ꮱ', 'tlv': 'Ꮲ',
'tsa': 'Ꮳ', 'tse': 'Ꮴ', 'tsi': 'Ꮵ', 'tso': 'Ꮶ', 'tsu': 'Ꮷ', 'tsv': 'Ꮸ',
'wa': 'Ꮹ', 'we': 'Ꮺ', 'wi': 'Ꮻ', 'wo': 'Ꮼ', 'wu': 'Ꮽ', 'wv': 'Ꮾ',
'ya': 'Ꮿ', 'ye': 'Ᏸ', 'yi': 'Ᏹ', 'yo': 'Ᏺ', 'yu': 'Ᏻ', 'yv': 'Ᏼ',
}
syllabary_sentence = ''
i = 0
while i < len(phonetic_sentence):
if i + 2 <= len(phonetic_sentence) and phonetic_sentence[i:i+2] in phonetic_to_syllabary_map:
syllabary_sentence += phonetic_to_syllabary_map[phonetic_sentence[i:i+2]]
i += 2
elif i + 3 <= len(phonetic_sentence) and phonetic_sentence[i:i+3] in phonetic_to_syllabary_map:
syllabary_sentence += phonetic_to_syllabary_map[phonetic_sentence[i:i+3]]
i += 3
elif phonetic_sentence[i] in phonetic_to_syllabary_map:
syllabary_sentence += phonetic_to_syllabary_map[phonetic_sentence[i]]
i += 1
else:
syllabary_sentence += phonetic_sentence[i]
i += 1
return syllabary_sentence
# Example usage
syllabary_sentence = "ᎨᏍᏗ ᏯᏍᎦᎢᎮ ᏥᏄᏍᏕ ᎠᎬᏱ ᏣᎴᏂᏍᎨ ᎠᏂᎩᏍᎬ, ᎾᎥᏂ ᏭᎷᏤᎢ, ᏏᏲ, ᎤᏍᏗ ᎠᏣᏗ ᎬᏉᏎᎰ ᏃᎴ ᎨᏍᏗ ᎯᎸᎯᏳ ᏥᎪᎥ ᏂᎯ ᎢᏳᏍᏗ ᎠᏣᏗ."
phonetic_sentence = syllabary_to_phonetic(syllabary_sentence)
print("Phonetic:", phonetic_sentence)
reconstructed_syllabary = phonetic_to_syllabary(phonetic_sentence)
print("Reconstructed Syllabary:", reconstructed_syllabary)
print("Original and reconstructed match:", syllabary_sentence == reconstructed_syllabary)
# dataset
st.markdown('<a name="customizable-border"></a>', unsafe_allow_html=True)
st.header("🔲 Demo", divider="rainbow")
# st.markdown("Enter Cherokee Syllabary Text:")
# st.markdown("""
# #### Cherokee-English Word Dataset (10.2k)
# This dataset focuses on vocabulary, ensuring that our model has a comprehensive grasp of Cherokee words and their English counterparts.
# """)
# Input text area for syllabary
# Create two columns
# First row: syllabary to phonetic
st.subheader("Syllabary to Phonetic")
col1, col2 = st.columns(2)
with col1:
syllabary_input = st.text_area("Enter Cherokee Syllabary Text:",
"ᎨᏍᏗ ᏯᏍᎦᎢᎮ ᏥᏄᏍᏕ ᎠᎬᏱ ᏣᎴᏂᏍᎨ ᎠᏂᎩᏍᎬ",
height=100, key="syllabary_input")
if st.button("Convert to Phonetic"):
phonetic_output = syllabary_to_phonetic(syllabary_input)
st.session_state.phonetic_output = phonetic_output
with col2:
st.text_area("Phonetic Output:",
value=st.session_state.get('phonetic_output', ''),
height=100, key="phonetic_output")
# Second row: phonetic to syllabary
st.subheader("Phonetic to Syllabary")
col3, col4 = st.columns(2)
with col3:
phonetic_input = st.text_area("Enter Phonetic Text:",
"gesdi yasgaihe jinusde agvyi jalenisge anigigv",
height=100, key="phonetic_input")
if st.button("Convert to Syllabary"):
syllabary_output = phonetic_to_syllabary(phonetic_input)
st.session_state.syllabary_output = syllabary_output
with col4:
st.text_area("Syllabary Output:",
value=st.session_state.get('syllabary_output', ''),
height=100, key="syllabary_output")
# App skeleton Demo
st.markdown('<a name="new-app-loading-animation"></a>', unsafe_allow_html=True)
st.header("⏳ Method", divider="rainbow")
st.markdown("""
#### Cherokee syllabary
The Cherokee syllabary is a syllabary invented by Sequoyah in the late 1810s and early 1820s to write the Cherokee language. His creation of the syllabary is particularly noteworthy as he was illiterate until its creation.[3] He first experimented with logograms, but his system later developed into the syllabary. In his system, each symbol represents a syllable rather than a single phoneme; the 85 (originally 86)[1] characters provide a suitable method for writing Cherokee. The letters resemble characters from other scripts, such as Latin, Greek, Cyrillic, and Glagolitic, however, these are not used to represent the same sounds.
""")
def get_file_url(path):
file_ = open(path, "rb")
contents = file_.read()
data_url = base64.b64encode(contents).decode("utf-8")
file_.close()
return data_url
gif1, gif2 = st.columns(2)
with gif1:
# st.subheader("detail")
st.caption("Fig: https://en.wikipedia.org/wiki/Cherokee_syllabary ")
with gif2:
# st.subheader("detail")
st.caption("""Fig: https://en.wikipedia.org/wiki/Cherokee_syllabary """)
st.divider()
## -------------------------------------------------------------------- ##
|