File size: 9,675 Bytes
597e51e
 
 
 
 
 
 
 
 
 
 
 
 
f2f3b49
560fdf7
597e51e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc98885
597e51e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d59a1c
597e51e
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
import streamlit as st
# import streamlit.components.v1 as components
# components.iframe("https://cherokee.nicedata.eu.org/", height=500)
import streamlit.components.v1 as components
import base64
# from faker import Faker
import random
from datetime import datetime
import pandas as pd
import requests
import time
st.set_page_config(layout="wide")

# LOGO_URL_LARGE="./lora1.png"


with st.sidebar:
    st.title('🌵 Cherokee Syllabary and Phonetic Converter')
    st.write('This chatbot is created using the open-source Llama 3 LLM model from Meta.')

    st.markdown('📖 Learn how to build this app in this [blog](https://nicedata.eu.org/Cherokee)!')

    st.info(
        """
    - Email: [sh.wang4067@gmail.com](mailto:sh.wang4067@gmail.com)
    - Tel: +86 181-1615-2720
    - Homepage: [nicedata.eu.org](https://nicedata.eu.org)
    - Github: [wdzhwsh4076](https://github.com/wdzhwsh4076)
    - Address: Boda Campus, Xinjiang University, Urumqi City, China
        """
    )
    st.markdown(
        """
    ### Link

    [1. cherokee dictionary](https://www.cherokeedictionary.net/)

    [2. cherokee 500 word](https://www.cherokeedictionary.net/first500)
            """
    )

st.title("🌵 Cherokee Syllabary and Phonetic Converter")
# st.markdown(
#     """
#     I am excited to present the latest language model, which has been  fine-tuned using the state-of-the-art LoRA (Low-Rank Adaptation) technique on the robust foundation of the LLaMA3-8B model. 
#     This is an open-source project and you are very welcome to contribute your comments, questions, resources, and apps as [issues](https://github.com/giswqs/streamlit-geospatial/issues) or
#     [pull requests](https://github.com/giswqs/streamlit-geospatial/pulls) to the [GitHub repository](https://github.com/giswqs/streamlit-geospatial).

#     """
# )s
st.info("Click on the left sidebar menu to navigate to the different apps.")



def get_file_url(path):
    file_ = open(path, "rb")
    contents = file_.read()
    data_url = base64.b64encode(contents).decode("utf-8")
    file_.close()
    return data_url



## -------------------------------------------------------------------- ##
def syllabary_to_phonetic(syllabary_sentence: str) -> str:
    syllabary_to_phonetic_map = {
        'Ꭰ': 'a', 'Ꭱ': 'e', 'Ꭲ': 'i', 'Ꭳ': 'o', 'Ꭴ': 'u', 'Ꭵ': 'v',
        'Ꭶ': 'ga', 'Ꭷ': 'ka', 'Ꭸ': 'ge', 'Ꭹ': 'gi', 'Ꭺ': 'go', 'Ꭻ': 'gu', 'Ꭼ': 'gv',
        'Ꭽ': 'ha', 'Ꭾ': 'he', 'Ꭿ': 'hi', 'Ꮀ': 'ho', 'Ꮁ': 'hu', 'Ꮂ': 'hv',
        'Ꮃ': 'la', 'Ꮄ': 'le', 'Ꮅ': 'li', 'Ꮆ': 'lo', 'Ꮇ': 'lu', 'Ꮈ': 'lv',
        'Ꮉ': 'ma', 'Ꮊ': 'me', 'Ꮋ': 'mi', 'Ꮌ': 'mo', 'Ꮍ': 'mu', 'Ᏽ': 'mv',
        'Ꮎ': 'na', 'Ꮏ': 'hna', 'Ꮐ': 'nah', 'Ꮑ': 'ne', 'Ꮒ': 'ni', 'Ꮓ': 'no', 'Ꮔ': 'nu', 'Ꮕ': 'nv',
        'Ꮖ': 'qua', 'Ꮗ': 'que', 'Ꮘ': 'qui', 'Ꮙ': 'quo', 'Ꮚ': 'quu', 'Ꮛ': 'quv',
        'Ꮝ': 's', 'Ꮜ': 'sa', 'Ꮞ': 'se', 'Ꮟ': 'si', 'Ꮠ': 'so', 'Ꮡ': 'su', 'Ꮢ': 'sv',
        'Ꮣ': 'da', 'Ꮤ': 'ta', 'Ꮥ': 'de', 'Ꮦ': 'te', 'Ꮧ': 'di', 'Ꮨ': 'ti', 'Ꮩ': 'do', 'Ꮪ': 'du', 'Ꮫ': 'dv',
        'Ꮬ': 'dla', 'Ꮭ': 'tla', 'Ꮮ': 'tle', 'Ꮯ': 'tli', 'Ꮰ': 'tlo', 'Ꮱ': 'tlu', 'Ꮲ': 'tlv',
        'Ꮳ': 'tsa', 'Ꮴ': 'tse', 'Ꮵ': 'tsi', 'Ꮶ': 'tso', 'Ꮷ': 'tsu', 'Ꮸ': 'tsv',
        'Ꮹ': 'wa', 'Ꮺ': 'we', 'Ꮻ': 'wi', 'Ꮼ': 'wo', 'Ꮽ': 'wu', 'Ꮾ': 'wv',
        'Ꮿ': 'ya', 'Ᏸ': 'ye', 'Ᏹ': 'yi', 'Ᏺ': 'yo', 'Ᏻ': 'yu', 'Ᏼ': 'yv',
    }
    
    phonetic_sentence = ''
    for char in syllabary_sentence:
        if char in syllabary_to_phonetic_map:
            phonetic_sentence += syllabary_to_phonetic_map[char]
        else:
            phonetic_sentence += char
    
    return phonetic_sentence

def phonetic_to_syllabary(phonetic_sentence: str) -> str:
    phonetic_to_syllabary_map = {
        'a': 'Ꭰ', 'e': 'Ꭱ', 'i': 'Ꭲ', 'o': 'Ꭳ', 'u': 'Ꭴ', 'v': 'Ꭵ',
        'ga': 'Ꭶ', 'ka': 'Ꭷ', 'ge': 'Ꭸ', 'gi': 'Ꭹ', 'go': 'Ꭺ', 'gu': 'Ꭻ', 'gv': 'Ꭼ',
        'ha': 'Ꭽ', 'he': 'Ꭾ', 'hi': 'Ꭿ', 'ho': 'Ꮀ', 'hu': 'Ꮁ', 'hv': 'Ꮂ',
        'la': 'Ꮃ', 'le': 'Ꮄ', 'li': 'Ꮅ', 'lo': 'Ꮆ', 'lu': 'Ꮇ', 'lv': 'Ꮈ',
        'ma': 'Ꮉ', 'me': 'Ꮊ', 'mi': 'Ꮋ', 'mo': 'Ꮌ', 'mu': 'Ꮍ', 'mv': 'Ᏽ',
        'na': 'Ꮎ', 'hna': 'Ꮏ', 'nah': 'Ꮐ', 'ne': 'Ꮑ', 'ni': 'Ꮒ', 'no': 'Ꮓ', 'nu': 'Ꮔ', 'nv': 'Ꮕ',
        'qua': 'Ꮖ', 'que': 'Ꮗ', 'qui': 'Ꮘ', 'quo': 'Ꮙ', 'quu': 'Ꮚ', 'quv': 'Ꮛ',
        's': 'Ꮝ', 'sa': 'Ꮜ', 'se': 'Ꮞ', 'si': 'Ꮟ', 'so': 'Ꮠ', 'su': 'Ꮡ', 'sv': 'Ꮢ',
        'da': 'Ꮣ', 'ta': 'Ꮤ', 'de': 'Ꮥ', 'te': 'Ꮦ', 'di': 'Ꮧ', 'ti': 'Ꮨ', 'do': 'Ꮩ', 'du': 'Ꮪ', 'dv': 'Ꮫ',
        'dla': 'Ꮬ', 'tla': 'Ꮭ', 'tle': 'Ꮮ', 'tli': 'Ꮯ', 'tlo': 'Ꮰ', 'tlu': 'Ꮱ', 'tlv': 'Ꮲ',
        'tsa': 'Ꮳ', 'tse': 'Ꮴ', 'tsi': 'Ꮵ', 'tso': 'Ꮶ', 'tsu': 'Ꮷ', 'tsv': 'Ꮸ',
        'wa': 'Ꮹ', 'we': 'Ꮺ', 'wi': 'Ꮻ', 'wo': 'Ꮼ', 'wu': 'Ꮽ', 'wv': 'Ꮾ',
        'ya': 'Ꮿ', 'ye': 'Ᏸ', 'yi': 'Ᏹ', 'yo': 'Ᏺ', 'yu': 'Ᏻ', 'yv': 'Ᏼ',
    }
    
    syllabary_sentence = ''
    i = 0
    while i < len(phonetic_sentence):
        if i + 2 <= len(phonetic_sentence) and phonetic_sentence[i:i+2] in phonetic_to_syllabary_map:
            syllabary_sentence += phonetic_to_syllabary_map[phonetic_sentence[i:i+2]]
            i += 2
        elif i + 3 <= len(phonetic_sentence) and phonetic_sentence[i:i+3] in phonetic_to_syllabary_map:
            syllabary_sentence += phonetic_to_syllabary_map[phonetic_sentence[i:i+3]]
            i += 3
        elif phonetic_sentence[i] in phonetic_to_syllabary_map:
            syllabary_sentence += phonetic_to_syllabary_map[phonetic_sentence[i]]
            i += 1
        else:
            syllabary_sentence += phonetic_sentence[i]
            i += 1
    
    return syllabary_sentence

# Example usage
syllabary_sentence = "ᎨᏍᏗ ᏯᏍᎦᎢᎮ ᏥᏄᏍᏕ ᎠᎬᏱ ᏣᎴᏂᏍᎨ ᎠᏂᎩᏍᎬ, ᎾᎥᏂ ᏭᎷᏤᎢ, ᏏᏲ, ᎤᏍᏗ ᎠᏣᏗ ᎬᏉᏎᎰ ᏃᎴ ᎨᏍᏗ ᎯᎸᎯᏳ ᏥᎪᎥ ᏂᎯ ᎢᏳᏍᏗ ᎠᏣᏗ."
phonetic_sentence = syllabary_to_phonetic(syllabary_sentence)
print("Phonetic:", phonetic_sentence)

reconstructed_syllabary = phonetic_to_syllabary(phonetic_sentence)
print("Reconstructed Syllabary:", reconstructed_syllabary)
print("Original and reconstructed match:", syllabary_sentence == reconstructed_syllabary)

# dataset
st.markdown('<a name="customizable-border"></a>', unsafe_allow_html=True)
st.header("🔲 Demo", divider="rainbow")
# st.markdown("Enter Cherokee Syllabary Text:")

# st.markdown("""
# #### Cherokee-English Word Dataset (10.2k)

# This dataset focuses on vocabulary, ensuring that our model has a comprehensive grasp of Cherokee words and their English counterparts.
# """)
# Input text area for syllabary
    # Create two columns
# First row: syllabary to phonetic
st.subheader("Syllabary to Phonetic")
col1, col2 = st.columns(2)

with col1:
    syllabary_input = st.text_area("Enter Cherokee Syllabary Text:", 
                                    "ᎨᏍᏗ ᏯᏍᎦᎢᎮ ᏥᏄᏍᏕ ᎠᎬᏱ ᏣᎴᏂᏍᎨ ᎠᏂᎩᏍᎬ",
                                    height=100, key="syllabary_input")
    
    if st.button("Convert to Phonetic"):
        phonetic_output = syllabary_to_phonetic(syllabary_input)
        st.session_state.phonetic_output = phonetic_output

with col2:
    st.text_area("Phonetic Output:", 
                    value=st.session_state.get('phonetic_output', ''),
                    height=100, key="phonetic_output")

# Second row: phonetic to syllabary
st.subheader("Phonetic to Syllabary")
col3, col4 = st.columns(2)

with col3:
    phonetic_input = st.text_area("Enter Phonetic Text:", 
                                    "gesdi yasgaihe jinusde agvyi jalenisge anigigv",
                                    height=100, key="phonetic_input")
    
    if st.button("Convert to Syllabary"):
        syllabary_output = phonetic_to_syllabary(phonetic_input)
        st.session_state.syllabary_output = syllabary_output

with col4:
    st.text_area("Syllabary Output:", 
                    value=st.session_state.get('syllabary_output', ''),
                    height=100, key="syllabary_output")


# App skeleton Demo
st.markdown('<a name="new-app-loading-animation"></a>', unsafe_allow_html=True)
st.header("⏳ Method", divider="rainbow")
st.markdown("""
    #### Cherokee syllabary
    
    The Cherokee syllabary is a syllabary invented by Sequoyah in the late 1810s and early 1820s to write the Cherokee language. His creation of the syllabary is particularly noteworthy as he was illiterate until its creation.[3] He first experimented with logograms, but his system later developed into the syllabary. In his system, each symbol represents a syllable rather than a single phoneme; the 85 (originally 86)[1] characters provide a suitable method for writing Cherokee. The letters resemble characters from other scripts, such as Latin, Greek, Cyrillic, and Glagolitic, however, these are not used to represent the same sounds.
""")


def get_file_url(path):
    file_ = open(path, "rb")
    contents = file_.read()
    data_url = base64.b64encode(contents).decode("utf-8")
    file_.close()
    return data_url



gif1, gif2 = st.columns(2)
with gif1:
    # st.subheader("detail")

    st.caption("Fig: https://en.wikipedia.org/wiki/Cherokee_syllabary ")

with gif2:
    # st.subheader("detail")
    st.caption("""Fig: https://en.wikipedia.org/wiki/Cherokee_syllabary """)

st.divider()

## -------------------------------------------------------------------- ##