SinDarSoup commited on
Commit
db080f6
·
1 Parent(s): 3005b7a

init commit

Browse files
.gitignore ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Ruff stuff:
171
+ .ruff_cache/
172
+
173
+ # PyPI configuration file
174
+ .pypirc
app.py CHANGED
@@ -1,7 +1,28 @@
1
  import gradio as gr
2
 
 
 
 
3
  with gr.Blocks() as demo:
4
  gr.Markdown("# Lang Thrower")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
 
7
  if __name__ == "__main__":
 
1
  import gradio as gr
2
 
3
+ async def chat_fn(message, history, state_history, state_audios):
4
+ return message, state_history, state_audios
5
+
6
  with gr.Blocks() as demo:
7
  gr.Markdown("# Lang Thrower")
8
+ state_history = gr.State([]) # The state for openai usage
9
+ state_audios = gr.State([
10
+ # {"text":"...", "path":"..."}
11
+ ])
12
+
13
+ textbox = gr.MultimodalTextbox(
14
+ file_types=["image"],
15
+ file_count="multiple",
16
+ placeholder="Please give text and image.",
17
+ )
18
+
19
+ chat_interface = gr.ChatInterface(
20
+ fn=chat_fn,
21
+ textbox=textbox,
22
+ additional_inputs=[state_history, state_audios],
23
+ additional_outputs=[state_history, state_audios],
24
+ )
25
+ textbox.render()
26
 
27
 
28
  if __name__ == "__main__":
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ openai
2
+ python-magic
3
+ edge-tts
utils/__init__.py ADDED
File without changes
utils/caller/llm_client.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ from openai import Client
3
+ from gradio_client.utils import is_http_url_like
4
+ import magic
5
+ from pydantic import BaseModel
6
+ from ..learner.learner import DefaultTool
7
+
8
+ def get_client(api_key: str | None = None, **kwargs):
9
+ return Client(
10
+ api_key=api_key,
11
+ **kwargs,
12
+ )
13
+
14
+ def encode_image(image_path:str):
15
+ with open(image_path, "rb") as image_file:
16
+ base64_image = base64.b64encode(image_file.read()).decode("utf-8")
17
+ mime = magic.Magic(mime=True)
18
+ mime_type = mime.from_file(image_path)
19
+ return f"data:{mime_type};base64,{base64_image}"
20
+
21
+ def image_to_content(
22
+ image_path:str,
23
+ detail:str="auto",
24
+ ):
25
+ url = (
26
+ image_path
27
+ if is_http_url_like(image_path) else
28
+ encode_image(image_path)
29
+ )
30
+
31
+ return {
32
+ "type":"image_url",
33
+ "image_url":{
34
+ "url":url,
35
+ "detail":detail,
36
+ }
37
+ }
38
+
39
+ def audio_to_content(
40
+ data:str,
41
+ format:str,
42
+ ):
43
+ return {
44
+ "type":"input_audio",
45
+ "input_audio":{
46
+ "data": data,
47
+ "format": format,
48
+ }
49
+ }
50
+
51
+ async def chat_completions(
52
+ messages: list,
53
+ model:str,
54
+ *,
55
+ client : Client | None = None,
56
+ tool_models:list[BaseModel] = [DefaultTool],
57
+ **kwargs,
58
+ ):
59
+ tools = kwargs.pop("tools", None)
60
+ if tools is None:
61
+ tools = []
62
+ for tool_model in tool_models:
63
+ tools.append(
64
+ {
65
+ "type":"function",
66
+ "function":{
67
+ "parameters":tool_model.model_json_schema(),
68
+ "strict":True,
69
+ },
70
+ }
71
+ )
72
+
73
+ client = client or get_client()
74
+ response = client.chat.completions.create(
75
+ model=model,
76
+ messages=messages,
77
+ tools=tools,
78
+ **kwargs,
79
+ )
80
+
81
+ return response
utils/learner/__init__.py ADDED
File without changes
utils/learner/dataclass.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing_extensions import Self
2
+ from typing import Any, Literal, Optional
3
+ from pydantic import BaseModel, Field, model_validator, ValidationError
4
+ from .language import (
5
+ LANGUAGE_CODES,
6
+ T_LANGUAGE_CODES,
7
+ LANGUAGES,
8
+ T_LANGUAGES,
9
+ CODE_TO_LANGUAGE,
10
+ LANGUAGE_TO_CODE,
11
+ )
12
+
13
+
14
+
15
+ class _Record(BaseModel):
16
+ "Base Data Model For Language Learner"
17
+ lang: T_LANGUAGE_CODES | T_LANGUAGES | str = Field(..., description="The language name.")
18
+ data: str = Field(..., description="The data for the record, like `apple` is vocabulary, `How are you.` is a phrase. `I like your product! How much is this` is a sentence.")
19
+ type: None = Field(None, description="The field needs to be defined in the sub data model.")
20
+ meta: dict | Any = Field(None, description="The field to be implement or overwrite, please do not fill this yet.")
21
+ IPA: Optional[str] = Field(None, description="International Phonetic Alphabet")
22
+
23
+ @model_validator(mode='after')
24
+ def _validator_lang(self)->Self:
25
+ lang = self.lang.lower()
26
+ if lang in LANGUAGE_CODES:
27
+ lang = CODE_TO_LANGUAGE[lang]
28
+ else:
29
+ if self.meta is None:
30
+ self.meta = {}
31
+ self.meta["warning.lang"] = f"The language is not in the language list {LANGUAGES}."
32
+
33
+ self.lang = lang
34
+ return self
35
+
36
+ class Vocabulary(_Record):
37
+ """
38
+ This is for word level record.
39
+ Please fill the `data` field with vocabulary level input respect to the language.
40
+ e.g.
41
+ Chinese: "貓", "車", "醫生", "學校", "咖啡", "書"
42
+ English: "Cat", "Car", "Doctor", "School", "Coffee", "Book"
43
+ Japanese: "猫(ねこ)", "車(くるま)", "医者(いしゃ)", "学校(がっこう)", "コーヒー", "本(ほん)"
44
+ Korean: "고양이", "차", "의사", "학교", "커피", "책"
45
+ Italian: "Gatto", "Auto", "Dottore", "Scuola", "Caffè", "Libro"
46
+ """
47
+ type: Literal['WORD'] = "WORD"
48
+
49
+ class Phrase(_Record):
50
+ """
51
+ This is for phrase level record.
52
+ Please fill the `data` field with phrase level input respect to the language.
53
+ e.g.
54
+ Chinese: "你好", "謝謝你", "我愛你", "怎麼了?", "好久不見", "多少錢?"
55
+ English: "Hello", "Thank you", "I love you", "What's wrong?", "Long time no see", "How much is it?"
56
+ Japanese: "こんにちは", "ありがとう", "愛してる", "どうしたの?", "久しぶり(ひさしぶり)", "いくらですか?"
57
+ Korean: "안녕하세요", "감사합니다", "사랑해요", "왜 그래요?", "오랜만이에요", "얼마예요?"
58
+ Italian: "Ciao", "Grazie", "Ti amo", "Che succede?", "È da tanto tempo!", "Quanto costa?"
59
+ """
60
+ type: Literal['WORD'] = "WORD"
61
+
62
+
63
+ class Sentence(_Record):
64
+ """
65
+ This is for sentence level record.
66
+ Please fill the `data` field with sentence level input respect to the language.
67
+ e.g.
68
+ Chinese: "這是一隻可愛的貓。", "我想喝一杯咖啡。", "你住在哪裡?", "今天的天氣很好。", "你能幫助我嗎?", "我正在學習日語和韓語。"
69
+ English: "This is a cute cat.", "I want to drink a cup of coffee.", "Where do you live?", "The weather is nice today.", "Can you help me?", "I am learning Japanese and Korean."
70
+ Japanese: "これはかわいい猫です。", "コーヒーを一杯飲みたいです。", "どこに住んでいますか?", "今日は天気がいいです。", "手伝ってくれますか?", "日本語と韓国語を勉強しています。"
71
+ Korean: "이건 귀여운 고양이예요.", "커피 한 잔 마시고 싶어요.", "어디에 살아요?", "오늘 날씨가 좋아요.", "저를 도와줄 수 있어요?", "일본어와 한국어를 공부하고 있어요."
72
+ Italian: "Questo è un gatto carino.", "Voglio bere una tazza di caffè.", "Dove vivi?", "Oggi il tempo è bello.", "Puoi aiutarmi?", "Sto imparando il giapponese e il coreano."
73
+ """
74
+ type: Literal['SENTENCE'] = "SENTENCE"
75
+
76
+
77
+ class ReadableReference(BaseModel):
78
+ """
79
+ This is a reference for the foregin language.
80
+ Try to let the user to understand the foregin language more easily.
81
+ Please use the user native language to do this.
82
+ """
83
+ name: str
84
+ short_explain: str
85
+ description: str = Field(
86
+ ...,
87
+ description="Try to describe the foreign language more comprehensively."
88
+ )
89
+
90
+ class __R(BaseModel):
91
+ reference: ReadableReference
92
+
93
+ class R_Vocabulary(__R):
94
+ foreign: Vocabulary
95
+
96
+ class R_Phrase(__R):
97
+ foreign: Phrase
98
+
99
+ class R_Sentence(__R):
100
+ foreign: Sentence
utils/learner/language.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Literal
2
+
3
+ # We follow the openai language code
4
+ DICT = {
5
+ "en": "english",
6
+ "zh": "chinese",
7
+ "de": "german",
8
+ "es": "spanish",
9
+ "ru": "russian",
10
+ "ko": "korean",
11
+ "fr": "french",
12
+ "ja": "japanese",
13
+ "pt": "portuguese",
14
+ "tr": "turkish",
15
+ "pl": "polish",
16
+ "ca": "catalan",
17
+ "nl": "dutch",
18
+ "ar": "arabic",
19
+ "sv": "swedish",
20
+ "it": "italian",
21
+ "id": "indonesian",
22
+ "hi": "hindi",
23
+ "fi": "finnish",
24
+ "vi": "vietnamese",
25
+ "he": "hebrew",
26
+ "uk": "ukrainian",
27
+ "el": "greek",
28
+ "ms": "malay",
29
+ "cs": "czech",
30
+ "ro": "romanian",
31
+ "da": "danish",
32
+ "hu": "hungarian",
33
+ "ta": "tamil",
34
+ "no": "norwegian",
35
+ "th": "thai",
36
+ "ur": "urdu",
37
+ "hr": "croatian",
38
+ "bg": "bulgarian",
39
+ "lt": "lithuanian",
40
+ "la": "latin",
41
+ "mi": "maori",
42
+ "ml": "malayalam",
43
+ "cy": "welsh",
44
+ "sk": "slovak",
45
+ "te": "telugu",
46
+ "fa": "persian",
47
+ "lv": "latvian",
48
+ "bn": "bengali",
49
+ "sr": "serbian",
50
+ "az": "azerbaijani",
51
+ "sl": "slovenian",
52
+ "kn": "kannada",
53
+ "et": "estonian",
54
+ "mk": "macedonian",
55
+ "br": "breton",
56
+ "eu": "basque",
57
+ "is": "icelandic",
58
+ "hy": "armenian",
59
+ "ne": "nepali",
60
+ "mn": "mongolian",
61
+ "bs": "bosnian",
62
+ "kk": "kazakh",
63
+ "sq": "albanian",
64
+ "sw": "swahili",
65
+ "gl": "galician",
66
+ "mr": "marathi",
67
+ "pa": "punjabi",
68
+ "si": "sinhala",
69
+ "km": "khmer",
70
+ "sn": "shona",
71
+ "yo": "yoruba",
72
+ "so": "somali",
73
+ "af": "afrikaans",
74
+ "oc": "occitan",
75
+ "ka": "georgian",
76
+ "be": "belarusian",
77
+ "tg": "tajik",
78
+ "sd": "sindhi",
79
+ "gu": "gujarati",
80
+ "am": "amharic",
81
+ "yi": "yiddish",
82
+ "lo": "lao",
83
+ "uz": "uzbek",
84
+ "fo": "faroese",
85
+ "ht": "haitian creole",
86
+ "ps": "pashto",
87
+ "tk": "turkmen",
88
+ "nn": "nynorsk",
89
+ "mt": "maltese",
90
+ "sa": "sanskrit",
91
+ "lb": "luxembourgish",
92
+ "my": "myanmar",
93
+ "bo": "tibetan",
94
+ "tl": "tagalog",
95
+ "mg": "malagasy",
96
+ "as": "assamese",
97
+ "tt": "tatar",
98
+ "haw": "hawaiian",
99
+ "ln": "lingala",
100
+ "ha": "hausa",
101
+ "ba": "bashkir",
102
+ "jw": "javanese",
103
+ "su": "sundanese",
104
+ "yue": "cantonese",
105
+ }
106
+
107
+ LANGUAGE_CODES = ['en', 'zh', 'de', 'es', 'ru', 'ko', 'fr', 'ja', 'pt', 'tr', 'pl', 'ca', 'nl', 'ar', 'sv', 'it', 'id', 'hi', 'fi', 'vi', 'he', 'uk', 'el', 'ms', 'cs', 'ro', 'da', 'hu', 'ta', 'no', 'th', 'ur', 'hr', 'bg', 'lt', 'la', 'mi', 'ml', 'cy', 'sk', 'te', 'fa', 'lv', 'bn', 'sr', 'az', 'sl', 'kn', 'et', 'mk', 'br', 'eu', 'is', 'hy', 'ne', 'mn', 'bs', 'kk', 'sq', 'sw', 'gl', 'mr', 'pa', 'si', 'km', 'sn', 'yo', 'so', 'af', 'oc', 'ka', 'be', 'tg', 'sd', 'gu', 'am', 'yi', 'lo', 'uz', 'fo', 'ht', 'ps', 'tk', 'nn', 'mt', 'sa', 'lb', 'my', 'bo', 'tl', 'mg', 'as', 'tt', 'haw', 'ln', 'ha', 'ba', 'jw', 'su', 'yue']
108
+ T_LANGUAGE_CODES = Literal['en', 'zh', 'de', 'es', 'ru', 'ko', 'fr', 'ja', 'pt', 'tr', 'pl', 'ca', 'nl', 'ar', 'sv', 'it', 'id', 'hi', 'fi', 'vi', 'he', 'uk', 'el', 'ms', 'cs', 'ro', 'da', 'hu', 'ta', 'no', 'th', 'ur', 'hr', 'bg', 'lt', 'la', 'mi', 'ml', 'cy', 'sk', 'te', 'fa', 'lv', 'bn', 'sr', 'az', 'sl', 'kn', 'et', 'mk', 'br', 'eu', 'is', 'hy', 'ne', 'mn', 'bs', 'kk', 'sq', 'sw', 'gl', 'mr', 'pa', 'si', 'km', 'sn', 'yo', 'so', 'af', 'oc', 'ka', 'be', 'tg', 'sd', 'gu', 'am', 'yi', 'lo', 'uz', 'fo', 'ht', 'ps', 'tk', 'nn', 'mt', 'sa', 'lb', 'my', 'bo', 'tl', 'mg', 'as', 'tt', 'haw', 'ln', 'ha', 'ba', 'jw', 'su', 'yue']
109
+
110
+ LANGUAGES = ['english', 'chinese', 'german', 'spanish', 'russian', 'korean', 'french', 'japanese', 'portuguese', 'turkish', 'polish', 'catalan', 'dutch', 'arabic', 'swedish', 'italian', 'indonesian', 'hindi', 'finnish', 'vietnamese', 'hebrew', 'ukrainian', 'greek', 'malay', 'czech', 'romanian', 'danish', 'hungarian', 'tamil', 'norwegian', 'thai', 'urdu', 'croatian', 'bulgarian', 'lithuanian', 'latin', 'maori', 'malayalam', 'welsh', 'slovak', 'telugu', 'persian', 'latvian', 'bengali', 'serbian', 'azerbaijani', 'slovenian', 'kannada', 'estonian', 'macedonian', 'breton', 'basque', 'icelandic', 'armenian', 'nepali', 'mongolian', 'bosnian', 'kazakh', 'albanian', 'swahili', 'galician', 'marathi', 'punjabi', 'sinhala', 'khmer', 'shona', 'yoruba', 'somali', 'afrikaans', 'occitan', 'georgian', 'belarusian', 'tajik', 'sindhi', 'gujarati', 'amharic', 'yiddish', 'lao', 'uzbek', 'faroese', 'haitian creole', 'pashto', 'turkmen', 'nynorsk', 'maltese', 'sanskrit', 'luxembourgish', 'myanmar', 'tibetan', 'tagalog', 'malagasy', 'assamese', 'tatar', 'hawaiian', 'lingala', 'hausa', 'bashkir', 'javanese', 'sundanese', 'cantonese']
111
+ T_LANGUAGES = Literal['english', 'chinese', 'german', 'spanish', 'russian', 'korean', 'french', 'japanese', 'portuguese', 'turkish', 'polish', 'catalan', 'dutch', 'arabic', 'swedish', 'italian', 'indonesian', 'hindi', 'finnish', 'vietnamese', 'hebrew', 'ukrainian', 'greek', 'malay', 'czech', 'romanian', 'danish', 'hungarian', 'tamil', 'norwegian', 'thai', 'urdu', 'croatian', 'bulgarian', 'lithuanian', 'latin', 'maori', 'malayalam', 'welsh', 'slovak', 'telugu', 'persian', 'latvian', 'bengali', 'serbian', 'azerbaijani', 'slovenian', 'kannada', 'estonian', 'macedonian', 'breton', 'basque', 'icelandic', 'armenian', 'nepali', 'mongolian', 'bosnian', 'kazakh', 'albanian', 'swahili', 'galician', 'marathi', 'punjabi', 'sinhala', 'khmer', 'shona', 'yoruba', 'somali', 'afrikaans', 'occitan', 'georgian', 'belarusian', 'tajik', 'sindhi', 'gujarati', 'amharic', 'yiddish', 'lao', 'uzbek', 'faroese', 'haitian creole', 'pashto', 'turkmen', 'nynorsk', 'maltese', 'sanskrit', 'luxembourgish', 'myanmar', 'tibetan', 'tagalog', 'malagasy', 'assamese', 'tatar', 'hawaiian', 'lingala', 'hausa', 'bashkir', 'javanese', 'sundanese', 'cantonese']
112
+
113
+ CODE_TO_LANGUAGE = {
114
+ code: language
115
+ for code, language in DICT.items()
116
+ }
117
+
118
+ LANGUAGE_TO_CODE = {
119
+ language: code
120
+ for code, language in DICT.items()
121
+ }
122
+
123
+ __all__ = [
124
+ "LANGUAGE_CODES",
125
+ "T_LANGUAGE_CODES",
126
+ "LANGUAGES",
127
+ "T_LANGUAGES",
128
+ "CODE_TO_LANGUAGE",
129
+ "LANGUAGE_TO_CODE",
130
+ ]
utils/learner/learner.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .dataclass import *
2
+
3
+ class DefaultTool(BaseModel):
4
+ """
5
+ This is the data model for the `ThrowLingo`.
6
+ It is to help the user to learn the Foregin Languagae.
7
+
8
+ The suggest max_length are
9
+ `vocabulary` is less than 15
10
+ `phrase` is less than 8
11
+ `sentence` is less then 5
12
+ """
13
+ vocabulary: list[R_Vocabulary]
14
+ phrase: list[R_Phrase]
15
+ sentence: list[R_Sentence]
16
+
17
+
18
+ def get_default_system_prompt():
19
+ return """
20
+ # Assistant Author:
21
+ * 湯沂達 / Tang Yi Dar
22
+ - changethewhat@gmail.com
23
+ - https://github.com/mistake0316
24
+ - https://www.linkedin.com/in/yi-dar-tang-89866717a/
25
+ - https://medium.com/@changethewhat
26
+
27
+
28
+ # System Prompt
29
+ You are an assistant for doing the language learning.
30
+ The reason for the author to create this assistant is that he is a guy love to visit different place in different country, but struggle with his poor language skill and bad memorization ability.
31
+
32
+ Some struggle scenarios are that:
33
+ * He is good at math, but do not know how to describe that in Japanese.
34
+ * He is injured, but do not know what how to talk to the doctor in different language, he needs to prepare some words to describe his status.
35
+ * He want to learn gymnastic but do not know what kind of object and the name of motion in both his native language and the foreign language.
36
+
37
+ To fill the gap, he decide to create a instant language learner, which is able to generate the target language text and audio together.
38
+
39
+ Most of the time, the input will be photos and texts.
40
+ """
41
+
42
+ __all__ = [
43
+ "DefaultTool",
44
+ "get_default_system_prompt"
45
+ ]