Spaces:

SinDarSoup
/

LangNoter

Sleeping

App Files Files Community

SinDarSoup commited on Mar 5, 2025

Commit

db080f6

1 Parent(s): 3005b7a

init commit

Browse files

Files changed (9) hide show

.gitignore +174 -0
app.py +21 -0
requirements.txt +3 -0
utils/__init__.py +0 -0
utils/caller/llm_client.py +81 -0
utils/learner/__init__.py +0 -0
utils/learner/dataclass.py +100 -0
utils/learner/language.py +130 -0
utils/learner/learner.py +45 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,174 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc

app.py CHANGED Viewed

@@ -1,7 +1,28 @@
 import gradio as gr
 with gr.Blocks() as demo:
     gr.Markdown("# Lang Thrower")
 if __name__ == "__main__":

 import gradio as gr
+async def chat_fn(message, history, state_history, state_audios):
+    return message, state_history, state_audios
 with gr.Blocks() as demo:
     gr.Markdown("# Lang Thrower")
+    state_history = gr.State([]) # The state for openai usage
+    state_audios = gr.State([
+        # {"text":"...", "path":"..."}
+    ])
+    textbox = gr.MultimodalTextbox(
+        file_types=["image"],
+        file_count="multiple",
+        placeholder="Please give text and image.",
+    )
+    chat_interface = gr.ChatInterface(
+        fn=chat_fn,
+        textbox=textbox,
+        additional_inputs=[state_history, state_audios],
+        additional_outputs=[state_history, state_audios],
+    )
+    textbox.render()
 if __name__ == "__main__":

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+openai
+python-magic
+edge-tts

utils/__init__.py ADDED Viewed

File without changes

utils/caller/llm_client.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import base64
+from openai import Client
+from gradio_client.utils import is_http_url_like
+import magic
+from pydantic import BaseModel
+from ..learner.learner import DefaultTool
+def get_client(api_key: str | None = None, **kwargs):
+    return Client(
+        api_key=api_key,
+        **kwargs,
+    )
+def encode_image(image_path:str):
+    with open(image_path, "rb") as image_file:
+        base64_image = base64.b64encode(image_file.read()).decode("utf-8")
+    mime = magic.Magic(mime=True)
+    mime_type = mime.from_file(image_path)
+    return f"data:{mime_type};base64,{base64_image}"
+def image_to_content(
+    image_path:str,
+    detail:str="auto",
+):
+    url = (
+        image_path
+        if is_http_url_like(image_path) else
+        encode_image(image_path)
+    )
+    return {
+        "type":"image_url",
+        "image_url":{
+            "url":url,
+            "detail":detail,
+        }
+    }
+def audio_to_content(
+    data:str,
+    format:str,
+):
+    return {
+        "type":"input_audio",
+        "input_audio":{
+            "data": data,
+            "format": format,
+        }
+    }
+async def chat_completions(
+    messages: list,
+    model:str,
+    *,
+    client : Client | None = None,
+    tool_models:list[BaseModel] = [DefaultTool],
+    **kwargs,
+):
+    tools = kwargs.pop("tools", None)
+    if tools is None:
+        tools = []
+        for tool_model in tool_models:
+            tools.append(
+                {
+                    "type":"function",
+                    "function":{
+                        "parameters":tool_model.model_json_schema(),
+                        "strict":True,
+                    },
+                }
+            )
+    client = client or get_client()
+    response = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        tools=tools,
+        **kwargs,
+    )
+    return response

utils/learner/__init__.py ADDED Viewed

File without changes

utils/learner/dataclass.py ADDED Viewed

	@@ -0,0 +1,100 @@

+from typing_extensions import Self
+from typing import Any, Literal, Optional
+from pydantic import BaseModel, Field, model_validator, ValidationError
+from .language import (
+    LANGUAGE_CODES,
+    T_LANGUAGE_CODES,
+    LANGUAGES,
+    T_LANGUAGES,
+    CODE_TO_LANGUAGE,
+    LANGUAGE_TO_CODE,
+)
+class _Record(BaseModel):
+    "Base Data Model For Language Learner"
+    lang: T_LANGUAGE_CODES | T_LANGUAGES | str = Field(..., description="The language name.")
+    data: str = Field(..., description="The data for the record, like `apple` is vocabulary, `How are you.` is a phrase. `I like your product! How much is this` is a sentence.")
+    type: None = Field(None, description="The field needs to be defined in the sub data model.")
+    meta: dict | Any = Field(None, description="The field to be implement or overwrite, please do not fill this yet.")
+    IPA: Optional[str] = Field(None, description="International Phonetic Alphabet")
+    @model_validator(mode='after')
+    def _validator_lang(self)->Self:
+        lang = self.lang.lower()
+        if lang in LANGUAGE_CODES:
+            lang = CODE_TO_LANGUAGE[lang]
+        else:
+            if self.meta is None:
+                self.meta = {}
+            self.meta["warning.lang"] = f"The language is not in the language list {LANGUAGES}."
+        self.lang = lang
+        return self
+class Vocabulary(_Record):
+    """
+    This is for word level record.
+    Please fill the `data` field with vocabulary level input respect to the language.
+    e.g.
+        Chinese: "貓", "車", "醫生", "學校", "咖啡", "書"
+        English: "Cat", "Car", "Doctor", "School", "Coffee", "Book"
+        Japanese: "猫（ねこ）", "車（くるま）", "医者（いしゃ）", "学校（がっこう）", "コーヒー", "本（ほん）"
+        Korean: "고양이", "차", "의사", "학교", "커피", "책"
+        Italian: "Gatto", "Auto", "Dottore", "Scuola", "Caffè", "Libro"
+    """
+    type: Literal['WORD'] = "WORD"
+class Phrase(_Record):
+    """
+    This is for phrase level record.
+    Please fill the `data` field with phrase level input respect to the language.
+    e.g.
+        Chinese: "你好", "謝謝你", "我愛你", "怎麼了？", "好久不見", "多少錢？"
+        English: "Hello", "Thank you", "I love you", "What's wrong?", "Long time no see", "How much is it?"
+        Japanese: "こんにちは", "ありがとう", "愛してる", "どうしたの？", "久しぶり（ひさしぶり）", "いくらですか？"
+        Korean: "안녕하세요", "감사합니다", "사랑해요", "왜 그래요?", "오랜만이에요", "얼마예요?"
+        Italian: "Ciao", "Grazie", "Ti amo", "Che succede?", "È da tanto tempo!", "Quanto costa?"
+    """
+    type: Literal['WORD'] = "WORD"
+class Sentence(_Record):
+    """
+    This is for sentence level record.
+    Please fill the `data` field with sentence level input respect to the language.
+    e.g.
+        Chinese: "這是一隻可愛的貓。", "我想喝一杯咖啡。", "你住在哪裡？", "今天的天氣很好。", "你能幫助我嗎？", "我正在學習日語和韓語。"
+        English: "This is a cute cat.", "I want to drink a cup of coffee.", "Where do you live?", "The weather is nice today.", "Can you help me?", "I am learning Japanese and Korean."
+        Japanese: "これはかわいい猫です。", "コーヒーを一杯飲みたいです。", "どこに住んでいますか？", "今日は天気がいいです。", "手伝ってくれますか？", "日本語と韓国語を勉強しています。"
+        Korean: "이건 귀여운 고양이예요.", "커피 한 잔 마시고 싶어요.", "어디에 살아요?", "오늘 날씨가 좋아요.", "저를 도와줄 수 있어요?", "일본어와 한국어를 공부하고 있어요."
+        Italian: "Questo è un gatto carino.", "Voglio bere una tazza di caffè.", "Dove vivi?", "Oggi il tempo è bello.", "Puoi aiutarmi?", "Sto imparando il giapponese e il coreano."
+    """
+    type: Literal['SENTENCE'] = "SENTENCE"
+class ReadableReference(BaseModel):
+    """
+    This is a reference for the foregin language.
+    Try to let the user to understand the foregin language more easily.
+    Please use the user native language to do this.
+    """
+    name: str
+    short_explain: str
+    description: str = Field(
+        ...,
+        description="Try to describe the foreign language more comprehensively."
+    )
+class __R(BaseModel):
+    reference: ReadableReference
+class R_Vocabulary(__R):
+    foreign: Vocabulary
+class R_Phrase(__R):
+    foreign: Phrase
+class R_Sentence(__R):
+    foreign: Sentence

utils/learner/language.py ADDED Viewed

	@@ -0,0 +1,130 @@

+from typing import Literal
+# We follow the openai language code
+DICT = {
+    "en": "english",
+    "zh": "chinese",
+    "de": "german",
+    "es": "spanish",
+    "ru": "russian",
+    "ko": "korean",
+    "fr": "french",
+    "ja": "japanese",
+    "pt": "portuguese",
+    "tr": "turkish",
+    "pl": "polish",
+    "ca": "catalan",
+    "nl": "dutch",
+    "ar": "arabic",
+    "sv": "swedish",
+    "it": "italian",
+    "id": "indonesian",
+    "hi": "hindi",
+    "fi": "finnish",
+    "vi": "vietnamese",
+    "he": "hebrew",
+    "uk": "ukrainian",
+    "el": "greek",
+    "ms": "malay",
+    "cs": "czech",
+    "ro": "romanian",
+    "da": "danish",
+    "hu": "hungarian",
+    "ta": "tamil",
+    "no": "norwegian",
+    "th": "thai",
+    "ur": "urdu",
+    "hr": "croatian",
+    "bg": "bulgarian",
+    "lt": "lithuanian",
+    "la": "latin",
+    "mi": "maori",
+    "ml": "malayalam",
+    "cy": "welsh",
+    "sk": "slovak",
+    "te": "telugu",
+    "fa": "persian",
+    "lv": "latvian",
+    "bn": "bengali",
+    "sr": "serbian",
+    "az": "azerbaijani",
+    "sl": "slovenian",
+    "kn": "kannada",
+    "et": "estonian",
+    "mk": "macedonian",
+    "br": "breton",
+    "eu": "basque",
+    "is": "icelandic",
+    "hy": "armenian",
+    "ne": "nepali",
+    "mn": "mongolian",
+    "bs": "bosnian",
+    "kk": "kazakh",
+    "sq": "albanian",
+    "sw": "swahili",
+    "gl": "galician",
+    "mr": "marathi",
+    "pa": "punjabi",
+    "si": "sinhala",
+    "km": "khmer",
+    "sn": "shona",
+    "yo": "yoruba",
+    "so": "somali",
+    "af": "afrikaans",
+    "oc": "occitan",
+    "ka": "georgian",
+    "be": "belarusian",
+    "tg": "tajik",
+    "sd": "sindhi",
+    "gu": "gujarati",
+    "am": "amharic",
+    "yi": "yiddish",
+    "lo": "lao",
+    "uz": "uzbek",
+    "fo": "faroese",
+    "ht": "haitian creole",
+    "ps": "pashto",
+    "tk": "turkmen",
+    "nn": "nynorsk",
+    "mt": "maltese",
+    "sa": "sanskrit",
+    "lb": "luxembourgish",
+    "my": "myanmar",
+    "bo": "tibetan",
+    "tl": "tagalog",
+    "mg": "malagasy",
+    "as": "assamese",
+    "tt": "tatar",
+    "haw": "hawaiian",
+    "ln": "lingala",
+    "ha": "hausa",
+    "ba": "bashkir",
+    "jw": "javanese",
+    "su": "sundanese",
+    "yue": "cantonese",
+}
+LANGUAGE_CODES = ['en', 'zh', 'de', 'es', 'ru', 'ko', 'fr', 'ja', 'pt', 'tr', 'pl', 'ca', 'nl', 'ar', 'sv', 'it', 'id', 'hi', 'fi', 'vi', 'he', 'uk', 'el', 'ms', 'cs', 'ro', 'da', 'hu', 'ta', 'no', 'th', 'ur', 'hr', 'bg', 'lt', 'la', 'mi', 'ml', 'cy', 'sk', 'te', 'fa', 'lv', 'bn', 'sr', 'az', 'sl', 'kn', 'et', 'mk', 'br', 'eu', 'is', 'hy', 'ne', 'mn', 'bs', 'kk', 'sq', 'sw', 'gl', 'mr', 'pa', 'si', 'km', 'sn', 'yo', 'so', 'af', 'oc', 'ka', 'be', 'tg', 'sd', 'gu', 'am', 'yi', 'lo', 'uz', 'fo', 'ht', 'ps', 'tk', 'nn', 'mt', 'sa', 'lb', 'my', 'bo', 'tl', 'mg', 'as', 'tt', 'haw', 'ln', 'ha', 'ba', 'jw', 'su', 'yue']
+T_LANGUAGE_CODES = Literal['en', 'zh', 'de', 'es', 'ru', 'ko', 'fr', 'ja', 'pt', 'tr', 'pl', 'ca', 'nl', 'ar', 'sv', 'it', 'id', 'hi', 'fi', 'vi', 'he', 'uk', 'el', 'ms', 'cs', 'ro', 'da', 'hu', 'ta', 'no', 'th', 'ur', 'hr', 'bg', 'lt', 'la', 'mi', 'ml', 'cy', 'sk', 'te', 'fa', 'lv', 'bn', 'sr', 'az', 'sl', 'kn', 'et', 'mk', 'br', 'eu', 'is', 'hy', 'ne', 'mn', 'bs', 'kk', 'sq', 'sw', 'gl', 'mr', 'pa', 'si', 'km', 'sn', 'yo', 'so', 'af', 'oc', 'ka', 'be', 'tg', 'sd', 'gu', 'am', 'yi', 'lo', 'uz', 'fo', 'ht', 'ps', 'tk', 'nn', 'mt', 'sa', 'lb', 'my', 'bo', 'tl', 'mg', 'as', 'tt', 'haw', 'ln', 'ha', 'ba', 'jw', 'su', 'yue']
+LANGUAGES = ['english', 'chinese', 'german', 'spanish', 'russian', 'korean', 'french', 'japanese', 'portuguese', 'turkish', 'polish', 'catalan', 'dutch', 'arabic', 'swedish', 'italian', 'indonesian', 'hindi', 'finnish', 'vietnamese', 'hebrew', 'ukrainian', 'greek', 'malay', 'czech', 'romanian', 'danish', 'hungarian', 'tamil', 'norwegian', 'thai', 'urdu', 'croatian', 'bulgarian', 'lithuanian', 'latin', 'maori', 'malayalam', 'welsh', 'slovak', 'telugu', 'persian', 'latvian', 'bengali', 'serbian', 'azerbaijani', 'slovenian', 'kannada', 'estonian', 'macedonian', 'breton', 'basque', 'icelandic', 'armenian', 'nepali', 'mongolian', 'bosnian', 'kazakh', 'albanian', 'swahili', 'galician', 'marathi', 'punjabi', 'sinhala', 'khmer', 'shona', 'yoruba', 'somali', 'afrikaans', 'occitan', 'georgian', 'belarusian', 'tajik', 'sindhi', 'gujarati', 'amharic', 'yiddish', 'lao', 'uzbek', 'faroese', 'haitian creole', 'pashto', 'turkmen', 'nynorsk', 'maltese', 'sanskrit', 'luxembourgish', 'myanmar', 'tibetan', 'tagalog', 'malagasy', 'assamese', 'tatar', 'hawaiian', 'lingala', 'hausa', 'bashkir', 'javanese', 'sundanese', 'cantonese']
+T_LANGUAGES = Literal['english', 'chinese', 'german', 'spanish', 'russian', 'korean', 'french', 'japanese', 'portuguese', 'turkish', 'polish', 'catalan', 'dutch', 'arabic', 'swedish', 'italian', 'indonesian', 'hindi', 'finnish', 'vietnamese', 'hebrew', 'ukrainian', 'greek', 'malay', 'czech', 'romanian', 'danish', 'hungarian', 'tamil', 'norwegian', 'thai', 'urdu', 'croatian', 'bulgarian', 'lithuanian', 'latin', 'maori', 'malayalam', 'welsh', 'slovak', 'telugu', 'persian', 'latvian', 'bengali', 'serbian', 'azerbaijani', 'slovenian', 'kannada', 'estonian', 'macedonian', 'breton', 'basque', 'icelandic', 'armenian', 'nepali', 'mongolian', 'bosnian', 'kazakh', 'albanian', 'swahili', 'galician', 'marathi', 'punjabi', 'sinhala', 'khmer', 'shona', 'yoruba', 'somali', 'afrikaans', 'occitan', 'georgian', 'belarusian', 'tajik', 'sindhi', 'gujarati', 'amharic', 'yiddish', 'lao', 'uzbek', 'faroese', 'haitian creole', 'pashto', 'turkmen', 'nynorsk', 'maltese', 'sanskrit', 'luxembourgish', 'myanmar', 'tibetan', 'tagalog', 'malagasy', 'assamese', 'tatar', 'hawaiian', 'lingala', 'hausa', 'bashkir', 'javanese', 'sundanese', 'cantonese']
+CODE_TO_LANGUAGE = {
+    code: language
+    for code, language in DICT.items()
+}
+LANGUAGE_TO_CODE = {
+    language: code
+    for code, language in DICT.items()
+}
+__all__ = [
+    "LANGUAGE_CODES",
+    "T_LANGUAGE_CODES",
+    "LANGUAGES",
+    "T_LANGUAGES",
+    "CODE_TO_LANGUAGE",
+    "LANGUAGE_TO_CODE",
+]

utils/learner/learner.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from .dataclass import *
+class DefaultTool(BaseModel):
+    """
+    This is the data model for the `ThrowLingo`.
+    It is to help the user to learn the Foregin Languagae.
+    The suggest max_length are
+        `vocabulary` is less than 15
+        `phrase` is less than 8
+        `sentence` is less then 5
+    """
+    vocabulary: list[R_Vocabulary]
+    phrase: list[R_Phrase]
+    sentence: list[R_Sentence]
+def get_default_system_prompt():
+    return """
+    # Assistant Author:
+    * 湯沂達 / Tang Yi Dar
+        - changethewhat@gmail.com
+        - https://github.com/mistake0316
+        - https://www.linkedin.com/in/yi-dar-tang-89866717a/
+        - https://medium.com/@changethewhat
+    # System Prompt
+    You are an assistant for doing the language learning.
+    The reason for the author to create this assistant is that he is a guy love to visit different place in different country, but struggle with his poor language skill and bad memorization ability.
+    Some struggle scenarios are that:
+    * He is good at math, but do not know how to describe that in Japanese.
+    * He is injured, but do not know what how to talk to the doctor in different language, he needs to prepare some words to describe his status.
+    * He want to learn gymnastic but do not know what kind of object and the name of motion in both his native language and the foreign language.
+    To fill the gap, he decide to create a instant language learner, which is able to generate the target language text and audio together.
+    Most of the time, the input will be photos and texts.
+    """
+__all__ = [
+    "DefaultTool",
+    "get_default_system_prompt"
+]