| | """ |
| | Translate this project to other languages (experimental, please open an issue if there is any bug) |
| | |
| | |
| | Usage: |
| | 1. modify config.py, set your LLM_MODEL and API_KEY(s) to provide access to OPENAI (or any other LLM model provider) |
| | |
| | 2. modify LANG (below ↓) |
| | LANG = "English" |
| | |
| | 3. modify TransPrompt (below ↓) |
| | TransPrompt = f"Replace each json value `#` with translated results in English, e.g., \"原始文本\":\"TranslatedText\". Keep Json format. Do not answer #." |
| | |
| | 4. Run `python multi_language.py`. |
| | Note: You need to run it multiple times to increase translation coverage because GPT makes mistakes sometimes. |
| | (You can also run `CACHE_ONLY=True python multi_language.py` to use cached translation mapping) |
| | |
| | 5. Find the translated program in `multi-language\English\*` |
| | |
| | P.S. |
| | |
| | - The translation mapping will be stored in `docs/translation_xxxx.json`, you can revised mistaken translation there. |
| | |
| | - If you would like to share your `docs/translation_xxxx.json`, (so that everyone can use the cached & revised translation mapping), please open a Pull Request |
| | |
| | - If there is any translation error in `docs/translation_xxxx.json`, please open a Pull Request |
| | |
| | - Welcome any Pull Request, regardless of language |
| | """ |
| |
|
| | import os |
| | import json |
| | import functools |
| | import re |
| | import pickle |
| | import time |
| | from toolbox import get_conf |
| |
|
| | CACHE_ONLY = os.environ.get('CACHE_ONLY', False) |
| |
|
| | CACHE_FOLDER = get_conf('PATH_LOGGING') |
| |
|
| | blacklist = ['multi-language', CACHE_FOLDER, '.git', 'private_upload', 'multi_language.py', 'build', '.github', '.vscode', '__pycache__', 'venv'] |
| |
|
| | |
| | |
| |
|
| | |
| | |
| |
|
| | LANG = "English" |
| | TransPrompt = f"Replace each json value `#` with translated results in English, e.g., \"原始文本\":\"TranslatedText\". Keep Json format. Do not answer #." |
| |
|
| |
|
| | if not os.path.exists(CACHE_FOLDER): |
| | os.makedirs(CACHE_FOLDER) |
| |
|
| |
|
| | def lru_file_cache(maxsize=128, ttl=None, filename=None): |
| | """ |
| | Decorator that caches a function's return value after being called with given arguments. |
| | It uses a Least Recently Used (LRU) cache strategy to limit the size of the cache. |
| | maxsize: Maximum size of the cache. Defaults to 128. |
| | ttl: Time-to-Live of the cache. If a value hasn't been accessed for `ttl` seconds, it will be evicted from the cache. |
| | filename: Name of the file to store the cache in. If not supplied, the function name + ".cache" will be used. |
| | """ |
| | cache_path = os.path.join(CACHE_FOLDER, f"{filename}.cache") if filename is not None else None |
| |
|
| | def decorator_function(func): |
| | cache = {} |
| | _cache_info = { |
| | "hits": 0, |
| | "misses": 0, |
| | "maxsize": maxsize, |
| | "currsize": 0, |
| | "ttl": ttl, |
| | "filename": cache_path, |
| | } |
| |
|
| | @functools.wraps(func) |
| | def wrapper_function(*args, **kwargs): |
| | key = str((args, frozenset(kwargs))) |
| | if key in cache: |
| | if _cache_info["ttl"] is None or (cache[key][1] + _cache_info["ttl"]) >= time.time(): |
| | _cache_info["hits"] += 1 |
| | print(f'Warning, reading cache, last read {(time.time()-cache[key][1])//60} minutes ago'); time.sleep(2) |
| | cache[key][1] = time.time() |
| | return cache[key][0] |
| | else: |
| | del cache[key] |
| |
|
| | result = func(*args, **kwargs) |
| | cache[key] = [result, time.time()] |
| | _cache_info["misses"] += 1 |
| | _cache_info["currsize"] += 1 |
| |
|
| | if _cache_info["currsize"] > _cache_info["maxsize"]: |
| | oldest_key = None |
| | for k in cache: |
| | if oldest_key is None: |
| | oldest_key = k |
| | elif cache[k][1] < cache[oldest_key][1]: |
| | oldest_key = k |
| | del cache[oldest_key] |
| | _cache_info["currsize"] -= 1 |
| |
|
| | if cache_path is not None: |
| | with open(cache_path, "wb") as f: |
| | pickle.dump(cache, f) |
| |
|
| | return result |
| |
|
| | def cache_info(): |
| | return _cache_info |
| |
|
| | wrapper_function.cache_info = cache_info |
| |
|
| | if cache_path is not None and os.path.exists(cache_path): |
| | with open(cache_path, "rb") as f: |
| | cache = pickle.load(f) |
| | _cache_info["currsize"] = len(cache) |
| |
|
| | return wrapper_function |
| |
|
| | return decorator_function |
| |
|
| | def contains_chinese(string): |
| | """ |
| | Returns True if the given string contains Chinese characters, False otherwise. |
| | """ |
| | chinese_regex = re.compile(u'[\u4e00-\u9fff]+') |
| | return chinese_regex.search(string) is not None |
| |
|
| | def split_list(lst, n_each_req): |
| | """ |
| | Split a list into smaller lists, each with a maximum number of elements. |
| | :param lst: the list to split |
| | :param n_each_req: the maximum number of elements in each sub-list |
| | :return: a list of sub-lists |
| | """ |
| | result = [] |
| | for i in range(0, len(lst), n_each_req): |
| | result.append(lst[i:i + n_each_req]) |
| | return result |
| |
|
| | def map_to_json(map, language): |
| | dict_ = read_map_from_json(language) |
| | dict_.update(map) |
| | with open(f'docs/translate_{language.lower()}.json', 'w', encoding='utf8') as f: |
| | json.dump(dict_, f, indent=4, ensure_ascii=False) |
| |
|
| | def read_map_from_json(language): |
| | if os.path.exists(f'docs/translate_{language.lower()}.json'): |
| | with open(f'docs/translate_{language.lower()}.json', 'r', encoding='utf8') as f: |
| | res = json.load(f) |
| | res = {k:v for k, v in res.items() if v is not None and contains_chinese(k)} |
| | return res |
| | return {} |
| |
|
| | def advanced_split(splitted_string, spliter, include_spliter=False): |
| | splitted_string_tmp = [] |
| | for string_ in splitted_string: |
| | if spliter in string_: |
| | splitted = string_.split(spliter) |
| | for i, s in enumerate(splitted): |
| | if include_spliter: |
| | if i != len(splitted)-1: |
| | splitted[i] += spliter |
| | splitted[i] = splitted[i].strip() |
| | for i in reversed(range(len(splitted))): |
| | if not contains_chinese(splitted[i]): |
| | splitted.pop(i) |
| | splitted_string_tmp.extend(splitted) |
| | else: |
| | splitted_string_tmp.append(string_) |
| | splitted_string = splitted_string_tmp |
| | return splitted_string_tmp |
| |
|
| | cached_translation = {} |
| | cached_translation = read_map_from_json(language=LANG) |
| |
|
| | def trans(word_to_translate, language, special=False): |
| | if len(word_to_translate) == 0: return {} |
| | from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency |
| | from toolbox import get_conf, ChatBotWithCookies, load_chat_cookies |
| | |
| | cookies = load_chat_cookies() |
| | llm_kwargs = { |
| | 'api_key': cookies['api_key'], |
| | 'llm_model': cookies['llm_model'], |
| | 'top_p':1.0, |
| | 'max_length': None, |
| | 'temperature':0.4, |
| | } |
| | import random |
| | N_EACH_REQ = random.randint(16, 32) |
| | word_to_translate_split = split_list(word_to_translate, N_EACH_REQ) |
| | inputs_array = [str(s) for s in word_to_translate_split] |
| | inputs_show_user_array = inputs_array |
| | history_array = [[] for _ in inputs_array] |
| | if special: |
| | sys_prompt_array = [f"Translate following names to English with CamelCase naming convention. Keep original format" for _ in inputs_array] |
| | else: |
| | sys_prompt_array = [f"Translate following sentences to {LANG}. E.g., You should translate sentences to the following format ['translation of sentence 1', 'translation of sentence 2']. Do NOT answer with Chinese!" for _ in inputs_array] |
| | chatbot = ChatBotWithCookies(llm_kwargs) |
| | gpt_say_generator = request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( |
| | inputs_array, |
| | inputs_show_user_array, |
| | llm_kwargs, |
| | chatbot, |
| | history_array, |
| | sys_prompt_array, |
| | ) |
| | while True: |
| | try: |
| | gpt_say = next(gpt_say_generator) |
| | print(gpt_say[1][0][1]) |
| | except StopIteration as e: |
| | result = e.value |
| | break |
| | translated_result = {} |
| | for i, r in enumerate(result): |
| | if i%2 == 1: |
| | try: |
| | res_before_trans = eval(result[i-1]) |
| | res_after_trans = eval(result[i]) |
| | if len(res_before_trans) != len(res_after_trans): |
| | raise RuntimeError |
| | for a,b in zip(res_before_trans, res_after_trans): |
| | translated_result[a] = b |
| | except: |
| | |
| | |
| | |
| | |
| | |
| | |
| | print('GPT answers with unexpected format, some words may not be translated, but you can try again later to increase translation coverage.') |
| | res_before_trans = eval(result[i-1]) |
| | for a in res_before_trans: |
| | translated_result[a] = None |
| | return translated_result |
| |
|
| |
|
| | def trans_json(word_to_translate, language, special=False): |
| | if len(word_to_translate) == 0: return {} |
| | from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency |
| | from toolbox import get_conf, ChatBotWithCookies, load_chat_cookies |
| | |
| | cookies = load_chat_cookies() |
| | llm_kwargs = { |
| | 'api_key': cookies['api_key'], |
| | 'llm_model': cookies['llm_model'], |
| | 'top_p':1.0, |
| | 'max_length': None, |
| | 'temperature':0.4, |
| | } |
| | import random |
| | N_EACH_REQ = random.randint(16, 32) |
| | random.shuffle(word_to_translate) |
| | word_to_translate_split = split_list(word_to_translate, N_EACH_REQ) |
| | inputs_array = [{k:"#" for k in s} for s in word_to_translate_split] |
| | inputs_array = [ json.dumps(i, ensure_ascii=False) for i in inputs_array] |
| | |
| | inputs_show_user_array = inputs_array |
| | history_array = [[] for _ in inputs_array] |
| | sys_prompt_array = [TransPrompt for _ in inputs_array] |
| | chatbot = ChatBotWithCookies(llm_kwargs) |
| | gpt_say_generator = request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( |
| | inputs_array, |
| | inputs_show_user_array, |
| | llm_kwargs, |
| | chatbot, |
| | history_array, |
| | sys_prompt_array, |
| | ) |
| | while True: |
| | try: |
| | gpt_say = next(gpt_say_generator) |
| | print(gpt_say[1][0][1]) |
| | except StopIteration as e: |
| | result = e.value |
| | break |
| | translated_result = {} |
| | for i, r in enumerate(result): |
| | if i%2 == 1: |
| | try: |
| | translated_result.update(json.loads(result[i])) |
| | except: |
| | print(result[i]) |
| | print(result) |
| | return translated_result |
| |
|
| |
|
| | def step_1_core_key_translate(): |
| | LANG_STD = 'std' |
| | def extract_chinese_characters(file_path): |
| | syntax = [] |
| | with open(file_path, 'r', encoding='utf-8') as f: |
| | content = f.read() |
| | import ast |
| | root = ast.parse(content) |
| | for node in ast.walk(root): |
| | if isinstance(node, ast.Name): |
| | if contains_chinese(node.id): syntax.append(node.id) |
| | if isinstance(node, ast.Import): |
| | for n in node.names: |
| | if contains_chinese(n.name): syntax.append(n.name) |
| | elif isinstance(node, ast.ImportFrom): |
| | for n in node.names: |
| | if contains_chinese(n.name): syntax.append(n.name) |
| | |
| | for k in node.module.split('.'): |
| | if contains_chinese(k): syntax.append(k) |
| | return syntax |
| |
|
| | def extract_chinese_characters_from_directory(directory_path): |
| | chinese_characters = [] |
| | for root, dirs, files in os.walk(directory_path): |
| | if any([b in root for b in blacklist]): |
| | continue |
| | print(files) |
| | for file in files: |
| | if file.endswith('.py'): |
| | file_path = os.path.join(root, file) |
| | chinese_characters.extend(extract_chinese_characters(file_path)) |
| | return chinese_characters |
| |
|
| | directory_path = './' |
| | chinese_core_names = extract_chinese_characters_from_directory(directory_path) |
| | chinese_core_keys = [name for name in chinese_core_names] |
| | chinese_core_keys_norepeat = [] |
| | for d in chinese_core_keys: |
| | if d not in chinese_core_keys_norepeat: chinese_core_keys_norepeat.append(d) |
| | need_translate = [] |
| | cached_translation = read_map_from_json(language=LANG_STD) |
| | cached_translation_keys = list(cached_translation.keys()) |
| | for d in chinese_core_keys_norepeat: |
| | if d not in cached_translation_keys: |
| | need_translate.append(d) |
| |
|
| | if CACHE_ONLY: |
| | need_translate_mapping = {} |
| | else: |
| | need_translate_mapping = trans(need_translate, language=LANG_STD, special=True) |
| | map_to_json(need_translate_mapping, language=LANG_STD) |
| | cached_translation = read_map_from_json(language=LANG_STD) |
| | cached_translation = dict(sorted(cached_translation.items(), key=lambda x: -len(x[0]))) |
| |
|
| | chinese_core_keys_norepeat_mapping = {} |
| | for k in chinese_core_keys_norepeat: |
| | chinese_core_keys_norepeat_mapping.update({k:cached_translation[k]}) |
| | chinese_core_keys_norepeat_mapping = dict(sorted(chinese_core_keys_norepeat_mapping.items(), key=lambda x: -len(x[0]))) |
| |
|
| | |
| | |
| | |
| | def copy_source_code(): |
| |
|
| | from toolbox import get_conf |
| | import shutil |
| | import os |
| | try: shutil.rmtree(f'./multi-language/{LANG}/') |
| | except: pass |
| | os.makedirs(f'./multi-language', exist_ok=True) |
| | backup_dir = f'./multi-language/{LANG}/' |
| | shutil.copytree('./', backup_dir, ignore=lambda x, y: blacklist) |
| | copy_source_code() |
| |
|
| | |
| | |
| | |
| | directory_path = f'./multi-language/{LANG}/' |
| | for root, dirs, files in os.walk(directory_path): |
| | for file in files: |
| | if file.endswith('.py'): |
| | file_path = os.path.join(root, file) |
| | syntax = [] |
| | |
| | with open(file_path, 'r', encoding='utf-8') as f: |
| | content = f.read() |
| | |
| | for k, v in chinese_core_keys_norepeat_mapping.items(): |
| | content = content.replace(k, v) |
| |
|
| | with open(file_path, 'w', encoding='utf-8') as f: |
| | f.write(content) |
| |
|
| |
|
| | def step_2_core_key_translate(): |
| |
|
| | |
| | |
| | |
| |
|
| | def load_string(strings, string_input): |
| | string_ = string_input.strip().strip(',').strip().strip('.').strip() |
| | if string_.startswith('[Local Message]'): |
| | string_ = string_.replace('[Local Message]', '') |
| | string_ = string_.strip().strip(',').strip().strip('.').strip() |
| | splitted_string = [string_] |
| | |
| | splitted_string = advanced_split(splitted_string, spliter=",", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter="。", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter=")", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter="(", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter="(", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter=")", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter="<", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter=">", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter="[", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter="]", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter="【", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter="】", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter="?", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter=":", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter=":", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter=",", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter="#", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter="\n", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter=";", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter="`", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter=" ", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter="- ", include_spliter=False) |
| | splitted_string = advanced_split(splitted_string, spliter="---", include_spliter=False) |
| | |
| | |
| | for j, s in enumerate(splitted_string): |
| | if '.com' in s: continue |
| | if '\'' in s: continue |
| | if '\"' in s: continue |
| | strings.append([s,0]) |
| |
|
| |
|
| | def get_strings(node): |
| | strings = [] |
| | |
| | for child in ast.iter_child_nodes(node): |
| | node = child |
| | if isinstance(child, ast.Str): |
| | if contains_chinese(child.s): |
| | load_string(strings=strings, string_input=child.s) |
| | elif isinstance(child, ast.AST): |
| | strings.extend(get_strings(child)) |
| | return strings |
| |
|
| | string_literals = [] |
| | directory_path = f'./multi-language/{LANG}/' |
| | for root, dirs, files in os.walk(directory_path): |
| | for file in files: |
| | if file.endswith('.py'): |
| | file_path = os.path.join(root, file) |
| | syntax = [] |
| | with open(file_path, 'r', encoding='utf-8') as f: |
| | content = f.read() |
| | |
| | comments_arr = [] |
| | for code_sp in content.splitlines(): |
| | comments = re.findall(r'#.*$', code_sp) |
| | for comment in comments: |
| | load_string(strings=comments_arr, string_input=comment) |
| | string_literals.extend(comments_arr) |
| |
|
| | |
| | import ast |
| | tree = ast.parse(content) |
| | res = get_strings(tree, ) |
| | string_literals.extend(res) |
| |
|
| | [print(s) for s in string_literals] |
| | chinese_literal_names = [] |
| | chinese_literal_names_norepeat = [] |
| | for string, offset in string_literals: |
| | chinese_literal_names.append(string) |
| | chinese_literal_names_norepeat = [] |
| | for d in chinese_literal_names: |
| | if d not in chinese_literal_names_norepeat: chinese_literal_names_norepeat.append(d) |
| | need_translate = [] |
| | cached_translation = read_map_from_json(language=LANG) |
| | cached_translation_keys = list(cached_translation.keys()) |
| | for d in chinese_literal_names_norepeat: |
| | if d not in cached_translation_keys: |
| | need_translate.append(d) |
| |
|
| | if CACHE_ONLY: |
| | up = {} |
| | else: |
| | up = trans_json(need_translate, language=LANG, special=False) |
| | map_to_json(up, language=LANG) |
| | cached_translation = read_map_from_json(language=LANG) |
| | LANG_STD = 'std' |
| | cached_translation.update(read_map_from_json(language=LANG_STD)) |
| | cached_translation = dict(sorted(cached_translation.items(), key=lambda x: -len(x[0]))) |
| |
|
| | |
| | |
| | |
| | directory_path = f'./multi-language/{LANG}/' |
| | for root, dirs, files in os.walk(directory_path): |
| | for file in files: |
| | if file.endswith('.py'): |
| | file_path = os.path.join(root, file) |
| | syntax = [] |
| | |
| | with open(file_path, 'r', encoding='utf-8') as f: |
| | content = f.read() |
| | |
| | for k, v in cached_translation.items(): |
| | if v is None: continue |
| | if '"' in v: |
| | v = v.replace('"', "`") |
| | if '\'' in v: |
| | v = v.replace('\'', "`") |
| | content = content.replace(k, v) |
| |
|
| | with open(file_path, 'w', encoding='utf-8') as f: |
| | f.write(content) |
| | |
| | if file.strip('.py') in cached_translation: |
| | file_new = cached_translation[file.strip('.py')] + '.py' |
| | file_path_new = os.path.join(root, file_new) |
| | with open(file_path_new, 'w', encoding='utf-8') as f: |
| | f.write(content) |
| | os.remove(file_path) |
| | step_1_core_key_translate() |
| | step_2_core_key_translate() |
| | print('Finished, checkout generated results at ./multi-language/') |