import re from tokens import * def format_input(text_ja: str) -> str: for token, emoji in TRIGGER_EMOJI_DICT.items(): text_ja = re.sub(emoji, token, text_ja) return text_ja def text_to_placeholder_tokens(text_ja: str) -> tuple[str, dict]: repls = {} for entity_type in REPL_DICT.keys(): refs = re.findall(REPL_DICT[entity_type]["pattern"], text_ja) token = REPL_DICT[entity_type]["token"] repls[entity_type] = refs for ref in refs: text_ja = re.sub(re.escape(ref), token, text_ja) return text_ja, repls def placeholder_tokens_to_text(text_en: str, repls: dict[str, list[str]]) -> str: for entity_type in REPL_DICT.keys(): for entity in repls[entity_type]: token = REPL_DICT[entity_type]["token"] text_en = re.sub(token, entity, text_en, count=1) return text_en def format_output(text_en: str, sub_emoji: bool = True) -> str: text_en = re.sub(r'【\s(.+?)\s】\s?', r'【\1】', text_en) if sub_emoji: for token, emoji in TRIGGER_EMOJI_DICT.items(): text_en = re.sub(token, emoji, text_en) return text_en