Spaces:
Sleeping
Sleeping
| import yaml | |
| import argparse | |
| from openai import OpenAI | |
| # TODO 1: ํ์ดํผํ๋ผ๋ฏธํฐ ์กฐ์ ํ์. (์คํ ํ ์คํธ ํ์) | |
| # TODO 2: ๊ฒฐ๊ณผ๊ฐ ()->() ๋ฒ์ด๋ ๊ฒฝ์ฐ๋ฅผ ๋๋นํด ์ฒ๋ฆฌ ์ฝ๋ ์์ฑ ํ์. | |
| # TODO 3: ๊ฒฐ๊ณผ ์์ ์๋ชจ ๊ธฐ์ค์ผ๋ก ์๋ชป ๋ฐ์ํ ๋ถ๋ถ ์ถ์ถ ์ฝ๋ ์์ฑ ํ์. | |
| # TODO 4: ๋ค์ค ์ ๋ ฅ ์ฒ๋ฆฌ(batch) ์ฒ๋ฆฌ ๊ฐ๋ฅํ๋๋ก. ํ์ผ์ด๋ ๋ฆฌ์คํธ๋ก ์ฌ๋ฌ user_input/correct_input ๋ฐ์ ์ผ๊ด ์ฒ๋ฆฌ | |
| # TODO 5: ์์ฑ ๊ฒฐ๊ณผ ํ๊ฐ ์งํ ํ์. | |
| import yaml | |
| import argparse | |
| from hangul_romanize import Transliter | |
| from hangul_romanize.rule import academic | |
| # TODO 1: ํ์ดํผํ๋ผ๋ฏธํฐ ์กฐ์ ํ์. (์คํ ํ ์คํธ ํ์) | |
| # TODO 2: ๊ฒฐ๊ณผ๊ฐ () -> () ๋ท์ด๋ ๊ฒฝ์ฐ๋ฅผ ๋๋นํด ์ฒ๋ฆฌ ์ฝ๋ ์์ฑ ํ์. | |
| # TODO 3: ๊ฒฐ๊ณผ ์์ ์๋ชจ ๊ธฐ์ค์ผ๋ก ์ ๋ชป ๋ฐ์๋ ๋ถ๋ถ ์ถ์ถ ์ฝ๋ ์์ฑ ํ์. | |
| # TODO 4: ๋ค์ค ์ ๋ ฅ ์ฒ๋ฆฌ(batch) ์ฒ๋ฆฌ ๊ฐ๋ฅํ๋๋ก. ํ์ผ์ด๋ ๋ฆฌ์คํธ๋ก ์ฌ๋ฌ user_input/correct_input ๋ฐ์ ์ผ๊ด | |
| # TODO 5: ์ฑ์ฅ ๊ฒฐ๊ณผ ํ๊ฐ ์งํ ํ์. | |
| class LLaMA3: | |
| def __init__(self, config: str): | |
| # hangul-romanize ๋ผ์ด๋ธ๋ฌ๋ฆฌ ์ด๊ธฐํ | |
| self.transliter = Transliter(academic) | |
| # ํ๋กฌํํธ ํ ํ๋ฆฟ ์ค์ (์ฌ์ฉํ์ง ์์ง๋ง ๊ธฐ์กด ๊ตฌ์กฐ ์ ์ง) | |
| self.prompt_template = config.get("prompt_template", "") | |
| # ๋ชจ๋ธ ์ค์ (์ฌ์ฉํ์ง ์์ง๋ง ๊ธฐ์กด ๊ตฌ์กฐ ์ ์ง) | |
| self.model = config.get("model", {}).get("id", "") | |
| def add_hyphens(self, korean_text): | |
| """์์ ๋ณ๋ก ๋ถ๋ฆฌํด์ ํ์ดํ ์ถ๊ฐ""" | |
| syllables = list(korean_text) | |
| romanized_syllables = [] | |
| for syllable in syllables: | |
| romanized = self.transliter.translit(syllable) | |
| romanized_syllables.append(romanized) | |
| return '-'.join(romanized_syllables) | |
| def generate(self, user_input: str, correct_input: str) -> str: | |
| # user_input์ ๋ก๋ง์๋ก ๋ณํ (๊ดํธ ์ ๊ฑฐ) | |
| user_korean = user_input.strip('()') | |
| user_romanized = self.add_hyphens(user_korean) | |
| # correct_input์ ๋ก๋ง์๋ก ๋ณํ (๊ดํธ ์ ๊ฑฐ) | |
| correct_korean = correct_input.strip('()') | |
| correct_romanized = self.add_hyphens(correct_korean) | |
| # (user_romanized) -> (correct_romanized) ํ์์ผ๋ก ๋ฐํ | |
| result = f"({user_romanized})->({correct_romanized})" | |
| return result | |
| # def parse_args() -> argparse.Namespace: | |
| # parser = argparse.ArgumentParser(description="LLaMA3 pronunciation correction pipeline.") | |
| # parser.add_argument("--config_path", type=str, default="data/config/llama3.yaml", help="๋ชจ๋ธ ์ค์ ๋ฐ ํ๋กฌํํธ ์ ๋ณด๋ฅผ ๋ด์ YAML ํ์ผ ๊ฒฝ๋ก") | |
| # parser.add_argument("--user_input", type=str, default="๋ฐ๋ผ", help="์๋ชป ๋ฐ์๋ ๋จ์ด") | |
| # parser.add_argument("--correct_input", type=str, default="๋ฐ๋ผ", help="์ ํํ ๋ฐ์ ๋จ์ด") | |
| # return parser.parse_args() | |
| # def main(): | |
| # args = parse_args() | |
| # # ์ค์ ํ์ผ ๋ก๋ | |
| # with open(args.config_path, "r") as f: | |
| # config = yaml.safe_load(f) | |
| # # ๋ชจ๋ธ ๋ก๋ ๋ฐ ๊ฒฐ๊ณผ ๋ฐํ | |
| # llama3 = LLaMA3(config) | |
| # output = llama3.generate(args.user_input, args.correct_input) | |
| # print(output) | |
| # if __name__ == "__main__": | |
| # main() | |