Spaces:
Runtime error
Runtime error
| import os | |
| import re | |
| def is_jang(line): | |
| pattern = r"์ \d{1,2}์ฅ" | |
| match = re.search(pattern, line) | |
| if match: | |
| return line | |
| return None | |
| def is_jo(line): | |
| pattern = r"์ \d{1,2}์กฐ" | |
| match = re.search(pattern, line) | |
| if match: | |
| return match.group() | |
| return None | |
| def process_text_document(filepath): | |
| jang_info = '' | |
| result = '***' | |
| delim = '*****\n' | |
| filename = os.path.basename(filepath).split('.')[0] | |
| dirname = 'docs' | |
| with open(filepath, "r") as f: | |
| for line in f: | |
| jang_info = is_jang(line) if is_jang(line) else jang_info | |
| # ์๋ก์ด ์ฅ์ด๋ฉด ๊ธฐ์ตํ๊ณ ์๋ค๊ฐ ์กฐ ์์ ๋ถ์ธ๋ค. | |
| # ์ด๋ฒ ์ค์ด ์๋ก์ด ์กฐํญ์ด๋ฉด ์์ ๋ธ๋ฆผ์ ์ฝ์ ํ๊ณ , ์ฅ ์ ๋ณด๋ฅผ ์ฝ์ ํ ๋ค | |
| # ๋ฆฌ์ ํธ์ ์ด์ด๋ถ์ธ๋ค. | |
| if is_jo(line): | |
| result += delim + jang_info | |
| # ๋ ธ์ด์ฆ๊ฐ ๋ ์ ์๋ ๊ดํธ์์ ์ ๋ณด๋ ์ ๊ฑฐํ๋ค. | |
| result += re.sub(r"<.*?>|\[.*?\]", "", line) | |
| with open(os.path.join(dirname, 'processed', filename + '_processed.txt'), 'w') as f: | |
| f.write(result) | |
| if __name__ == '__main__': | |
| # potential path bug exists | |
| for f in os.listdir(os.getcwd() + '/docs'): | |
| if f.endswith('.txt'): | |
| print(f) | |
| process_text_document(os.path.join(os.getcwd() + '/docs', f)) | |