Spaces:
Runtime error
Runtime error
File size: 1,400 Bytes
01b7e90 4b5869a 01b7e90 4b5869a 01b7e90 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | import os
import re
def is_jang(line):
pattern = r"์ \d{1,2}์ฅ"
match = re.search(pattern, line)
if match:
return line
return None
def is_jo(line):
pattern = r"์ \d{1,2}์กฐ"
match = re.search(pattern, line)
if match:
return match.group()
return None
def process_text_document(filepath):
jang_info = ''
result = '***'
delim = '*****\n'
filename = os.path.basename(filepath).split('.')[0]
dirname = 'docs'
with open(filepath, "r") as f:
for line in f:
jang_info = is_jang(line) if is_jang(line) else jang_info
# ์๋ก์ด ์ฅ์ด๋ฉด ๊ธฐ์ตํ๊ณ ์๋ค๊ฐ ์กฐ ์์ ๋ถ์ธ๋ค.
# ์ด๋ฒ ์ค์ด ์๋ก์ด ์กฐํญ์ด๋ฉด ์์ ๋ธ๋ฆผ์ ์ฝ์
ํ๊ณ , ์ฅ ์ ๋ณด๋ฅผ ์ฝ์
ํ ๋ค
# ๋ฆฌ์ ํธ์ ์ด์ด๋ถ์ธ๋ค.
if is_jo(line):
result += delim + jang_info
# ๋
ธ์ด์ฆ๊ฐ ๋ ์ ์๋ ๊ดํธ์์ ์ ๋ณด๋ ์ ๊ฑฐํ๋ค.
result += re.sub(r"<.*?>|\[.*?\]", "", line)
with open(os.path.join(dirname, 'processed', filename + '_processed.txt'), 'w') as f:
f.write(result)
if __name__ == '__main__':
# potential path bug exists
for f in os.listdir(os.getcwd() + '/docs'):
if f.endswith('.txt'):
print(f)
process_text_document(os.path.join(os.getcwd() + '/docs', f))
|