File size: 893 Bytes
5759868 7a668f2 09bc630 7a668f2 3d115be 7a668f2 3d115be 5759868 7a668f2 3d115be | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | import os
def process_file(file_path):
with open(file_path, 'r', encoding='utf-8-sig') as f:
text = f.read()
assert text
return text, str(file_path).split('.')[0]
def process_folder_recursive(folder_path):
all_messages = []
for file in os.listdir(folder_path):
file_path = os.path.join(folder_path, file)
if os.path.isfile(file_path):
all_messages.append(process_file(file_path))
else:
all_messages += process_folder_recursive(file_path)
return all_messages
def load_and_process_data() -> list[dict]:
"""Загрузка и предобработка данных из JSON файлов"""
all_messages = process_folder_recursive('texts')
return [x[0] for x in all_messages], [x[1][:-3] for x in all_messages] # возвращаем расширения и тексты документов
|