File size: 893 Bytes
5759868
 
7a668f2
 
 
 
09bc630
7a668f2
3d115be
7a668f2
 
 
 
 
 
 
 
 
3d115be
 
5759868
 
7a668f2
3d115be
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import os

def process_file(file_path):
    with open(file_path, 'r', encoding='utf-8-sig') as f:
        text = f.read()
        assert text
    return text, str(file_path).split('.')[0]


def process_folder_recursive(folder_path):
    all_messages = []
    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)
        if os.path.isfile(file_path):
            all_messages.append(process_file(file_path))
        else:
            all_messages += process_folder_recursive(file_path)
    return all_messages


def load_and_process_data() -> list[dict]:
    """Загрузка и предобработка данных из JSON файлов"""
    all_messages = process_folder_recursive('texts')
    return [x[0] for x in all_messages], [x[1][:-3] for x in all_messages]  # возвращаем расширения и тексты документов