Spaces:

MrSimple01
/

RAG_AIEXP_1

Sleeping

App Files Files Community

MrSimple07 commited on Sep 30, 2025

Commit

a4f228e

1 Parent(s): 0a99ba6

a new version

Browse files

Files changed (4) hide show

documents_prep.py +19 -70
index_retriever.py +54 -27
table_prep.py +91 -286
utils.py +109 -220

documents_prep.py CHANGED Viewed

@@ -46,79 +46,29 @@ def process_documents_with_chunking(documents):
     table_count = 0
     image_count = 0
     text_chunks_count = 0
-    large_tables_count = 0
-    large_images_count = 0
-    custom_processed_count = 0
     for doc in documents:
         doc_type = doc.metadata.get('type', 'text')
         if doc_type == 'table':
             table_count += 1
-            doc_id = doc.metadata.get('document_id', 'unknown')
-            table_num = doc.metadata.get('table_number', 'unknown')
-            from table_prep import should_use_custom_processing
-            use_custom, doc_pattern, method_config = should_use_custom_processing(doc_id, table_num)
-            if use_custom:
-                custom_processed_count += 1
-                log_message(f"Table {table_num} in document {doc_id} was processed with custom method '{method_config.get('method')}', skipping standard chunking")
-                # Add the document as-is since it was already processed by custom method
-                all_chunked_docs.append(doc)
-                chunk_info.append({
-                    'document_id': doc_id,
-                    'section_id': doc.metadata.get('section_id', 'unknown'),
-                    'chunk_id': 0,
-                    'chunk_size': len(doc.text),
-                    'chunk_preview': doc.text[:200] + "..." if len(doc.text) > 200 else doc.text,
-                    'type': 'table',
-                    'table_number': table_num,
-                    'processing_method': method_config.get('method')
-                })
-                continue
-            # Standard processing for non-custom tables
-            doc_size = len(doc.text)
-            if doc_size > CHUNK_SIZE:
-                large_tables_count += 1
-                log_message(f"Large table found: {table_num} in document {doc_id}, size: {doc_size} characters")
-                # Chunk large tables
-                chunked_docs = chunk_document(doc)
-                all_chunked_docs.extend(chunked_docs)
-                for i, chunk_doc in enumerate(chunked_docs):
-                    chunk_info.append({
-                        'document_id': chunk_doc.metadata.get('document_id', 'unknown'),
-                        'section_id': chunk_doc.metadata.get('section_id', 'unknown'),
-                        'chunk_id': i,
-                        'chunk_size': len(chunk_doc.text),
-                        'chunk_preview': chunk_doc.text[:200] + "..." if len(chunk_doc.text) > 200 else chunk_doc.text,
-                        'type': 'table',
-                        'table_number': chunk_doc.metadata.get('table_number', 'unknown'),
-                        'processing_method': 'standard_chunked'
-                    })
-            else:
-                all_chunked_docs.append(doc)
-                chunk_info.append({
-                    'document_id': doc.metadata.get('document_id', 'unknown'),
-                    'section_id': doc.metadata.get('section_id', 'unknown'),
-                    'chunk_id': 0,
-                    'chunk_size': doc_size,
-                    'chunk_preview': doc.text[:200] + "..." if len(doc.text) > 200 else doc.text,
-                    'type': 'table',
-                    'table_number': doc.metadata.get('table_number', 'unknown'),
-                    'processing_method': 'standard'
-                })
         elif doc_type == 'image':
             image_count += 1
             doc_size = len(doc.text)
             if doc_size > CHUNK_SIZE:
-                large_images_count += 1
-                log_message(f"Large image description found: {doc.metadata.get('image_number', 'unknown')} in document {doc.metadata.get('document_id', 'unknown')}, size: {doc_size} characters")
-                # Chunk large images
                 chunked_docs = chunk_document(doc)
                 all_chunked_docs.extend(chunked_docs)
@@ -144,7 +94,7 @@ def process_documents_with_chunking(documents):
                     'image_number': doc.metadata.get('image_number', 'unknown')
                 })
-        else:  # text documents
             doc_size = len(doc.text)
             if doc_size > CHUNK_SIZE:
                 chunked_docs = chunk_document(doc)
@@ -171,14 +121,13 @@ def process_documents_with_chunking(documents):
                     'type': 'text'
                 })
-    log_message(f"=== PROCESSING STATISTICS ===")
-    log_message(f"Total tables processed: {table_count}")
-    log_message(f"Custom processed tables: {custom_processed_count}")
-    log_message(f"Large tables (>{CHUNK_SIZE} chars): {large_tables_count}")
-    log_message(f"Total images processed: {image_count}")
-    log_message(f"Large images (>{CHUNK_SIZE} chars): {large_images_count}")
-    log_message(f"Total text chunks created: {text_chunks_count}")
-    log_message(f"Total documents after processing: {len(all_chunked_docs)}")
     return all_chunked_docs, chunk_info

     table_count = 0
     image_count = 0
     text_chunks_count = 0
     for doc in documents:
         doc_type = doc.metadata.get('type', 'text')
         if doc_type == 'table':
+            # Add tables as-is, no chunking
             table_count += 1
+            all_chunked_docs.append(doc)
+            chunk_info.append({
+                'document_id': doc.metadata.get('document_id', 'unknown'),
+                'section_id': doc.metadata.get('section_id', 'unknown'),
+                'chunk_id': 0,
+                'chunk_size': len(doc.text),
+                'chunk_preview': doc.text[:200] + "..." if len(doc.text) > 200 else doc.text,
+                'type': 'table',
+                'table_number': doc.metadata.get('table_number', 'unknown')
+            })
         elif doc_type == 'image':
             image_count += 1
             doc_size = len(doc.text)
             if doc_size > CHUNK_SIZE:
                 chunked_docs = chunk_document(doc)
                 all_chunked_docs.extend(chunked_docs)
                     'image_number': doc.metadata.get('image_number', 'unknown')
                 })
+        else:
             doc_size = len(doc.text)
             if doc_size > CHUNK_SIZE:
                 chunked_docs = chunk_document(doc)
                     'type': 'text'
                 })
+    log_message(f"\n{'='*60}")
+    log_message(f"ИТОГО ОБРАБОТАНО ДОКУМЕНТОВ:")
+    log_message(f"  • Таблицы: {table_count} (добавлены целиком)")
+    log_message(f"  • Изображения: {image_count}")
+    log_message(f"  • Текстовые чанки: {text_chunks_count}")
+    log_message(f"  • Всего документов: {len(all_chunked_docs)}")
+    log_message(f"{'='*60}\n")
     return all_chunked_docs, chunk_info

index_retriever.py CHANGED Viewed

@@ -16,24 +16,24 @@ def create_query_engine(vector_index):
     try:
         bm25_retriever = BM25Retriever.from_defaults(
             docstore=vector_index.docstore,
-            similarity_top_k=15
         )
         vector_retriever = VectorIndexRetriever(
             index=vector_index,
-            similarity_top_k=30,
-            similarity_cutoff=0.8
         )
         hybrid_retriever = QueryFusionRetriever(
             [vector_retriever, bm25_retriever],
-            similarity_top_k=30,
             num_queries=1
         )
         custom_prompt_template = PromptTemplate(PROMPT_SIMPLE_POISK)
         response_synthesizer = get_response_synthesizer(
-            response_mode=ResponseMode.TREE_SUMMARIZE,
             text_qa_template=custom_prompt_template
         )
@@ -49,39 +49,66 @@ def create_query_engine(vector_index):
         log_message(f"Ошибка создания query engine: {str(e)}")
         raise
-def rerank_nodes(query, nodes, reranker, top_k=10):
     if not nodes or not reranker:
         return nodes[:top_k]
     try:
         log_message(f"Переранжирую {len(nodes)} узлов")
-        # Separate tables and images from text nodes
-        table_nodes = [node for node in nodes if node.metadata.get('type') == 'table']
-        image_nodes = [node for node in nodes if node.metadata.get('type') == 'image']
-        text_nodes = [node for node in nodes if node.metadata.get('type', 'text') == 'text']
-        priority_nodes = table_nodes + image_nodes
-        # Rerank only text nodes
-        if text_nodes:
-            pairs = []
-            for node in text_nodes:
-                pairs.append([query, node.text])
-            scores = reranker.predict(pairs)
-            scored_nodes = list(zip(text_nodes, scores))
             scored_nodes.sort(key=lambda x: x[1], reverse=True)
-            reranked_text_nodes = [node for node, score in scored_nodes]
-        else:
-            reranked_text_nodes = []
-        # Combine: priority nodes first, then reranked text nodes
-        final_nodes = priority_nodes + reranked_text_nodes
-        result = final_nodes[:top_k]
-        log_message(f"Возвращаю {len(priority_nodes)} приоритетных узлов и {len(result) - len(priority_nodes)} текстовых узлов")
-        return result
     except Exception as e:
         log_message(f"Ошибка переранжировки: {str(e)}")

     try:
         bm25_retriever = BM25Retriever.from_defaults(
             docstore=vector_index.docstore,
+            similarity_top_k=20
         )
         vector_retriever = VectorIndexRetriever(
             index=vector_index,
+            similarity_top_k=30,
+            similarity_cutoff=0.65
         )
         hybrid_retriever = QueryFusionRetriever(
             [vector_retriever, bm25_retriever],
+            similarity_top_k=40,
             num_queries=1
         )
         custom_prompt_template = PromptTemplate(PROMPT_SIMPLE_POISK)
         response_synthesizer = get_response_synthesizer(
+            response_mode=ResponseMode.TREE_SUMMARIZE,
             text_qa_template=custom_prompt_template
         )
         log_message(f"Ошибка создания query engine: {str(e)}")
         raise
+def rerank_nodes(query, nodes, reranker, top_k=20, min_score_threshold=0.5, diversity_penalty=0.3):
     if not nodes or not reranker:
         return nodes[:top_k]
     try:
         log_message(f"Переранжирую {len(nodes)} узлов")
+        pairs = [[query, node.text] for node in nodes]
+        scores = reranker.predict(pairs)
+        scored_nodes = list(zip(nodes, scores))
+        scored_nodes.sort(key=lambda x: x[1], reverse=True)
+        if min_score_threshold is not None:
+            scored_nodes = [(node, score) for node, score in scored_nodes
+                          if score >= min_score_threshold]
+            log_message(f"После фильтрации по порогу {min_score_threshold}: {len(scored_nodes)} узлов")
+        if not scored_nodes:
+            log_message("Нет узлов после фильтрации, снижаю порог")
+            scored_nodes = list(zip(nodes, scores))
             scored_nodes.sort(key=lambda x: x[1], reverse=True)
+            min_score_threshold = scored_nodes[0][1] * 0.6
+            scored_nodes = [(node, score) for node, score in scored_nodes
+                          if score >= min_score_threshold]
+        selected_nodes = []
+        selected_docs = set()
+        selected_sections = set()
+        for node, score in scored_nodes:
+            if len(selected_nodes) >= top_k:
+                break
+            metadata = node.metadata if hasattr(node, 'metadata') else {}
+            doc_id = metadata.get('document_id', 'unknown')
+            section_key = f"{doc_id}_{metadata.get('section_path', metadata.get('section_id', ''))}"
+            # Apply diversity penalty
+            penalty = 0
+            if doc_id in selected_docs:
+                penalty += diversity_penalty * 0.5
+            if section_key in selected_sections:
+                penalty += diversity_penalty
+            adjusted_score = score * (1 - penalty)
+            # Add if still competitive
+            if not selected_nodes or adjusted_score >= selected_nodes[0][1] * 0.6:
+                selected_nodes.append((node, score))
+                selected_docs.add(doc_id)
+                selected_sections.add(section_key)
+        log_message(f"Выбрано {len(selected_nodes)} узлов с разнообразием")
+        log_message(f"Уникальных документов: {len(selected_docs)}, секций: {len(selected_sections)}")
+        if selected_nodes:
+            log_message(f"Score range: {selected_nodes[0][1]:.3f} to {selected_nodes[-1][1]:.3f}")
+        return [node for node, score in selected_nodes]
     except Exception as e:
         log_message(f"Ошибка переранжировки: {str(e)}")

table_prep.py CHANGED Viewed

@@ -1,292 +1,86 @@
-import os
 from collections import defaultdict
 import json
-import zipfile
-import pandas as pd
 from huggingface_hub import hf_hub_download, list_repo_files
 from llama_index.core import Document
 from my_logging import log_message
-CUSTOM_TABLE_CONFIGS = {
-    "ГОСТ Р 50.05.01-2018": {
-        "tables": {
-            "№3": {"method": "group_by_column", "group_column": "Класс герметичности и чувствительности"},
-            "№Б.1": {"method": "group_by_column", "group_column": "Класс чувствительности системы контроля"}
-        }
-    },
-    "ГОСТ Р 50.06.01-2017": {
-        "tables": {
-            "№ Б.2": {"method": "split_by_rows"}
-        }
-    },
-    "НП-104-18": {
-        "tables": {
-            "*": {"method": "group_entire_table"}  # All tables
-        }
-    },
-    "НП-068-05": {
-        "tables": {
-            "Таблица 1": {"method": "group_by_column", "group_column": "Рабочее давление среды, МПа"},
-            "Таблица 2": {"method": "group_by_column", "group_column": "Рабочее давление среды, МПа"},
-            "Таблица Приложения 1": {"method": "group_by_column", "group_column": "Тип"}
-        }
-    },
-    "ГОСТ Р 59023.1-2020": {
-        "tables": {
-            "№ 1": {"method": "split_by_rows"},
-            "№ 2": {"method": "split_by_rows"},
-            "№ 3": {"method": "split_by_rows"}
-        }
-    },
-    "НП-089-15": {
-        "tables": {
-            "-": {"method": "split_by_rows"}
-        }
-    },
-    "НП-105-18": {
-        "tables": {
-            "№ 4.8": {"method": "group_entire_table"}
-        }
-    },
-    "ГОСТ Р 50.05.23-2020": {
-        "tables": {
-            "№8": {"method": "group_entire_table"}
-        }
-    },
-    "ГОСТ Р 50.03.01-2017": {
-        "tables": {
-            "А.8": {"method": "group_entire_table"}
-        }
-    }
-}
-def create_meta_info(document_name, section, table_number, table_title, extra_info=""):
-    base_info = f'Документ "{document_name}", Раздел: {section}, Таблица: {table_number}'
-    if table_title and table_title.strip():
-        base_info += f', Название: {table_title}'
-    if extra_info:
-        base_info += f', {extra_info}'
-    return base_info
-def create_chunk_text(meta_info, headers, rows, add_row_numbers=False):
-    chunk_lines = [meta_info.rstrip()]  # Remove trailing newline from meta_info
-    # Add headers only once
-    header_line = " | ".join(headers)
-    chunk_lines.append(f"Заголовки: {header_line}")
-    # Add rows without redundant formatting
-    for i, row in enumerate(rows, start=1):
-        row_parts = []
-        for h in headers:
-            value = row.get(h, '')
-            if value:  # Only add non-empty values
-                row_parts.append(f"{h}: {value}")
-        if add_row_numbers:
-            chunk_lines.append(f"Строка {i}: {' | '.join(row_parts)}")
-        else:
-            chunk_lines.append(' | '.join(row_parts))
-    return "\n".join(chunk_lines)
-def group_by_column_method(table_data, document_name, group_column):
-    """Group rows by specified column value"""
-    documents = []
-    headers = table_data.get("headers", [])
-    rows = table_data.get("data", [])
-    section = table_data.get("section", "")
-    table_number = table_data.get("table_number", "")
-    table_title = table_data.get("table_title", "")
-    grouped = defaultdict(list)
-    for row in rows:
-        key = row.get(group_column, "UNKNOWN")
-        grouped[key].append(row)
-    for group_value, group_rows in grouped.items():
-        meta_info = create_meta_info(document_name, section, table_number, table_title,
-                                   f'Группа по "{group_column}": {group_value}')
-        chunk_text = create_chunk_text(meta_info, headers, group_rows, add_row_numbers=True)
-        doc = Document(
-            text=chunk_text,
-            metadata={
-                "type": "table",
-                "table_number": table_number,
-                "table_title": table_title,
-                "document_id": document_name,
-                "section": section,
-                "section_id": section,
-                "group_column": group_column,
-                "group_value": group_value,
-                "total_rows": len(group_rows),
-                "processing_method": "group_by_column"
-            }
-        )
-        documents.append(doc)
-        log_message(f"Created grouped chunk for {group_column}={group_value}, rows: {len(group_rows)}, length: {len(chunk_text)}")
-    return documents
-def split_by_rows_method(table_data, document_name):
-    """Split table into individual row chunks"""
-    documents = []
-    headers = table_data.get("headers", [])
-    rows = table_data.get("data", [])
-    section = table_data.get("section", "")
-    table_number = table_data.get("table_number", "")
-    table_title = table_data.get("table_title", "")
-    for i, row in enumerate(rows, start=1):
-        meta_info = create_meta_info(document_name, section, table_number, table_title, f'Строка: {i}')
-        chunk_text = create_chunk_text(meta_info, headers, [row])
-        doc = Document(
-            text=chunk_text,
-            metadata={
-                "type": "table",
-                "table_number": table_number,
-                "table_title": table_title,
-                "document_id": document_name,
-                "section": section,
-                "section_id": section,
-                "row_number": i,
-                "total_rows": len(rows),
-                "processing_method": "split_by_rows"
-            }
-        )
-        documents.append(doc)
-    log_message(f"Split table {table_number} into {len(rows)} row chunks")
-    return documents
-def group_entire_table_method(table_data, document_name):
-    """Group entire table as one chunk"""
-    headers = table_data.get("headers", [])
-    rows = table_data.get("data", [])
-    section = table_data.get("section", "")
-    table_number = table_data.get("table_number", "")
-    table_title = table_data.get("table_title", "")
-    meta_info = create_meta_info(document_name, section, table_number, table_title)
-    chunk_text = create_chunk_text(meta_info, headers, rows)
-    doc = Document(
-        text=chunk_text,
         metadata={
             "type": "table",
-            "table_number": table_number,
             "table_title": table_title,
-            "document_id": document_name,
             "section": section,
             "section_id": section,
-            "total_rows": len(rows),
-            "processing_method": "group_entire_table"
         }
-    )
-    log_message(f"Grouped entire table {table_number}, rows: {len(rows)}, length: {len(chunk_text)}")
-    return [doc]
-def should_use_custom_processing(document_id, table_number):
-    """Check if table should use custom processing"""
-    for doc_pattern, config in CUSTOM_TABLE_CONFIGS.items():
-        if document_id.startswith(doc_pattern):
-            tables_config = config.get("tables", {})
-            if table_number in tables_config or "*" in tables_config:
-                return True, doc_pattern, tables_config.get(table_number, tables_config.get("*"))
-    return False, None, None
-def process_table_with_custom_method(table_data, document_name, method_config):
-    """Process table using custom method"""
-    method = method_config.get("method")
-    if method == "group_by_column":
-        group_column = method_config.get("group_column")
-        return group_by_column_method(table_data, document_name, group_column)
-    elif method == "split_by_rows":
-        return split_by_rows_method(table_data, document_name)
-    elif method == "group_entire_table":
-        return group_entire_table_method(table_data, document_name)
-    else:
-        log_message(f"Unknown custom method: {method}, falling back to default processing")
-        return None
-def table_to_document(table_data, document_id=None):
-    if isinstance(table_data, dict):
-        doc_id = document_id or table_data.get('document_id', table_data.get('document', 'Неизвестно'))
-        table_num = table_data.get('table_number', 'Неизвестно')
-        use_custom, doc_pattern, method_config = should_use_custom_processing(doc_id, table_num)
-        if use_custom:
-            log_message(f"Using custom processing for table {table_num} in document {doc_id}")
-            custom_docs = process_table_with_custom_method(table_data, doc_id, method_config)
-            if custom_docs:
-                return custom_docs
-        # DEFAULT PROCESSING (only if NOT using custom)
-        table_title = table_data.get('table_title', 'Неизвестно')
-        section = table_data.get('section', 'Неизвестно')
-        header_content = f"Таблица: {table_num}\nНазвание: {table_title}\nДокумент: {doc_id}\nРаздел: {section}\n"
-        if 'data' in table_data and isinstance(table_data['data'], list):
-            table_content = header_content + "\nДанные таблицы:\n"
-            for row_idx, row in enumerate(table_data['data']):
-                if isinstance(row, dict):
-                    row_text = " | ".join([f"{k}: {v}" for k, v in row.items()])
-                    table_content += f"Строка {row_idx + 1}: {row_text}\n"
-            doc = Document(
-                text=table_content,
-                metadata={
-                    "type": "table",
-                    "table_number": table_num,
-                    "table_title": table_title,
-                    "document_id": doc_id,
-                    "section": section,
-                    "section_id": section,
-                    "total_rows": len(table_data['data']),
-                    "processing_method": "default"
-                }
-            )
-            return [doc]
-        else:
-            doc = Document(
-                text=header_content,
-                metadata={
-                    "type": "table",
-                    "table_number": table_num,
-                    "table_title": table_title,
-                    "document_id": doc_id,
-                    "section": section,
-                    "section_id": section,
-                    "processing_method": "default"
-                }
-            )
-            return [doc]
-    return []
 def load_table_data(repo_id, hf_token, table_data_dir):
-    """Modified function with custom table processing integration"""
-    log_message("Начинаю загрузку табличных данных")
-    table_files = []
     try:
         files = list_repo_files(repo_id=repo_id, repo_type="dataset", token=hf_token)
-        for file in files:
-            if file.startswith(table_data_dir) and file.endswith('.json'):
-                table_files.append(file)
         log_message(f"Найдено {len(table_files)} JSON файлов с таблицами")
         table_documents = []
         for file_path in table_files:
             try:
-                log_message(f"Обрабатываю файл: {file_path}")
                 local_path = hf_hub_download(
                     repo_id=repo_id,
                     filename=file_path,
@@ -295,6 +89,8 @@ def load_table_data(repo_id, hf_token, table_data_dir):
                     token=hf_token
                 )
                 with open(local_path, 'r', encoding='utf-8') as f:
                     table_data = json.load(f)
@@ -302,46 +98,55 @@ def load_table_data(repo_id, hf_token, table_data_dir):
                         document_id = table_data.get('document', 'unknown')
                         if 'sheets' in table_data:
-                            for sheet in table_data['sheets']:
                                 sheet['document'] = document_id
-                                # Check if this table uses custom processing
-                                table_num = sheet.get('table_number', 'Неизвестно')
-                                use_custom, _, _ = should_use_custom_processing(document_id, table_num)
-                                if use_custom:
-                                    log_message(f"Skipping default processing for custom table {table_num} in {document_id}")
                                 docs_list = table_to_document(sheet, document_id)
                                 table_documents.extend(docs_list)
                         else:
-                            # Check if this table uses custom processing
-                            table_num = table_data.get('table_number', 'Неизвестно')
-                            use_custom, _, _ = should_use_custom_processing(document_id, table_num)
-                            if use_custom:
-                                log_message(f"Skipping default processing for custom table {table_num} in {document_id}")
                             docs_list = table_to_document(table_data, document_id)
                             table_documents.extend(docs_list)
-                    elif isinstance(table_data, list):
-                        for table_json in table_data:
-                            document_id = table_json.get('document', 'unknown')
-                            table_num = table_json.get('table_number', 'Неизвестно')
-                            use_custom, _, _ = should_use_custom_processing(document_id, table_num)
-                            if use_custom:
-                                log_message(f"Skipping default processing for custom table {table_num} in {document_id}")
-                            docs_list = table_to_document(table_json)
-                            table_documents.extend(docs_list)
             except Exception as e:
-                log_message(f"Ошибка обработки файла {file_path}: {str(e)}")
                 continue
-        log_message(f"Создано {len(table_documents)} документов из таблиц")
         return table_documents
     except Exception as e:
-        log_message(f"Ошибка загрузки табличных данных: {str(e)}")
-        return []

 from collections import defaultdict
 import json
 from huggingface_hub import hf_hub_download, list_repo_files
 from llama_index.core import Document
 from my_logging import log_message
+def create_table_content(table_data):
+    """Create formatted content from table data"""
+    doc_id = table_data.get('document_id', table_data.get('document', 'Неизвестно'))
+    table_num = table_data.get('table_number', 'Неизвестно')
+    table_title = table_data.get('table_title', 'Неизвестно')
+    section = table_data.get('section', 'Неизвестно')
+    content = f"Таблица: {table_num}\n"
+    content += f"Название: {table_title}\n"
+    content += f"Документ: {doc_id}\n"
+    content += f"Раздел: {section}\n"
+    headers = table_data.get('headers', [])
+    if headers:
+        content += f"\nЗаголовки: {' | '.join(headers)}\n"
+    if 'data' in table_data and isinstance(table_data['data'], list):
+        content += "\nДанные таблицы:\n"
+        for row_idx, row in enumerate(table_data['data'], start=1):
+            if isinstance(row, dict):
+                row_text = " | ".join([f"{k}: {v}" for k, v in row.items() if v])
+                content += f"Строка {row_idx}: {row_text}\n"
+    return content
+def table_to_document(table_data, document_id=None):
+    """Convert table data to a single Document"""
+    if not isinstance(table_data, dict):
+        return []
+    doc_id = document_id or table_data.get('document_id', table_data.get('document', 'Неизвестно'))
+    table_num = table_data.get('table_number', 'Неизвестно')
+    table_title = table_data.get('table_title', 'Неизвестно')
+    section = table_data.get('section', 'Неизвестно')
+    content = create_table_content(table_data)
+    content_size = len(content)
+    # Log table addition
+    row_count = len(table_data.get('data', [])) if 'data' in table_data else 0
+    log_message(f"✓ ДОБАВЛЕНА: Таблица {table_num} из документа '{doc_id}' | "
+                f"Размер: {content_size} символов | Строк: {row_count}")
+    return [Document(
+        text=content,
         metadata={
             "type": "table",
+            "table_number": table_num,
             "table_title": table_title,
+            "document_id": doc_id,
             "section": section,
             "section_id": section,
+            "total_rows": row_count,
+            "content_size": content_size
         }
+    )]
 def load_table_data(repo_id, hf_token, table_data_dir):
+    log_message("=" * 60)
+    log_message("НАЧАЛО ЗАГРУЗКИ ТАБЛИЧНЫХ ДАННЫХ")
+    log_message("=" * 60)
     try:
         files = list_repo_files(repo_id=repo_id, repo_type="dataset", token=hf_token)
+        table_files = [f for f in files if f.startswith(table_data_dir) and f.endswith('.json')]
         log_message(f"Найдено {len(table_files)} JSON файлов с таблицами")
         table_documents = []
+        stats = {
+            'total_tables': 0,
+            'total_size': 0,
+            'by_document': defaultdict(lambda: {'count': 0, 'size': 0})
+        }
         for file_path in table_files:
             try:
                 local_path = hf_hub_download(
                     repo_id=repo_id,
                     filename=file_path,
                     token=hf_token
                 )
+                log_message(f"\nОбработка файла: {file_path}")
                 with open(local_path, 'r', encoding='utf-8') as f:
                     table_data = json.load(f)
                         document_id = table_data.get('document', 'unknown')
                         if 'sheets' in table_data:
+                            sorted_sheets = sorted(
+                                table_data['sheets'],
+                                key=lambda sheet: sheet.get('table_number', '')  # or use 'table_number'
+                            )
+                            for sheet in sorted_sheets:
                                 sheet['document'] = document_id
                                 docs_list = table_to_document(sheet, document_id)
                                 table_documents.extend(docs_list)
+                                for doc in docs_list:
+                                    stats['total_tables'] += 1
+                                    size = doc.metadata.get('content_size', 0)
+                                    stats['total_size'] += size
+                                    stats['by_document'][document_id]['count'] += 1
+                                    stats['by_document'][document_id]['size'] += size
                         else:
                             docs_list = table_to_document(table_data, document_id)
                             table_documents.extend(docs_list)
+                            for doc in docs_list:
+                                stats['total_tables'] += 1
+                                size = doc.metadata.get('content_size', 0)
+                                stats['total_size'] += size
+                                stats['by_document'][document_id]['count'] += 1
+                                stats['by_document'][document_id]['size'] += size
             except Exception as e:
+                log_message(f"❌ ОШИБКА файла {file_path}: {str(e)}")
                 continue
+        # Log summary statistics
+        log_message("\n" + "=" * 60)
+        log_message("СТАТИСТИКА ПО ТАБЛИЦАМ")
+        log_message("=" * 60)
+        log_message(f"Всего таблиц добавлено: {stats['total_tables']}")
+        log_message(f"Общий размер: {stats['total_size']:,} символов")
+        log_message(f"Средний размер таблицы: {stats['total_size'] // stats['total_tables'] if stats['total_tables'] > 0 else 0:,} символов")
+        log_message("\nПо документам:")
+        for doc_id, doc_stats in sorted(stats['by_document'].items()):
+            log_message(f"  • {doc_id}: {doc_stats['count']} таблиц, "
+                       f"{doc_stats['size']:,} символов")
+        log_message("=" * 60)
         return table_documents
     except Exception as e:
+        log_message(f"❌ КРИТИЧЕСКАЯ ОШИБКА загрузки табличных данных: {str(e)}")
+        return []

utils.py CHANGED Viewed

@@ -52,6 +52,7 @@ def format_context_for_llm(nodes):
         section_info = ""
         if metadata.get('section_path'):
             section_path = metadata['section_path']
             section_text = metadata.get('section_text', '')
@@ -60,13 +61,17 @@ def format_context_for_llm(nodes):
             level = metadata.get('level', '')
             if level in ['subsection', 'sub_subsection', 'sub_sub_subsection'] and parent_section and parent_title:
-                # For subsections, show: пункт X.X в разделе X (Title)
-                section_info = f"пункт {section_path} в разделе {parent_section} ({parent_title})"
             elif section_text:
-                # For main sections, show: пункт X (Title)
-                section_info = f"пункт {section_path} ({section_text})"
             else:
-                section_info = f"пункт {section_path}"
         elif metadata.get('section_id'):
             section_id = metadata['section_id']
             section_text = metadata.get('section_text', '')
@@ -75,203 +80,54 @@ def format_context_for_llm(nodes):
             parent_title = metadata.get('parent_title', '')
             if level in ['subsection', 'sub_subsection', 'sub_sub_subsection'] and parent_section and parent_title:
-                # For subsections without section_path, show: пункт X.X в разделе X (Title)
-                section_info = f"пункт {section_id} в разделе {parent_section} ({parent_title})"
             elif section_text:
-                section_info = f"пункт {section_id} ({section_text})"
             else:
-                section_info = f"пункт {section_id}"
         if metadata.get('type') == 'table' and metadata.get('table_number'):
             table_num = metadata['table_number']
             if not str(table_num).startswith('№'):
                 table_num = f"№{table_num}"
-            section_info = f"таблица {table_num}"
-        if metadata.get('type') == 'image' and metadata.get('image_number'):
-            image_num = metadata['image_number']
-            if not str(image_num).startswith('№'):
-                image_num = f"№{image_num}"
-            section_info = f"рисунок {image_num}"
-        context_text = node.text if hasattr(node, 'text') else str(node)
-        if section_info:
-            formatted_context = f"[ИСТОЧНИК: {section_info} документа {doc_id}]\n{context_text}\n"
-        else:
-            formatted_context = f"[ИСТОЧНИК: документ {doc_id}]\n{context_text}\n"
-        context_parts.append(formatted_context)
-    return "\n".join(context_parts)
-def answer_question(question, query_engine, reranker, current_model, chunks_df=None):
-    if query_engine is None:
-        return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Система не инициализирована</div>", ""
-    try:
-        log_message(f"Получен вопрос: {question}")
-        start_time = time.time()
-        # Извлечение узлов
-        retrieved_nodes = query_engine.retriever.retrieve(question)
-        log_message(f"Извлечено {len(retrieved_nodes)} узлов")
-        # ДЕТАЛЬНОЕ ЛОГИРОВАНИЕ ИСТОЧНИКОВ
-        log_message("=== ДЕТАЛЬНАЯ ИНФОРМАЦИЯ О НАЙДЕННЫХ УЗЛАХ ===")
-        for i, node in enumerate(retrieved_nodes):
-            log_message(f"Узел {i+1}:")
-            log_message(f"  Документ: {node.metadata.get('document_id', 'unknown')}")
-            log_message(f"  Тип: {node.metadata.get('type', 'unknown')}")
-            log_message(f"  Раздел: {node.metadata.get('section_id', 'unknown')}")
-            log_message(f"  Текст (первые 400 символов): {node.text[:400]}...")
-            log_message(f"  Метаданные: {node.metadata}")
-        # Переранжировка
-        reranked_nodes = rerank_nodes(question, retrieved_nodes, reranker, top_k=10)
-        log_message("=== УЗЛЫ ПОСЛЕ ПЕРЕРАНЖИРОВКИ ===")
-        for i, node in enumerate(reranked_nodes):
-            log_message(f"Переранжированный узел {i+1}:")
-            log_message(f"  Документ: {node.metadata.get('document_id', 'unknown')}")
-            log_message(f"  Тип: {node.metadata.get('type', 'unknown')}")
-            log_message(f"  Раздел: {node.metadata.get('section_id', 'unknown')}")
-            log_message(f"  Полный текст: {node.text}")
-        formatted_context = format_context_for_llm(reranked_nodes)
-        log_message(f"ПОЛНЫЙ КОНТЕКСТ ДЛЯ LLM:\n{formatted_context}")
-        enhanced_question = f"""
-Контекст из базы данных:
-{formatted_context}
-Вопрос пользователя: {question}"""
-        response = query_engine.query(enhanced_question)
-        log_message(f"ОТВЕТ LLM: {response.response}")
-        end_time = time.time()
-        processing_time = end_time - start_time
-        log_message(f"Обработка завершена за {processing_time:.2f} секунд")
-        sources_html = generate_sources_html(reranked_nodes, chunks_df)
-        answer_with_time = f"""<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; margin-bottom: 10px;'>
-        <h3 style='color: #63b3ed; margin-top: 0;'>Ответ (Модель: {current_model}):</h3>
-        <div style='line-height: 1.6; font-size: 16px;'>{response.response}</div>
-        <div style='margin-top: 15px; padding-top: 10px; border-top: 1px solid #4a5568; font-size: 14px; color: #a0aec0;'>
-        Время обработки: {processing_time:.2f} секунд
-        </div>
-        </div>"""
-        chunk_info = []
-        for node in reranked_nodes:
-            metadata = node.metadata if hasattr(node, 'metadata') else {}
-            chunk_info.append({
-                'document_id': metadata.get('document_id', 'unknown'),
-                'section_id': metadata.get('section_id', metadata.get('section', 'unknown')),
-                'section_path': metadata.get('section_path', ''),
-                'section_text': metadata.get('section_text', ''),
-                'level': metadata.get('level', ''),
-                'parent_section': metadata.get('parent_section', ''),
-                'parent_title': metadata.get('parent_title', ''),
-                'type': metadata.get('type', 'text'),
-                'table_number': metadata.get('table_number', ''),
-                'image_number': metadata.get('image_number', ''),
-                'chunk_size': len(node.text),
-                'chunk_text': node.text
-            })
-        from app import create_chunks_display_html
-        chunks_html = create_chunks_display_html(chunk_info)
-        return answer_with_time, sources_html, chunks_html
-    except Exception as e:
-        log_message(f"Ошибка обработки вопроса: {str(e)}")
-        error_msg = f"<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Ошибка обработки вопроса: {str(e)}</div>"
-        return error_msg, ""
-def get_llm_model(model_name):
-    try:
-        model_config = AVAILABLE_MODELS.get(model_name)
-        if not model_config:
-            log_message(f"Модель {model_name} не найдена, использую модель по умолчанию")
-            model_config = AVAILABLE_MODELS[DEFAULT_MODEL]
-        if not model_config.get("api_key"):
-            raise Exception(f"API ключ не найден для модели {model_name}")
-        if model_config["provider"] == "google":
-            return GoogleGenAI(
-                model=model_config["model_name"],
-                api_key=model_config["api_key"]
-            )
-        elif model_config["provider"] == "openai":
-            return OpenAI(
-                model=model_config["model_name"],
-                api_key=model_config["api_key"]
-            )
-        else:
-            raise Exception(f"Неподдерживаемый провайдер: {model_config['provider']}")
-    except Exception as e:
-        log_message(f"Ошибка создания модели {model_name}: {str(e)}")
-        return GoogleGenAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY)
-def get_embedding_model(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"):
-    return HuggingFaceEmbedding(model_name=model_name)
-def get_reranker_model(model_name='cross-encoder/ms-marco-MiniLM-L-12-v2'):
-    return CrossEncoder(model_name)
-def format_context_for_llm(nodes):
-    context_parts = []
-    for node in nodes:
-        metadata = node.metadata if hasattr(node, 'metadata') else {}
-        doc_id = metadata.get('document_id', 'Неизвестный документ')
-        section_info = ""
-        if metadata.get('section_path'):
-            section_path = metadata['section_path']
-            section_text = metadata.get('section_text', '')
-            parent_section = metadata.get('parent_section', '')
-            parent_title = metadata.get('parent_title', '')
-            if metadata.get('level') in ['subsection', 'sub_subsection', 'sub_sub_subsection'] and parent_section and parent_title:
-                section_info = f"пункт {section_path} ({section_text}) в разделе {parent_section} ({parent_title})"
-            elif section_text:
-                section_info = f"пункт {section_path} ({section_text})"
-            else:
-                section_info = f"пункт {section_path}"
-        elif metadata.get('section_id'):
-            section_id = metadata['section_id']
-            section_text = metadata.get('section_text', '')
-            if section_text:
-                section_info = f"пункт {section_id} ({section_text})"
             else:
-                section_info = f"пункт {section_id}"
-        if metadata.get('type') == 'table' and metadata.get('table_number'):
-            table_num = metadata['table_number']
-            if not str(table_num).startswith('№'):
-                table_num = f"№{table_num}"
-            section_info = f"таблица {table_num}"
         if metadata.get('type') == 'image' and metadata.get('image_number'):
             image_num = metadata['image_number']
             if not str(image_num).startswith('№'):
                 image_num = f"№{image_num}"
-            section_info = f"рисунок {image_num}"
         context_text = node.text if hasattr(node, 'text') else str(node)
         if section_info:
-            formatted_context = f"[ИСТОЧНИК: {section_info} документа {doc_id}]\n{context_text}\n"
         else:
             formatted_context = f"[ИСТОЧНИК: документ {doc_id}]\n{context_text}\n"
@@ -279,6 +135,7 @@ def format_context_for_llm(nodes):
     return "\n".join(context_parts)
 def generate_sources_html(nodes, chunks_df=None):
     html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; max-height: 400px; overflow-y: auto;'>"
     html += "<h3 style='color: #63b3ed; margin-top: 0;'>Источники:</h3>"
@@ -369,56 +226,80 @@ def generate_sources_html(nodes, chunks_df=None):
     html += "</div>"
     return html
 def answer_question(question, query_engine, reranker, current_model, chunks_df=None):
     if query_engine is None:
-        return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Система не инициализирована</div>", ""
     try:
-        log_message(f"Получен вопрос: {question}")
         start_time = time.time()
-        # Извлечение узлов
-        retrieved_nodes = query_engine.retriever.retrieve(question)
-        log_message(f"Извлечено {len(retrieved_nodes)} узлов")
-        # ДЕТАЛЬНОЕ ЛОГИРОВАНИЕ ИСТОЧНИКОВ
-        log_message("=== ДЕТАЛЬНАЯ ИНФОРМАЦИЯ О НАЙДЕННЫХ УЗЛАХ ===")
-        for i, node in enumerate(retrieved_nodes):
-            log_message(f"Узел {i+1}:")
-            log_message(f"  Документ: {node.metadata.get('document_id', 'unknown')}")
-            log_message(f"  Тип: {node.metadata.get('type', 'unknown')}")
-            log_message(f"  Раздел: {node.metadata.get('section_id', 'unknown')}")
-            log_message(f"  Текст (первые 400 символов): {node.text[:400]}...")
-            log_message(f"  Метаданные: {node.metadata}")
-        # Переранжировка
-        reranked_nodes = rerank_nodes(question, retrieved_nodes, reranker, top_k=10)
-        log_message("=== УЗЛЫ ПОСЛЕ ПЕРЕРАНЖИРОВКИ ===")
-        for i, node in enumerate(reranked_nodes):
-            log_message(f"Переранжированный узел {i+1}:")
-            log_message(f"  Документ: {node.metadata.get('document_id', 'unknown')}")
-            log_message(f"  Тип: {node.metadata.get('type', 'unknown')}")
-            log_message(f"  Раздел: {node.metadata.get('section_id', 'unknown')}")
-            log_message(f"  Полный текст: {node.text}")
         formatted_context = format_context_for_llm(reranked_nodes)
-        log_message(f"ПОЛНЫЙ КОНТЕКСТ ДЛЯ LLM:\n{formatted_context}")
-        enhanced_question = f"""
-Контекст из базы данных:
 {formatted_context}
-Вопрос пользователя: {question}"""
         response = query_engine.query(enhanced_question)
-        log_message(f"ОТВЕТ LLM: {response.response}")
         end_time = time.time()
         processing_time = end_time - start_time
-        log_message(f"Обработка завершена за {processing_time:.2f} секунд")
         sources_html = generate_sources_html(reranked_nodes, chunks_df)
@@ -432,10 +313,18 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
         chunk_info = []
         for node in reranked_nodes:
-            section_id = node.metadata.get('section_id', node.metadata.get('section', 'unknown'))
             chunk_info.append({
-                'document_id': node.metadata.get('document_id', 'unknown'),
-                'section_id': section_id,
                 'chunk_size': len(node.text),
                 'chunk_text': node.text
             })
@@ -445,6 +334,6 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
         return answer_with_time, sources_html, chunks_html
     except Exception as e:
-        log_message(f"Ошибка обработки вопроса: {str(e)}")
-        error_msg = f"<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Ошибка обработки вопроса: {str(e)}</div>"
-        return error_msg, ""

         section_info = ""
+        # Handle section information with proper hierarchy
         if metadata.get('section_path'):
             section_path = metadata['section_path']
             section_text = metadata.get('section_text', '')
             level = metadata.get('level', '')
             if level in ['subsection', 'sub_subsection', 'sub_sub_subsection'] and parent_section and parent_title:
+                # For subsections: раздел X (Title), пункт X.X
+                if section_text:
+                    section_info = f"раздел {parent_section} ({parent_title}), пункт {section_path} ({section_text})"
+                else:
+                    section_info = f"раздел {parent_section} ({parent_title}), пункт {section_path}"
             elif section_text:
+                # For main sections: раздел X (Title)
+                section_info = f"раздел {section_path} ({section_text})"
             else:
+                section_info = f"раздел {section_path}"
         elif metadata.get('section_id'):
             section_id = metadata['section_id']
             section_text = metadata.get('section_text', '')
             parent_title = metadata.get('parent_title', '')
             if level in ['subsection', 'sub_subsection', 'sub_sub_subsection'] and parent_section and parent_title:
+                if section_text:
+                    section_info = f"раздел {parent_section} ({parent_title}), пункт {section_id} ({section_text})"
+                else:
+                    section_info = f"раздел {parent_section} ({parent_title}), пункт {section_id}"
             elif section_text:
+                section_info = f"раздел {section_id} ({section_text})"
             else:
+                section_info = f"раздел {section_id}"
+        # Override with table/image info if applicable
         if metadata.get('type') == 'table' and metadata.get('table_number'):
             table_num = metadata['table_number']
             if not str(table_num).startswith('№'):
                 table_num = f"№{table_num}"
+            table_title = metadata.get('table_title', '')
+            # Include section context for tables
+            base_section = ""
+            if metadata.get('section_path'):
+                base_section = f", раздел {metadata['section_path']}"
+            elif metadata.get('section_id'):
+                base_section = f", раздел {metadata['section_id']}"
+            if table_title:
+                section_info = f"Таблица {table_num} ({table_title}){base_section}"
             else:
+                section_info = f"Таблица {table_num}{base_section}"
         if metadata.get('type') == 'image' and metadata.get('image_number'):
             image_num = metadata['image_number']
             if not str(image_num).startswith('№'):
                 image_num = f"№{image_num}"
+            image_title = metadata.get('image_title', '')
+            # Include section context for images
+            base_section = ""
+            if metadata.get('section_path'):
+                base_section = f", раздел {metadata['section_path']}"
+            elif metadata.get('section_id'):
+                base_section = f", раздел {metadata['section_id']}"
+            if image_title:
+                section_info = f"Рисунок {image_num} ({image_title}){base_section}"
+            else:
+                section_info = f"Рисунок {image_num}{base_section}"
         context_text = node.text if hasattr(node, 'text') else str(node)
         if section_info:
+            formatted_context = f"[ИСТОЧНИК: {section_info}, документ {doc_id}]\n{context_text}\n"
         else:
             formatted_context = f"[ИСТОЧНИК: документ {doc_id}]\n{context_text}\n"
     return "\n".join(context_parts)
 def generate_sources_html(nodes, chunks_df=None):
     html = "<div style='background-color: #2d3748; color: white; padding: 20px; border-radius: 10px; max-height: 400px; overflow-y: auto;'>"
     html += "<h3 style='color: #63b3ed; margin-top: 0;'>Источники:</h3>"
     html += "</div>"
     return html
+def expand_query(question, llm_model):
+    """
+    Generate multiple query variations for better retrieval
+    """
+    expansion_prompt = f"""Дан вопрос: "{question}"
+Сгенерируй 2 альтернативные формулировки этого вопроса для поиска в базе данных.
+Используй синонимы и разные формулировки, сохраняя смысл.
+Формат ответа (только вопросы, по одному на строку):
+1. [первая формулировка]
+2. [вторая формулировка]"""
+    try:
+        response = llm_model.complete(expansion_prompt)
+        expanded = [q.strip() for q in response.text.split('\n') if q.strip() and not q.strip().startswith('1.') and not q.strip().startswith('2.')]
+        # Clean up
+        expanded = [q.lstrip('12. ').strip() for q in expanded if len(q) > 10][:2]
+        log_message(f"Query expansion: {len(expanded)} вариантов")
+        return [question] + expanded
+    except Exception as e:
+        log_message(f"Ошибка расширения запроса: {str(e)}")
+        return [question]
 def answer_question(question, query_engine, reranker, current_model, chunks_df=None):
     if query_engine is None:
+        return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Система не инициализирована</div>", "", ""
     try:
         start_time = time.time()
+        llm = get_llm_model(current_model)
+        query_variations = expand_query(question, llm)
+        all_nodes = []
+        seen_node_ids = set()
+        for query_var in query_variations:
+            retrieved = query_engine.retriever.retrieve(query_var)
+            for node in retrieved:
+                node_id = f"{node.node_id if hasattr(node, 'node_id') else hash(node.text)}"
+                if node_id not in seen_node_ids:
+                    all_nodes.append(node)
+                    seen_node_ids.add(node_id)
+        log_message(f"Получено {len(all_nodes)} уникальных узлов из {len(query_variations)} запросов")
+        reranked_nodes = rerank_nodes(
+            question,
+            all_nodes,
+            reranker,
+            top_k=20,
+            min_score_threshold=0.5,
+            diversity_penalty=0.3
+        )
         formatted_context = format_context_for_llm(reranked_nodes)
+        enhanced_question = f"""Контекст из базы данных:
 {formatted_context}
+Вопрос пользователя: {question}
+Инструкция: Ответь на вопрос, используя ТОЛЬКО информацию из контекста выше.
+Если информации недостаточно, четко укажи это. Цитируй конкретные источники."""
         response = query_engine.query(enhanced_question)
         end_time = time.time()
         processing_time = end_time - start_time
+        log_message(f"Обработка завершена за {processing_time:.2f}с")
         sources_html = generate_sources_html(reranked_nodes, chunks_df)
         chunk_info = []
         for node in reranked_nodes:
+            metadata = node.metadata if hasattr(node, 'metadata') else {}
             chunk_info.append({
+                'document_id': metadata.get('document_id', 'unknown'),
+                'section_id': metadata.get('section_id', metadata.get('section', 'unknown')),
+                'section_path': metadata.get('section_path', ''),
+                'section_text': metadata.get('section_text', ''),
+                'level': metadata.get('level', ''),
+                'parent_section': metadata.get('parent_section', ''),
+                'parent_title': metadata.get('parent_title', ''),
+                'type': metadata.get('type', 'text'),
+                'table_number': metadata.get('table_number', ''),
+                'image_number': metadata.get('image_number', ''),
                 'chunk_size': len(node.text),
                 'chunk_text': node.text
             })
         return answer_with_time, sources_html, chunks_html
     except Exception as e:
+        log_message(f"Ошибка: {str(e)}")
+        error_msg = f"<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Ошибка: {str(e)}</div>"
+        return error_msg, "", ""