Spaces:

MrSimple01
/

RAG_AIEXP_01

Sleeping

App Files Files Community

MrSimple07 commited on Sep 29, 2025

Commit

c81fd8c

1 Parent(s): 5c2023e

Removed duplicate logs throughout all files

Browse files

Files changed (3) hide show

documents_prep.py +5 -25
table_prep.py +86 -176
utils.py +16 -34

documents_prep.py CHANGED Viewed

@@ -46,8 +46,6 @@ def process_documents_with_chunking(documents):
     table_count = 0
     image_count = 0
     text_chunks_count = 0
-    large_tables_count = 0
-    large_images_count = 0
     custom_processed_count = 0
     for doc in documents:
@@ -57,13 +55,11 @@ def process_documents_with_chunking(documents):
             table_count += 1
             doc_id = doc.metadata.get('document_id', 'unknown')
             table_num = doc.metadata.get('table_number', 'unknown')
-            from table_prep import should_use_custom_processing
-            use_custom, doc_pattern, method_config = should_use_custom_processing(doc_id, table_num)
-            if use_custom:
                 custom_processed_count += 1
-                log_message(f"Table {table_num} in document {doc_id} was processed with custom method '{method_config.get('method')}', skipping standard chunking")
-                # Add the document as-is since it was already processed by custom method
                 all_chunked_docs.append(doc)
                 chunk_info.append({
                     'document_id': doc_id,
@@ -77,13 +73,8 @@ def process_documents_with_chunking(documents):
                 })
                 continue
-            # Standard processing for non-custom tables
             doc_size = len(doc.text)
             if doc_size > CHUNK_SIZE:
-                large_tables_count += 1
-                log_message(f"Large table found: {table_num} in document {doc_id}, size: {doc_size} characters")
-                # Chunk large tables
                 chunked_docs = chunk_document(doc)
                 all_chunked_docs.extend(chunked_docs)
@@ -115,10 +106,6 @@ def process_documents_with_chunking(documents):
             image_count += 1
             doc_size = len(doc.text)
             if doc_size > CHUNK_SIZE:
-                large_images_count += 1
-                log_message(f"Large image description found: {doc.metadata.get('image_number', 'unknown')} in document {doc.metadata.get('document_id', 'unknown')}, size: {doc_size} characters")
-                # Chunk large images
                 chunked_docs = chunk_document(doc)
                 all_chunked_docs.extend(chunked_docs)
@@ -144,7 +131,7 @@ def process_documents_with_chunking(documents):
                     'image_number': doc.metadata.get('image_number', 'unknown')
                 })
-        else:  # text documents
             doc_size = len(doc.text)
             if doc_size > CHUNK_SIZE:
                 chunked_docs = chunk_document(doc)
@@ -171,14 +158,7 @@ def process_documents_with_chunking(documents):
                     'type': 'text'
                 })
-    log_message(f"=== PROCESSING STATISTICS ===")
-    log_message(f"Total tables processed: {table_count}")
-    log_message(f"Custom processed tables: {custom_processed_count}")
-    log_message(f"Large tables (>{CHUNK_SIZE} chars): {large_tables_count}")
-    log_message(f"Total images processed: {image_count}")
-    log_message(f"Large images (>{CHUNK_SIZE} chars): {large_images_count}")
-    log_message(f"Total text chunks created: {text_chunks_count}")
-    log_message(f"Total documents after processing: {len(all_chunked_docs)}")
     return all_chunked_docs, chunk_info

     table_count = 0
     image_count = 0
     text_chunks_count = 0
     custom_processed_count = 0
     for doc in documents:
             table_count += 1
             doc_id = doc.metadata.get('document_id', 'unknown')
             table_num = doc.metadata.get('table_number', 'unknown')
+            from table_prep import get_custom_config
+            method_config = get_custom_config(doc_id, table_num)
+            if method_config:
                 custom_processed_count += 1
                 all_chunked_docs.append(doc)
                 chunk_info.append({
                     'document_id': doc_id,
                 })
                 continue
             doc_size = len(doc.text)
             if doc_size > CHUNK_SIZE:
                 chunked_docs = chunk_document(doc)
                 all_chunked_docs.extend(chunked_docs)
             image_count += 1
             doc_size = len(doc.text)
             if doc_size > CHUNK_SIZE:
                 chunked_docs = chunk_document(doc)
                 all_chunked_docs.extend(chunked_docs)
                     'image_number': doc.metadata.get('image_number', 'unknown')
                 })
+        else:
             doc_size = len(doc.text)
             if doc_size > CHUNK_SIZE:
                 chunked_docs = chunk_document(doc)
                     'type': 'text'
                 })
+    log_message(f"Таблицы: {table_count} (кастомных: {custom_processed_count}), Изображения: {image_count}, Текстовые чанки: {text_chunks_count}, Итого: {len(all_chunked_docs)}")
     return all_chunked_docs, chunk_info

table_prep.py CHANGED Viewed

@@ -1,63 +1,30 @@
-import os
 from collections import defaultdict
 import json
-import zipfile
-import pandas as pd
 from huggingface_hub import hf_hub_download, list_repo_files
 from llama_index.core import Document
 from my_logging import log_message
 CUSTOM_TABLE_CONFIGS = {
     "ГОСТ Р 50.05.01-2018": {
-        "tables": {
-            "№3": {"method": "group_by_column", "group_column": "Класс герметичности и чувствительности"},
-            "№Б.1": {"method": "group_by_column", "group_column": "Класс чувствительности системы контроля"}
-        }
-    },
-    "ГОСТ Р 50.06.01-2017": {
-        "tables": {
-            "№ Б.2": {"method": "split_by_rows"}
-        }
-    },
-    "НП-104-18": {
-        "tables": {
-            "*": {"method": "group_entire_table"}  # All tables
-        }
     },
     "НП-068-05": {
-        "tables": {
-            "Таблица 1": {"method": "group_by_column", "group_column": "Рабочее давление среды, МПа"},
-            "Таблица 2": {"method": "group_by_column", "group_column": "Рабочее давление среды, МПа"},
-            "Таблица Приложения 1": {"method": "group_by_column", "group_column": "Тип"}
-        }
     },
     "ГОСТ Р 59023.1-2020": {
-        "tables": {
-            "№ 1": {"method": "split_by_rows"},
-            "№ 2": {"method": "split_by_rows"},
-            "№ 3": {"method": "split_by_rows"}
-        }
-    },
-    "НП-089-15": {
-        "tables": {
-            "-": {"method": "split_by_rows"}
-        }
     },
-    "НП-105-18": {
-        "tables": {
-            "№ 4.8": {"method": "group_entire_table"}
-        }
-    },
-    "ГОСТ Р 50.05.23-2020": {
-        "tables": {
-            "№8": {"method": "group_entire_table"}
-        }
-    },
-    "ГОСТ Р 50.03.01-2017": {
-        "tables": {
-            "А.8": {"method": "group_entire_table"}
-        }
-    }
 }
 def create_meta_info(document_name, section, table_number, table_title, extra_info=""):
@@ -69,28 +36,25 @@ def create_meta_info(document_name, section, table_number, table_title, extra_in
     return base_info
 def create_chunk_text(meta_info, headers, rows, add_row_numbers=False):
-    chunk_lines = [meta_info.rstrip()]  # Remove trailing newline from meta_info
-    # Add headers only once
-    header_line = " | ".join(headers)
-    chunk_lines.append(f"Заголовки: {header_line}")
-    # Add rows without redundant formatting
     for i, row in enumerate(rows, start=1):
-        row_parts = []
-        for h in headers:
-            value = row.get(h, '')
-            if value:  # Only add non-empty values
-                row_parts.append(f"{h}: {value}")
         if add_row_numbers:
             chunk_lines.append(f"Строка {i}: {' | '.join(row_parts)}")
         else:
             chunk_lines.append(' | '.join(row_parts))
     return "\n".join(chunk_lines)
 def group_by_column_method(table_data, document_name, group_column):
-    """Group rows by specified column value"""
     documents = []
     headers = table_data.get("headers", [])
     rows = table_data.get("data", [])
@@ -100,16 +64,14 @@ def group_by_column_method(table_data, document_name, group_column):
     grouped = defaultdict(list)
     for row in rows:
-        key = row.get(group_column, "UNKNOWN")
-        grouped[key].append(row)
     for group_value, group_rows in grouped.items():
         meta_info = create_meta_info(document_name, section, table_number, table_title,
                                    f'Группа по "{group_column}": {group_value}')
         chunk_text = create_chunk_text(meta_info, headers, group_rows, add_row_numbers=True)
-        doc = Document(
             text=chunk_text,
             metadata={
                 "type": "table",
@@ -123,14 +85,11 @@ def group_by_column_method(table_data, document_name, group_column):
                 "total_rows": len(group_rows),
                 "processing_method": "group_by_column"
             }
-        )
-        documents.append(doc)
-        log_message(f"Created grouped chunk for {group_column}={group_value}, rows: {len(group_rows)}, length: {len(chunk_text)}")
     return documents
 def split_by_rows_method(table_data, document_name):
-    """Split table into individual row chunks"""
     documents = []
     headers = table_data.get("headers", [])
     rows = table_data.get("data", [])
@@ -140,10 +99,9 @@ def split_by_rows_method(table_data, document_name):
     for i, row in enumerate(rows, start=1):
         meta_info = create_meta_info(document_name, section, table_number, table_title, f'Строка: {i}')
         chunk_text = create_chunk_text(meta_info, headers, [row])
-        doc = Document(
             text=chunk_text,
             metadata={
                 "type": "table",
@@ -156,14 +114,11 @@ def split_by_rows_method(table_data, document_name):
                 "total_rows": len(rows),
                 "processing_method": "split_by_rows"
             }
-        )
-        documents.append(doc)
-    log_message(f"Split table {table_number} into {len(rows)} row chunks")
     return documents
 def group_entire_table_method(table_data, document_name):
-    """Group entire table as one chunk"""
     headers = table_data.get("headers", [])
     rows = table_data.get("data", [])
     section = table_data.get("section", "")
@@ -173,7 +128,7 @@ def group_entire_table_method(table_data, document_name):
     meta_info = create_meta_info(document_name, section, table_number, table_title)
     chunk_text = create_chunk_text(meta_info, headers, rows)
-    doc = Document(
         text=chunk_text,
         metadata={
             "type": "table",
@@ -185,108 +140,84 @@ def group_entire_table_method(table_data, document_name):
             "total_rows": len(rows),
             "processing_method": "group_entire_table"
         }
-    )
-    log_message(f"Grouped entire table {table_number}, rows: {len(rows)}, length: {len(chunk_text)}")
-    return [doc]
-def should_use_custom_processing(document_id, table_number):
-    """Check if table should use custom processing"""
-    for doc_pattern, config in CUSTOM_TABLE_CONFIGS.items():
-        if document_id.startswith(doc_pattern):
-            tables_config = config.get("tables", {})
-            if table_number in tables_config or "*" in tables_config:
-                return True, doc_pattern, tables_config.get(table_number, tables_config.get("*"))
-    return False, None, None
-def process_table_with_custom_method(table_data, document_name, method_config):
-    """Process table using custom method"""
     method = method_config.get("method")
     if method == "group_by_column":
-        group_column = method_config.get("group_column")
-        return group_by_column_method(table_data, document_name, group_column)
     elif method == "split_by_rows":
         return split_by_rows_method(table_data, document_name)
     elif method == "group_entire_table":
         return group_entire_table_method(table_data, document_name)
-    else:
-        log_message(f"Unknown custom method: {method}, falling back to default processing")
-        return None
 def table_to_document(table_data, document_id=None):
-    if isinstance(table_data, dict):
-        doc_id = document_id or table_data.get('document_id', table_data.get('document', 'Неизвестно'))
-        table_num = table_data.get('table_number', 'Неизвестно')
-        use_custom, doc_pattern, method_config = should_use_custom_processing(doc_id, table_num)
-        if use_custom:
-            log_message(f"Using custom processing for table {table_num} in document {doc_id}")
-            custom_docs = process_table_with_custom_method(table_data, doc_id, method_config)
-            if custom_docs:
-                return custom_docs
-        # DEFAULT PROCESSING (only if NOT using custom)
-        table_title = table_data.get('table_title', 'Неизвестно')
-        section = table_data.get('section', 'Неизвестно')
-        header_content = f"Таблица: {table_num}\nНазвание: {table_title}\nДокумент: {doc_id}\nРаздел: {section}\n"
-        if 'data' in table_data and isinstance(table_data['data'], list):
-            table_content = header_content + "\nДанные таблицы:\n"
-            for row_idx, row in enumerate(table_data['data']):
-                if isinstance(row, dict):
-                    row_text = " | ".join([f"{k}: {v}" for k, v in row.items()])
-                    table_content += f"Строка {row_idx + 1}: {row_text}\n"
-            doc = Document(
-                text=table_content,
-                metadata={
-                    "type": "table",
-                    "table_number": table_num,
-                    "table_title": table_title,
-                    "document_id": doc_id,
-                    "section": section,
-                    "section_id": section,
-                    "total_rows": len(table_data['data']),
-                    "processing_method": "default"
-                }
-            )
-            return [doc]
-        else:
-            doc = Document(
-                text=header_content,
-                metadata={
-                    "type": "table",
-                    "table_number": table_num,
-                    "table_title": table_title,
-                    "document_id": doc_id,
-                    "section": section,
-                    "section_id": section,
-                    "processing_method": "default"
-                }
-            )
-            return [doc]
-    return []
 def load_table_data(repo_id, hf_token, table_data_dir):
-    """Modified function with custom table processing integration"""
-    log_message("Начинаю загрузку табличных данных")
-    table_files = []
     try:
         files = list_repo_files(repo_id=repo_id, repo_type="dataset", token=hf_token)
-        for file in files:
-            if file.startswith(table_data_dir) and file.endswith('.json'):
-                table_files.append(file)
         log_message(f"Найдено {len(table_files)} JSON файлов с таблицами")
         table_documents = []
         for file_path in table_files:
             try:
-                log_message(f"Обрабатываю файл: {file_path}")
                 local_path = hf_hub_download(
                     repo_id=repo_id,
                     filename=file_path,
@@ -304,39 +235,18 @@ def load_table_data(repo_id, hf_token, table_data_dir):
                         if 'sheets' in table_data:
                             for sheet in table_data['sheets']:
                                 sheet['document'] = document_id
-                                # Check if this table uses custom processing
-                                table_num = sheet.get('table_number', 'Неизвестно')
-                                use_custom, _, _ = should_use_custom_processing(document_id, table_num)
-                                if use_custom:
-                                    log_message(f"Skipping default processing for custom table {table_num} in {document_id}")
                                 docs_list = table_to_document(sheet, document_id)
                                 table_documents.extend(docs_list)
                         else:
-                            # Check if this table uses custom processing
-                            table_num = table_data.get('table_number', 'Неизвестно')
-                            use_custom, _, _ = should_use_custom_processing(document_id, table_num)
-                            if use_custom:
-                                log_message(f"Skipping default processing for custom table {table_num} in {document_id}")
                             docs_list = table_to_document(table_data, document_id)
                             table_documents.extend(docs_list)
                     elif isinstance(table_data, list):
                         for table_json in table_data:
-                            document_id = table_json.get('document', 'unknown')
-                            table_num = table_json.get('table_number', 'Неизвестно')
-                            use_custom, _, _ = should_use_custom_processing(document_id, table_num)
-                            if use_custom:
-                                log_message(f"Skipping default processing for custom table {table_num} in {document_id}")
                             docs_list = table_to_document(table_json)
                             table_documents.extend(docs_list)
             except Exception as e:
-                log_message(f"Ошибка обработки файла {file_path}: {str(e)}")
                 continue
         log_message(f"Создано {len(table_documents)} документов из таблиц")

 from collections import defaultdict
 import json
 from huggingface_hub import hf_hub_download, list_repo_files
 from llama_index.core import Document
 from my_logging import log_message
 CUSTOM_TABLE_CONFIGS = {
     "ГОСТ Р 50.05.01-2018": {
+        "№3": {"method": "group_by_column", "group_column": "Класс герметичности и чувствительности"},
+        "№Б.1": {"method": "group_by_column", "group_column": "Класс чувствительности системы контроля"}
     },
+    "ГОСТ Р 50.06.01-2017": {"№ Б.2": {"method": "split_by_rows"}},
+    "НП-104-18": {"*": {"method": "group_entire_table"}},
     "НП-068-05": {
+        "Таблица 1": {"method": "group_by_column", "group_column": "Рабочее давление среды, МПа"},
+        "Таблица 2": {"method": "group_by_column", "group_column": "Рабочее давление среды, МПа"},
+        "Таблица Приложения 1": {"method": "group_by_column", "group_column": "Тип"}
     },
     "ГОСТ Р 59023.1-2020": {
+        "№ 1": {"method": "split_by_rows"},
+        "№ 2": {"method": "split_by_rows"},
+        "№ 3": {"method": "split_by_rows"}
     },
+    "НП-089-15": {"-": {"method": "split_by_rows"}},
+    "НП-105-18": {"№ 4.8": {"method": "group_entire_table"}},
+    "ГОСТ Р 50.05.23-2020": {"№8": {"method": "group_entire_table"}},
+    "ГОСТ Р 50.03.01-2017": {"А.8": {"method": "group_entire_table"}}
 }
 def create_meta_info(document_name, section, table_number, table_title, extra_info=""):
     return base_info
 def create_chunk_text(meta_info, headers, rows, add_row_numbers=False):
+    chunk_lines = [meta_info.rstrip()]
+    chunk_lines.append("Заголовки: " + " | ".join(headers))
     for i, row in enumerate(rows, start=1):
+        row_parts = [f"{h}: {row.get(h, '')}" for h in headers if row.get(h, '')]
         if add_row_numbers:
             chunk_lines.append(f"Строка {i}: {' | '.join(row_parts)}")
         else:
             chunk_lines.append(' | '.join(row_parts))
     return "\n".join(chunk_lines)
+def get_custom_config(document_id, table_number):
+    for doc_pattern, tables_config in CUSTOM_TABLE_CONFIGS.items():
+        if document_id.startswith(doc_pattern):
+            return tables_config.get(table_number, tables_config.get("*"))
+    return None
 def group_by_column_method(table_data, document_name, group_column):
     documents = []
     headers = table_data.get("headers", [])
     rows = table_data.get("data", [])
     grouped = defaultdict(list)
     for row in rows:
+        grouped[row.get(group_column, "UNKNOWN")].append(row)
     for group_value, group_rows in grouped.items():
         meta_info = create_meta_info(document_name, section, table_number, table_title,
                                    f'Группа по "{group_column}": {group_value}')
         chunk_text = create_chunk_text(meta_info, headers, group_rows, add_row_numbers=True)
+        documents.append(Document(
             text=chunk_text,
             metadata={
                 "type": "table",
                 "total_rows": len(group_rows),
                 "processing_method": "group_by_column"
             }
+        ))
     return documents
 def split_by_rows_method(table_data, document_name):
     documents = []
     headers = table_data.get("headers", [])
     rows = table_data.get("data", [])
     for i, row in enumerate(rows, start=1):
         meta_info = create_meta_info(document_name, section, table_number, table_title, f'Строка: {i}')
         chunk_text = create_chunk_text(meta_info, headers, [row])
+        documents.append(Document(
             text=chunk_text,
             metadata={
                 "type": "table",
                 "total_rows": len(rows),
                 "processing_method": "split_by_rows"
             }
+        ))
     return documents
 def group_entire_table_method(table_data, document_name):
     headers = table_data.get("headers", [])
     rows = table_data.get("data", [])
     section = table_data.get("section", "")
     meta_info = create_meta_info(document_name, section, table_number, table_title)
     chunk_text = create_chunk_text(meta_info, headers, rows)
+    return [Document(
         text=chunk_text,
         metadata={
             "type": "table",
             "total_rows": len(rows),
             "processing_method": "group_entire_table"
         }
+    )]
+def process_table(table_data, document_name, method_config):
     method = method_config.get("method")
     if method == "group_by_column":
+        return group_by_column_method(table_data, document_name, method_config.get("group_column"))
     elif method == "split_by_rows":
         return split_by_rows_method(table_data, document_name)
     elif method == "group_entire_table":
         return group_entire_table_method(table_data, document_name)
+    return None
 def table_to_document(table_data, document_id=None):
+    if not isinstance(table_data, dict):
+        return []
+    doc_id = document_id or table_data.get('document_id', table_data.get('document', 'Неизвестно'))
+    table_num = table_data.get('table_number', 'Неизвестно')
+    table_title = table_data.get('table_title', 'Неизвестно')
+    section = table_data.get('section', 'Неизвестно')
+    method_config = get_custom_config(doc_id, table_num)
+    if method_config:
+        log_message(f"✓ Таблица {table_num} '{table_title}' в документе {doc_id}: метод {method_config['method']}")
+        custom_docs = process_table(table_data, doc_id, method_config)
+        if custom_docs:
+            return custom_docs
+    header_content = f"Таблица: {table_num}\nНазвание: {table_title}\nДокумент: {doc_id}\nРаздел: {section}\n"
+    if 'data' in table_data and isinstance(table_data['data'], list):
+        table_content = header_content + "\nДанные таблицы:\n"
+        for row_idx, row in enumerate(table_data['data']):
+            if isinstance(row, dict):
+                row_text = " | ".join([f"{k}: {v}" for k, v in row.items()])
+                table_content += f"Строка {row_idx + 1}: {row_text}\n"
+        return [Document(
+            text=table_content,
+            metadata={
+                "type": "table",
+                "table_number": table_num,
+                "table_title": table_title,
+                "document_id": doc_id,
+                "section": section,
+                "section_id": section,
+                "total_rows": len(table_data['data']),
+                "processing_method": "default"
+            }
+        )]
+    return [Document(
+        text=header_content,
+        metadata={
+            "type": "table",
+            "table_number": table_num,
+            "table_title": table_title,
+            "document_id": doc_id,
+            "section": section,
+            "section_id": section,
+            "processing_method": "default"
+        }
+    )]
 def load_table_data(repo_id, hf_token, table_data_dir):
+    log_message("Загрузка табличных данных")
     try:
         files = list_repo_files(repo_id=repo_id, repo_type="dataset", token=hf_token)
+        table_files = [f for f in files if f.startswith(table_data_dir) and f.endswith('.json')]
         log_message(f"Найдено {len(table_files)} JSON файлов с таблицами")
         table_documents = []
         for file_path in table_files:
             try:
                 local_path = hf_hub_download(
                     repo_id=repo_id,
                     filename=file_path,
                         if 'sheets' in table_data:
                             for sheet in table_data['sheets']:
                                 sheet['document'] = document_id
                                 docs_list = table_to_document(sheet, document_id)
                                 table_documents.extend(docs_list)
                         else:
                             docs_list = table_to_document(table_data, document_id)
                             table_documents.extend(docs_list)
                     elif isinstance(table_data, list):
                         for table_json in table_data:
                             docs_list = table_to_document(table_json)
                             table_documents.extend(docs_list)
             except Exception as e:
+                log_message(f"Ошибка файла {file_path}: {str(e)}")
                 continue
         log_message(f"Создано {len(table_documents)} документов из таблиц")

utils.py CHANGED Viewed

@@ -371,39 +371,15 @@ def generate_sources_html(nodes, chunks_df=None):
 def answer_question(question, query_engine, reranker, current_model, chunks_df=None):
     if query_engine is None:
-        return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Система не инициализирована</div>", ""
     try:
-        log_message(f"Получен вопрос: {question}")
         start_time = time.time()
-        # Извлечение узлов
         retrieved_nodes = query_engine.retriever.retrieve(question)
-        log_message(f"Извлечено {len(retrieved_nodes)} узлов")
-        # ДЕТАЛЬНОЕ ЛОГИРОВАНИЕ ИСТОЧНИКОВ
-        log_message("=== ДЕТАЛЬНАЯ ИНФОРМАЦИЯ О НАЙДЕННЫХ УЗЛАХ ===")
-        for i, node in enumerate(retrieved_nodes):
-            log_message(f"Узел {i+1}:")
-            log_message(f"  Документ: {node.metadata.get('document_id', 'unknown')}")
-            log_message(f"  Тип: {node.metadata.get('type', 'unknown')}")
-            log_message(f"  Раздел: {node.metadata.get('section_id', 'unknown')}")
-            log_message(f"  Текст (первые 400 символов): {node.text[:400]}...")
-            log_message(f"  Метаданные: {node.metadata}")
-        # Переранжировка
         reranked_nodes = rerank_nodes(question, retrieved_nodes, reranker, top_k=10)
-        log_message("=== УЗЛЫ ПОСЛЕ ПЕРЕРАНЖИРОВКИ ===")
-        for i, node in enumerate(reranked_nodes):
-            log_message(f"Переранжированный узел {i+1}:")
-            log_message(f"  Документ: {node.metadata.get('document_id', 'unknown')}")
-            log_message(f"  Тип: {node.metadata.get('type', 'unknown')}")
-            log_message(f"  Раздел: {node.metadata.get('section_id', 'unknown')}")
-            log_message(f"  Полный текст: {node.text}")
         formatted_context = format_context_for_llm(reranked_nodes)
-        log_message(f"ПОЛНЫЙ КОНТЕКСТ ДЛЯ LLM:\n{formatted_context}")
         enhanced_question = f"""
 Контекст из базы данных:
@@ -413,12 +389,10 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
         response = query_engine.query(enhanced_question)
-        log_message(f"ОТВЕТ LLM: {response.response}")
         end_time = time.time()
         processing_time = end_time - start_time
-        log_message(f"Обработка завершена за {processing_time:.2f} секунд")
         sources_html = generate_sources_html(reranked_nodes, chunks_df)
@@ -432,10 +406,18 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
         chunk_info = []
         for node in reranked_nodes:
-            section_id = node.metadata.get('section_id', node.metadata.get('section', 'unknown'))
             chunk_info.append({
-                'document_id': node.metadata.get('document_id', 'unknown'),
-                'section_id': section_id,
                 'chunk_size': len(node.text),
                 'chunk_text': node.text
             })
@@ -445,6 +427,6 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
         return answer_with_time, sources_html, chunks_html
     except Exception as e:
-        log_message(f"Ошибка обработки вопроса: {str(e)}")
-        error_msg = f"<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Ошибка обработки вопроса: {str(e)}</div>"
-        return error_msg, ""

 def answer_question(question, query_engine, reranker, current_model, chunks_df=None):
     if query_engine is None:
+        return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Система не инициализирована</div>", "", ""
     try:
         start_time = time.time()
         retrieved_nodes = query_engine.retriever.retrieve(question)
         reranked_nodes = rerank_nodes(question, retrieved_nodes, reranker, top_k=10)
         formatted_context = format_context_for_llm(reranked_nodes)
         enhanced_question = f"""
 Контекст из базы данных:
         response = query_engine.query(enhanced_question)
         end_time = time.time()
         processing_time = end_time - start_time
+        log_message(f"Обработка завершена за {processing_time:.2f}с")
         sources_html = generate_sources_html(reranked_nodes, chunks_df)
         chunk_info = []
         for node in reranked_nodes:
+            metadata = node.metadata if hasattr(node, 'metadata') else {}
             chunk_info.append({
+                'document_id': metadata.get('document_id', 'unknown'),
+                'section_id': metadata.get('section_id', metadata.get('section', 'unknown')),
+                'section_path': metadata.get('section_path', ''),
+                'section_text': metadata.get('section_text', ''),
+                'level': metadata.get('level', ''),
+                'parent_section': metadata.get('parent_section', ''),
+                'parent_title': metadata.get('parent_title', ''),
+                'type': metadata.get('type', 'text'),
+                'table_number': metadata.get('table_number', ''),
+                'image_number': metadata.get('image_number', ''),
                 'chunk_size': len(node.text),
                 'chunk_text': node.text
             })
         return answer_with_time, sources_html, chunks_html
     except Exception as e:
+        log_message(f"Ошибка: {str(e)}")
+        error_msg = f"<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Ошибка: {str(e)}</div>"
+        return error_msg, "", ""