Spaces:

MrSimple01
/

RAG_AIEXP_01

Sleeping

App Files Files Community

MrSimple07 commited on Sep 18, 2025

Commit

59c7b5b

1 Parent(s): 80b9f4e

removed table + image chunking display

Browse files

Files changed (1) hide show

app.py +0 -19

app.py CHANGED Viewed

@@ -40,7 +40,6 @@ def create_chunks_display_html(chunk_info):
     return html
 def get_section_display(chunk):
-    """Get section display for the 'Раздел' field - without 'пункт' prefix"""
     section_path = chunk.get('section_path', '')
     section_id = chunk.get('section_id', 'unknown')
     doc_type = chunk.get('type', 'text')
@@ -57,7 +56,6 @@ def get_section_display(chunk):
             image_num = f"№{image_num}"
         return f"рисунок {image_num}"
-    # For text documents, return just the section_path or section_id without "пункт"
     if section_path:
         return section_path
     elif section_id and section_id != 'unknown':
@@ -66,7 +64,6 @@ def get_section_display(chunk):
     return section_id
 def get_formatted_content(chunk):
-    """Format the content with proper section context"""
     document_id = chunk.get('document_id', 'unknown')
     section_path = chunk.get('section_path', '')
     section_id = chunk.get('section_id', 'unknown')
@@ -77,18 +74,6 @@ def get_formatted_content(chunk):
     chunk_text = chunk.get('chunk_text', '')
     doc_type = chunk.get('type', 'text')
-    if doc_type == 'table':
-        table_num = chunk.get('table_number', 'unknown')
-        if not str(table_num).startswith('№'):
-            table_num = f"№{table_num}"
-        return f"В таблице {table_num} документа {document_id}: {chunk_text}"
-    if doc_type == 'image':
-        image_num = chunk.get('image_number', 'unknown')
-        if not str(image_num).startswith('№'):
-            image_num = f"№{image_num}"
-        return f"В рисунке {image_num} документа {document_id}: {chunk_text}"
     # For text documents
     if level in ['subsection', 'sub_subsection', 'sub_sub_subsection'] and parent_section:
         current_section = section_path if section_path else section_id
@@ -96,14 +81,10 @@ def get_formatted_content(chunk):
         return f"В разделе {parent_info} в документе {document_id}, пункт {current_section}: {chunk_text}"
     else:
         current_section = section_path if section_path else section_id
-        # Clean chunk_text to avoid duplication
         clean_text = chunk_text
         if section_text and chunk_text.startswith(section_text):
-            # If chunk_text starts with full section_text, use section_text as title
             section_title = section_text
         elif chunk_text.startswith(f"{current_section} "):
-            # If chunk_text starts with section number, extract the title part
             clean_text = chunk_text[len(f"{current_section} "):].strip()
             section_title = section_text if section_text else f"{current_section} {clean_text.split('.')[0] if '.' in clean_text else clean_text[:50]}"
         else:

     return html
 def get_section_display(chunk):
     section_path = chunk.get('section_path', '')
     section_id = chunk.get('section_id', 'unknown')
     doc_type = chunk.get('type', 'text')
             image_num = f"№{image_num}"
         return f"рисунок {image_num}"
     if section_path:
         return section_path
     elif section_id and section_id != 'unknown':
     return section_id
 def get_formatted_content(chunk):
     document_id = chunk.get('document_id', 'unknown')
     section_path = chunk.get('section_path', '')
     section_id = chunk.get('section_id', 'unknown')
     chunk_text = chunk.get('chunk_text', '')
     doc_type = chunk.get('type', 'text')
     # For text documents
     if level in ['subsection', 'sub_subsection', 'sub_sub_subsection'] and parent_section:
         current_section = section_path if section_path else section_id
         return f"В разделе {parent_info} в документе {document_id}, пункт {current_section}: {chunk_text}"
     else:
         current_section = section_path if section_path else section_id
         clean_text = chunk_text
         if section_text and chunk_text.startswith(section_text):
             section_title = section_text
         elif chunk_text.startswith(f"{current_section} "):
             clean_text = chunk_text[len(f"{current_section} "):].strip()
             section_title = section_text if section_text else f"{current_section} {clean_text.split('.')[0] if '.' in clean_text else clean_text[:50]}"
         else: