MrSimple07 commited on
Commit
59c7b5b
·
1 Parent(s): 80b9f4e

removed table + image chunking display

Browse files
Files changed (1) hide show
  1. app.py +0 -19
app.py CHANGED
@@ -40,7 +40,6 @@ def create_chunks_display_html(chunk_info):
40
  return html
41
 
42
  def get_section_display(chunk):
43
- """Get section display for the 'Раздел' field - without 'пункт' prefix"""
44
  section_path = chunk.get('section_path', '')
45
  section_id = chunk.get('section_id', 'unknown')
46
  doc_type = chunk.get('type', 'text')
@@ -57,7 +56,6 @@ def get_section_display(chunk):
57
  image_num = f"№{image_num}"
58
  return f"рисунок {image_num}"
59
 
60
- # For text documents, return just the section_path or section_id without "пункт"
61
  if section_path:
62
  return section_path
63
  elif section_id and section_id != 'unknown':
@@ -66,7 +64,6 @@ def get_section_display(chunk):
66
  return section_id
67
 
68
  def get_formatted_content(chunk):
69
- """Format the content with proper section context"""
70
  document_id = chunk.get('document_id', 'unknown')
71
  section_path = chunk.get('section_path', '')
72
  section_id = chunk.get('section_id', 'unknown')
@@ -77,18 +74,6 @@ def get_formatted_content(chunk):
77
  chunk_text = chunk.get('chunk_text', '')
78
  doc_type = chunk.get('type', 'text')
79
 
80
- if doc_type == 'table':
81
- table_num = chunk.get('table_number', 'unknown')
82
- if not str(table_num).startswith('№'):
83
- table_num = f"№{table_num}"
84
- return f"В таблице {table_num} документа {document_id}: {chunk_text}"
85
-
86
- if doc_type == 'image':
87
- image_num = chunk.get('image_number', 'unknown')
88
- if not str(image_num).startswith('№'):
89
- image_num = f"№{image_num}"
90
- return f"В рисунке {image_num} документа {document_id}: {chunk_text}"
91
-
92
  # For text documents
93
  if level in ['subsection', 'sub_subsection', 'sub_sub_subsection'] and parent_section:
94
  current_section = section_path if section_path else section_id
@@ -96,14 +81,10 @@ def get_formatted_content(chunk):
96
  return f"В разделе {parent_info} в документе {document_id}, пункт {current_section}: {chunk_text}"
97
  else:
98
  current_section = section_path if section_path else section_id
99
-
100
- # Clean chunk_text to avoid duplication
101
  clean_text = chunk_text
102
  if section_text and chunk_text.startswith(section_text):
103
- # If chunk_text starts with full section_text, use section_text as title
104
  section_title = section_text
105
  elif chunk_text.startswith(f"{current_section} "):
106
- # If chunk_text starts with section number, extract the title part
107
  clean_text = chunk_text[len(f"{current_section} "):].strip()
108
  section_title = section_text if section_text else f"{current_section} {clean_text.split('.')[0] if '.' in clean_text else clean_text[:50]}"
109
  else:
 
40
  return html
41
 
42
  def get_section_display(chunk):
 
43
  section_path = chunk.get('section_path', '')
44
  section_id = chunk.get('section_id', 'unknown')
45
  doc_type = chunk.get('type', 'text')
 
56
  image_num = f"№{image_num}"
57
  return f"рисунок {image_num}"
58
 
 
59
  if section_path:
60
  return section_path
61
  elif section_id and section_id != 'unknown':
 
64
  return section_id
65
 
66
  def get_formatted_content(chunk):
 
67
  document_id = chunk.get('document_id', 'unknown')
68
  section_path = chunk.get('section_path', '')
69
  section_id = chunk.get('section_id', 'unknown')
 
74
  chunk_text = chunk.get('chunk_text', '')
75
  doc_type = chunk.get('type', 'text')
76
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  # For text documents
78
  if level in ['subsection', 'sub_subsection', 'sub_sub_subsection'] and parent_section:
79
  current_section = section_path if section_path else section_id
 
81
  return f"В разделе {parent_info} в документе {document_id}, пункт {current_section}: {chunk_text}"
82
  else:
83
  current_section = section_path if section_path else section_id
 
 
84
  clean_text = chunk_text
85
  if section_text and chunk_text.startswith(section_text):
 
86
  section_title = section_text
87
  elif chunk_text.startswith(f"{current_section} "):
 
88
  clean_text = chunk_text[len(f"{current_section} "):].strip()
89
  section_title = section_text if section_text else f"{current_section} {clean_text.split('.')[0] if '.' in clean_text else clean_text[:50]}"
90
  else: