MrSimple07 commited on
Commit
80b9f4e
·
1 Parent(s): 0f9c9b1

400 symbols to show in logging 2

Browse files
Files changed (1) hide show
  1. app.py +18 -5
app.py CHANGED
@@ -70,7 +70,9 @@ def get_formatted_content(chunk):
70
  document_id = chunk.get('document_id', 'unknown')
71
  section_path = chunk.get('section_path', '')
72
  section_id = chunk.get('section_id', 'unknown')
 
73
  parent_section = chunk.get('parent_section', '')
 
74
  level = chunk.get('level', '')
75
  chunk_text = chunk.get('chunk_text', '')
76
  doc_type = chunk.get('type', 'text')
@@ -89,14 +91,25 @@ def get_formatted_content(chunk):
89
 
90
  # For text documents
91
  if level in ['subsection', 'sub_subsection', 'sub_sub_subsection'] and parent_section:
92
- # For subsections: В разделе X в документе Y, пункт X.X content
93
  current_section = section_path if section_path else section_id
94
- return f"В разделе {parent_section} в документе {document_id}, пункт {current_section} {chunk_text}"
 
95
  else:
96
- # For main sections: В разделе X в документе Y пункт X content
97
  current_section = section_path if section_path else section_id
98
- return f"В разделе {current_section} в документе {document_id} пункт {current_section} {chunk_text}"
99
-
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  def initialize_system(repo_id, hf_token, download_dir, chunks_filename=None,
102
  json_files_dir=None, table_data_dir=None, image_data_dir=None,
 
70
  document_id = chunk.get('document_id', 'unknown')
71
  section_path = chunk.get('section_path', '')
72
  section_id = chunk.get('section_id', 'unknown')
73
+ section_text = chunk.get('section_text', '')
74
  parent_section = chunk.get('parent_section', '')
75
+ parent_title = chunk.get('parent_title', '')
76
  level = chunk.get('level', '')
77
  chunk_text = chunk.get('chunk_text', '')
78
  doc_type = chunk.get('type', 'text')
 
91
 
92
  # For text documents
93
  if level in ['subsection', 'sub_subsection', 'sub_sub_subsection'] and parent_section:
 
94
  current_section = section_path if section_path else section_id
95
+ parent_info = f"{parent_section} ({parent_title})" if parent_title else parent_section
96
+ return f"В разделе {parent_info} в документе {document_id}, пункт {current_section}: {chunk_text}"
97
  else:
 
98
  current_section = section_path if section_path else section_id
99
+
100
+ # Clean chunk_text to avoid duplication
101
+ clean_text = chunk_text
102
+ if section_text and chunk_text.startswith(section_text):
103
+ # If chunk_text starts with full section_text, use section_text as title
104
+ section_title = section_text
105
+ elif chunk_text.startswith(f"{current_section} "):
106
+ # If chunk_text starts with section number, extract the title part
107
+ clean_text = chunk_text[len(f"{current_section} "):].strip()
108
+ section_title = section_text if section_text else f"{current_section} {clean_text.split('.')[0] if '.' in clean_text else clean_text[:50]}"
109
+ else:
110
+ section_title = section_text if section_text else current_section
111
+
112
+ return f"В разделе {current_section} в документе {document_id}, пункт {section_title}: {clean_text}"
113
 
114
  def initialize_system(repo_id, hf_token, download_dir, chunks_filename=None,
115
  json_files_dir=None, table_data_dir=None, image_data_dir=None,