MrSimple07 commited on
Commit
7062aff
·
1 Parent(s): 46dedf9

top k 200, 50 + max chunk size = 10 000, max chunk row = 40

Browse files
Files changed (2) hide show
  1. index_retriever.py +35 -1
  2. utils.py +0 -22
index_retriever.py CHANGED
@@ -51,7 +51,7 @@ def create_query_engine(vector_index):
51
 
52
  vector_retriever = VectorIndexRetriever(
53
  index=vector_index,
54
- similarity_top_k=200,
55
  similarity_cutoff=0.35
56
  )
57
 
@@ -73,7 +73,41 @@ def create_query_engine(vector_index):
73
  )
74
 
75
  log_message("Query engine успешно создан")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  return query_engine
 
 
 
77
 
78
  except Exception as e:
79
  log_message(f"Ошибка создания query engine: {str(e)}")
 
51
 
52
  vector_retriever = VectorIndexRetriever(
53
  index=vector_index,
54
+ similarity_top_k=50,
55
  similarity_cutoff=0.35
56
  )
57
 
 
73
  )
74
 
75
  log_message("Query engine успешно создан")
76
+
77
+
78
+ all_nodes = list(vector_index.docstore.docs.values())
79
+ c25_tables = []
80
+
81
+ for node_id, node in vector_index.docstore.docs.items():
82
+ metadata = node.metadata
83
+ text = node.get_content()
84
+
85
+ # Check if this is a С-25 table
86
+ if ('С-25' in text or 'C-25' in text or
87
+ 'С-25' in str(metadata.get('table_title', '')) or
88
+ 'С-25' in str(metadata.get('table_number', ''))):
89
+
90
+ c25_tables.append({
91
+ 'node_id': node_id,
92
+ 'doc_id': metadata.get('document_id'),
93
+ 'table_num': metadata.get('table_number'),
94
+ 'table_title': metadata.get('table_title', ''),
95
+ 'text_preview': text[:200]
96
+ })
97
+
98
+ log_message(f"\n{'='*70}")
99
+ log_message(f"DEBUG: Found {len(c25_tables)} С-25 tables in index:")
100
+ for t in c25_tables:
101
+ log_message(f" • {t['doc_id']} - Table {t['table_num']}")
102
+ log_message(f" Title: {t['table_title']}")
103
+ log_message(f" Preview: {t['text_preview']}")
104
+ log_message(f"{'='*70}\n")
105
+
106
+
107
  return query_engine
108
+
109
+
110
+
111
 
112
  except Exception as e:
113
  log_message(f"Ошибка создания query engine: {str(e)}")
utils.py CHANGED
@@ -172,28 +172,6 @@ def deduplicate_nodes(nodes):
172
 
173
  return unique_nodes
174
 
175
- def debug_search_tables(vector_index, search_term="С-25"):
176
- """Debug function to find all tables containing a specific term"""
177
- all_nodes = list(vector_index.docstore.docs.values())
178
-
179
- matching = []
180
- for node in all_nodes:
181
- if node.metadata.get('type') == 'table':
182
- text = node.get_content()
183
- if search_term in text or search_term in node.metadata.get('table_title', ''):
184
- matching.append({
185
- 'doc_id': node.metadata.get('document_id'),
186
- 'table_num': node.metadata.get('table_number'),
187
- 'title': node.metadata.get('table_title', '')[:100]
188
- })
189
-
190
- log_message(f"\n{'='*60}")
191
- log_message(f"DEBUG: Found {len(matching)} tables containing '{search_term}'")
192
- for m in matching:
193
- log_message(f" • {m['doc_id']} - Table {m['table_num']}: {m['title']}")
194
- log_message(f"{'='*60}\n")
195
-
196
- return matching
197
 
198
  def answer_question(question, query_engine, reranker, current_model, chunks_df=None):
199
  if query_engine is None:
 
172
 
173
  return unique_nodes
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
  def answer_question(question, query_engine, reranker, current_model, chunks_df=None):
177
  if query_engine is None: