MrSimple07 commited on
Commit
edc2f6f
·
1 Parent(s): 8befcd1

bm = 50, semantic = 50. hybrid = 50

Browse files
Files changed (2) hide show
  1. index_retriever.py +2 -2
  2. utils.py +4 -40
index_retriever.py CHANGED
@@ -16,7 +16,7 @@ def create_query_engine(vector_index):
16
  try:
17
  bm25_retriever = BM25Retriever.from_defaults(
18
  docstore=vector_index.docstore,
19
- similarity_top_k=40
20
  )
21
 
22
  vector_retriever = VectorIndexRetriever(
@@ -27,7 +27,7 @@ def create_query_engine(vector_index):
27
 
28
  hybrid_retriever = QueryFusionRetriever(
29
  [vector_retriever, bm25_retriever],
30
- similarity_top_k=40,
31
  num_queries=1
32
  )
33
 
 
16
  try:
17
  bm25_retriever = BM25Retriever.from_defaults(
18
  docstore=vector_index.docstore,
19
+ similarity_top_k=50
20
  )
21
 
22
  vector_retriever = VectorIndexRetriever(
 
27
 
28
  hybrid_retriever = QueryFusionRetriever(
29
  [vector_retriever, bm25_retriever],
30
+ similarity_top_k=50,
31
  num_queries=1
32
  )
33
 
utils.py CHANGED
@@ -225,32 +225,6 @@ def generate_sources_html(nodes, chunks_df=None):
225
 
226
  html += "</div>"
227
  return html
228
-
229
- def expand_query(question, llm_model):
230
- """
231
- Generate multiple query variations for better retrieval
232
- """
233
- expansion_prompt = f"""Дан вопрос: "{question}"
234
-
235
- Сгенерируй 2 альтернативные формулировки этого вопроса для поиска в базе данных.
236
- Используй синонимы и разные формулировки, сохраняя смысл.
237
-
238
- Формат ответа (только вопросы, по одному на строку):
239
- 1. [первая формулировка]
240
- 2. [вторая формулировка]"""
241
-
242
- try:
243
- response = llm_model.complete(expansion_prompt)
244
- expanded = [q.strip() for q in response.text.split('\n') if q.strip() and not q.strip().startswith('1.') and not q.strip().startswith('2.')]
245
- # Clean up
246
- expanded = [q.lstrip('12. ').strip() for q in expanded if len(q) > 10][:2]
247
- log_message(f"Query expansion: {len(expanded)} вариантов")
248
- return [question] + expanded
249
- except Exception as e:
250
- log_message(f"Ошибка расширения запроса: {str(e)}")
251
- return [question]
252
-
253
-
254
  def answer_question(question, query_engine, reranker, current_model, chunks_df=None):
255
  if query_engine is None:
256
  return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Система не инициализирована</div>", "", ""
@@ -260,24 +234,14 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
260
 
261
  llm = get_llm_model(current_model)
262
 
263
- query_variations = expand_query(question, llm)
264
-
265
- all_nodes = []
266
- seen_node_ids = set()
267
-
268
- for query_var in query_variations:
269
- retrieved = query_engine.retriever.retrieve(query_var)
270
- for node in retrieved:
271
- node_id = f"{node.node_id if hasattr(node, 'node_id') else hash(node.text)}"
272
- if node_id not in seen_node_ids:
273
- all_nodes.append(node)
274
- seen_node_ids.add(node_id)
275
 
276
- log_message(f"Получено {len(all_nodes)} уникальных узлов из {len(query_variations)} запросов")
277
 
278
  reranked_nodes = rerank_nodes(
279
  question,
280
- all_nodes,
281
  reranker,
282
  top_k=20,
283
  min_score_threshold=0.5,
 
225
 
226
  html += "</div>"
227
  return html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  def answer_question(question, query_engine, reranker, current_model, chunks_df=None):
229
  if query_engine is None:
230
  return "<div style='background-color: #e53e3e; color: white; padding: 20px; border-radius: 10px;'>Система не инициализирована</div>", "", ""
 
234
 
235
  llm = get_llm_model(current_model)
236
 
237
+ # Direct retrieval without query expansion
238
+ retrieved_nodes = query_engine.retriever.retrieve(question)
 
 
 
 
 
 
 
 
 
 
239
 
240
+ log_message(f"Получено {len(retrieved_nodes)} узлов")
241
 
242
  reranked_nodes = rerank_nodes(
243
  question,
244
+ retrieved_nodes,
245
  reranker,
246
  top_k=20,
247
  min_score_threshold=0.5,