MrSimple07 commited on
Commit
1f9fdb8
·
1 Parent(s): c63d524

added 35 semantic similarity

Browse files
Files changed (2) hide show
  1. index_retriever.py +2 -6
  2. utils.py +3 -7
index_retriever.py CHANGED
@@ -22,7 +22,7 @@ def create_query_engine(vector_index):
22
  vector_retriever = VectorIndexRetriever(
23
  index=vector_index,
24
  similarity_top_k=30, # Increased
25
- similarity_cutoff=0.7 # Slightly lower for recall
26
  )
27
 
28
  hybrid_retriever = QueryFusionRetriever(
@@ -50,9 +50,6 @@ def create_query_engine(vector_index):
50
  raise
51
 
52
  def rerank_nodes(query, nodes, reranker, top_k=20, min_score_threshold=0.5, diversity_penalty=0.3):
53
- """
54
- Rerank nodes with diversity and adaptive scoring
55
- """
56
  if not nodes or not reranker:
57
  return nodes[:top_k]
58
 
@@ -74,11 +71,10 @@ def rerank_nodes(query, nodes, reranker, top_k=20, min_score_threshold=0.5, dive
74
  log_message("Нет узлов после фильтрации, снижаю порог")
75
  scored_nodes = list(zip(nodes, scores))
76
  scored_nodes.sort(key=lambda x: x[1], reverse=True)
77
- min_score_threshold = scored_nodes[0][1] * 0.6 # 60% of top score
78
  scored_nodes = [(node, score) for node, score in scored_nodes
79
  if score >= min_score_threshold]
80
 
81
- # MMR-like diversity selection
82
  selected_nodes = []
83
  selected_docs = set()
84
  selected_sections = set()
 
22
  vector_retriever = VectorIndexRetriever(
23
  index=vector_index,
24
  similarity_top_k=30, # Increased
25
+ similarity_cutoff=0.5 # Slightly lower for recall
26
  )
27
 
28
  hybrid_retriever = QueryFusionRetriever(
 
50
  raise
51
 
52
  def rerank_nodes(query, nodes, reranker, top_k=20, min_score_threshold=0.5, diversity_penalty=0.3):
 
 
 
53
  if not nodes or not reranker:
54
  return nodes[:top_k]
55
 
 
71
  log_message("Нет узлов после фильтрации, снижаю порог")
72
  scored_nodes = list(zip(nodes, scores))
73
  scored_nodes.sort(key=lambda x: x[1], reverse=True)
74
+ min_score_threshold = scored_nodes[0][1] * 0.6
75
  scored_nodes = [(node, score) for node, score in scored_nodes
76
  if score >= min_score_threshold]
77
 
 
78
  selected_nodes = []
79
  selected_docs = set()
80
  selected_sections = set()
utils.py CHANGED
@@ -258,13 +258,10 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
258
  try:
259
  start_time = time.time()
260
 
261
- # Get LLM for query expansion
262
  llm = get_llm_model(current_model)
263
 
264
- # Expand query
265
  query_variations = expand_query(question, llm)
266
 
267
- # Retrieve with multiple queries and deduplicate
268
  all_nodes = []
269
  seen_node_ids = set()
270
 
@@ -278,13 +275,12 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
278
 
279
  log_message(f"Получено {len(all_nodes)} уникальных узлов из {len(query_variations)} запросов")
280
 
281
- # Rerank with stricter threshold and diversity
282
  reranked_nodes = rerank_nodes(
283
- question, # Use original question for reranking
284
  all_nodes,
285
  reranker,
286
  top_k=20,
287
- min_score_threshold=0.5, # Much stricter threshold
288
  diversity_penalty=0.3
289
  )
290
 
@@ -311,7 +307,7 @@ def answer_question(question, query_engine, reranker, current_model, chunks_df=N
311
  <h3 style='color: #63b3ed; margin-top: 0;'>Ответ (Модель: {current_model}):</h3>
312
  <div style='line-height: 1.6; font-size: 16px;'>{response.response}</div>
313
  <div style='margin-top: 15px; padding-top: 10px; border-top: 1px solid #4a5568; font-size: 14px; color: #a0aec0;'>
314
- Время обработки: {processing_time:.2f} секунд | Источников: {len(reranked_nodes)} | Запросов: {len(query_variations)}
315
  </div>
316
  </div>"""
317
 
 
258
  try:
259
  start_time = time.time()
260
 
 
261
  llm = get_llm_model(current_model)
262
 
 
263
  query_variations = expand_query(question, llm)
264
 
 
265
  all_nodes = []
266
  seen_node_ids = set()
267
 
 
275
 
276
  log_message(f"Получено {len(all_nodes)} уникальных узлов из {len(query_variations)} запросов")
277
 
 
278
  reranked_nodes = rerank_nodes(
279
+ question,
280
  all_nodes,
281
  reranker,
282
  top_k=20,
283
+ min_score_threshold=0.5,
284
  diversity_penalty=0.3
285
  )
286
 
 
307
  <h3 style='color: #63b3ed; margin-top: 0;'>Ответ (Модель: {current_model}):</h3>
308
  <div style='line-height: 1.6; font-size: 16px;'>{response.response}</div>
309
  <div style='margin-top: 15px; padding-top: 10px; border-top: 1px solid #4a5568; font-size: 14px; color: #a0aec0;'>
310
+ Время обработки: {processing_time:.2f} секунд
311
  </div>
312
  </div>"""
313