MrSimple07 commited on
Commit
0a99ba6
·
1 Parent(s): 865746a

added the priority to the table data

Browse files
Files changed (1) hide show
  1. index_retriever.py +25 -13
index_retriever.py CHANGED
@@ -22,7 +22,7 @@ def create_query_engine(vector_index):
22
  vector_retriever = VectorIndexRetriever(
23
  index=vector_index,
24
  similarity_top_k=30,
25
- similarity_cutoff=0.7
26
  )
27
 
28
  hybrid_retriever = QueryFusionRetriever(
@@ -56,21 +56,33 @@ def rerank_nodes(query, nodes, reranker, top_k=10):
56
  try:
57
  log_message(f"Переранжирую {len(nodes)} узлов")
58
 
59
- pairs = []
60
- for node in nodes:
61
- pairs.append([query, node.text])
 
62
 
63
- scores = reranker.predict(pairs)
64
 
65
- scored_nodes = list(zip(nodes, scores))
66
- scored_nodes.sort(key=lambda x: x[1], reverse=True)
 
 
 
 
 
 
 
 
 
 
67
 
68
- reranked_nodes = [node for node, score in scored_nodes[:top_k]]
69
- log_message(f"Возвращаю топ-{len(reranked_nodes)} переранжированных узлов")
 
 
 
 
70
 
71
- return reranked_nodes
72
  except Exception as e:
73
  log_message(f"Ошибка переранжировки: {str(e)}")
74
- return nodes[:top_k]
75
-
76
-
 
22
  vector_retriever = VectorIndexRetriever(
23
  index=vector_index,
24
  similarity_top_k=30,
25
+ similarity_cutoff=0.8
26
  )
27
 
28
  hybrid_retriever = QueryFusionRetriever(
 
56
  try:
57
  log_message(f"Переранжирую {len(nodes)} узлов")
58
 
59
+ # Separate tables and images from text nodes
60
+ table_nodes = [node for node in nodes if node.metadata.get('type') == 'table']
61
+ image_nodes = [node for node in nodes if node.metadata.get('type') == 'image']
62
+ text_nodes = [node for node in nodes if node.metadata.get('type', 'text') == 'text']
63
 
64
+ priority_nodes = table_nodes + image_nodes
65
 
66
+ # Rerank only text nodes
67
+ if text_nodes:
68
+ pairs = []
69
+ for node in text_nodes:
70
+ pairs.append([query, node.text])
71
+
72
+ scores = reranker.predict(pairs)
73
+ scored_nodes = list(zip(text_nodes, scores))
74
+ scored_nodes.sort(key=lambda x: x[1], reverse=True)
75
+ reranked_text_nodes = [node for node, score in scored_nodes]
76
+ else:
77
+ reranked_text_nodes = []
78
 
79
+ # Combine: priority nodes first, then reranked text nodes
80
+ final_nodes = priority_nodes + reranked_text_nodes
81
+ result = final_nodes[:top_k]
82
+
83
+ log_message(f"Возвращаю {len(priority_nodes)} приоритетных узлов и {len(result) - len(priority_nodes)} текстовых узлов")
84
+ return result
85
 
 
86
  except Exception as e:
87
  log_message(f"Ошибка переранжировки: {str(e)}")
88
+ return nodes[:top_k]