MrSimple07 commited on
Commit
07d69bf
·
1 Parent(s): 2b2b3c3

bm25 full corpuse + COMPACT + NodePostprocessor

Browse files
Files changed (1) hide show
  1. index_retriever.py +22 -12
index_retriever.py CHANGED
@@ -7,44 +7,54 @@ from llama_index.retrievers.bm25 import BM25Retriever
7
  from llama_index.core.retrievers import QueryFusionRetriever
8
  from my_logging import log_message
9
  from config import CUSTOM_PROMPT, PROMPT_SIMPLE_POISK
 
 
10
 
11
  def create_vector_index(documents):
12
  log_message("Строю векторный индекс")
13
  return VectorStoreIndex.from_documents(documents)
14
 
15
- def create_query_engine(vector_index):
16
  try:
 
17
  bm25_retriever = BM25Retriever.from_defaults(
18
- docstore=vector_index.docstore,
 
19
  similarity_top_k=15
20
  )
21
-
22
  vector_retriever = VectorIndexRetriever(
23
- index=vector_index,
24
- similarity_top_k=25,
25
  similarity_cutoff=0.5
26
  )
27
-
28
  hybrid_retriever = QueryFusionRetriever(
29
  [vector_retriever, bm25_retriever],
30
  similarity_top_k=40,
31
  num_queries=1
32
  )
33
-
34
  custom_prompt_template = PromptTemplate(PROMPT_SIMPLE_POISK)
35
  response_synthesizer = get_response_synthesizer(
36
- response_mode=ResponseMode.TREE_SUMMARIZE,
37
  text_qa_template=custom_prompt_template
38
  )
39
-
 
 
 
 
 
40
  query_engine = RetrieverQueryEngine(
41
  retriever=hybrid_retriever,
42
- response_synthesizer=response_synthesizer
 
43
  )
44
-
45
  log_message("Query engine успешно создан")
46
  return query_engine
47
-
48
  except Exception as e:
49
  log_message(f"Ошибка создания query engine: {str(e)}")
50
  raise
 
7
  from llama_index.core.retrievers import QueryFusionRetriever
8
  from my_logging import log_message
9
  from config import CUSTOM_PROMPT, PROMPT_SIMPLE_POISK
10
+ from llama_index.core.postprocessor import LlamaIndexNodePostprocessor
11
+
12
 
13
  def create_vector_index(documents):
14
  log_message("Строю векторный индекс")
15
  return VectorStoreIndex.from_documents(documents)
16
 
17
+ def create_query_engine(vector_index, reranker=None):
18
  try:
19
+ # Ensure BM25 sees the full text corpus, not just docstore
20
  bm25_retriever = BM25Retriever.from_defaults(
21
+ docstore=vector_index.docstore,
22
+ nodes=vector_index.get_nodes(), # <-- add this line
23
  similarity_top_k=15
24
  )
25
+
26
  vector_retriever = VectorIndexRetriever(
27
+ index=vector_index,
28
+ similarity_top_k=30,
29
  similarity_cutoff=0.5
30
  )
31
+
32
  hybrid_retriever = QueryFusionRetriever(
33
  [vector_retriever, bm25_retriever],
34
  similarity_top_k=40,
35
  num_queries=1
36
  )
37
+
38
  custom_prompt_template = PromptTemplate(PROMPT_SIMPLE_POISK)
39
  response_synthesizer = get_response_synthesizer(
40
+ response_mode=ResponseMode.COMPACT,
41
  text_qa_template=custom_prompt_template
42
  )
43
+
44
+ # Add reranker as a NodePostprocessor if provided
45
+ node_postprocessors = []
46
+ if reranker is not None:
47
+ node_postprocessors.append(LlamaIndexNodePostprocessor(reranker))
48
+
49
  query_engine = RetrieverQueryEngine(
50
  retriever=hybrid_retriever,
51
+ response_synthesizer=response_synthesizer,
52
+ node_postprocessors=node_postprocessors if node_postprocessors else None
53
  )
54
+
55
  log_message("Query engine успешно создан")
56
  return query_engine
57
+
58
  except Exception as e:
59
  log_message(f"Ошибка создания query engine: {str(e)}")
60
  raise