MrSimple07 commited on
Commit
29f19c8
·
1 Parent(s): f9516ca

bm25 full corpuse + COMPACT + NodePostprocessor

Browse files
Files changed (1) hide show
  1. index_retriever.py +11 -20
index_retriever.py CHANGED
@@ -7,54 +7,45 @@ from llama_index.retrievers.bm25 import BM25Retriever
7
  from llama_index.core.retrievers import QueryFusionRetriever
8
  from my_logging import log_message
9
  from config import CUSTOM_PROMPT, PROMPT_SIMPLE_POISK
10
- from llama_index.core.postprocessor import BaseNodePostprocessor
11
-
12
 
13
  def create_vector_index(documents):
14
  log_message("Строю векторный индекс")
15
  return VectorStoreIndex.from_documents(documents)
16
 
17
- def create_query_engine(vector_index, reranker=None):
18
  try:
19
- # Ensure BM25 sees the full text corpus, not just docstore
20
  bm25_retriever = BM25Retriever.from_defaults(
21
  docstore=vector_index.docstore,
22
  nodes=vector_index.get_nodes(), # <-- add this line
23
- similarity_top_k=15
24
  )
25
-
26
  vector_retriever = VectorIndexRetriever(
27
- index=vector_index,
28
  similarity_top_k=30,
29
- similarity_cutoff=0.5
30
  )
31
-
32
  hybrid_retriever = QueryFusionRetriever(
33
  [vector_retriever, bm25_retriever],
34
  similarity_top_k=40,
35
  num_queries=1
36
  )
37
-
38
  custom_prompt_template = PromptTemplate(PROMPT_SIMPLE_POISK)
39
  response_synthesizer = get_response_synthesizer(
40
  response_mode=ResponseMode.COMPACT,
41
  text_qa_template=custom_prompt_template
42
  )
43
-
44
- # Add reranker as a NodePostprocessor if provided
45
- node_postprocessors = []
46
- if reranker is not None:
47
- node_postprocessors.append(BaseNodePostprocessor(reranker))
48
-
49
  query_engine = RetrieverQueryEngine(
50
  retriever=hybrid_retriever,
51
- response_synthesizer=response_synthesizer,
52
- node_postprocessors=node_postprocessors if node_postprocessors else None
53
  )
54
-
55
  log_message("Query engine успешно создан")
56
  return query_engine
57
-
58
  except Exception as e:
59
  log_message(f"Ошибка создания query engine: {str(e)}")
60
  raise
 
7
  from llama_index.core.retrievers import QueryFusionRetriever
8
  from my_logging import log_message
9
  from config import CUSTOM_PROMPT, PROMPT_SIMPLE_POISK
 
 
10
 
11
  def create_vector_index(documents):
12
  log_message("Строю векторный индекс")
13
  return VectorStoreIndex.from_documents(documents)
14
 
15
+ def create_query_engine(vector_index):
16
  try:
 
17
  bm25_retriever = BM25Retriever.from_defaults(
18
  docstore=vector_index.docstore,
19
  nodes=vector_index.get_nodes(), # <-- add this line
20
+ similarity_top_k=20
21
  )
22
+
23
  vector_retriever = VectorIndexRetriever(
24
+ index=vector_index,
25
  similarity_top_k=30,
26
+ similarity_cutoff=0.7
27
  )
28
+
29
  hybrid_retriever = QueryFusionRetriever(
30
  [vector_retriever, bm25_retriever],
31
  similarity_top_k=40,
32
  num_queries=1
33
  )
34
+
35
  custom_prompt_template = PromptTemplate(PROMPT_SIMPLE_POISK)
36
  response_synthesizer = get_response_synthesizer(
37
  response_mode=ResponseMode.COMPACT,
38
  text_qa_template=custom_prompt_template
39
  )
40
+
 
 
 
 
 
41
  query_engine = RetrieverQueryEngine(
42
  retriever=hybrid_retriever,
43
+ response_synthesizer=response_synthesizer
 
44
  )
45
+
46
  log_message("Query engine успешно создан")
47
  return query_engine
48
+
49
  except Exception as e:
50
  log_message(f"Ошибка создания query engine: {str(e)}")
51
  raise