MrSimple07 commited on
Commit
88291da
·
1 Parent(s): 1ca91bc

topk query = 50 + 0.55 sim cut off + table chunk size= 2500

Browse files
Files changed (2) hide show
  1. index_retriever.py +2 -2
  2. table_prep.py +1 -1
index_retriever.py CHANGED
@@ -52,12 +52,12 @@ def create_query_engine(vector_index):
52
  vector_retriever = VectorIndexRetriever(
53
  index=vector_index,
54
  similarity_top_k=50,
55
- similarity_cutoff=0.65
56
  )
57
 
58
  hybrid_retriever = QueryFusionRetriever(
59
  [vector_retriever, bm25_retriever],
60
- similarity_top_k=40,
61
  num_queries=1
62
  )
63
 
 
52
  vector_retriever = VectorIndexRetriever(
53
  index=vector_index,
54
  similarity_top_k=50,
55
+ similarity_cutoff=0.55
56
  )
57
 
58
  hybrid_retriever = QueryFusionRetriever(
59
  [vector_retriever, bm25_retriever],
60
+ similarity_top_k=50,
61
  num_queries=1
62
  )
63
 
table_prep.py CHANGED
@@ -32,7 +32,7 @@ def create_table_content(table_data):
32
  from llama_index.core.text_splitter import SentenceSplitter
33
  from config import CHUNK_SIZE, CHUNK_OVERLAP
34
 
35
- def chunk_table_document(doc, max_chunk_size=2000):
36
  lines = doc.text.strip().split('\n')
37
 
38
  # Separate header and data rows
 
32
  from llama_index.core.text_splitter import SentenceSplitter
33
  from config import CHUNK_SIZE, CHUNK_OVERLAP
34
 
35
+ def chunk_table_document(doc, max_chunk_size=2500):
36
  lines = doc.text.strip().split('\n')
37
 
38
  # Separate header and data rows