Cachoups commited on
Commit
bce90b4
·
verified ·
1 Parent(s): 32fa615

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -52,8 +52,8 @@ tokenizer_name = EMBEDDING_MODEL_NAME
52
  # Token splitting for more context split
53
  text_splitter = TokenTextSplitter.from_huggingface_tokenizer(
54
  tokenizer=AutoTokenizer.from_pretrained(tokenizer_name),
55
- chunk_size=300,
56
- chunk_overlap=30
57
  )
58
 
59
  chunks = text_splitter.split_documents(documents) # chunks used for LLM generation
@@ -68,7 +68,7 @@ for i, doc in enumerate(chunks):
68
  doc.metadata["index"] = i # Add an index for tracking
69
 
70
  """Retriever"""
71
- bm25_retriever = BM25Retriever.from_documents(chunks_bm25, k = 2) # 2 most similar contexts
72
 
73
  def retriever(query):
74
  tmp = bm25_retriever.invoke(preprocess_for_bm25(query))
 
52
  # Token splitting for more context split
53
  text_splitter = TokenTextSplitter.from_huggingface_tokenizer(
54
  tokenizer=AutoTokenizer.from_pretrained(tokenizer_name),
55
+ chunk_size=150,
56
+ chunk_overlap=15
57
  )
58
 
59
  chunks = text_splitter.split_documents(documents) # chunks used for LLM generation
 
68
  doc.metadata["index"] = i # Add an index for tracking
69
 
70
  """Retriever"""
71
+ bm25_retriever = BM25Retriever.from_documents(chunks_bm25, k = 4) # 2 most similar contexts
72
 
73
  def retriever(query):
74
  tmp = bm25_retriever.invoke(preprocess_for_bm25(query))