MrSimple07 commited on
Commit
d99512d
·
1 Parent(s): a83db61

Much lower reranking threshold (-0.5 instead of 0.1) + detailed score logging

Browse files
Files changed (1) hide show
  1. documents_prep.py +1 -6
documents_prep.py CHANGED
@@ -18,17 +18,12 @@ def chunk_text_documents(documents):
18
 
19
  chunked = []
20
  for doc in documents:
21
- # Add document ID to text for better BM25 matching
22
- doc_id = doc.metadata.get('document_id', '')
23
- if doc_id and doc_id not in doc.text[:200]:
24
- doc.text = f"[Документ: {doc_id}]\n\n{doc.text}"
25
-
26
  chunks = text_splitter.get_nodes_from_documents([doc])
27
  for i, chunk in enumerate(chunks):
28
  chunk.metadata.update({
29
  'chunk_id': i,
30
  'total_chunks': len(chunks),
31
- 'chunk_size': len(chunk.text)
32
  })
33
  chunked.append(chunk)
34
 
 
18
 
19
  chunked = []
20
  for doc in documents:
 
 
 
 
 
21
  chunks = text_splitter.get_nodes_from_documents([doc])
22
  for i, chunk in enumerate(chunks):
23
  chunk.metadata.update({
24
  'chunk_id': i,
25
  'total_chunks': len(chunks),
26
+ 'chunk_size': len(chunk.text) # Add chunk size
27
  })
28
  chunked.append(chunk)
29