Spaces:

MrSimple01
/

RAG_AIEXP_01

Sleeping

MrSimple07 commited on Oct 5, 2025

Commit

d99512d

1 Parent(s): a83db61

Much lower reranking threshold (-0.5 instead of 0.1) + detailed score logging

Files changed (1) hide show

documents_prep.py CHANGED Viewed

@@ -18,17 +18,12 @@ def chunk_text_documents(documents):
     chunked = []
     for doc in documents:
-        # Add document ID to text for better BM25 matching
-        doc_id = doc.metadata.get('document_id', '')
-        if doc_id and doc_id not in doc.text[:200]:
-            doc.text = f"[Документ: {doc_id}]\n\n{doc.text}"
         chunks = text_splitter.get_nodes_from_documents([doc])
         for i, chunk in enumerate(chunks):
             chunk.metadata.update({
                 'chunk_id': i,
                 'total_chunks': len(chunks),
-                'chunk_size': len(chunk.text)
             })
             chunked.append(chunk)

     chunked = []
     for doc in documents:
         chunks = text_splitter.get_nodes_from_documents([doc])
         for i, chunk in enumerate(chunks):
             chunk.metadata.update({
                 'chunk_id': i,
                 'total_chunks': len(chunks),
+                'chunk_size': len(chunk.text)  # Add chunk size
             })
             chunked.append(chunk)