MANIT_Chat / server /utils /BM25_to_Dict.py
WizardCoder2007's picture
first commit
2e9afea
Raw
History Blame Contribute Delete
850 Bytes
def convert_bm25_to_dict(bm25_results):
"""
Converts a list of LangChain Document objects from BM25
into standard dictionaries matching your Chroma format.
"""
dict_results = []
for i, doc in enumerate(bm25_results):
# Extract the persistent chunk_id you generated via UUID
chunk_id = doc.metadata.get('chunk_id')
# Build the exact dictionary structure your pipeline expects
bm25_dict = {
'id': chunk_id,
'content': doc.page_content,
'metadata': doc.metadata,
'similarity_score': 0.0, # BM25 doesn't provide a normalized score
'distance': 1.0, # Maximum distance since it's not a vector match
'rank': i + 1
}
dict_results.append(bm25_dict)
return dict_results