Spaces:
Running
Running
| def convert_bm25_to_dict(bm25_results): | |
| """ | |
| Converts a list of LangChain Document objects from BM25 | |
| into standard dictionaries matching your Chroma format. | |
| """ | |
| dict_results = [] | |
| for i, doc in enumerate(bm25_results): | |
| # Extract the persistent chunk_id you generated via UUID | |
| chunk_id = doc.metadata.get('chunk_id') | |
| # Build the exact dictionary structure your pipeline expects | |
| bm25_dict = { | |
| 'id': chunk_id, | |
| 'content': doc.page_content, | |
| 'metadata': doc.metadata, | |
| 'similarity_score': 0.0, # BM25 doesn't provide a normalized score | |
| 'distance': 1.0, # Maximum distance since it's not a vector match | |
| 'rank': i + 1 | |
| } | |
| dict_results.append(bm25_dict) | |
| return dict_results |