Spaces:
Sleeping
Sleeping
| from utils import * | |
| from langchain_community.retrievers import BM25Retriever | |
| from langchain.retrievers import EnsembleRetriever | |
| from config import logger | |
| # Try to load existing vector store, create if not found | |
| try: | |
| logger.info("Loading vector store...") | |
| vector_store = load_company_vector_store() | |
| if vector_store: | |
| logger.info("Vector store loaded successfully") | |
| else: | |
| # If vector_store is None, this means it didn't exist | |
| logger.info("Vector store not found, creating new...") | |
| company_documents = create_company_documents() | |
| company_chunks = split_documents(company_documents) | |
| vector_store = create_company_vector_store(company_chunks) | |
| logger.info("Vector store created successfully") | |
| except Exception as e: | |
| # This block will handle other potential errors during the loading/creation process | |
| logger.error(f"Error loading or creating vector store: {str(e)}") | |
| # It might be good to exit or handle this more gracefully. | |
| # For now, let's just re-raise the exception to see what's wrong. | |
| raise | |
| # Try to load existing company chunks, create if not found | |
| try: | |
| logger.info("Loading company chunks...") | |
| company_chunks = load_chunks() | |
| if company_chunks: | |
| logger.info("Company chunks loaded successfully") | |
| else: | |
| # If company_chunks is None, this means it didn't exist | |
| logger.info("Company chunks not found, creating new...") | |
| company_documents = create_company_documents() | |
| company_chunks = split_documents(company_documents) | |
| save_chunks(company_chunks) | |
| logger.info("Company chunks created successfully") | |
| except Exception as e: | |
| # This block will handle other potential errors during the loading/creation process | |
| logger.error(f"Error loading or creating company chunks: {str(e)}") | |
| # It might be good to exit or handle this more gracefully. | |
| # For now, let's just re-raise the exception to see what's wrong. | |
| raise | |
| # Create vector retriever | |
| logger.info("π Creating vector retriever...") | |
| vector_retriever = vector_store.as_retriever(search_kwargs={"k": 5}) | |
| # Create BM25 retriever | |
| logger.info("π Creating BM25 retriever...") | |
| bm25_retriever = BM25Retriever.from_documents(company_chunks) | |
| bm25_retriever.k = 3 | |
| # Create hybrid retriever | |
| logger.info("π Creating hybrid retriever...") | |
| hybrid_retriever = EnsembleRetriever( | |
| retrievers=[bm25_retriever, vector_retriever], | |
| weights=[0.2, 0.8] | |
| ) | |
| logger.info("β Retrievers created and hybrid retriever is ready.") | |