Spaces:
Sleeping
Sleeping
File size: 2,564 Bytes
c5e1945 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
from utils import *
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
from config import logger
# Try to load existing vector store, create if not found
try:
logger.info("Loading vector store...")
vector_store = load_company_vector_store()
if vector_store:
logger.info("Vector store loaded successfully")
else:
# If vector_store is None, this means it didn't exist
logger.info("Vector store not found, creating new...")
company_documents = create_company_documents()
company_chunks = split_documents(company_documents)
vector_store = create_company_vector_store(company_chunks)
logger.info("Vector store created successfully")
except Exception as e:
# This block will handle other potential errors during the loading/creation process
logger.error(f"Error loading or creating vector store: {str(e)}")
# It might be good to exit or handle this more gracefully.
# For now, let's just re-raise the exception to see what's wrong.
raise
# Try to load existing company chunks, create if not found
try:
logger.info("Loading company chunks...")
company_chunks = load_chunks()
if company_chunks:
logger.info("Company chunks loaded successfully")
else:
# If company_chunks is None, this means it didn't exist
logger.info("Company chunks not found, creating new...")
company_documents = create_company_documents()
company_chunks = split_documents(company_documents)
save_chunks(company_chunks)
logger.info("Company chunks created successfully")
except Exception as e:
# This block will handle other potential errors during the loading/creation process
logger.error(f"Error loading or creating company chunks: {str(e)}")
# It might be good to exit or handle this more gracefully.
# For now, let's just re-raise the exception to see what's wrong.
raise
# Create vector retriever
logger.info("π Creating vector retriever...")
vector_retriever = vector_store.as_retriever(search_kwargs={"k": 5})
# Create BM25 retriever
logger.info("π Creating BM25 retriever...")
bm25_retriever = BM25Retriever.from_documents(company_chunks)
bm25_retriever.k = 3
# Create hybrid retriever
logger.info("π Creating hybrid retriever...")
hybrid_retriever = EnsembleRetriever(
retrievers=[bm25_retriever, vector_retriever],
weights=[0.2, 0.8]
)
logger.info("β
Retrievers created and hybrid retriever is ready.")
|