File size: 2,564 Bytes
c5e1945
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from utils import *
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
from config import logger

# Try to load existing vector store, create if not found
try:
    logger.info("Loading vector store...")
    vector_store = load_company_vector_store()
    if vector_store:
        logger.info("Vector store loaded successfully")
    else:
        # If vector_store is None, this means it didn't exist
        logger.info("Vector store not found, creating new...")
        company_documents = create_company_documents()
        company_chunks = split_documents(company_documents)
        vector_store = create_company_vector_store(company_chunks)
        logger.info("Vector store created successfully")
except Exception as e:
    # This block will handle other potential errors during the loading/creation process
    logger.error(f"Error loading or creating vector store: {str(e)}")
    # It might be good to exit or handle this more gracefully.
    # For now, let's just re-raise the exception to see what's wrong.
    raise


# Try to load existing company chunks, create if not found
try:
    logger.info("Loading company chunks...")
    company_chunks = load_chunks()
    if company_chunks:
        logger.info("Company chunks loaded successfully")
    else:
        # If company_chunks is None, this means it didn't exist
        logger.info("Company chunks not found, creating new...")
        company_documents = create_company_documents()
        company_chunks = split_documents(company_documents)
        save_chunks(company_chunks)
        logger.info("Company chunks created successfully")
        
except Exception as e:
    # This block will handle other potential errors during the loading/creation process
    logger.error(f"Error loading or creating company chunks: {str(e)}")
    # It might be good to exit or handle this more gracefully.
    # For now, let's just re-raise the exception to see what's wrong.
    raise


# Create vector retriever
logger.info("πŸ” Creating vector retriever...")
vector_retriever = vector_store.as_retriever(search_kwargs={"k": 5})

# Create BM25 retriever
logger.info("πŸ“ Creating BM25 retriever...")
bm25_retriever = BM25Retriever.from_documents(company_chunks)
bm25_retriever.k = 3

# Create hybrid retriever
logger.info("πŸ”„ Creating hybrid retriever...")
hybrid_retriever = EnsembleRetriever(
       retrievers=[bm25_retriever, vector_retriever],
       weights=[0.2, 0.8]
   )


logger.info("βœ… Retrievers created and hybrid retriever is ready.")