File size: 1,158 Bytes
ea71a81
 
 
 
 
 
af51700
 
ea71a81
 
 
 
 
 
 
 
 
 
 
 
af51700
ea71a81
 
 
 
 
 
 
 
 
 
 
 
4df7450
 
ea71a81
4df7450
ea71a81
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from langchain_qdrant import QdrantVectorStore, FastEmbedSparse
from langchain_openai import OpenAIEmbeddings
from src import config

def get_hybrid_retriever():
    """
    Builds and returns a hybrid dense + sparse retriever using MMR search.
    Instantiates QdrantVectorStore directly to ensure compatibility with the local file-based client
    """
    # 1. Initialize Client
    client = config.qdrant_client

    # 2. Initialize Embeddings
    dense_embeddings = OpenAIEmbeddings(
        model=config.EMBEDDING_MODEL,
        openai_api_key=config.OPENAI_API_KEY
    )
    
    sparse_embeddings = FastEmbedSparse(model_name=config.SPARSE_MODEL)

    # 3. Instantiate the vector store directly using the shared client
    vector_store = QdrantVectorStore(
        client=client,
        collection_name=config.COLLECTION_NAME,
        embedding=dense_embeddings,
        sparse_embedding=sparse_embeddings,
        retrieval_mode="hybrid"
    )

    # 4. Return Retriever
    return vector_store.as_retriever(
        search_type="mmr",
        search_kwargs={
            "k": 8,
            "fetch_k": 20,
            "lambda_mult": 0.7
    }
    )