geneseek / src /retrieve.py
prabhal's picture
fix Railway port config and switch to Qdrant Cloud
af51700
from langchain_qdrant import QdrantVectorStore, FastEmbedSparse
from langchain_openai import OpenAIEmbeddings
from src import config
def get_hybrid_retriever():
"""
Builds and returns a hybrid dense + sparse retriever using MMR search.
Instantiates QdrantVectorStore directly to ensure compatibility with the local file-based client
"""
# 1. Initialize Client
client = config.qdrant_client
# 2. Initialize Embeddings
dense_embeddings = OpenAIEmbeddings(
model=config.EMBEDDING_MODEL,
openai_api_key=config.OPENAI_API_KEY
)
sparse_embeddings = FastEmbedSparse(model_name=config.SPARSE_MODEL)
# 3. Instantiate the vector store directly using the shared client
vector_store = QdrantVectorStore(
client=client,
collection_name=config.COLLECTION_NAME,
embedding=dense_embeddings,
sparse_embedding=sparse_embeddings,
retrieval_mode="hybrid"
)
# 4. Return Retriever
return vector_store.as_retriever(
search_type="mmr",
search_kwargs={
"k": 8,
"fetch_k": 20,
"lambda_mult": 0.7
}
)