Spaces:
Paused
Paused
File size: 2,038 Bytes
869eb7d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
from langchain.retrievers import ContextualCompressionRetriever
from langchain_cohere.rerank import CohereRerank
from langchain_core.vectorstores import VectorStoreRetriever
def retrieve(embedding, q, retrieve_document_count):
retriever: VectorStoreRetriever = embedding.get_vector_store().as_retriever(
search_type="similarity",
search_kwargs={"k": retrieve_document_count}
)
context_doc = retriever.get_relevant_documents(
query=q,
kwargs={"k": retrieve_document_count}
)
return context_doc
def retrieve_with_rerank(embedding, q, retrieve_document_count):
compression_retriever = reranking_retriever(embedding, retrieve_document_count)
context_doc = compression_retriever.invoke(
input=q,
kwargs={"k": retrieve_document_count}
)
# for doc in context_doc:
# text = doc.page_content
# print(" kontext: " + text.replace('\n', ' ').replace('\r', ' '))
return context_doc
def reranking_retriever(embedding, retrieve_document_count):
retriever: VectorStoreRetriever = embedding.get_vector_store().as_retriever(
search_type="similarity",
search_kwargs={"k": retrieve_document_count * 10}
)
compressor = CohereRerank(model="rerank-multilingual-v3.0")
compression_retriever = ContextualCompressionRetriever(
base_compressor=compressor, base_retriever=retriever
)
return compression_retriever
# todo
# def hyde(agent: Agent, q, retrieve_document_count):
# retriever: VectorStoreRetriever = agent.embedding.get_vector_store().as_retriever(
# search_type="similarity",
# search_kwargs={"k": retrieve_document_count * 10}
# )
#
# context_doc = compression_retriever.get_relevant_documents(
# query=q,
# kwargs={"k": retrieve_document_count}
# )
#
# for doc in context_doc:
# text = doc.page_content
# print(" kontext: " + text.replace('\n', ' ').replace('\r', ' '))
#
# return context_doc
|