| from langchain_core.runnables import RunnablePassthrough
|
| from langchain_core.output_parsers import StrOutputParser
|
| from langchain_community.chat_models import ChatOllama
|
| from langchain_core.prompts import ChatPromptTemplate
|
| from langchain_pinecone import PineconeVectorStore
|
| from langchain_community.embeddings import SentenceTransformerEmbeddings
|
|
|
| import os
|
| from dotenv import load_dotenv
|
| from langchain.retrievers import BM25Retriever, EnsembleRetriever
|
| from kiwipiepy import Kiwi
|
| load_dotenv()
|
|
|
| kiwi = Kiwi()
|
|
|
| def kiwi_tokenize(text):
|
| return [token.form for token in kiwi.tokenize(text)]
|
|
|
|
|
| def retriever(pc, bm25):
|
| pcretriever = pc.as_retriever(search_kwargs={'k':4})
|
| kiwi_bm25 = BM25Retriever.from_documents(bm25,preprocess_func=kiwi_tokenize)
|
| kiwi_bm25.k=4
|
|
|
| kiwibm25_pc_37 = EnsembleRetriever(
|
| retrievers=[kiwi_bm25, pcretriever],
|
| weights=[0.3, 0.7],
|
| search_type="mmr",
|
| )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| return kiwibm25_pc_37
|
|
|
|
|