Spaces:
Sleeping
Sleeping
xicocdi commited on
Commit ·
9c44a22
1
Parent(s): 2ab1d97
push embedding-model
Browse files- app.py +11 -4
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -11,6 +11,8 @@ from langchain.prompts import PromptTemplate
|
|
| 11 |
from langchain.chains import ConversationalRetrievalChain
|
| 12 |
from langchain_community.vectorstores import Qdrant
|
| 13 |
from langchain.memory import ConversationBufferMemory
|
|
|
|
|
|
|
| 14 |
|
| 15 |
import chainlit as cl
|
| 16 |
|
|
@@ -27,13 +29,13 @@ for pdf_path in pdf_paths:
|
|
| 27 |
documents.extend(loader.load())
|
| 28 |
|
| 29 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 30 |
-
chunk_size=
|
| 31 |
-
chunk_overlap=
|
| 32 |
)
|
| 33 |
|
| 34 |
docs = text_splitter.split_documents(documents)
|
| 35 |
|
| 36 |
-
embedding =
|
| 37 |
|
| 38 |
vectorstore = Qdrant.from_documents(
|
| 39 |
documents=docs,
|
|
@@ -78,6 +80,11 @@ llm = ChatOpenAI(
|
|
| 78 |
streaming=True,
|
| 79 |
)
|
| 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
@cl.on_chat_start
|
| 83 |
async def start_chat():
|
|
@@ -87,7 +94,7 @@ async def start_chat():
|
|
| 87 |
|
| 88 |
qa = ConversationalRetrievalChain.from_llm(
|
| 89 |
llm,
|
| 90 |
-
retriever=
|
| 91 |
memory=memory,
|
| 92 |
combine_docs_chain_kwargs={"prompt": PROMPT},
|
| 93 |
return_source_documents=True,
|
|
|
|
| 11 |
from langchain.chains import ConversationalRetrievalChain
|
| 12 |
from langchain_community.vectorstores import Qdrant
|
| 13 |
from langchain.memory import ConversationBufferMemory
|
| 14 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 15 |
+
from langchain.retrievers.multi_query import MultiQueryRetriever
|
| 16 |
|
| 17 |
import chainlit as cl
|
| 18 |
|
|
|
|
| 29 |
documents.extend(loader.load())
|
| 30 |
|
| 31 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 32 |
+
chunk_size=2000,
|
| 33 |
+
chunk_overlap=100,
|
| 34 |
)
|
| 35 |
|
| 36 |
docs = text_splitter.split_documents(documents)
|
| 37 |
|
| 38 |
+
embedding = HuggingFaceEmbeddings(model_name="XicoC/midterm-finetuned-arctic")
|
| 39 |
|
| 40 |
vectorstore = Qdrant.from_documents(
|
| 41 |
documents=docs,
|
|
|
|
| 80 |
streaming=True,
|
| 81 |
)
|
| 82 |
|
| 83 |
+
retriever_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
|
| 84 |
+
multiquery_retriever = MultiQueryRetriever.from_llm(
|
| 85 |
+
retriever=retriever, llm=retriever_llm
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
|
| 89 |
@cl.on_chat_start
|
| 90 |
async def start_chat():
|
|
|
|
| 94 |
|
| 95 |
qa = ConversationalRetrievalChain.from_llm(
|
| 96 |
llm,
|
| 97 |
+
retriever=multiquery_retriever,
|
| 98 |
memory=memory,
|
| 99 |
combine_docs_chain_kwargs={"prompt": PROMPT},
|
| 100 |
return_source_documents=True,
|
requirements.txt
CHANGED
|
@@ -8,4 +8,5 @@ langchain_openai==0.1.1
|
|
| 8 |
pypdf
|
| 9 |
chromadb
|
| 10 |
qdrant-client
|
| 11 |
-
importlib-metadata<7.0,>=6.0
|
|
|
|
|
|
| 8 |
pypdf
|
| 9 |
chromadb
|
| 10 |
qdrant-client
|
| 11 |
+
importlib-metadata<7.0,>=6.0
|
| 12 |
+
sentence-transformers
|