Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -31,8 +31,8 @@ def get_pdf_text(pdf_docs):
|
|
| 31 |
|
| 32 |
def get_text_chunks(text):
|
| 33 |
text_splitter = CharacterTextSplitter(separator="\n",
|
| 34 |
-
chunk_size=
|
| 35 |
-
chunk_overlap=
|
| 36 |
length_function=len
|
| 37 |
)
|
| 38 |
chunks = text_splitter.split_text(text)
|
|
@@ -43,13 +43,14 @@ def get_text_chunks(text):
|
|
| 43 |
def get_vectorstore(text_chunks):
|
| 44 |
#embeddings = OpenAIEmbeddings()
|
| 45 |
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
|
| 46 |
-
#
|
| 47 |
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
| 48 |
|
| 49 |
return vectorstore
|
| 50 |
|
| 51 |
|
| 52 |
def get_conversation_chain(vectorstore, model_name):
|
|
|
|
| 53 |
llm = LlamaCpp(model_path=model_name,
|
| 54 |
temperature=0.1,
|
| 55 |
top_k=30,
|
|
@@ -62,10 +63,12 @@ def get_conversation_chain(vectorstore, model_name):
|
|
| 62 |
|
| 63 |
#llm = ChatOpenAI()
|
| 64 |
|
| 65 |
-
memory = ConversationBufferMemory(memory_key='chat_history',
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm,
|
| 68 |
-
# condense_question_prompt=CONDENSE_QUESTION_PROMPT,
|
| 69 |
retriever=vectorstore.as_retriever(),
|
| 70 |
memory=memory,
|
| 71 |
return_source_documents=True
|
|
@@ -75,6 +78,7 @@ def get_conversation_chain(vectorstore, model_name):
|
|
| 75 |
|
| 76 |
|
| 77 |
def handle_userinput(user_question):
|
|
|
|
| 78 |
response = st.session_state.conversation({'question': user_question})
|
| 79 |
|
| 80 |
st.session_state.chat_history = response['chat_history']
|
|
@@ -111,7 +115,7 @@ if "chat_history" not in st.session_state:
|
|
| 111 |
st.session_state.chat_history = None
|
| 112 |
|
| 113 |
st.header("Chat with multiple PDFs :books:")
|
| 114 |
-
user_question = st.text_input("Ask a question about your documents:")
|
| 115 |
|
| 116 |
if user_question:
|
| 117 |
handle_userinput(user_question)
|
|
|
|
| 31 |
|
| 32 |
def get_text_chunks(text):
|
| 33 |
text_splitter = CharacterTextSplitter(separator="\n",
|
| 34 |
+
chunk_size=1000, # 1000
|
| 35 |
+
chunk_overlap=200, # 200
|
| 36 |
length_function=len
|
| 37 |
)
|
| 38 |
chunks = text_splitter.split_text(text)
|
|
|
|
| 43 |
def get_vectorstore(text_chunks):
|
| 44 |
#embeddings = OpenAIEmbeddings()
|
| 45 |
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
|
| 46 |
+
#embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
|
| 47 |
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
| 48 |
|
| 49 |
return vectorstore
|
| 50 |
|
| 51 |
|
| 52 |
def get_conversation_chain(vectorstore, model_name):
|
| 53 |
+
|
| 54 |
llm = LlamaCpp(model_path=model_name,
|
| 55 |
temperature=0.1,
|
| 56 |
top_k=30,
|
|
|
|
| 63 |
|
| 64 |
#llm = ChatOpenAI()
|
| 65 |
|
| 66 |
+
memory = ConversationBufferMemory(memory_key='chat_history',
|
| 67 |
+
input_key='question',
|
| 68 |
+
output_key='answer',
|
| 69 |
+
return_messages=True)
|
| 70 |
|
| 71 |
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm,
|
|
|
|
| 72 |
retriever=vectorstore.as_retriever(),
|
| 73 |
memory=memory,
|
| 74 |
return_source_documents=True
|
|
|
|
| 78 |
|
| 79 |
|
| 80 |
def handle_userinput(user_question):
|
| 81 |
+
|
| 82 |
response = st.session_state.conversation({'question': user_question})
|
| 83 |
|
| 84 |
st.session_state.chat_history = response['chat_history']
|
|
|
|
| 115 |
st.session_state.chat_history = None
|
| 116 |
|
| 117 |
st.header("Chat with multiple PDFs :books:")
|
| 118 |
+
user_question = st.text_input("Ask a question about your documents: ")
|
| 119 |
|
| 120 |
if user_question:
|
| 121 |
handle_userinput(user_question)
|