Spaces:
Runtime error
Runtime error
qorgh346 commited on
Commit ·
15d201d
1
Parent(s): dd9ce97
update app.py
Browse files
app.py
CHANGED
|
@@ -26,17 +26,17 @@ def get_pdf_text(pdf_docs):
|
|
| 26 |
temp_file.write(pdf_docs.getvalue())
|
| 27 |
temp_file.seek(0)
|
| 28 |
pdf_loader = PyPDFLoader(temp_file.name)
|
| 29 |
-
print('pdf_loader = ', pdf_loader)
|
| 30 |
pdf_doc = pdf_loader.load()
|
| 31 |
-
print('pdf_doc = ',pdf_doc)
|
| 32 |
return pdf_doc
|
| 33 |
|
| 34 |
|
| 35 |
-
def get_text_chunks(
|
| 36 |
-
|
| 37 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 38 |
-
chunk_size =
|
| 39 |
-
chunk_overlap =
|
| 40 |
length_function= len
|
| 41 |
)
|
| 42 |
# text_splitter = CharacterTextSplitter(
|
|
@@ -45,9 +45,9 @@ def get_text_chunks(text):
|
|
| 45 |
# chunk_overlap=200,
|
| 46 |
# length_function=len
|
| 47 |
# )
|
| 48 |
-
|
| 49 |
-
print('
|
| 50 |
-
return
|
| 51 |
|
| 52 |
|
| 53 |
def get_vectorstore(text_chunks):
|
|
@@ -58,7 +58,7 @@ def get_vectorstore(text_chunks):
|
|
| 58 |
# embeddings = OpenAIEmbeddings()sentence-transformers/all-MiniLM-L6-v2
|
| 59 |
# embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl",
|
| 60 |
# model_kwargs={'device':'cpu'})
|
| 61 |
-
vectorstore = FAISS.
|
| 62 |
# vectorstore = Chroma.from_texts(texts=text_chunks, embedding=embeddings)
|
| 63 |
|
| 64 |
return vectorstore
|
|
@@ -186,7 +186,7 @@ def main():
|
|
| 186 |
|
| 187 |
|
| 188 |
# get the text chunks
|
| 189 |
-
text_chunks = get_text_chunks(
|
| 190 |
|
| 191 |
# create vector store
|
| 192 |
vectorstore = get_vectorstore(text_chunks)
|
|
|
|
| 26 |
temp_file.write(pdf_docs.getvalue())
|
| 27 |
temp_file.seek(0)
|
| 28 |
pdf_loader = PyPDFLoader(temp_file.name)
|
| 29 |
+
# print('pdf_loader = ', pdf_loader)
|
| 30 |
pdf_doc = pdf_loader.load()
|
| 31 |
+
# print('pdf_doc = ',pdf_doc)
|
| 32 |
return pdf_doc
|
| 33 |
|
| 34 |
|
| 35 |
+
def get_text_chunks(documents):
|
| 36 |
+
|
| 37 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 38 |
+
chunk_size = 1000,
|
| 39 |
+
chunk_overlap = 200,
|
| 40 |
length_function= len
|
| 41 |
)
|
| 42 |
# text_splitter = CharacterTextSplitter(
|
|
|
|
| 45 |
# chunk_overlap=200,
|
| 46 |
# length_function=len
|
| 47 |
# )
|
| 48 |
+
documents = text_splitter.split_documents(documents)
|
| 49 |
+
print('documents = ', documents)
|
| 50 |
+
return documents
|
| 51 |
|
| 52 |
|
| 53 |
def get_vectorstore(text_chunks):
|
|
|
|
| 58 |
# embeddings = OpenAIEmbeddings()sentence-transformers/all-MiniLM-L6-v2
|
| 59 |
# embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl",
|
| 60 |
# model_kwargs={'device':'cpu'})
|
| 61 |
+
vectorstore = FAISS.from_documents(texts=text_chunks, embedding=embeddings)
|
| 62 |
# vectorstore = Chroma.from_texts(texts=text_chunks, embedding=embeddings)
|
| 63 |
|
| 64 |
return vectorstore
|
|
|
|
| 186 |
|
| 187 |
|
| 188 |
# get the text chunks
|
| 189 |
+
text_chunks = get_text_chunks(doc_list)
|
| 190 |
|
| 191 |
# create vector store
|
| 192 |
vectorstore = get_vectorstore(text_chunks)
|