Update app.py
Browse files
app.py
CHANGED
|
@@ -69,15 +69,18 @@ class CFG:
|
|
| 69 |
PDFs_path = './data'
|
| 70 |
Embeddings_path = './embeddings/input'
|
| 71 |
Output_folder = './ml-papers-vector'
|
| 72 |
-
|
|
|
|
| 73 |
loader = DirectoryLoader(CFG.PDFs_path, glob="./*.pdf", loader_cls=PyPDFLoader,use_multithreading=True)
|
| 74 |
|
|
|
|
| 75 |
documents = loader.load()
|
| 76 |
|
|
|
|
| 77 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size = CFG.split_chunk_size, chunk_overlap = CFG.split_overlap)
|
| 78 |
-
|
| 79 |
texts = text_splitter.split_documents(documents)
|
| 80 |
-
|
| 81 |
if not os.path.exists(CFG.Embeddings_path + '/index.faiss'):
|
| 82 |
embeddings = HuggingFaceInstructEmbeddings(model_name = CFG.embeddings_model_repo, model_kwargs={"device":"cuda"})
|
| 83 |
vectordb = FAISS.from_documents(documents=texts, embedding=embeddings)
|
|
|
|
| 69 |
PDFs_path = './data'
|
| 70 |
Embeddings_path = './embeddings/input'
|
| 71 |
Output_folder = './ml-papers-vector'
|
| 72 |
+
|
| 73 |
+
@spaces.GPU(duration=120)
|
| 74 |
loader = DirectoryLoader(CFG.PDFs_path, glob="./*.pdf", loader_cls=PyPDFLoader,use_multithreading=True)
|
| 75 |
|
| 76 |
+
@spaces.GPU(duration=120)
|
| 77 |
documents = loader.load()
|
| 78 |
|
| 79 |
+
@spaces.GPU(duration=120)
|
| 80 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size = CFG.split_chunk_size, chunk_overlap = CFG.split_overlap)
|
| 81 |
+
@spaces.GPU(duration=120)
|
| 82 |
texts = text_splitter.split_documents(documents)
|
| 83 |
+
@spaces.GPU(duration=120)
|
| 84 |
if not os.path.exists(CFG.Embeddings_path + '/index.faiss'):
|
| 85 |
embeddings = HuggingFaceInstructEmbeddings(model_name = CFG.embeddings_model_repo, model_kwargs={"device":"cuda"})
|
| 86 |
vectordb = FAISS.from_documents(documents=texts, embedding=embeddings)
|