Spaces:
Build error
Build error
saving file on temporary location for embeddings
Browse files
app.py
CHANGED
|
@@ -50,26 +50,28 @@ def get_huggingface_pipeline():
|
|
| 50 |
if st.button("Process PDFs") and uploaded_files:
|
| 51 |
all_documents = []
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
pdf_docs = loader.load()
|
| 56 |
|
|
|
|
| 57 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 58 |
chunk_size=1000,
|
| 59 |
-
chunk_overlap=
|
| 60 |
separators=["\n\n", "\n", " ", ""]
|
| 61 |
)
|
| 62 |
|
| 63 |
-
docs = []
|
| 64 |
for doc in pdf_docs:
|
| 65 |
chunks = text_splitter.split_text(doc.page_content)
|
| 66 |
for chunk in chunks:
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
"metadata": doc.metadata
|
| 70 |
-
})
|
| 71 |
-
|
| 72 |
-
all_documents.extend(docs)
|
| 73 |
|
| 74 |
# Create embeddings with Hugging Face
|
| 75 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
|
|
| 50 |
if st.button("Process PDFs") and uploaded_files:
|
| 51 |
all_documents = []
|
| 52 |
|
| 53 |
+
for file in uploaded_files:
|
| 54 |
+
# Save the file temporarily
|
| 55 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
| 56 |
+
temp_file.write(file.getvalue())
|
| 57 |
+
temp_file_path = temp_file.name
|
| 58 |
+
|
| 59 |
+
# Load the PDF using PyPDFLoader
|
| 60 |
+
loader = PyPDFLoader(temp_file_path)
|
| 61 |
pdf_docs = loader.load()
|
| 62 |
|
| 63 |
+
# Split text into manageable chunks
|
| 64 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 65 |
chunk_size=1000,
|
| 66 |
+
chunk_overlap=300,
|
| 67 |
separators=["\n\n", "\n", " ", ""]
|
| 68 |
)
|
| 69 |
|
|
|
|
| 70 |
for doc in pdf_docs:
|
| 71 |
chunks = text_splitter.split_text(doc.page_content)
|
| 72 |
for chunk in chunks:
|
| 73 |
+
# Create Document object for each chunk
|
| 74 |
+
all_documents.append(Document(page_content=chunk, metadata=doc.metadata))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
# Create embeddings with Hugging Face
|
| 77 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|