Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -169,13 +169,23 @@ Answer:
|
|
| 169 |
)
|
| 170 |
|
| 171 |
# Load PDF and split into chunks
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
def load_and_split_pdf(uploaded_file):
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
documents = loader.load()
|
| 175 |
-
|
|
|
|
|
|
|
| 176 |
chunks = text_splitter.split_documents(documents)
|
| 177 |
return chunks
|
| 178 |
-
|
| 179 |
# Build vectorstore from document chunks
|
| 180 |
def build_vectorstore(chunks):
|
| 181 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
|
|
| 169 |
)
|
| 170 |
|
| 171 |
# Load PDF and split into chunks
|
| 172 |
+
|
| 173 |
+
from langchain_community.document_loaders import PyPDFLoader
|
| 174 |
+
import tempfile
|
| 175 |
+
|
| 176 |
def load_and_split_pdf(uploaded_file):
|
| 177 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
|
| 178 |
+
tmp_file.write(uploaded_file.read())
|
| 179 |
+
tmp_file_path = tmp_file.name
|
| 180 |
+
|
| 181 |
+
loader = PyPDFLoader(tmp_file_path)
|
| 182 |
documents = loader.load()
|
| 183 |
+
|
| 184 |
+
# Then your text splitting logic follows
|
| 185 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
| 186 |
chunks = text_splitter.split_documents(documents)
|
| 187 |
return chunks
|
| 188 |
+
|
| 189 |
# Build vectorstore from document chunks
|
| 190 |
def build_vectorstore(chunks):
|
| 191 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|