Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -85,7 +85,12 @@ def upload_and_parse_documents(documents):
|
|
| 85 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
| 86 |
for doc in documents:
|
| 87 |
try:
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
pages = loader.load()
|
| 90 |
document_names.append(doc.name)
|
| 91 |
page_contents = []
|
|
@@ -94,9 +99,14 @@ def upload_and_parse_documents(documents):
|
|
| 94 |
all_texts.extend(chunks)
|
| 95 |
page_contents.append(page.page_content)
|
| 96 |
document_pages.append(page_contents)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
except Exception as e:
|
| 98 |
st.error(f"Error parsing document {doc.name}: {e}")
|
| 99 |
return all_texts, document_names, document_pages
|
|
|
|
| 100 |
|
| 101 |
@st.cache_data
|
| 102 |
def parse_pdf_from_url(url):
|
|
|
|
| 85 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
| 86 |
for doc in documents:
|
| 87 |
try:
|
| 88 |
+
# Create a temporary file
|
| 89 |
+
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
| 90 |
+
tmp_file.write(doc.read())
|
| 91 |
+
tmp_file_path = tmp_file.name
|
| 92 |
+
|
| 93 |
+
loader = PyPDFLoader(tmp_file_path) # Use the temporary file path
|
| 94 |
pages = loader.load()
|
| 95 |
document_names.append(doc.name)
|
| 96 |
page_contents = []
|
|
|
|
| 99 |
all_texts.extend(chunks)
|
| 100 |
page_contents.append(page.page_content)
|
| 101 |
document_pages.append(page_contents)
|
| 102 |
+
|
| 103 |
+
# Remove the temporary file
|
| 104 |
+
os.remove(tmp_file_path)
|
| 105 |
+
|
| 106 |
except Exception as e:
|
| 107 |
st.error(f"Error parsing document {doc.name}: {e}")
|
| 108 |
return all_texts, document_names, document_pages
|
| 109 |
+
|
| 110 |
|
| 111 |
@st.cache_data
|
| 112 |
def parse_pdf_from_url(url):
|