Spaces:
Runtime error
Runtime error
Update interim/app.py
Browse files- interim/app.py +11 -1
interim/app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import sys
|
| 2 |
import os
|
| 3 |
import re
|
|
@@ -61,7 +62,16 @@ def load_docs(document_path):
|
|
| 61 |
)
|
| 62 |
documents = loader.load()
|
| 63 |
text_splitter = NLTKTextSplitter(chunk_size=1000)
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
except Exception as e:
|
| 66 |
st.error(f"Failed to load and process PDF: {e}")
|
| 67 |
st.stop()
|
|
|
|
| 1 |
+
# to-do: Enable downloading multiple patent PDFs via corresponding links
|
| 2 |
import sys
|
| 3 |
import os
|
| 4 |
import re
|
|
|
|
| 62 |
)
|
| 63 |
documents = loader.load()
|
| 64 |
text_splitter = NLTKTextSplitter(chunk_size=1000)
|
| 65 |
+
split_docs = text_splitter.split_documents(documents)
|
| 66 |
+
|
| 67 |
+
# Filter metadata to only include str, int, float, or bool
|
| 68 |
+
for doc in split_docs:
|
| 69 |
+
if hasattr(doc, "metadata") and isinstance(doc.metadata, dict):
|
| 70 |
+
doc.metadata = {
|
| 71 |
+
k: v for k, v in doc.metadata.items()
|
| 72 |
+
if isinstance(v, (str, int, float, bool))
|
| 73 |
+
}
|
| 74 |
+
return split_docs
|
| 75 |
except Exception as e:
|
| 76 |
st.error(f"Failed to load and process PDF: {e}")
|
| 77 |
st.stop()
|