Spaces:
Runtime error
Runtime error
Update scripts/process.py
Browse files- scripts/process.py +2 -13
scripts/process.py
CHANGED
|
@@ -91,19 +91,8 @@ def load_document(
|
|
| 91 |
documents.append(Document(content=text,
|
| 92 |
meta={"name": file_name},
|
| 93 |
id_hash_keys=id_hash_keys))
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
This can happen whith certain pdf types.'''
|
| 97 |
-
for i in documents:
|
| 98 |
-
if i.content == "":
|
| 99 |
-
st.write("using pdfplumber")
|
| 100 |
-
text = []
|
| 101 |
-
with pdfplumber.open(file_path) as pdf:
|
| 102 |
-
for page in pdf.pages:
|
| 103 |
-
text.append(page.extract_text())
|
| 104 |
-
i.content = ' '.join([page for page in text])
|
| 105 |
-
|
| 106 |
-
return documents
|
| 107 |
|
| 108 |
|
| 109 |
def preprocessing(document):
|
|
|
|
| 91 |
documents.append(Document(content=text,
|
| 92 |
meta={"name": file_name},
|
| 93 |
id_hash_keys=id_hash_keys))
|
| 94 |
+
|
| 95 |
+
return documents
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
|
| 98 |
def preprocessing(document):
|