Spaces:
Paused
Paused
Update pdftotext.py
Browse files- pdftotext.py +14 -0
pdftotext.py
CHANGED
|
@@ -139,6 +139,20 @@ def apiFiltering(apitext):
|
|
| 139 |
"bqcode": detail.get('bqcodelibrary', {}).get('bqcode')
|
| 140 |
})
|
| 141 |
return filtered_items
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
# import fitz
|
| 143 |
|
| 144 |
# import tsadropboxretrieval
|
|
|
|
| 139 |
"bqcode": detail.get('bqcodelibrary', {}).get('bqcode')
|
| 140 |
})
|
| 141 |
return filtered_items
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
import fitz
|
| 146 |
+
|
| 147 |
+
def texts_from_pdfAllText(input_pdf_data):
|
| 148 |
+
pdf_document = fitz.open('pdf',input_pdf_data)
|
| 149 |
+
|
| 150 |
+
for page_num in range(pdf_document.page_count):
|
| 151 |
+
page = pdf_document[page_num]
|
| 152 |
+
text_instances = page.get_text()
|
| 153 |
+
|
| 154 |
+
print(text_instances)
|
| 155 |
+
return text_instances
|
| 156 |
# import fitz
|
| 157 |
|
| 158 |
# import tsadropboxretrieval
|