Spaces:
Paused
Paused
Update pdftotext.py
Browse files- pdftotext.py +7 -0
pdftotext.py
CHANGED
|
@@ -2,10 +2,17 @@ import fitz # PyMuPDF
|
|
| 2 |
from io import BytesIO
|
| 3 |
import re
|
| 4 |
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
def texts_from_pdf(pdfshareablelinks, heading_to_search):
|
| 6 |
print('intexts',pdfshareablelinks)
|
| 7 |
|
| 8 |
|
|
|
|
| 9 |
|
| 10 |
# Case 1: If it's a shareable link
|
| 11 |
for link in pdfshareablelinks:
|
|
|
|
| 2 |
from io import BytesIO
|
| 3 |
import re
|
| 4 |
import requests
|
| 5 |
+
|
| 6 |
+
def split_links(links_string):
|
| 7 |
+
# Remove any extra whitespace around each link after splitting
|
| 8 |
+
links_array = [link.strip() for link in links_string.split(',')]
|
| 9 |
+
return links_array
|
| 10 |
+
|
| 11 |
def texts_from_pdf(pdfshareablelinks, heading_to_search):
|
| 12 |
print('intexts',pdfshareablelinks)
|
| 13 |
|
| 14 |
|
| 15 |
+
pdfshareablelinks=split_links(pdfshareablelinks)
|
| 16 |
|
| 17 |
# Case 1: If it's a shareable link
|
| 18 |
for link in pdfshareablelinks:
|