Spaces:
Sleeping
Sleeping
File size: 543 Bytes
cadae78 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
import io
import requests
import pdfplumber
def fextractURL(pdf_path):
r = requests.get(pdf_path)
f = io.BytesIO(r.content)
extracted_data = ""
with pdfplumber.open(f) as pdf:
for page in pdf.pages:
extracted_data += page.extract_text() + "\n" # Extract text
tables = page.extract_tables() # Extract tables
for table in tables:
for row in table:
extracted_data += "\t".join(str(cell) for cell in row) + "\n"
return extracted_data
|