Marthee commited on
Commit
a57ba95
·
verified ·
1 Parent(s): 43bf96b

Update pdftotext.py

Browse files
Files changed (1) hide show
  1. pdftotext.py +7 -0
pdftotext.py CHANGED
@@ -2,10 +2,17 @@ import fitz # PyMuPDF
2
  from io import BytesIO
3
  import re
4
  import requests
 
 
 
 
 
 
5
  def texts_from_pdf(pdfshareablelinks, heading_to_search):
6
  print('intexts',pdfshareablelinks)
7
 
8
 
 
9
 
10
  # Case 1: If it's a shareable link
11
  for link in pdfshareablelinks:
 
2
  from io import BytesIO
3
  import re
4
  import requests
5
+
6
+ def split_links(links_string):
7
+ # Remove any extra whitespace around each link after splitting
8
+ links_array = [link.strip() for link in links_string.split(',')]
9
+ return links_array
10
+
11
  def texts_from_pdf(pdfshareablelinks, heading_to_search):
12
  print('intexts',pdfshareablelinks)
13
 
14
 
15
+ pdfshareablelinks=split_links(pdfshareablelinks)
16
 
17
  # Case 1: If it's a shareable link
18
  for link in pdfshareablelinks: