Marthee commited on
Commit
16c35c7
·
verified ·
1 Parent(s): 78a4008

Update InitialMarkups.py

Browse files
Files changed (1) hide show
  1. InitialMarkups.py +3 -3
InitialMarkups.py CHANGED
@@ -1924,7 +1924,7 @@ def extract_section_under_header_tobebilled2(pdf_path):
1924
 
1925
  def extract_section_under_header_tobebilled2marthe(multiplePDF_Paths):
1926
  # keywordstoSkip=["installation", "execution", "miscellaneous items", "workmanship", "testing", "labeling"]
1927
-
1928
  keywords = {'installation', 'execution', 'miscellaneous items', 'workmanship', 'testing', 'labeling'}
1929
  top_margin = 70
1930
  bottom_margin = 50
@@ -1939,7 +1939,7 @@ def extract_section_under_header_tobebilled2marthe(multiplePDF_Paths):
1939
  parsed_url = urlparse(pdf_path)
1940
  filename = os.path.basename(parsed_url.path)
1941
  filename = unquote(filename) # decode URL-encoded characters
1942
-
1943
  # Optimized URL handling
1944
  if pdf_path and ('http' in pdf_path or 'dropbox' in pdf_path):
1945
  pdf_path = pdf_path.replace('dl=0', 'dl=1')
@@ -2380,6 +2380,6 @@ def extract_section_under_header_tobebilled2marthe(multiplePDF_Paths):
2380
  pdf_bytes = BytesIO()
2381
  docHighlights.save(pdf_bytes)
2382
 
2383
- return pdf_bytes.getvalue(), docHighlights , json_output, Alltexttobebilled , filename
2384
 
2385
 
 
1924
 
1925
  def extract_section_under_header_tobebilled2marthe(multiplePDF_Paths):
1926
  # keywordstoSkip=["installation", "execution", "miscellaneous items", "workmanship", "testing", "labeling"]
1927
+ filenames=[]
1928
  keywords = {'installation', 'execution', 'miscellaneous items', 'workmanship', 'testing', 'labeling'}
1929
  top_margin = 70
1930
  bottom_margin = 50
 
1939
  parsed_url = urlparse(pdf_path)
1940
  filename = os.path.basename(parsed_url.path)
1941
  filename = unquote(filename) # decode URL-encoded characters
1942
+ filenames.append(filename)
1943
  # Optimized URL handling
1944
  if pdf_path and ('http' in pdf_path or 'dropbox' in pdf_path):
1945
  pdf_path = pdf_path.replace('dl=0', 'dl=1')
 
2380
  pdf_bytes = BytesIO()
2381
  docHighlights.save(pdf_bytes)
2382
 
2383
+ return pdf_bytes.getvalue(), docHighlights , json_output, Alltexttobebilled , filenames
2384
 
2385