Marthee commited on
Commit
6c5993a
·
verified ·
1 Parent(s): 4431ded

Update InitialMarkups.py

Browse files
Files changed (1) hide show
  1. InitialMarkups.py +5 -4
InitialMarkups.py CHANGED
@@ -2003,7 +2003,8 @@ def extract_section_under_header_tobebilled2(pdf_path):
2003
 
2004
 
2005
 
2006
- def extract_section_under_header_tobebilledMultiplePDFSmarthe(multiplePDF_Paths):
 
2007
  baselink = "https://findconsole-initialmarkups.hf.space/view-pdf?"
2008
  # keywordstoSkip=["installation", "execution", "miscellaneous items", "workmanship", "testing", "labeling"]
2009
  filenames=[]
@@ -2250,7 +2251,7 @@ def extract_section_under_header_tobebilledMultiplePDFSmarthe(multiplePDF_Paths)
2250
  # Alltexttobebilled+='\n'
2251
  matched_header_font_size = max(span["size"] for span in header_spans)
2252
 
2253
- # collected_lines.append(line_text)
2254
  valid_spans = [span for span in spans if span.get("bbox")]
2255
 
2256
  if valid_spans:
@@ -2351,7 +2352,7 @@ def extract_section_under_header_tobebilledMultiplePDFSmarthe(multiplePDF_Paths)
2351
  # Alltexttobebilled+= ' '+ combined_line_norm
2352
  matched_header_font_size = max(span["size"] for span in header_spans)
2353
  # if normalize_text(line_text)!=heading_norm:
2354
- # collected_lines.append(line_text)
2355
  valid_spans = [span for span in spans if span.get("bbox")]
2356
 
2357
  if valid_spans:
@@ -2492,7 +2493,7 @@ def extract_section_under_header_tobebilledMultiplePDFSmarthe(multiplePDF_Paths)
2492
  # if line_text.lower() != heading_norm.lower():
2493
  # print('checkk',line_text,heading_norm)
2494
  # collected_lines.append(line_text)
2495
- # collected_lines.append(line_text)
2496
  valid_spans = [span for span in spans if span.get("bbox")]
2497
  if valid_spans:
2498
  x0s = [span["bbox"][0] for span in valid_spans]
 
2003
 
2004
 
2005
 
2006
+
2007
+ def extract_section_under_header_tobebilledMultiplePDFS(multiplePDF_Paths):
2008
  baselink = "https://findconsole-initialmarkups.hf.space/view-pdf?"
2009
  # keywordstoSkip=["installation", "execution", "miscellaneous items", "workmanship", "testing", "labeling"]
2010
  filenames=[]
 
2251
  # Alltexttobebilled+='\n'
2252
  matched_header_font_size = max(span["size"] for span in header_spans)
2253
 
2254
+ collected_lines.append(line_text)
2255
  valid_spans = [span for span in spans if span.get("bbox")]
2256
 
2257
  if valid_spans:
 
2352
  # Alltexttobebilled+= ' '+ combined_line_norm
2353
  matched_header_font_size = max(span["size"] for span in header_spans)
2354
  # if normalize_text(line_text)!=heading_norm:
2355
+ collected_lines.append(line_text)
2356
  valid_spans = [span for span in spans if span.get("bbox")]
2357
 
2358
  if valid_spans:
 
2493
  # if line_text.lower() != heading_norm.lower():
2494
  # print('checkk',line_text,heading_norm)
2495
  # collected_lines.append(line_text)
2496
+ collected_lines.append(line_text)
2497
  valid_spans = [span for span in spans if span.get("bbox")]
2498
  if valid_spans:
2499
  x0s = [span["bbox"][0] for span in valid_spans]