Marthee commited on
Commit
9793cbf
·
verified ·
1 Parent(s): e14330f

Update InitialMarkups.py

Browse files
Files changed (1) hide show
  1. InitialMarkups.py +4 -1
InitialMarkups.py CHANGED
@@ -1479,6 +1479,7 @@ def extract_section_under_header_tobebilledOnly(pdf_path):
1479
  ########################################################################################################################################################
1480
 
1481
  def extract_section_under_headerRawan(pdf_path,headingjson,pagenum=0,incomingheader=0):
 
1482
  top_margin = 70
1483
  bottom_margin = 50
1484
  # Optimized URL handling
@@ -1637,6 +1638,7 @@ def extract_section_under_headerRawan(pdf_path,headingjson,pagenum=0,incominghea
1637
  and span['size'] < mainHeaderFontSize)
1638
  ]
1639
  if header_spans:
 
1640
  collecting = True
1641
  matched_header_font_size = max(span["size"] for span in header_spans)
1642
 
@@ -1717,6 +1719,7 @@ def extract_section_under_headerRawan(pdf_path,headingjson,pagenum=0,incominghea
1717
 
1718
  if header_spans and (meets_word_threshold or same_start_word(heading_to_search, combined_line_norm) ):
1719
  collecting = True
 
1720
  matched_header_font_size = max(span["size"] for span in header_spans)
1721
 
1722
  collected_lines.append(line_text)
@@ -1848,7 +1851,7 @@ def extract_section_under_headerRawan(pdf_path,headingjson,pagenum=0,incominghea
1848
 
1849
  pdf_bytes = BytesIO()
1850
  docHighlights.save(pdf_bytes)
1851
- return pdf_bytes.getvalue(), docHighlights , newjsonList
1852
 
1853
 
1854
 
 
1479
  ########################################################################################################################################################
1480
 
1481
  def extract_section_under_headerRawan(pdf_path,headingjson,pagenum=0,incomingheader=0):
1482
+ Alltexttobebilled=''
1483
  top_margin = 70
1484
  bottom_margin = 50
1485
  # Optimized URL handling
 
1638
  and span['size'] < mainHeaderFontSize)
1639
  ]
1640
  if header_spans:
1641
+ Alltexttobebilled+= ' '+ combined_line_norm
1642
  collecting = True
1643
  matched_header_font_size = max(span["size"] for span in header_spans)
1644
 
 
1719
 
1720
  if header_spans and (meets_word_threshold or same_start_word(heading_to_search, combined_line_norm) ):
1721
  collecting = True
1722
+ Alltexttobebilled+= ' '+ combined_line_norm
1723
  matched_header_font_size = max(span["size"] for span in header_spans)
1724
 
1725
  collected_lines.append(line_text)
 
1851
 
1852
  pdf_bytes = BytesIO()
1853
  docHighlights.save(pdf_bytes)
1854
+ return pdf_bytes.getvalue(), docHighlights , newjsonList, Alltexttobebilled
1855
 
1856
 
1857