Spaces:
Runtime error
Runtime error
Update InitialMarkups.py
Browse files- InitialMarkups.py +4 -1
InitialMarkups.py
CHANGED
|
@@ -1479,6 +1479,7 @@ def extract_section_under_header_tobebilledOnly(pdf_path):
|
|
| 1479 |
########################################################################################################################################################
|
| 1480 |
|
| 1481 |
def extract_section_under_headerRawan(pdf_path,headingjson,pagenum=0,incomingheader=0):
|
|
|
|
| 1482 |
top_margin = 70
|
| 1483 |
bottom_margin = 50
|
| 1484 |
# Optimized URL handling
|
|
@@ -1637,6 +1638,7 @@ def extract_section_under_headerRawan(pdf_path,headingjson,pagenum=0,incominghea
|
|
| 1637 |
and span['size'] < mainHeaderFontSize)
|
| 1638 |
]
|
| 1639 |
if header_spans:
|
|
|
|
| 1640 |
collecting = True
|
| 1641 |
matched_header_font_size = max(span["size"] for span in header_spans)
|
| 1642 |
|
|
@@ -1717,6 +1719,7 @@ def extract_section_under_headerRawan(pdf_path,headingjson,pagenum=0,incominghea
|
|
| 1717 |
|
| 1718 |
if header_spans and (meets_word_threshold or same_start_word(heading_to_search, combined_line_norm) ):
|
| 1719 |
collecting = True
|
|
|
|
| 1720 |
matched_header_font_size = max(span["size"] for span in header_spans)
|
| 1721 |
|
| 1722 |
collected_lines.append(line_text)
|
|
@@ -1848,7 +1851,7 @@ def extract_section_under_headerRawan(pdf_path,headingjson,pagenum=0,incominghea
|
|
| 1848 |
|
| 1849 |
pdf_bytes = BytesIO()
|
| 1850 |
docHighlights.save(pdf_bytes)
|
| 1851 |
-
return pdf_bytes.getvalue(), docHighlights , newjsonList
|
| 1852 |
|
| 1853 |
|
| 1854 |
|
|
|
|
| 1479 |
########################################################################################################################################################
|
| 1480 |
|
| 1481 |
def extract_section_under_headerRawan(pdf_path,headingjson,pagenum=0,incomingheader=0):
|
| 1482 |
+
Alltexttobebilled=''
|
| 1483 |
top_margin = 70
|
| 1484 |
bottom_margin = 50
|
| 1485 |
# Optimized URL handling
|
|
|
|
| 1638 |
and span['size'] < mainHeaderFontSize)
|
| 1639 |
]
|
| 1640 |
if header_spans:
|
| 1641 |
+
Alltexttobebilled+= ' '+ combined_line_norm
|
| 1642 |
collecting = True
|
| 1643 |
matched_header_font_size = max(span["size"] for span in header_spans)
|
| 1644 |
|
|
|
|
| 1719 |
|
| 1720 |
if header_spans and (meets_word_threshold or same_start_word(heading_to_search, combined_line_norm) ):
|
| 1721 |
collecting = True
|
| 1722 |
+
Alltexttobebilled+= ' '+ combined_line_norm
|
| 1723 |
matched_header_font_size = max(span["size"] for span in header_spans)
|
| 1724 |
|
| 1725 |
collected_lines.append(line_text)
|
|
|
|
| 1851 |
|
| 1852 |
pdf_bytes = BytesIO()
|
| 1853 |
docHighlights.save(pdf_bytes)
|
| 1854 |
+
return pdf_bytes.getvalue(), docHighlights , newjsonList, Alltexttobebilled
|
| 1855 |
|
| 1856 |
|
| 1857 |
|