Spaces:
Sleeping
Sleeping
Update InitialMarkups.py
Browse files- InitialMarkups.py +4 -4
InitialMarkups.py
CHANGED
|
@@ -1093,7 +1093,7 @@ def extract_section_under_header_withoutNot(pdf_path):
|
|
| 1093 |
return list(range(0, toc_pages[-1] +1)) if toc_pages else toc_pages
|
| 1094 |
|
| 1095 |
toc_pages = get_toc_page_numbers(doc)
|
| 1096 |
-
|
| 1097 |
|
| 1098 |
hierarchy = build_header_hierarchy(doc, toc_pages, most_common_font_size, most_common_color, most_common_font)
|
| 1099 |
listofHeaderstoMarkup = get_leaf_headers_with_paths(hierarchy)
|
|
@@ -1125,7 +1125,7 @@ def extract_section_under_header_withoutNot(pdf_path):
|
|
| 1125 |
if not spans:
|
| 1126 |
i += 1
|
| 1127 |
continue
|
| 1128 |
-
|
| 1129 |
y0 = spans[0]["bbox"][1]
|
| 1130 |
y1 = spans[0]["bbox"][3]
|
| 1131 |
if y0 < top_margin or y1 > (page_height - bottom_margin):
|
|
@@ -1142,7 +1142,7 @@ def extract_section_under_header_withoutNot(pdf_path):
|
|
| 1142 |
combined_line_norm = normalize_text(line_text + " " + next_line_text)
|
| 1143 |
else:
|
| 1144 |
combined_line_norm = line_text_norm
|
| 1145 |
-
|
| 1146 |
# Check if we should continue processing
|
| 1147 |
if combined_line_norm and combined_line_norm in paths[0]:
|
| 1148 |
|
|
@@ -1156,7 +1156,7 @@ def extract_section_under_header_withoutNot(pdf_path):
|
|
| 1156 |
stringtowrite='To be billed'
|
| 1157 |
if stringtowrite!='To be billed':
|
| 1158 |
alltextWithoutNotbilled+= combined_line_norm #################################################
|
| 1159 |
-
|
| 1160 |
return alltextWithoutNotbilled
|
| 1161 |
|
| 1162 |
|
|
|
|
| 1093 |
return list(range(0, toc_pages[-1] +1)) if toc_pages else toc_pages
|
| 1094 |
|
| 1095 |
toc_pages = get_toc_page_numbers(doc)
|
| 1096 |
+
print('here0')
|
| 1097 |
|
| 1098 |
hierarchy = build_header_hierarchy(doc, toc_pages, most_common_font_size, most_common_color, most_common_font)
|
| 1099 |
listofHeaderstoMarkup = get_leaf_headers_with_paths(hierarchy)
|
|
|
|
| 1125 |
if not spans:
|
| 1126 |
i += 1
|
| 1127 |
continue
|
| 1128 |
+
print('here1')
|
| 1129 |
y0 = spans[0]["bbox"][1]
|
| 1130 |
y1 = spans[0]["bbox"][3]
|
| 1131 |
if y0 < top_margin or y1 > (page_height - bottom_margin):
|
|
|
|
| 1142 |
combined_line_norm = normalize_text(line_text + " " + next_line_text)
|
| 1143 |
else:
|
| 1144 |
combined_line_norm = line_text_norm
|
| 1145 |
+
print('hereee2')
|
| 1146 |
# Check if we should continue processing
|
| 1147 |
if combined_line_norm and combined_line_norm in paths[0]:
|
| 1148 |
|
|
|
|
| 1156 |
stringtowrite='To be billed'
|
| 1157 |
if stringtowrite!='To be billed':
|
| 1158 |
alltextWithoutNotbilled+= combined_line_norm #################################################
|
| 1159 |
+
print('donee')
|
| 1160 |
return alltextWithoutNotbilled
|
| 1161 |
|
| 1162 |
|