Marthee commited on
Commit
890dce0
·
verified ·
1 Parent(s): 11c40be

Update InitialMarkups.py

Browse files
Files changed (1) hide show
  1. InitialMarkups.py +4 -4
InitialMarkups.py CHANGED
@@ -1093,7 +1093,7 @@ def extract_section_under_header_withoutNot(pdf_path):
1093
  return list(range(0, toc_pages[-1] +1)) if toc_pages else toc_pages
1094
 
1095
  toc_pages = get_toc_page_numbers(doc)
1096
-
1097
 
1098
  hierarchy = build_header_hierarchy(doc, toc_pages, most_common_font_size, most_common_color, most_common_font)
1099
  listofHeaderstoMarkup = get_leaf_headers_with_paths(hierarchy)
@@ -1125,7 +1125,7 @@ def extract_section_under_header_withoutNot(pdf_path):
1125
  if not spans:
1126
  i += 1
1127
  continue
1128
-
1129
  y0 = spans[0]["bbox"][1]
1130
  y1 = spans[0]["bbox"][3]
1131
  if y0 < top_margin or y1 > (page_height - bottom_margin):
@@ -1142,7 +1142,7 @@ def extract_section_under_header_withoutNot(pdf_path):
1142
  combined_line_norm = normalize_text(line_text + " " + next_line_text)
1143
  else:
1144
  combined_line_norm = line_text_norm
1145
-
1146
  # Check if we should continue processing
1147
  if combined_line_norm and combined_line_norm in paths[0]:
1148
 
@@ -1156,7 +1156,7 @@ def extract_section_under_header_withoutNot(pdf_path):
1156
  stringtowrite='To be billed'
1157
  if stringtowrite!='To be billed':
1158
  alltextWithoutNotbilled+= combined_line_norm #################################################
1159
-
1160
  return alltextWithoutNotbilled
1161
 
1162
 
 
1093
  return list(range(0, toc_pages[-1] +1)) if toc_pages else toc_pages
1094
 
1095
  toc_pages = get_toc_page_numbers(doc)
1096
+ print('here0')
1097
 
1098
  hierarchy = build_header_hierarchy(doc, toc_pages, most_common_font_size, most_common_color, most_common_font)
1099
  listofHeaderstoMarkup = get_leaf_headers_with_paths(hierarchy)
 
1125
  if not spans:
1126
  i += 1
1127
  continue
1128
+ print('here1')
1129
  y0 = spans[0]["bbox"][1]
1130
  y1 = spans[0]["bbox"][3]
1131
  if y0 < top_margin or y1 > (page_height - bottom_margin):
 
1142
  combined_line_norm = normalize_text(line_text + " " + next_line_text)
1143
  else:
1144
  combined_line_norm = line_text_norm
1145
+ print('hereee2')
1146
  # Check if we should continue processing
1147
  if combined_line_norm and combined_line_norm in paths[0]:
1148
 
 
1156
  stringtowrite='To be billed'
1157
  if stringtowrite!='To be billed':
1158
  alltextWithoutNotbilled+= combined_line_norm #################################################
1159
+ print('donee')
1160
  return alltextWithoutNotbilled
1161
 
1162