Spaces:
Runtime error
Runtime error
Update InitialMarkups.py
Browse files- InitialMarkups.py +5 -4
InitialMarkups.py
CHANGED
|
@@ -2003,7 +2003,8 @@ def extract_section_under_header_tobebilled2(pdf_path):
|
|
| 2003 |
|
| 2004 |
|
| 2005 |
|
| 2006 |
-
|
|
|
|
| 2007 |
baselink = "https://findconsole-initialmarkups.hf.space/view-pdf?"
|
| 2008 |
# keywordstoSkip=["installation", "execution", "miscellaneous items", "workmanship", "testing", "labeling"]
|
| 2009 |
filenames=[]
|
|
@@ -2250,7 +2251,7 @@ def extract_section_under_header_tobebilledMultiplePDFSmarthe(multiplePDF_Paths)
|
|
| 2250 |
# Alltexttobebilled+='\n'
|
| 2251 |
matched_header_font_size = max(span["size"] for span in header_spans)
|
| 2252 |
|
| 2253 |
-
|
| 2254 |
valid_spans = [span for span in spans if span.get("bbox")]
|
| 2255 |
|
| 2256 |
if valid_spans:
|
|
@@ -2351,7 +2352,7 @@ def extract_section_under_header_tobebilledMultiplePDFSmarthe(multiplePDF_Paths)
|
|
| 2351 |
# Alltexttobebilled+= ' '+ combined_line_norm
|
| 2352 |
matched_header_font_size = max(span["size"] for span in header_spans)
|
| 2353 |
# if normalize_text(line_text)!=heading_norm:
|
| 2354 |
-
|
| 2355 |
valid_spans = [span for span in spans if span.get("bbox")]
|
| 2356 |
|
| 2357 |
if valid_spans:
|
|
@@ -2492,7 +2493,7 @@ def extract_section_under_header_tobebilledMultiplePDFSmarthe(multiplePDF_Paths)
|
|
| 2492 |
# if line_text.lower() != heading_norm.lower():
|
| 2493 |
# print('checkk',line_text,heading_norm)
|
| 2494 |
# collected_lines.append(line_text)
|
| 2495 |
-
|
| 2496 |
valid_spans = [span for span in spans if span.get("bbox")]
|
| 2497 |
if valid_spans:
|
| 2498 |
x0s = [span["bbox"][0] for span in valid_spans]
|
|
|
|
| 2003 |
|
| 2004 |
|
| 2005 |
|
| 2006 |
+
|
| 2007 |
+
def extract_section_under_header_tobebilledMultiplePDFS(multiplePDF_Paths):
|
| 2008 |
baselink = "https://findconsole-initialmarkups.hf.space/view-pdf?"
|
| 2009 |
# keywordstoSkip=["installation", "execution", "miscellaneous items", "workmanship", "testing", "labeling"]
|
| 2010 |
filenames=[]
|
|
|
|
| 2251 |
# Alltexttobebilled+='\n'
|
| 2252 |
matched_header_font_size = max(span["size"] for span in header_spans)
|
| 2253 |
|
| 2254 |
+
collected_lines.append(line_text)
|
| 2255 |
valid_spans = [span for span in spans if span.get("bbox")]
|
| 2256 |
|
| 2257 |
if valid_spans:
|
|
|
|
| 2352 |
# Alltexttobebilled+= ' '+ combined_line_norm
|
| 2353 |
matched_header_font_size = max(span["size"] for span in header_spans)
|
| 2354 |
# if normalize_text(line_text)!=heading_norm:
|
| 2355 |
+
collected_lines.append(line_text)
|
| 2356 |
valid_spans = [span for span in spans if span.get("bbox")]
|
| 2357 |
|
| 2358 |
if valid_spans:
|
|
|
|
| 2493 |
# if line_text.lower() != heading_norm.lower():
|
| 2494 |
# print('checkk',line_text,heading_norm)
|
| 2495 |
# collected_lines.append(line_text)
|
| 2496 |
+
collected_lines.append(line_text)
|
| 2497 |
valid_spans = [span for span in spans if span.get("bbox")]
|
| 2498 |
if valid_spans:
|
| 2499 |
x0s = [span["bbox"][0] for span in valid_spans]
|